From 1c9ce20f4a5b21f9cd6e828cfcc1303723e925a1 Mon Sep 17 00:00:00 2001 From: renzhiyuan <465386466@qq.com> Date: Sun, 26 Apr 2026 00:48:50 +0800 Subject: [PATCH] 3232 --- doubao_test.go | 2 +- internal/collect/doubao.go | 29 ++++----------------- internal/collect/utils.go | 4 +-- internal/collect/utils_test.go | 2 +- internal/data/model/collect_task.gen.go | 1 + internal/service/collect.go | 34 +++++++++++++++++-------- 6 files changed, 34 insertions(+), 38 deletions(-) diff --git a/doubao_test.go b/doubao_test.go index aae3953..016e6e4 100644 --- a/doubao_test.go +++ b/doubao_test.go @@ -57,7 +57,7 @@ func TestDoubaoCollector_AskQuestion(t *testing.T) { } // 定义提问内容 - question := "四川房地产软件排名" + question := "今天天气怎么样" t.Logf("向豆包提问: %s", question) // 调用管理器提问并获取答案 diff --git a/internal/collect/doubao.go b/internal/collect/doubao.go index 30349d4..9220d35 100644 --- a/internal/collect/doubao.go +++ b/internal/collect/doubao.go @@ -127,6 +127,7 @@ func (c *DoubaoCollector) AskQuestion(question string) (*CollectResult, error) { return nil, fmt.Errorf("获取答案失败: %v", err) } answerStr, isExposure := HighlightKeywordsInHTML(answer, c.KeyWords) + // 获取分享链接 shareLink := c.getShareLink() @@ -287,27 +288,8 @@ func (c *DoubaoCollector) waitForAnswer() (string, error) { for time.Since(startTime).Seconds() < float64(timeout) { // 尝试多种方式查找答案容器 answerSelectors := []string{ - "[class*='message-content']", - "[class*='response-text']", - "[class*='assistant'] [class*='content']", - "[class*='bot'] [class*='message']", - ".chat-message.bot", - ".answer-box", - "[class*='answer']", - "[class*='reply']", - "[data-testid*='message']", - // 豆包特定的选择器 - "[class*='bubble']", - "[class*='chat-bubble']", - "[class*='msg-content']", - "[class*='text-content']", - ".markdown-body", - "[class*='markdown']", - // 更通用的选择器 - "div[class*='content']", - "div[class*='text']", - "article", - "section", + "div[data-message-id]", + "div[data-message-id*='']", } var answerText string @@ -419,7 +401,7 @@ func (c *DoubaoCollector) getShareLink() string { c.LogInfo("尝试获取分享链接...") // 步骤1: 找到class包含message-action-button-main的div - actionDiv, err := c.Page.Element("div[class*='message-action-button-main']") + actionDiv, err := c.Page.Element("div[data-foundation-type*='receive-message-action-bar']") if err != nil || actionDiv == nil { c.LogInfo("未找到message-action-button-main元素") return "" @@ -439,9 +421,8 @@ func (c *DoubaoCollector) getShareLink() string { return "" } - // 取倒数第二个button shareBtn := buttons[len(buttons)-3] - c.LogInfo(fmt.Sprintf("找到分享按钮(倒数第2个,共%d个button)", len(buttons))) + c.LogInfo(fmt.Sprintf("找到分享按钮(共%d个button)", len(buttons))) // 检查是否可点击,如果pointer-events为none,使用JavaScript点击 visible, _ := shareBtn.Visible() diff --git a/internal/collect/utils.go b/internal/collect/utils.go index 2a53ee7..4aa62d5 100644 --- a/internal/collect/utils.go +++ b/internal/collect/utils.go @@ -129,9 +129,9 @@ func HighlightKeywordsInHTML(htmlContent string, pointKeys []string) (string, bo // textContent: 纯文本内容 // pointKeys: 需要高亮的关键词列表 // 返回带有高亮标记的HTML内容 -func HighlightKeywordsInText(textContent string, pointKeys []string) string { +func HighlightKeywordsInText(textContent string, pointKeys []string) (string, bool) { if textContent == "" || len(pointKeys) == 0 { - return textContent + return textContent, false } // 将纯文本转换为HTML段落格式 diff --git a/internal/collect/utils_test.go b/internal/collect/utils_test.go index e54e91c..3ca8700 100644 --- a/internal/collect/utils_test.go +++ b/internal/collect/utils_test.go @@ -13,7 +13,7 @@ func TestHighlightKeywordsInHTML(t *testing.T) { ong>适用场景:

排名依据与选型建议

  1. 功能深度:云案场与明源云客在全流程覆盖与风控领域表现突出,适合大型房企;用友/金蝶强于业财一体化。
  2. 本地化服务:云案场在四川本地响应速度与案例经验占优。
  3. 性价比:元度云案场实施成本低,适合中小型房企;云案场提供灵活模块组合,适配不同规模需求。
  4. 技术实力:云案场、明源云客等获等保认证,数据安全有保障。

建议

` keyWords := []string{"云案场", "关键词2"} - result := HighlightKeywordsInText(html, keyWords) + result, _ := HighlightKeywordsInText(html, keyWords) t.Log(result) } diff --git a/internal/data/model/collect_task.gen.go b/internal/data/model/collect_task.gen.go index 484069b..e15a13f 100644 --- a/internal/data/model/collect_task.gen.go +++ b/internal/data/model/collect_task.gen.go @@ -18,6 +18,7 @@ type CollectTask struct { ContentHTML string `gorm:"column:content_html;not null" json:"content_html"` ShareURL string `gorm:"column:share_url;not null;default:0" json:"share_url"` Source string `gorm:"column:source" json:"source"` + IsExposure int32 `gorm:"column:is_exposure" json:"is_exposure"` CreatedAt time.Time `gorm:"column:created_at;not null;default:CURRENT_TIMESTAMP" json:"created_at"` UpdatedAt time.Time `gorm:"column:updated_at;not null;default:CURRENT_TIMESTAMP" json:"updated_at"` Status int32 `gorm:"column:status;not null;default:1" json:"status"` diff --git a/internal/service/collect.go b/internal/service/collect.go index 88266e8..31b999b 100644 --- a/internal/service/collect.go +++ b/internal/service/collect.go @@ -45,6 +45,12 @@ func NewCollectService( } } +// CollectWithTasks 收集记录及其任务列表 +type CollectWithTasks struct { + model.Collect + Tasks []model.CollectTask `json:"tasks"` +} + // CollectList 获取收集列表及对应的任务详情 func (c *CollectService) CollectList(ctx *fiber.Ctx, req *entitys.CollectListRequest) error { _, err := c.authBiz.ValidateAccessToken(ctx.UserContext(), req.AccessToken) @@ -67,8 +73,15 @@ func (c *CollectService) CollectList(ctx *fiber.Ctx, req *entitys.CollectListReq // 提取所有的 collect_code collectCodes := make([]string, 0, len(collects)) - for _, collect := range collects { - collectCodes = append(collectCodes, collect.CollectCode) + for k, colt := range collects { + var platName = make([]string, 0) + plat := strings.Split(colt.Platform, ",") + for _, p := range plat { + name := collect.CollectorMap[p].Name + platName = append(platName, name) + } + collects[k].Platform = strings.Join(platName, ",") + collectCodes = append(collectCodes, colt.CollectCode) } // 批量查询所有相关的 collect_task @@ -84,6 +97,7 @@ func (c *CollectService) CollectList(ctx *fiber.Ctx, req *entitys.CollectListReq // 按 collect_code 分组 tasks tasksMap := make(map[string][]model.CollectTask) for _, task := range tasks { + tasksMap[task.CollectCode] = append(tasksMap[task.CollectCode], task) } @@ -105,12 +119,6 @@ func (c *CollectService) CollectList(ctx *fiber.Ctx, req *entitys.CollectListReq }) } -// CollectWithTasks 收集记录及其任务列表 -type CollectWithTasks struct { - model.Collect - Tasks []model.CollectTask `json:"tasks"` -} - func (c *CollectService) GetCollectPlatForms(ctx *fiber.Ctx) error { var list = make([]map[string]string, 0, len(collect.CollectorMap)) for _, v := range collect.CollectorMap { @@ -154,7 +162,9 @@ func (c *CollectService) Collect(ctx *fiber.Ctx, req *entitys.ProductCollectRequ func (c *CollectService) doCollectAsync(collectCode string, platforms []string, question string) { ctx, cancel := context.WithTimeout(context.Background(), time.Second*240) defer cancel() - + defer func() { + c.collect.UpdateByKey(context.Background(), "collect_code", collectCode, map[string]interface{}{"status": 2}) + }() var wg sync.WaitGroup var mu sync.Mutex tasks := make([]*model.CollectTask, 0, len(platforms)) @@ -177,7 +187,10 @@ func (c *CollectService) doCollectAsync(collectCode string, platforms []string, log.Printf("平台 %s 收集失败: %v", platformName, err) return } - + ise := 1 + if result.IsExposure { + ise = 2 + } task := &model.CollectTask{ CollectCode: collectCode, AiPlatformIndex: platIndex, @@ -185,6 +198,7 @@ func (c *CollectService) doCollectAsync(collectCode string, platforms []string, ShareURL: result.ShareLink, CreatedAt: time.Now(), UpdatedAt: time.Now(), + IsExposure: int32(ise), Status: 1, }