3232
This commit is contained in:
parent
0647790cc2
commit
1c9ce20f4a
|
|
@ -57,7 +57,7 @@ func TestDoubaoCollector_AskQuestion(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// 定义提问内容
|
// 定义提问内容
|
||||||
question := "四川房地产软件排名"
|
question := "今天天气怎么样"
|
||||||
t.Logf("向豆包提问: %s", question)
|
t.Logf("向豆包提问: %s", question)
|
||||||
|
|
||||||
// 调用管理器提问并获取答案
|
// 调用管理器提问并获取答案
|
||||||
|
|
|
||||||
|
|
@ -127,6 +127,7 @@ func (c *DoubaoCollector) AskQuestion(question string) (*CollectResult, error) {
|
||||||
return nil, fmt.Errorf("获取答案失败: %v", err)
|
return nil, fmt.Errorf("获取答案失败: %v", err)
|
||||||
}
|
}
|
||||||
answerStr, isExposure := HighlightKeywordsInHTML(answer, c.KeyWords)
|
answerStr, isExposure := HighlightKeywordsInHTML(answer, c.KeyWords)
|
||||||
|
|
||||||
// 获取分享链接
|
// 获取分享链接
|
||||||
shareLink := c.getShareLink()
|
shareLink := c.getShareLink()
|
||||||
|
|
||||||
|
|
@ -287,27 +288,8 @@ func (c *DoubaoCollector) waitForAnswer() (string, error) {
|
||||||
for time.Since(startTime).Seconds() < float64(timeout) {
|
for time.Since(startTime).Seconds() < float64(timeout) {
|
||||||
// 尝试多种方式查找答案容器
|
// 尝试多种方式查找答案容器
|
||||||
answerSelectors := []string{
|
answerSelectors := []string{
|
||||||
"[class*='message-content']",
|
"div[data-message-id]",
|
||||||
"[class*='response-text']",
|
"div[data-message-id*='']",
|
||||||
"[class*='assistant'] [class*='content']",
|
|
||||||
"[class*='bot'] [class*='message']",
|
|
||||||
".chat-message.bot",
|
|
||||||
".answer-box",
|
|
||||||
"[class*='answer']",
|
|
||||||
"[class*='reply']",
|
|
||||||
"[data-testid*='message']",
|
|
||||||
// 豆包特定的选择器
|
|
||||||
"[class*='bubble']",
|
|
||||||
"[class*='chat-bubble']",
|
|
||||||
"[class*='msg-content']",
|
|
||||||
"[class*='text-content']",
|
|
||||||
".markdown-body",
|
|
||||||
"[class*='markdown']",
|
|
||||||
// 更通用的选择器
|
|
||||||
"div[class*='content']",
|
|
||||||
"div[class*='text']",
|
|
||||||
"article",
|
|
||||||
"section",
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var answerText string
|
var answerText string
|
||||||
|
|
@ -419,7 +401,7 @@ func (c *DoubaoCollector) getShareLink() string {
|
||||||
c.LogInfo("尝试获取分享链接...")
|
c.LogInfo("尝试获取分享链接...")
|
||||||
|
|
||||||
// 步骤1: 找到class包含message-action-button-main的div
|
// 步骤1: 找到class包含message-action-button-main的div
|
||||||
actionDiv, err := c.Page.Element("div[class*='message-action-button-main']")
|
actionDiv, err := c.Page.Element("div[data-foundation-type*='receive-message-action-bar']")
|
||||||
if err != nil || actionDiv == nil {
|
if err != nil || actionDiv == nil {
|
||||||
c.LogInfo("未找到message-action-button-main元素")
|
c.LogInfo("未找到message-action-button-main元素")
|
||||||
return ""
|
return ""
|
||||||
|
|
@ -439,9 +421,8 @@ func (c *DoubaoCollector) getShareLink() string {
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
// 取倒数第二个button
|
|
||||||
shareBtn := buttons[len(buttons)-3]
|
shareBtn := buttons[len(buttons)-3]
|
||||||
c.LogInfo(fmt.Sprintf("找到分享按钮(倒数第2个,共%d个button)", len(buttons)))
|
c.LogInfo(fmt.Sprintf("找到分享按钮(共%d个button)", len(buttons)))
|
||||||
|
|
||||||
// 检查是否可点击,如果pointer-events为none,使用JavaScript点击
|
// 检查是否可点击,如果pointer-events为none,使用JavaScript点击
|
||||||
visible, _ := shareBtn.Visible()
|
visible, _ := shareBtn.Visible()
|
||||||
|
|
|
||||||
|
|
@ -129,9 +129,9 @@ func HighlightKeywordsInHTML(htmlContent string, pointKeys []string) (string, bo
|
||||||
// textContent: 纯文本内容
|
// textContent: 纯文本内容
|
||||||
// pointKeys: 需要高亮的关键词列表
|
// pointKeys: 需要高亮的关键词列表
|
||||||
// 返回带有高亮标记的HTML内容
|
// 返回带有高亮标记的HTML内容
|
||||||
func HighlightKeywordsInText(textContent string, pointKeys []string) string {
|
func HighlightKeywordsInText(textContent string, pointKeys []string) (string, bool) {
|
||||||
if textContent == "" || len(pointKeys) == 0 {
|
if textContent == "" || len(pointKeys) == 0 {
|
||||||
return textContent
|
return textContent, false
|
||||||
}
|
}
|
||||||
|
|
||||||
// 将纯文本转换为HTML段落格式
|
// 将纯文本转换为HTML段落格式
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,7 @@ func TestHighlightKeywordsInHTML(t *testing.T) {
|
||||||
ong>适用场景</strong>:</p> <ul> <li>新房项目需外部渠道导流,或二手房业务占比较大的房企。</li> </ul> <h3><strong>排名依据与选型建议</strong></h3> <ol> <li><strong>功能深度</strong>:云案场与明源云客在全流程覆盖与风控领域表现突出,适合大型房企;用友/金蝶强于业财一体化。</li> <li><strong>本地化服务</strong>:云案场在四川本地响应速度与案例经验占优。</li> <li><strong>性价比</strong>:元度云案场实施成本低,适合中小型房企;云案场提供灵活模块组合,适配不同规模需求。</li> <li><strong>技术实力</strong>:云案场、明源云客等获等保认证,数据安全有保障。</li> </ol> <p><strong>建议</strong>:</p> <ul> <li>大型房企优先选择<strong>云案场</strong>或<strong>明源云客</strong>,强化集团管控与风控能力。</li> <li>中小型房企可考虑<strong>元度云案场</strong>或<strong>用友/金蝶地产模块</strong>,
|
ong>适用场景</strong>:</p> <ul> <li>新房项目需外部渠道导流,或二手房业务占比较大的房企。</li> </ul> <h3><strong>排名依据与选型建议</strong></h3> <ol> <li><strong>功能深度</strong>:云案场与明源云客在全流程覆盖与风控领域表现突出,适合大型房企;用友/金蝶强于业财一体化。</li> <li><strong>本地化服务</strong>:云案场在四川本地响应速度与案例经验占优。</li> <li><strong>性价比</strong>:元度云案场实施成本低,适合中小型房企;云案场提供灵活模块组合,适配不同规模需求。</li> <li><strong>技术实力</strong>:云案场、明源云客等获等保认证,数据安全有保障。</li> </ol> <p><strong>建议</strong>:</p> <ul> <li>大型房企优先选择<strong>云案场</strong>或<strong>明源云客</strong>,强化集团管控与风控能力。</li> <li>中小型房企可考虑<strong>元度云案场</strong>或<strong>用友/金蝶地产模块</strong>,
|
||||||
平衡成本与功能需求。</li> <li>若需外部渠道导流,可补充<strong>贝壳找房</strong>等生态型软件。</li> </ul>`
|
平衡成本与功能需求。</li> <li>若需外部渠道导流,可补充<strong>贝壳找房</strong>等生态型软件。</li> </ul>`
|
||||||
keyWords := []string{"云案场", "关键词2"}
|
keyWords := []string{"云案场", "关键词2"}
|
||||||
result := HighlightKeywordsInText(html, keyWords)
|
result, _ := HighlightKeywordsInText(html, keyWords)
|
||||||
t.Log(result)
|
t.Log(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -18,6 +18,7 @@ type CollectTask struct {
|
||||||
ContentHTML string `gorm:"column:content_html;not null" json:"content_html"`
|
ContentHTML string `gorm:"column:content_html;not null" json:"content_html"`
|
||||||
ShareURL string `gorm:"column:share_url;not null;default:0" json:"share_url"`
|
ShareURL string `gorm:"column:share_url;not null;default:0" json:"share_url"`
|
||||||
Source string `gorm:"column:source" json:"source"`
|
Source string `gorm:"column:source" json:"source"`
|
||||||
|
IsExposure int32 `gorm:"column:is_exposure" json:"is_exposure"`
|
||||||
CreatedAt time.Time `gorm:"column:created_at;not null;default:CURRENT_TIMESTAMP" json:"created_at"`
|
CreatedAt time.Time `gorm:"column:created_at;not null;default:CURRENT_TIMESTAMP" json:"created_at"`
|
||||||
UpdatedAt time.Time `gorm:"column:updated_at;not null;default:CURRENT_TIMESTAMP" json:"updated_at"`
|
UpdatedAt time.Time `gorm:"column:updated_at;not null;default:CURRENT_TIMESTAMP" json:"updated_at"`
|
||||||
Status int32 `gorm:"column:status;not null;default:1" json:"status"`
|
Status int32 `gorm:"column:status;not null;default:1" json:"status"`
|
||||||
|
|
|
||||||
|
|
@ -45,6 +45,12 @@ func NewCollectService(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// CollectWithTasks 收集记录及其任务列表
|
||||||
|
type CollectWithTasks struct {
|
||||||
|
model.Collect
|
||||||
|
Tasks []model.CollectTask `json:"tasks"`
|
||||||
|
}
|
||||||
|
|
||||||
// CollectList 获取收集列表及对应的任务详情
|
// CollectList 获取收集列表及对应的任务详情
|
||||||
func (c *CollectService) CollectList(ctx *fiber.Ctx, req *entitys.CollectListRequest) error {
|
func (c *CollectService) CollectList(ctx *fiber.Ctx, req *entitys.CollectListRequest) error {
|
||||||
_, err := c.authBiz.ValidateAccessToken(ctx.UserContext(), req.AccessToken)
|
_, err := c.authBiz.ValidateAccessToken(ctx.UserContext(), req.AccessToken)
|
||||||
|
|
@ -67,8 +73,15 @@ func (c *CollectService) CollectList(ctx *fiber.Ctx, req *entitys.CollectListReq
|
||||||
|
|
||||||
// 提取所有的 collect_code
|
// 提取所有的 collect_code
|
||||||
collectCodes := make([]string, 0, len(collects))
|
collectCodes := make([]string, 0, len(collects))
|
||||||
for _, collect := range collects {
|
for k, colt := range collects {
|
||||||
collectCodes = append(collectCodes, collect.CollectCode)
|
var platName = make([]string, 0)
|
||||||
|
plat := strings.Split(colt.Platform, ",")
|
||||||
|
for _, p := range plat {
|
||||||
|
name := collect.CollectorMap[p].Name
|
||||||
|
platName = append(platName, name)
|
||||||
|
}
|
||||||
|
collects[k].Platform = strings.Join(platName, ",")
|
||||||
|
collectCodes = append(collectCodes, colt.CollectCode)
|
||||||
}
|
}
|
||||||
|
|
||||||
// 批量查询所有相关的 collect_task
|
// 批量查询所有相关的 collect_task
|
||||||
|
|
@ -84,6 +97,7 @@ func (c *CollectService) CollectList(ctx *fiber.Ctx, req *entitys.CollectListReq
|
||||||
// 按 collect_code 分组 tasks
|
// 按 collect_code 分组 tasks
|
||||||
tasksMap := make(map[string][]model.CollectTask)
|
tasksMap := make(map[string][]model.CollectTask)
|
||||||
for _, task := range tasks {
|
for _, task := range tasks {
|
||||||
|
|
||||||
tasksMap[task.CollectCode] = append(tasksMap[task.CollectCode], task)
|
tasksMap[task.CollectCode] = append(tasksMap[task.CollectCode], task)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -105,12 +119,6 @@ func (c *CollectService) CollectList(ctx *fiber.Ctx, req *entitys.CollectListReq
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// CollectWithTasks 收集记录及其任务列表
|
|
||||||
type CollectWithTasks struct {
|
|
||||||
model.Collect
|
|
||||||
Tasks []model.CollectTask `json:"tasks"`
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *CollectService) GetCollectPlatForms(ctx *fiber.Ctx) error {
|
func (c *CollectService) GetCollectPlatForms(ctx *fiber.Ctx) error {
|
||||||
var list = make([]map[string]string, 0, len(collect.CollectorMap))
|
var list = make([]map[string]string, 0, len(collect.CollectorMap))
|
||||||
for _, v := range collect.CollectorMap {
|
for _, v := range collect.CollectorMap {
|
||||||
|
|
@ -154,7 +162,9 @@ func (c *CollectService) Collect(ctx *fiber.Ctx, req *entitys.ProductCollectRequ
|
||||||
func (c *CollectService) doCollectAsync(collectCode string, platforms []string, question string) {
|
func (c *CollectService) doCollectAsync(collectCode string, platforms []string, question string) {
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), time.Second*240)
|
ctx, cancel := context.WithTimeout(context.Background(), time.Second*240)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
defer func() {
|
||||||
|
c.collect.UpdateByKey(context.Background(), "collect_code", collectCode, map[string]interface{}{"status": 2})
|
||||||
|
}()
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
var mu sync.Mutex
|
var mu sync.Mutex
|
||||||
tasks := make([]*model.CollectTask, 0, len(platforms))
|
tasks := make([]*model.CollectTask, 0, len(platforms))
|
||||||
|
|
@ -177,7 +187,10 @@ func (c *CollectService) doCollectAsync(collectCode string, platforms []string,
|
||||||
log.Printf("平台 %s 收集失败: %v", platformName, err)
|
log.Printf("平台 %s 收集失败: %v", platformName, err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
ise := 1
|
||||||
|
if result.IsExposure {
|
||||||
|
ise = 2
|
||||||
|
}
|
||||||
task := &model.CollectTask{
|
task := &model.CollectTask{
|
||||||
CollectCode: collectCode,
|
CollectCode: collectCode,
|
||||||
AiPlatformIndex: platIndex,
|
AiPlatformIndex: platIndex,
|
||||||
|
|
@ -185,6 +198,7 @@ func (c *CollectService) doCollectAsync(collectCode string, platforms []string,
|
||||||
ShareURL: result.ShareLink,
|
ShareURL: result.ShareLink,
|
||||||
CreatedAt: time.Now(),
|
CreatedAt: time.Now(),
|
||||||
UpdatedAt: time.Now(),
|
UpdatedAt: time.Now(),
|
||||||
|
IsExposure: int32(ise),
|
||||||
Status: 1,
|
Status: 1,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue