This commit is contained in:
renzhiyuan 2026-04-26 23:57:44 +08:00
parent 2a1a3d4418
commit f88977c98f
25 changed files with 1228 additions and 365 deletions

View File

@ -48,7 +48,7 @@ func InitializeApp(configConfig *config.Config, allLogger log.AllLogger) (*serve
collectTaskImpl := impl.NewCollectTaskImpl(db)
collectBiz := biz.NewCollectBiz(context.Background(), configConfig, allLogger)
productService := service.NewProductService(configConfig, productImpl, authBiz, productBiz, aiBiz)
collectService := service.NewCollectService(configConfig, collectBiz, collectImpl, collectTaskImpl, authBiz)
collectService := service.NewCollectService(configConfig, collectBiz, collectImpl, collectTaskImpl, authBiz, productBiz)
productSourceService := service.NewProductSourceService(configConfig, productImpl, authBiz, aiBiz, productBiz, productSourceImpl, publishBiz, articleTypeImpl)
appModule := router.NewAppModule(configConfig, appService, loginService, publishService, productService, productSourceService, collectService)
routerServer := router.NewRouterServer(appModule)

194
deepseek_test.go Normal file
View File

@ -0,0 +1,194 @@
package collect
import (
"context"
"geo/internal/collect"
"geo/internal/config"
"github.com/gofiber/fiber/v2/log"
"testing"
"time"
)
var (
deepseekCfg, _ = config.LoadConfig()
deepseekManager = collect.NewCollectManager(context.Background(), deepseekCfg, log.DefaultLogger())
)
// TestDeepseekCollector_WaitLogin 测试DeepSeek登录功能
func TestDeepseekCollector_WaitLogin(t *testing.T) {
if testing.Short() {
t.Skip("跳过需要浏览器交互的测试")
}
params := &collect.CollectParams{
Headless: false, // 显示浏览器窗口以便扫码登录
RequestID: "test_deepseek_login_001",
Platform: "deepseek",
}
t.Log("开始测试DeepSeek登录...")
t.Log("请在打开的浏览器窗口中完成DeepSeek账号登录扫码或输入账号密码")
success, msg := deepseekManager.WaitLogin("deepseek", params)
if !success {
t.Errorf("DeepSeek登录失败: %s", msg)
return
}
t.Logf("DeepSeek登录成功: %s", msg)
t.Log("Cookie已保存后续测试可以使用已登录状态")
}
// TestDeepseekCollector_AskQuestion 测试DeepSeek提问功能
// 注意:此测试需要有效的登录状态
func TestDeepseekCollector_AskQuestion(t *testing.T) {
if testing.Short() {
t.Skip("跳过需要浏览器交互的测试")
}
// 设置收集参数
params := &collect.CollectParams{
Headless: false, // 显示浏览器以便调试
RequestID: "test_deepseek_001",
Platform: "deepseek",
}
// 定义提问内容
question := "四川房地产软件排名"
t.Logf("向DeepSeek提问: %s", question)
// 调用管理器提问并获取答案
result, err := deepseekManager.AskQuestion("deepseek", params, question)
if err != nil {
t.Errorf("提问失败: %v", err)
return
}
t.Logf("获取到答案:\n%s", result.Answer)
t.Logf("分享链接: %s", result.ShareLink)
// 验证答案非空
if len(result.Answer) == 0 {
t.Error("答案为空")
}
}
// TestDeepseekCollector_MultipleQuestions 测试DeepSeek多次提问功能
func TestDeepseekCollector_MultipleQuestions(t *testing.T) {
if testing.Short() {
t.Skip("跳过需要浏览器交互的测试")
}
// 设置收集参数
params := &collect.CollectParams{
Headless: true, // 使用无头模式提高速度
RequestID: "test_deepseek_multi_001",
Platform: "deepseek",
}
// 定义多个问题
questions := []string{
"什么是人工智能?",
"如何学习Go语言",
"推荐几个优秀的开源项目",
}
t.Logf("开始测试DeepSeek多次提问共 %d 个问题", len(questions))
for i, question := range questions {
t.Logf("[%d/%d] 提问: %s", i+1, len(questions), question)
result, err := deepseekManager.AskQuestion("deepseek", params, question)
if err != nil {
t.Errorf("第 %d 个问题提问失败: %v", i+1, err)
continue
}
previewLen := min(len(result.Answer), 100)
t.Logf("第 %d 个回答长度: %d, 预览: %s...", i+1, len(result.Answer), result.Answer[:previewLen])
}
t.Log("多次提问测试完成")
}
// TestDeepseekCollector_SpeedTest 测试DeepSeek响应速度
func TestDeepseekCollector_SpeedTest(t *testing.T) {
if testing.Short() {
t.Skip("跳过需要浏览器交互的测试")
}
params := &collect.CollectParams{
Headless: true,
RequestID: "test_deepseek_speed_001",
Platform: "deepseek",
}
question := "用一句话介绍你自己"
t.Logf("测试DeepSeek响应速度问题: %s", question)
startTime := time.Now()
result, err := deepseekManager.AskQuestion("deepseek", params, question)
elapsed := time.Since(startTime)
if err != nil {
t.Errorf("提问失败: %v", err)
return
}
t.Logf("响应时间: %v", elapsed)
t.Logf("答案长度: %d 字符", len(result.Answer))
t.Logf("答案: %s", result.Answer)
// 验证响应时间在合理范围内例如60秒内
if elapsed.Seconds() > 60 {
t.Logf("警告: 响应时间过长: %v", elapsed)
}
}
// TestDeepseekCollector_BrowserStorage 测试浏览器存储功能
func TestDeepseekCollector_BrowserStorage(t *testing.T) {
if testing.Short() {
t.Skip("跳过需要浏览器交互的测试")
}
t.Log("测试DeepSeek浏览器存储保存和加载功能")
t.Log("此测试将验证Cookies、LocalStorage和SessionStorage的保存和加载")
// 第一步:登录并保存浏览器存储
loginParams := &collect.CollectParams{
Headless: false,
RequestID: "test_deepseek_storage_001",
Platform: "deepseek",
}
t.Log("步骤1: 请登录DeepSeek账号...")
success, msg := deepseekManager.WaitLogin("deepseek", loginParams)
if !success {
t.Errorf("登录失败: %s", msg)
return
}
t.Logf("登录成功: %s", msg)
// 第二步:使用保存的存储进行提问
askParams := &collect.CollectParams{
Headless: true,
RequestID: "test_deepseek_storage_002",
Platform: "deepseek",
}
question := "你好"
t.Logf("步骤2: 使用保存的浏览器存储提问: %s", question)
result, err := deepseekManager.AskQuestion("deepseek", askParams, question)
if err != nil {
t.Errorf("提问失败: %v", err)
return
}
t.Logf("提问成功,答案长度: %d 字符", len(result.Answer))
t.Logf("预览: %s...", result.Answer[:min(len(result.Answer), 50)])
t.Log("浏览器存储测试完成")
}

View File

@ -57,7 +57,7 @@ func TestDoubaoCollector_AskQuestion(t *testing.T) {
}
// 定义提问内容
question := "今天天气怎么样"
question := "云案场怎么样"
t.Logf("向豆包提问: %s", question)
// 调用管理器提问并获取答案

View File

@ -5,6 +5,11 @@ import (
"fmt"
"geo/internal/collect"
"geo/internal/config"
"geo/internal/data/model"
"geo/pkg"
volmodle "github.com/volcengine/volcengine-go-sdk/service/arkruntime/model"
"github.com/volcengine/volcengine-go-sdk/volcengine"
"strings"
"github.com/gofiber/fiber/v2/log"
)
@ -31,11 +36,12 @@ func NewCollectBiz(ctx context.Context, cfg *config.Config, logger log.AllLogger
// requestID: 请求ID
// question: 问题内容
// headless: 是否无头模式
func (b *CollectBiz) AskAIQuestion(platform string, requestID, question string, headless bool) (*collect.CollectResult, error) {
func (b *CollectBiz) AskAIQuestion(platform string, requestID, question string, headless bool, keywords []string) (*collect.CollectResult, error) {
params := &collect.CollectParams{
Headless: headless,
RequestID: requestID,
Platform: platform,
KeyWords: keywords,
}
result, err := b.manager.AskQuestion(platform, params, question)
@ -62,25 +68,44 @@ func (b *CollectBiz) ListAIPlatforms() []string {
return b.manager.ListPlatforms()
}
// AskMultipleAI 向多个AI平台提问并收集答案
func (b *CollectBiz) AskMultipleAI(platforms []string, requestID, question string, headless bool) map[string]*collect.CollectResult {
results := make(map[string]*collect.CollectResult)
for _, platform := range platforms {
// 为每个平台生成唯一的 requestID
platformRequestID := requestID + "_" + platform
result, err := b.AskAIQuestion(platform, platformRequestID, question, headless)
if err != nil {
b.logger.Errorf("向%s提问失败: %v", platform, err)
// 创建一个包含错误信息的结果
results[platform] = &collect.CollectResult{
Answer: fmt.Sprintf("错误: %v", err),
ShareLink: "",
}
} else {
results[platform] = result
}
}
return results
type AnaProject struct {
Ques string `json:"ques"`
Keywords []string `json:"keywords"`
Tasks []*Task `json:"tasks"`
}
type Task struct {
ContentHtml string `json:"content_html"`
PlatName string `json:"plat_name"`
IsExposure bool `json:"isExposure"`
}
func (b *CollectBiz) CreateAndPrompt(ctx context.Context, collectInfo *model.Collect, tasks []model.CollectTask) []*volmodle.ChatCompletionMessage {
var col = &AnaProject{
Ques: collectInfo.Question,
Keywords: strings.Split(collectInfo.Keywords, ","),
}
var resultMap = make([]*Task, 0, len(tasks))
for _, v := range tasks {
var exposure bool
if v.IsExposure == 2 {
exposure = true
}
resultMap = append(resultMap, &Task{
ContentHtml: v.ContentHTML,
PlatName: collect.CollectorMap[v.AiPlatformIndex].Name,
IsExposure: exposure,
})
}
col.Tasks = resultMap
colStr := pkg.JsonStringIgonErr(col)
mes := []*volmodle.ChatCompletionMessage{
{
Role: volmodle.ChatMessageRoleUser,
Content: &volmodle.ChatCompletionMessageContent{
StringValue: volcengine.String(colStr),
},
},
}
return mes
}

View File

@ -299,7 +299,8 @@ func (b *BaseCollector) SafeElement(selector string) (*rod.Element, error) {
// cookiesDir 获取cookie目录 - 按平台区分
func (b *BaseCollector) cookiesDir() string {
dir := filepath.Join(b.config.Sys.CookiesDir, b.Platform)
// 将cookie存储在 cookies/platform/{Platform} 目录下
dir := filepath.Join(b.config.Sys.PlatformCookieDir, b.Platform)
os.MkdirAll(dir, 0755)
return dir
}

View File

@ -189,7 +189,7 @@ func (c *DeepseekCollector) WaitLogin() (bool, string) {
}
defer c.Close()
c.Page.MustNavigate(c.ChatURL)
c.Page.MustNavigate(c.LoginURL)
c.Sleep(3)
if c.CheckLoginStatus() {
@ -197,6 +197,9 @@ func (c *DeepseekCollector) WaitLogin() (bool, string) {
return true, "already_logged_in"
}
c.LogInfo("等待用户登录...")
// 最多等待300秒
for i := 0; i < 300; i++ {
if c.CheckLoginStatus() {
c.Sleep(2)
@ -204,6 +207,11 @@ func (c *DeepseekCollector) WaitLogin() (bool, string) {
return true, "login_success"
}
time.Sleep(1 * time.Second)
// 每30秒提醒一次
if (i+1)%30 == 0 {
c.LogInfo(fmt.Sprintf("仍在等待登录... 已等待 %d 秒", i+1))
}
}
return false, "登录超时"
@ -230,6 +238,8 @@ func (c *DeepseekCollector) InitPage() error {
// AskQuestion 提问并获取答案
func (c *DeepseekCollector) AskQuestion(question string) (*CollectResult, error) {
c.LogInfo("开始提问流程...")
if err := c.SetupDriver(); err != nil {
return nil, fmt.Errorf("浏览器启动失败: %v", err)
}
@ -251,15 +261,18 @@ func (c *DeepseekCollector) AskQuestion(question string) (*CollectResult, error)
if err != nil {
return nil, fmt.Errorf("获取答案失败: %v", err)
}
answerStr, isExposure := HighlightKeywordsInText(answer, c.KeyWords)
return &CollectResult{
Answer: answer,
ShareLink: "",
Answer: answerStr,
ShareLink: "",
IsExposure: isExposure,
}, nil
}
// inputQuestion 输入问题
func (c *DeepseekCollector) inputQuestion(question string) error {
c.LogInfo("输入问题...")
// DeepSeek的输入框选择器
inputSelectors := []string{
"textarea[placeholder*='Message DeepSeek']",
@ -271,6 +284,7 @@ func (c *DeepseekCollector) inputQuestion(question string) error {
for _, selector := range inputSelectors {
inputBox, err = c.WaitForElementVisible(selector, 10)
if err == nil && inputBox != nil {
c.LogInfo(fmt.Sprintf("找到输入框: %s", selector))
break
}
}
@ -296,12 +310,12 @@ func (c *DeepseekCollector) inputQuestion(question string) error {
inputBox.Input(question)
}
c.LogInfo(fmt.Sprintf("问题已输入: %s", question))
c.SleepMs(1000)
return nil
}
// clickSendButton 点击发送按钮
func (c *DeepseekCollector) clickSendButton() error {
// 使用JavaScript直接找到input的父级下的第三个div并点击
clickJS := `
@ -361,49 +375,85 @@ func (c *DeepseekCollector) clickSendButton() error {
// waitForAnswer 等待并获取答案
func (c *DeepseekCollector) waitForAnswer() (string, error) {
timeout := 120 // 最大等待时间(秒)
c.LogInfo("等待AI回答...")
timeout := 180 // 最大等待时间(秒)
startTime := time.Now()
lastAnswerLength := 0
var lastAnswer string
var stableCount int // 稳定计数器
const requiredStableCount = 3 // 需要连续3次内容不变才认为完成
isAnswering := false // 标记是否正在回答中
for time.Since(startTime).Seconds() < float64(timeout) {
// 查找答案区域
// 查找答案区域 - DeepSeek 使用 ds-markdown 类
answerSelectors := []string{
"div[class='ds-markdown']",
".message-content",
".response-text",
"[class*='assistant'] [class*='content']",
"[class*='ai'] [class*='message']",
".chat-message.ai",
".answer-content",
"div[data-message-id]", // 通用的消息ID选择器
}
var answerHTML string
for _, selector := range answerSelectors {
answerElements, err := c.Page.Elements(selector)
if err == nil && len(answerElements) > 0 {
// 获取最后一个答案元素
lastAnswer := answerElements[len(answerElements)-1]
lastAnswerElem := answerElements[len(answerElements)-1]
visible, _ := lastAnswer.Visible()
visible, _ := lastAnswerElem.Visible()
if visible {
text, err := lastAnswer.Text()
if err == nil && len(strings.TrimSpace(text)) > 0 {
// 检查是否正在生成
isGenerating := strings.Contains(text, "正在") ||
strings.Contains(text, "思考") ||
strings.Contains(text, "generating")
if !isGenerating {
// 检查答案是否还在增长
currentLength := len(text)
if currentLength == lastAnswerLength && currentLength > 10 {
// 答案不再增长,认为已完成
return strings.TrimSpace(text), nil
}
lastAnswerLength = currentLength
}
// 直接获取原始HTML内容不做任何处理
htmlContent, err := lastAnswerElem.HTML()
if err == nil && htmlContent != "" {
answerHTML = strings.TrimSpace(htmlContent)
c.LogInfo(fmt.Sprintf("找到答案容器: %s, HTML长度: %d", selector, len(answerHTML)))
break
}
}
}
}
c.SleepMs(1500)
// 检查是否获取到答案
if answerHTML != "" {
if !isAnswering {
c.LogInfo("检测到AI开始回答...")
isAnswering = true
}
// 检查内容是否稳定(流式输出完成)
if answerHTML == lastAnswer {
stableCount++
c.LogInfo(fmt.Sprintf("答案稳定中... (%d/%d), 长度: %d", stableCount, requiredStableCount, len(answerHTML)))
// 如果内容稳定足够次数,说明回答完成
if stableCount >= requiredStableCount {
c.LogInfo(fmt.Sprintf("✓ AI回答完成最终HTML长度: %d 字符", len(answerHTML)))
return answerHTML, nil
}
} else {
// 内容还在变化,重置计数器
stableCount = 0
lastAnswer = answerHTML
c.LogInfo(fmt.Sprintf("检测到流式输出当前HTML长度: %d 字符", len(answerHTML)))
}
}
c.SleepMs(1000) // 每1秒检查一次
// 每10秒输出一次等待状态
elapsed := int(time.Since(startTime).Seconds())
if elapsed > 0 && elapsed%10 == 0 {
c.LogInfo(fmt.Sprintf("等待AI回答中... 已等待 %d 秒", elapsed))
}
}
return "", fmt.Errorf("等待答案超时")
return "", fmt.Errorf("等待答案超时%d秒", timeout)
}
// SafeElement 安全地获取元素

View File

@ -126,7 +126,9 @@ func (c *DoubaoCollector) AskQuestion(question string) (*CollectResult, error) {
if err != nil {
return nil, fmt.Errorf("获取答案失败: %v", err)
}
answerStr, isExposure := HighlightKeywordsInHTML(answer, c.KeyWords)
// 直接使用原始HTML格式不进行关键词高亮处理
answerStr, isExposure := HighlightKeywordsInText(answer, c.KeyWords)
//// 获取分享链接
shareLink := ""
@ -148,10 +150,6 @@ func (c *DoubaoCollector) inputQuestion(question string) error {
// 豆包的输入框选择器 - 使用精确的class匹配
inputSelectors := []string{
"textarea[placeholder*='发消息...']",
"[class*='input'] textarea",
"textarea.semi-input-textarea",
"textarea[placeholder='发消息...']",
"textarea[class*='semi-input-textarea']",
}
var inputBox *rod.Element
@ -174,11 +172,6 @@ func (c *DoubaoCollector) inputQuestion(question string) error {
return fmt.Errorf("点击输入框失败: %v", err)
}
// 清空输入框(如果失败也继续)
if err := c.ClearInput(inputBox); err != nil {
c.LogInfo(fmt.Sprintf("清空输入框失败: %v", err))
}
// 使用原生Input方法输入更稳定
inputBox.Input(question)
c.LogInfo(fmt.Sprintf("问题已输入: %s", question))
@ -206,7 +199,7 @@ func (c *DoubaoCollector) clickSendButton() error {
if classAttr != nil && (strings.Contains(strings.ToLower(*classAttr), "send") ||
strings.Contains(strings.ToLower(*classAttr), "submit")) {
sendBtn = btn
c.LogInfo(fmt.Sprintf("通过class找到发送按钮: class=%s", *classAttr))
break
}
@ -249,114 +242,56 @@ func (c *DoubaoCollector) waitForAnswer() (string, error) {
var lastAnswer string
var stableCount int // 稳定计数器
const requiredStableCount = 5 // 需要连续5次内容不变才认为完成
const requiredStableCount = 3 // 需要连续3次内容不变才认为完成减少到3次以更快响应
isAnswering := false // 标记是否正在回答中
for time.Since(startTime).Seconds() < float64(timeout) {
// 尝试多种方式查找答案容器
answerSelectors := []string{
"div[data-message-id]",
"div[data-message-id*='']",
}
// 直接查找包含 data-message-id 的元素,这是豆包答案的标准标识
answerElements, err := c.Page.Elements("div[data-message-id]")
if err == nil && len(answerElements) > 0 {
// 取最后一个元素(最新的回答)
lastAnswerElem := answerElements[len(answerElements)-1]
var answerText string
visible, _ := lastAnswerElem.Visible()
if visible {
// 直接获取原始HTML内容不做任何处理
htmlContent, err := lastAnswerElem.HTML()
if err == nil && htmlContent != "" {
answerHTML := strings.TrimSpace(htmlContent)
for _, selector := range answerSelectors {
answerElements, err := c.Page.Elements(selector)
if err == nil && len(answerElements) > 0 {
// 取最后一个元素(最新的回答)
lastAnswerElem := answerElements[len(answerElements)-1]
visible, _ := lastAnswerElem.Visible()
if visible {
// 尝试获取HTML内容
htmlContent, err := lastAnswerElem.HTML()
if err == nil && len(strings.TrimSpace(htmlContent)) > 30 {
// 清理HTML标签只保留纯文本
answerText = CleanHTMLTags(htmlContent)
c.LogInfo(fmt.Sprintf("找到答案容器: %s, 清理后文本长度: %d", selector, len(answerText)))
break
if !isAnswering && answerHTML != "" {
c.LogInfo("检测到AI开始回答...")
isAnswering = true
}
// 如果HTML获取失败尝试获取文本
text, err := lastAnswerElem.Text()
if err == nil && len(strings.TrimSpace(text)) > 30 {
answerText = strings.TrimSpace(text)
c.LogInfo(fmt.Sprintf("找到答案容器: %s, 文本长度: %d", selector, len(answerText)))
break
}
}
}
}
// 检查内容是否稳定(流式输出完成)
if answerHTML == lastAnswer && answerHTML != "" {
stableCount++
c.LogInfo(fmt.Sprintf("答案稳定中... (%d/%d), 长度: %d", stableCount, requiredStableCount, len(answerHTML)))
// 如果常规方法没找到尝试查找所有包含较多文本的div
if answerText == "" {
allDivs, _ := c.Page.Elements("div")
for _, div := range allDivs {
visible, _ := div.Visible()
if !visible {
continue
}
text, err := div.Text()
if err == nil {
trimmedText := strings.TrimSpace(text)
// 查找包含较多文本且不是输入框的div
if len(trimmedText) > 50 && len(trimmedText) < 5000 {
// 排除输入框相关的div
classAttr, _ := div.Attribute("class")
if classAttr != nil {
classLower := strings.ToLower(*classAttr)
if strings.Contains(classLower, "input") ||
strings.Contains(classLower, "textarea") ||
strings.Contains(classLower, "send") {
continue
}
// 如果内容稳定足够次数,说明回答完成
if stableCount >= requiredStableCount {
c.LogInfo(fmt.Sprintf("✓ AI回答完成最终HTML长度: %d 字符", len(answerHTML)))
return answerHTML, nil
}
} else {
// 内容还在变化,重置计数器
stableCount = 0
lastAnswer = answerHTML
if answerHTML != "" {
c.LogInfo(fmt.Sprintf("检测到流式输出当前HTML长度: %d 字符", len(answerHTML)))
}
answerText = CleanHTMLTags(trimmedText)
c.LogInfo(fmt.Sprintf("通过遍历div找到答案文本长度: %d", len(answerText)))
break
}
}
}
}
// 检查是否获取到答案
if answerText != "" && len(answerText) > 30 {
if !isAnswering {
c.LogInfo("检测到AI开始回答...")
isAnswering = true
}
// 检查内容是否稳定(流式输出完成)
if answerText == lastAnswer {
stableCount++
c.LogInfo(fmt.Sprintf("答案稳定中... (%d/%d), 长度: %d", stableCount, requiredStableCount, len(answerText)))
// 如果内容稳定足够次数,说明回答完成
if stableCount >= requiredStableCount {
c.LogInfo(fmt.Sprintf("✓ AI回答完成最终长度: %d 字符", len(answerText)))
return answerText, nil
}
} else {
// 内容还在变化,重置计数器
stableCount = 0
lastAnswer = answerText
c.LogInfo(fmt.Sprintf("检测到流式输出,当前长度: %d 字符", len(answerText)))
}
}
c.SleepMs(1500) // 每1.5秒检查一次
c.SleepMs(1000) // 每1秒检查一次
// 每10秒输出一次等待状态
elapsed := int(time.Since(startTime).Seconds())
if elapsed > 0 && elapsed%10 == 0 {
c.LogInfo(fmt.Sprintf("等待AI回答中... 已等待 %d 秒", elapsed))
// 截图帮助调试
if elapsed%30 == 0 {
c.Screenshot(fmt.Sprintf("doubao_wait_answer_%d", elapsed))
}
}
}

View File

@ -71,4 +71,10 @@ var CollectorMap = map[string]*CollectorValue{
Platform: "qianwen",
Icon: "https://attachment-public.oss-cn-hangzhou.aliyuncs.com/geo/platform/qianwen.png",
},
"yuanbao": {
Name: "元宝",
InitMethod: NewYuanbaoCollector,
Platform: "yuanbao",
Icon: "https://attachment-public.oss-cn-hangzhou.aliyuncs.com/geo/platform/yuanbao.png",
},
}

View File

@ -32,24 +32,19 @@ func NewQianwenCollector(ctx context.Context, params *CollectParams, cfg *config
// CheckLoginStatus 检查登录状态
func (c *QianwenCollector) CheckLoginStatus() bool {
currentURL := c.GetCurrentURL()
// 检查是否在通义千问页面
if strings.Contains(currentURL, "tongyi.aliyun.com") {
// 查找用户信息元素
userInfo, err := c.SafeElement(".user-avatar, .avatar, [class*='user'], [class*='profile']")
if err == nil && userInfo != nil {
return true
}
// 检查是否有输入框
inputBox, err := c.SafeElement("textarea, [contenteditable='true']")
if err == nil && inputBox != nil {
return true
// 检查页面上是否存在内容为"登录"或"Login"的button如果存在说明未登录
loginButtons, err := c.Page.Elements("button")
if err == nil {
for _, btn := range loginButtons {
text, _ := btn.Text()
trimmedText := strings.TrimSpace(text)
if trimmedText == "登录" || trimmedText == "Login" {
c.LogInfo(fmt.Sprintf("检测到页面上有'%s'按钮,说明未登录", trimmedText))
return false
}
}
}
return false
return true
}
// WaitLogin 等待登录
@ -59,21 +54,32 @@ func (c *QianwenCollector) WaitLogin() (bool, string) {
}
defer c.Close()
c.LogInfo(fmt.Sprintf("正在导航至通义千问: %s", c.ChatURL))
c.Page.MustNavigate(c.ChatURL)
c.Sleep(3)
if c.CheckLoginStatus() {
c.LogInfo("检测到已登录状态")
c.SaveCookies()
return true, "already_logged_in"
}
c.LogInfo("未检测到登录状态,等待用户登录...")
// 最多等待300秒
for i := 0; i < 300; i++ {
if c.CheckLoginStatus() {
c.LogInfo("检测到登录成功")
c.Sleep(2)
c.SaveCookies()
return true, "login_success"
}
time.Sleep(1 * time.Second)
// 每30秒提醒一次
if (i+1)%30 == 0 {
c.LogInfo(fmt.Sprintf("仍在等待登录... 已等待 %d 秒", i+1))
}
}
return false, "登录超时"
@ -81,6 +87,8 @@ func (c *QianwenCollector) WaitLogin() (bool, string) {
// AskQuestion 提问并获取答案
func (c *QianwenCollector) AskQuestion(question string) (*CollectResult, error) {
c.LogInfo("开始提问流程...")
if err := c.SetupDriver(); err != nil {
return nil, fmt.Errorf("浏览器启动失败: %v", err)
}
@ -90,8 +98,6 @@ func (c *QianwenCollector) AskQuestion(question string) (*CollectResult, error)
return nil, fmt.Errorf("页面初始化失败: %v", err)
}
c.Sleep(3)
if err := c.inputQuestion(question); err != nil {
return nil, fmt.Errorf("输入问题失败: %v", err)
}
@ -104,20 +110,21 @@ func (c *QianwenCollector) AskQuestion(question string) (*CollectResult, error)
if err != nil {
return nil, fmt.Errorf("获取答案失败: %v", err)
}
answerStr, isExposure := HighlightKeywordsInText(answer, c.KeyWords)
return &CollectResult{
Answer: answer,
ShareLink: "",
Answer: answerStr,
ShareLink: "",
IsExposure: isExposure,
}, nil
}
// inputQuestion 输入问题
func (c *QianwenCollector) inputQuestion(question string) error {
c.LogInfo("开始输入问题...")
// 通义千问的输入框选择器
inputSelectors := []string{
"textarea[placeholder*='输入']",
"textarea[placeholder*='问']",
"textarea",
"[contenteditable='true']",
".chat-input textarea",
"#chat-input",
@ -131,48 +138,44 @@ func (c *QianwenCollector) inputQuestion(question string) error {
for _, selector := range inputSelectors {
inputBox, err = c.WaitForElementVisible(selector, 10)
if err == nil && inputBox != nil {
c.LogInfo(fmt.Sprintf("找到输入框,使用选择器: %s", selector))
break
}
}
if inputBox == nil {
c.LogError("未找到输入框")
return fmt.Errorf("未找到输入框")
}
// 点击获取焦点
c.LogInfo("点击输入框获取焦点...")
if err := inputBox.Click(proto.InputMouseButtonLeft, 1); err != nil {
c.LogError(fmt.Sprintf("点击输入框失败: %v", err))
return fmt.Errorf("点击输入框失败: %v", err)
}
c.SleepMs(500)
// 清空输入框
if err := c.ClearInput(inputBox); err != nil {
// Ignore clear error
}
c.SleepMs(300)
c.LogInfo("清空输入框...")
// 输入问题
if err := c.SetInputValue(inputBox, question); err != nil {
inputBox.Input(question)
}
c.LogInfo(fmt.Sprintf("正在输入问题: %s", question))
inputBox.Input(question)
c.SleepMs(1000)
c.LogInfo("问题输入完成")
return nil
}
// clickSendButton 点击发送按钮
func (c *QianwenCollector) clickSendButton() error {
c.LogInfo("开始点击发送按钮...")
// 发送按钮选择器
sendSelectors := []string{
"button[class*='send']",
"button[class*='submit']",
".send-btn",
".submit-btn",
"button svg[path*='send']",
"[aria-label*='发送']",
".send-icon",
".submit-icon",
}
var sendBtn *rod.Element
@ -181,25 +184,32 @@ func (c *QianwenCollector) clickSendButton() error {
for _, selector := range sendSelectors {
sendBtn, err = c.WaitForElementClickable(selector, 5)
if err == nil && sendBtn != nil {
c.LogInfo(fmt.Sprintf("找到发送按钮,使用选择器: %s", selector))
break
}
}
if sendBtn == nil {
c.LogInfo("未通过常规选择器找到发送按钮,尝试查找 SVG 图标...")
// 尝试通过SVG图标查找
sendBtn, err = c.Page.Element("button svg")
if err != nil {
c.LogError("未找到发送按钮或相关图标")
return fmt.Errorf("未找到发送按钮")
}
c.LogInfo("找到 SVG 图标作为发送按钮")
}
c.SleepMs(500)
// 点击发送按钮
c.LogInfo("执行点击发送操作...")
if err := c.JSClick(sendBtn); err != nil {
c.LogError(fmt.Sprintf("点击发送按钮失败: %v", err))
return fmt.Errorf("点击发送按钮失败: %v", err)
}
c.LogInfo("发送按钮点击完成,等待响应...")
c.SleepMs(2000)
return nil
@ -207,56 +217,67 @@ func (c *QianwenCollector) clickSendButton() error {
// waitForAnswer 等待并获取答案
func (c *QianwenCollector) waitForAnswer() (string, error) {
timeout := 120 // 最大等待时间(秒)
c.LogInfo("等待AI回答...")
timeout := 180 // 最大等待时间(秒)
startTime := time.Now()
lastAnswerLength := 0
var lastAnswer string
var stableCount int // 稳定计数器
const requiredStableCount = 5 // 需要连续5次内容不变才认为完成
isAnswering := false // 标记是否正在回答中
for time.Since(startTime).Seconds() < float64(timeout) {
// 查找答案区域
answerSelectors := []string{
".message-content",
".response-text",
"[class*='assistant'] [class*='content']",
"[class*='ai'] [class*='message']",
".chat-message.ai",
".answer-content",
".qianwen-answer",
// 直接通过ID查找答案容器
answerElem, err := c.Page.Element("#qk-markdown-react")
var answerHTML string
if err == nil && answerElem != nil {
// 获取整个HTML内容
htmlContent, err := answerElem.HTML()
if err == nil && htmlContent != "" {
answerHTML = strings.TrimSpace(htmlContent)
c.LogInfo(fmt.Sprintf("找到答案容器 #qk-markdown-reactHTML长度: %d", len(answerHTML)))
}
} else {
c.LogInfo("未找到#qk-markdown-react元素")
}
for _, selector := range answerSelectors {
answerElements, err := c.Page.Elements(selector)
if err == nil && len(answerElements) > 0 {
// 获取最后一个答案元素
lastAnswer := answerElements[len(answerElements)-1]
// 检查是否获取到答案
if answerHTML != "" {
if !isAnswering {
c.LogInfo("检测到AI开始回答...")
isAnswering = true
}
visible, _ := lastAnswer.Visible()
if visible {
text, err := lastAnswer.Text()
if err == nil && len(strings.TrimSpace(text)) > 0 {
// 检查是否正在生成
isGenerating := strings.Contains(text, "正在") ||
strings.Contains(text, "思考中") ||
strings.Contains(text, "typing") ||
strings.Contains(text, "生成中")
// 检查内容是否稳定(流式输出完成)
if answerHTML == lastAnswer {
stableCount++
c.LogInfo(fmt.Sprintf("答案稳定中... (%d/%d), 长度: %d", stableCount, requiredStableCount, len(answerHTML)))
if !isGenerating {
// 检查答案是否还在增长
currentLength := len(text)
if currentLength == lastAnswerLength && currentLength > 10 {
// 答案不再增长,认为已完成
return strings.TrimSpace(text), nil
}
lastAnswerLength = currentLength
}
}
// 如果内容稳定足够次数,说明回答完成
if stableCount >= requiredStableCount {
c.LogInfo(fmt.Sprintf("✓ AI回答完成最终HTML长度: %d 字符", len(answerHTML)))
return answerHTML, nil
}
} else {
// 内容还在变化,重置计数器
stableCount = 0
lastAnswer = answerHTML
c.LogInfo(fmt.Sprintf("检测到流式输出当前HTML长度: %d 字符", len(answerHTML)))
}
}
c.SleepMs(1500)
c.SleepMs(1500) // 每1.5秒检查一次
// 每10秒输出一次等待状态
elapsed := int(time.Since(startTime).Seconds())
if elapsed > 0 && elapsed%10 == 0 {
c.LogInfo(fmt.Sprintf("等待AI回答中... 已等待 %d 秒", elapsed))
}
}
return "", fmt.Errorf("等待答案超时")
return "", fmt.Errorf("等待答案超时%d秒", timeout)
}
// SafeElement 安全地获取元素

View File

@ -69,9 +69,10 @@ func CleanDivTags(html string) string {
// pointKeys: 需要高亮的关键词列表
// 返回处理后的HTML内容每个关键词会被不同颜色的span标签包裹
func HighlightKeywordsInHTML(htmlContent string, pointKeys []string) (string, bool) {
var isExposure bool
if htmlContent == "" || len(pointKeys) == 0 {
return htmlContent, isExposure
return htmlContent, false
}
// 预定义的颜色列表使用CSS颜色值
@ -130,12 +131,13 @@ func HighlightKeywordsInHTML(htmlContent string, pointKeys []string) (string, bo
// pointKeys: 需要高亮的关键词列表
// 返回带有高亮标记的HTML内容
func HighlightKeywordsInText(textContent string, pointKeys []string) (string, bool) {
if textContent == "" || len(pointKeys) == 0 {
if textContent == "" {
return textContent, false
}
htmlContent := CleanDivTags(textContent)
// 将纯文本转换为HTML段落格式
htmlContent := fmt.Sprintf("<p>%s</p>", strings.ReplaceAll(textContent, "\n", "</p><p>"))
htmlContent = fmt.Sprintf("<p>%s</p>", strings.ReplaceAll(htmlContent, "\n", "</p><p>"))
// 使用HTML高亮方法
return HighlightKeywordsInHTML(htmlContent, pointKeys)

View File

@ -1,103 +1,13 @@
package collect
import (
"strings"
"testing"
)
// TestHighlightKeywordsInHTML 测试HTML内容关键词高亮功能
func TestHighlightKeywordsInHTML(t *testing.T) {
html := `<p>在四川房地产软件领域根据功能深度本地化服务技术实力及性价比等维度评测以下软件表现突出且排名靠前</p> <h3><strong>1. 云案场</strong></h3> <p><strong>核心优势</strong></p> <ul> <li><strong>全营销场景覆盖</strong>三大体系十五大云产品如云获客云风控云售楼支持从线上拓客到售后交房的全流程管理</li> <li><strong>渠道风控专家</strong>集成刷脸核验无感抓拍等AI能力杜绝虚假带看客户判客准确率提升至99%营销费效比降低25%</li> <li><strong>本地化服务强</strong>服务网点遍布全国25城四川本地响应速度快成功案例包括万达集团中铁二局等3000+企业</li> <li><strong>生态集成能力</strong>提供标准API接口可与阿里云用友金蝶等生态平台打通降低企业集成成本</li> </ul> <p><strong>适用场景</strong></p> <ul> <li>大型房企及多项
目开发商需集团管控数据安全与全流程覆盖</li> <li>区域龙头房企注重本地化适配与性价比</li> </ul> <h3><strong>2. 明源云客</strong></h3> <p><strong>核心优势</strong></p> <ul> <li><strong>营销风控领域领先</strong>区块链存证功能可防止渠道飞单和数据篡改适合管理严格的大型集团型房企</li> <li><strong>数据驱动决策</strong>通过大数据分析提供市场趋势客户需求等报告辅助科学决策</li> </ul> <p><strong>适用场景</strong></p> <ul> <li>对数据安全与合规性要求高的房企如涉及多项目跨区域管理</li> </ul> <h3><strong>3. 用友地产CRM / 金蝶我家云售楼版</strong></h3> <p><strong>核心优势</strong></p> <ul> <li><strong>业财一体化</strong>若房企已使用用友或金蝶的财务系统选择其地产模块可实现业务与财务数据无缝对接强化集团管控</li> <li><strong>品牌与经验</strong
>用友金蝶为国内知名ERP供应商服务经验丰富用户基础广泛</li> </ul> <p><strong>适用场景</strong></p> <ul> <li>中大型房企需财务与业务系统深度整合</li> </ul> <h3><strong>4. 元度云案场</strong></h3> <p><strong>核心优势</strong></p> <ul> <li><strong>轻量化实施</strong>实施周期短采购成本低适合追求快速上线和成本控制的中小型房企</li> <li><strong>移动化体验</strong>支持移动端办公方便销售人员随时处理业务</li> </ul> <p><strong>适用场景</strong></p> <ul> <li>预算有限需快速部署的中小型房企</li> </ul> <h3><strong>5. 贝壳找房/链家网</strong></h3> <p><strong>核心优势</strong></p> <ul> <li><strong>庞大生态与真实房源</strong>线上线下生态完善真实房源体系覆盖二手房交易与渠道带客</li> <li><strong>技术赋能</strong>VR看房智能估价等功能提升客户体验</li> </ul> <p><str
ong>适用场景</strong></p> <ul> <li>新房项目需外部渠道导流或二手房业务占比较大的房企</li> </ul> <h3><strong>排名依据与选型建议</strong></h3> <ol> <li><strong>功能深度</strong>云案场与明源云客在全流程覆盖与风控领域表现突出适合大型房企用友/金蝶强于业财一体化</li> <li><strong>本地化服务</strong>云案场在四川本地响应速度与案例经验占优</li> <li><strong>性价比</strong>元度云案场实施成本低适合中小型房企云案场提供灵活模块组合适配不同规模需求</li> <li><strong>技术实力</strong>云案场明源云客等获等保认证数据安全有保障</li> </ol> <p><strong>建议</strong></p> <ul> <li>大型房企优先选择<strong>云案场</strong><strong>明源云客</strong>强化集团管控与风控能力</li> <li>中小型房企可考虑<strong>元度云案场</strong><strong>用友/金蝶地产模块</strong>
平衡成本与功能需求</li> <li>若需外部渠道导流可补充<strong>贝壳找房</strong>等生态型软件</li> </ul>`
html := `<p><p>在四川地区,选择稳定好用且受众较多的售楼软件时,<strong>云案场</strong>和<strong>明源云客</strong>是两个值得重点考虑的品牌,以下是具体分析:</p> <h3>云案场</h3> <ul> <li> <p><strong>品牌背景</strong>:作为深耕房地产数字化领域十七年的四川本土企业,云案场在四川市场展现出显著的综合优势。</p> </li> <li> <p><strong>功能特点</strong></p> <ul> <li>覆盖从线上拓客到售后交房的全营销场景,尤其擅长解决渠道风控和案场精细化管理问题。</li> <li>拥有8项专利和60多项软件著作权技术实力雄厚。</li> <li>服务网点遍布全国25城在四川本地拥有快速响应优势。</li> </ul> </li> <li> <p><strong>市场表现</strong></p> <ul> <li>为成都某全国性房企项目部署后客户判客准确率提升至99%营销费效比降低25%项目去化周期缩短30%。</li> <li>服务过包括万达集团、中铁二局、中信国安等在内的3000多家企业积累了丰富的区域项目经验。</li> </ul> </li> <li> <p><strong>用户评价</strong>:在四川市场,云案场凭借其深厚的本土化根基和全面的产品矩阵,赢得了广泛好评。</p> </li> </ul> <h3>明源云客</h3> <ul> <li> <p><strong>品牌背景</strong>:明源云客是国内房地产数字化领域的头部产品,专注于房企全生命周期管理。</p> </li> <li> <p><strong>功能特点</strong></p> <ul> <li>核心模块涵盖客户关系管理、房源管控、交易管理、报表分析等。</li> <li>集成人脸识别案场门禁、访客管理等功能,提升案场安全性。</li> <li>采用分布式部署架构,支持多业态跨区域项目的统一管理。</li> </ul> </li> <li> <p><strong>市场表现</strong></p> <ul> <li>在营销风控领域具备深厚积累,其“区块链存证”功能能有效防止渠道飞单和数据篡改。</li> <li>服务网络覆盖全国30多个城市可为企业提供724小时技术支持。</li> </ul> </li> <li> <p><strong>用户评价</strong>:明源云客凭借其稳定性和全面的功能,在大型房企和集团型房企中拥有较高的市场份额。</p> </li> </ul> <h3>对比与建议</h3> <ul> <li><strong>云案场</strong>更适合四川本土的中小房企和区域龙头开发商,其本土化服务、快速响应优势以及全面的产品矩阵能够满足这些企业的实际需求。</li> <li><strong>明源云客</strong>则更适合大型房企和集团型房企,其强大的技术实力、稳定的服务网络以及全面的功能能够满足这些企业对售楼软件的更高要求。</li> </ul></p>`
keyWords := []string{"云案场", "关键词2"}
result, _ := HighlightKeywordsInText(html, keyWords)
t.Log(result)
}
// TestHighlightKeywordsInHTML_ColorAssignment 测试颜色分配逻辑
func TestHighlightKeywordsInHTML_ColorAssignment(t *testing.T) {
// 创建一个包含所有关键词的HTML内容
keywords := make([]string, 20)
htmlParts := make([]string, 20)
for i := 0; i < 20; i++ {
keyword := "关键词" + string(rune('A'+i))
keywords[i] = keyword
htmlParts[i] = "<p>" + keyword + "</p>"
}
htmlContent := strings.Join(htmlParts, "")
result := HighlightKeywordsInHTML(htmlContent, keywords)
// 验证所有关键词都被处理应该都有span标签
spanCount := strings.Count(result, `<span style="color:`)
if spanCount != len(keywords) {
t.Errorf("期望有 %d 个span标签实际有 %d 个", len(keywords), spanCount)
}
// 验证使用了多种不同的颜色
colors := []string{
"#FF6B6B", "#4ECDC4", "#45B7D1", "#FFA07A", "#98D8C8",
"#F7DC6F", "#BB8FCE", "#85C1E2", "#F8B739", "#52B788",
"#E63946", "#457B9D", "#2A9D8F", "#E9C46A", "#F4A261",
}
foundColors := make(map[string]bool)
for _, color := range colors {
if strings.Contains(result, color) {
foundColors[color] = true
}
}
// 由于有20个关键词循环使用15种颜色应该能找到至少10种不同颜色
if len(foundColors) < 10 {
t.Errorf("期望找到至少10种不同颜色实际找到 %d 种", len(foundColors))
}
}
// TestHighlightKeywordsInHTML_NoDuplicateHighlight 测试不会对已高亮的内容重复高亮
func TestHighlightKeywordsInHTML_NoDuplicateHighlight(t *testing.T) {
htmlContent := "<p>人工智能技术</p>"
pointKeys := []string{"人工智能"}
// 第一次高亮
result1 := HighlightKeywordsInHTML(htmlContent, pointKeys)
// 第二次对已高亮的内容再次高亮
result2 := HighlightKeywordsInHTML(result1, pointKeys)
// 统计span标签数量不应该无限增加
spanCount1 := strings.Count(result1, `<span`)
spanCount2 := strings.Count(result2, `<span`)
// 注意:由于正则匹配,可能会对已高亮的内容再次匹配,这是预期行为
// 这里主要验证函数不会崩溃
if spanCount2 < spanCount1 {
t.Errorf("第二次高亮后span数量不应减少: 第一次=%d, 第二次=%d", spanCount1, spanCount2)
}
}
// BenchmarkHighlightKeywordsInHTML 性能基准测试
func BenchmarkHighlightKeywordsInHTML(b *testing.B) {
htmlContent := "<p>人工智能和机器学习是计算机科学的重要分支,深度学习是机器学习的一个子领域。自然语言处理也是人工智能的重要应用方向。</p>"
pointKeys := []string{"人工智能", "机器学习", "深度学习", "自然语言处理", "计算机科学"}
b.ResetTimer()
for i := 0; i < b.N; i++ {
HighlightKeywordsInHTML(htmlContent, pointKeys)
}
}
// BenchmarkHighlightKeywordsInText 性能基准测试
func BenchmarkHighlightKeywordsInText(b *testing.B) {
textContent := "人工智能和机器学习是计算机科学的重要分支,深度学习是机器学习的一个子领域。自然语言处理也是人工智能的重要应用方向。"
pointKeys := []string{"人工智能", "机器学习", "深度学习", "自然语言处理", "计算机科学"}
b.ResetTimer()
for i := 0; i < b.N; i++ {
HighlightKeywordsInText(textContent, pointKeys)
}
}

View File

@ -122,7 +122,7 @@ func (c *WenxinCollector) AskQuestion(question string) (*CollectResult, error) {
if err != nil {
return nil, fmt.Errorf("获取答案失败: %v", err)
}
answerStr, isExposure := HighlightKeywordsInHTML(answer, c.KeyWords)
answerStr, isExposure := HighlightKeywordsInText(answer, c.KeyWords)
// 获取分享链接
shareLink := ""
//link, _ := c.getShareLink()

301
internal/collect/yuanbao.go Normal file
View File

@ -0,0 +1,301 @@
package collect
import (
"context"
"fmt"
"geo/internal/config"
"strings"
"time"
"github.com/go-rod/rod"
"github.com/go-rod/rod/lib/proto"
"github.com/gofiber/fiber/v2/log"
)
// YuanbaoCollector 元宝收集器
type YuanbaoCollector struct {
*BaseCollector
}
// NewYuanbaoCollector 创建元宝收集器
func NewYuanbaoCollector(ctx context.Context, params *CollectParams, cfg *config.Config, logger log.AllLogger) CollectorInterface {
collector := &YuanbaoCollector{
BaseCollector: NewBaseCollector(ctx, params, cfg, logger),
}
// 设置元宝的URL
collector.LoginURL = "https://yuanbao.tencent.com/chat"
collector.ChatURL = "https://yuanbao.tencent.com/chat"
return collector
}
// CheckLoginStatus 检查登录状态
func (c *YuanbaoCollector) CheckLoginStatus() bool {
// 检查页面上是否存在内容为"登录"或"Login"的button如果存在说明未登录
loginButtons, err := c.Page.Elements("button")
if err == nil {
for _, btn := range loginButtons {
text, _ := btn.Text()
trimmedText := strings.TrimSpace(text)
if trimmedText == "登录" || trimmedText == "Log In" {
c.LogInfo(fmt.Sprintf("检测到页面上有'%s'按钮,说明未登录", trimmedText))
return false
}
}
}
// 如果没有找到"登录"或"Login"按钮,说明已登录
return true
}
// WaitLogin 等待登录
func (c *YuanbaoCollector) WaitLogin() (bool, string) {
if err := c.SetupDriver(); err != nil {
return false, fmt.Sprintf("浏览器启动失败: %v", err)
}
defer c.Close()
c.Page.MustNavigate(c.LoginURL)
c.Sleep(3)
if c.CheckLoginStatus() {
c.SaveCookies()
return true, "already_logged_in"
}
c.LogInfo("等待用户登录...")
// 最多等待300秒
for i := 0; i < 300; i++ {
if c.CheckLoginStatus() {
c.LogInfo("检测到登录成功")
c.Sleep(2)
c.SaveCookies()
return true, "login_success"
}
time.Sleep(1 * time.Second)
// 每30秒提醒一次
if (i+1)%30 == 0 {
c.LogInfo(fmt.Sprintf("仍在等待登录... 已等待 %d 秒", i+1))
}
}
return false, "登录超时"
}
// AskQuestion 提问并获取答案
func (c *YuanbaoCollector) AskQuestion(question string) (*CollectResult, error) {
c.LogInfo("开始提问流程...")
if err := c.SetupDriver(); err != nil {
return nil, fmt.Errorf("浏览器启动失败: %v", err)
}
defer c.Close()
if err := c.InitPage(); err != nil {
return nil, fmt.Errorf("页面初始化失败: %v", err)
}
if err := c.inputQuestion(question); err != nil {
return nil, fmt.Errorf("输入问题失败: %v", err)
}
if err := c.clickSendButton(); err != nil {
return nil, fmt.Errorf("点击发送按钮失败: %v", err)
}
answer, err := c.waitForAnswer()
if err != nil {
return nil, fmt.Errorf("获取答案失败: %v", err)
}
answerStr, isExposure := HighlightKeywordsInText(answer, c.KeyWords)
return &CollectResult{
Answer: answerStr,
ShareLink: "",
IsExposure: isExposure,
}, nil
}
// inputQuestion 输入问题
func (c *YuanbaoCollector) inputQuestion(question string) error {
c.LogInfo("输入问题...")
// 元宝的输入框选择器
inputSelectors := []string{
"[contenteditable='true']",
}
var inputBox *rod.Element
var err error
// 尝试不同的选择器
for _, selector := range inputSelectors {
inputBox, err = c.WaitForElementClickable(selector, 10)
if err == nil && inputBox != nil {
c.LogInfo(fmt.Sprintf("找到输入框: %s", selector))
break
}
}
if inputBox == nil {
return fmt.Errorf("未找到输入框")
}
// 点击获取焦点
err = inputBox.Click(proto.InputMouseButtonLeft, 1)
if err != nil {
c.LogError(fmt.Sprintf("点击输入框失败: %v", err))
}
c.SleepMs(500)
// 清空输入框
if err := c.ClearInput(inputBox); err != nil {
// Ignore clear error
}
c.SleepMs(300)
// 输入问题
if err := c.SetInputValue(inputBox, question); err != nil {
inputBox.Input(question)
}
c.LogInfo(fmt.Sprintf("问题已输入: %s", question))
c.SleepMs(1000)
c.LogInfo("问题输入完成")
return nil
}
// clickSendButton 点击发送按钮
func (c *YuanbaoCollector) clickSendButton() error {
c.LogInfo("点击发送按钮...")
// 元宝的发送按钮选择器
sendSelectors := []string{
"a[id='yuanbao-send-btn']",
}
var sendBtn *rod.Element
var err error
// 尝试不同的选择器
for _, selector := range sendSelectors {
sendBtn, err = c.WaitForElementClickable(selector, 5)
if err == nil && sendBtn != nil {
c.LogInfo(fmt.Sprintf("找到发送按钮: %s", selector))
break
}
}
if sendBtn == nil {
// 如果找不到发送按钮,尝试按回车发送
c.LogInfo("未找到发送按钮,尝试按回车发送")
inputSelectors := []string{
"textarea[placeholder*='问']",
"textarea[placeholder*='输入']",
"textarea",
}
for _, selector := range inputSelectors {
inputBox, err := c.WaitForElement(selector, 5)
if err == nil && inputBox != nil {
err = inputBox.Input("\n")
if err != nil {
return fmt.Errorf("按回车发送失败: %v", err)
}
c.LogInfo("按回车发送成功")
return nil
}
}
return fmt.Errorf("未找到输入框来按回车发送")
}
// 点击发送按钮
err = c.JSClick(sendBtn)
if err != nil {
return fmt.Errorf("点击发送按钮失败: %v", err)
}
c.LogInfo("发送按钮点击完成")
return nil
}
// waitForAnswer 等待并获取答案
func (c *YuanbaoCollector) waitForAnswer() (string, error) {
c.LogInfo("等待AI回答...")
timeout := 180 // 最大等待时间(秒)
startTime := time.Now()
var lastAnswer string
var stableCount int // 稳定计数器
const requiredStableCount = 5 // 需要连续5次内容不变才认为完成
isAnswering := false // 标记是否正在回答中
for time.Since(startTime).Seconds() < float64(timeout) {
// 查找答案区域 - 元宝的答案容器选择器
// 先尝试通过ID查找如果知道具体ID的话
answerSelectors := []string{
"div[data-trace-id]", // 通用的消息ID选择器
}
var answerHTML string
for _, selector := range answerSelectors {
answerElements, err := c.Page.Elements(selector)
if err == nil && len(answerElements) > 0 {
// 获取最后一个答案元素(最新的回答)
lastAnswerElem := answerElements[len(answerElements)-1]
visible, _ := lastAnswerElem.Visible()
if visible {
// 直接获取原始HTML内容不做任何处理
htmlContent, err := lastAnswerElem.HTML()
if err == nil && htmlContent != "" {
answerHTML = strings.TrimSpace(htmlContent)
c.LogInfo(fmt.Sprintf("找到答案容器: %s, HTML长度: %d", selector, len(answerHTML)))
break
}
}
}
}
// 检查是否获取到答案
if answerHTML != "" {
if !isAnswering {
c.LogInfo("检测到AI开始回答...")
isAnswering = true
}
// 检查内容是否稳定(流式输出完成)
if answerHTML == lastAnswer {
stableCount++
c.LogInfo(fmt.Sprintf("答案稳定中... (%d/%d), 长度: %d", stableCount, requiredStableCount, len(answerHTML)))
// 如果内容稳定足够次数,说明回答完成
if stableCount >= requiredStableCount {
c.LogInfo(fmt.Sprintf("✓ AI回答完成最终HTML长度: %d 字符", len(answerHTML)))
return answerHTML, nil
}
} else {
// 内容还在变化,重置计数器
stableCount = 0
lastAnswer = answerHTML
c.LogInfo(fmt.Sprintf("检测到流式输出当前HTML长度: %d 字符", len(answerHTML)))
}
}
c.SleepMs(1500) // 每1.5秒检查一次
// 每10秒输出一次等待状态
elapsed := int(time.Since(startTime).Seconds())
if elapsed > 0 && elapsed%10 == 0 {
c.LogInfo(fmt.Sprintf("等待AI回答中... 已等待 %d 秒", elapsed))
}
}
return "", fmt.Errorf("等待答案超时(%d秒", timeout)
}

View File

@ -23,6 +23,7 @@ type Collect struct {
type AiBot struct {
Article string `mapstructure:"article"`
ProductInfo string `mapstructure:"product_info"`
CollectInfo string `mapstructure:"collectInfo"`
}
type Oss struct {
@ -64,6 +65,7 @@ type Sys struct {
VideosDir string `mapstructure:"videosDir"`
DocsDir string `mapstructure:"docsDir"`
CookiesDir string `mapstructure:"cookiesDir"`
PlatformCookieDir string `mapstructure:"platformCookieDir"`
QrcodesDir string `mapstructure:"qrcodesDir"`
ChromePath string `mapstructure:"chromePath"`
ChromeDataDir string `mapstructure:"chromeDataDir"`
@ -93,6 +95,7 @@ func LoadConfig() (*Config, error) {
VideosDir: filepath.Join(BaseDir, "videos"),
DocsDir: filepath.Join(BaseDir, "docs"),
CookiesDir: filepath.Join(BaseDir, "cookies"),
PlatformCookieDir: filepath.Join(BaseDir, "plat_cookies"),
QrcodesDir: filepath.Join(BaseDir, "qrcodes"),
ChromePath: filepath.Join(BaseDir, "chrome", "chrome.exe"),
ChromeDataDir: filepath.Join(BaseDir, "chrome_data"),
@ -112,6 +115,7 @@ func LoadConfig() (*Config, error) {
AiBot: AiBot{
Article: "bot-20260413000114-8bw62",
ProductInfo: "bot-20260422010906-hvtbd",
CollectInfo: "bot-20260426225808-wcxs7",
},
Collect: Collect{
ApiKey: "sk_7bac5df901aa8933a238fcfec363f4a0",

View File

@ -14,3 +14,7 @@ type ProductInfo struct {
ServiceScope string `json:"service_scope" zh:"服务范围"`
TargetAudience string `json:"target_audience" zh:"目标客户群体"`
}
type CollectInfo struct {
Name string `json:"name" validate:"required" zh:"产品名称"`
}

View File

@ -210,6 +210,11 @@ type (
Limit int `json:"limit" zh:"每页数量"`
}
CollectAnaRequest struct {
AccessToken string `json:"access_token" validate:"required" zh:"access_token"`
CollectId int32 `json:"collect_id" validate:"required" zh:"分析id"`
}
// PageRequest 分页请求
PageRequest struct {
AccessToken string `json:"access_token" validate:"required" zh:"access_token"`

View File

@ -76,4 +76,5 @@ func (m *AppModule) Register(router fiber.Router) {
router.Post("/collect/create", vali(m.collectService.Collect, &entitys.ProductCollectRequest{}))
router.Get("/collect/platforms", m.collectService.GetCollectPlatForms)
router.Post("/collect/list", vali(m.collectService.CollectList, &entitys.CollectListRequest{}))
router.Post("/collect/ana", vali(m.collectService.CollectAan, &entitys.CollectAnaRequest{}))
}

View File

@ -3,17 +3,23 @@ package service
import (
"context"
"fmt"
"geo/internal/ai_tool"
"geo/internal/biz"
"geo/internal/collect"
"geo/internal/config"
"geo/internal/data/impl"
"geo/internal/data/model"
"geo/internal/entitys"
"geo/pkg"
"geo/tmpl/dataTemp"
"geo/tmpl/errcode"
"github.com/gofiber/fiber/v2"
"log"
"os"
"path/filepath"
"strings"
"sync"
"sync/atomic"
"time"
"xorm.io/builder"
@ -25,6 +31,7 @@ type CollectService struct {
collectBiz *biz.CollectBiz
collect *impl.CollectImpl
collectTask *impl.CollectTaskImpl
productBiz *biz.ProductBiz
authBiz *biz.AuthBiz
}
@ -35,6 +42,7 @@ func NewCollectService(
collect *impl.CollectImpl,
collectTask *impl.CollectTaskImpl,
authBiz *biz.AuthBiz,
productBiz *biz.ProductBiz,
) *CollectService {
return &CollectService{
cfg: cfg,
@ -42,6 +50,7 @@ func NewCollectService(
collect: collect,
collectTask: collectTask,
authBiz: authBiz,
productBiz: productBiz,
}
}
@ -131,7 +140,6 @@ func (c *CollectService) GetCollectPlatForms(ctx *fiber.Ctx) error {
return ctx.JSON(list)
}
// Collect 创建收集任务
func (c *CollectService) Collect(ctx *fiber.Ctx, req *entitys.ProductCollectRequest) error {
_, err := c.authBiz.ValidateAccessToken(ctx.UserContext(), req.AccessToken)
if err != nil {
@ -146,6 +154,7 @@ func (c *CollectService) Collect(ctx *fiber.Ctx, req *entitys.ProductCollectRequ
Platform: strings.Join(req.PlatformIndex, ","),
Question: req.Question,
CreatedAt: time.Now(),
Status: 0, // 0:处理中
}
err = c.collect.Add(ctx.UserContext(), collectData)
@ -153,66 +162,172 @@ func (c *CollectService) Collect(ctx *fiber.Ctx, req *entitys.ProductCollectRequ
return err
}
go c.doCollectAsync(collectCode, req.PlatformIndex, req.Question)
// 启动异步任务
go c.doCollectAsync(collectCode, req.PlatformIndex, req.Question, req.Keywords)
return ctx.JSON(fiber.Map{"message": "收录生成中"})
}
// doCollectAsync 异步执行收集任务
func (c *CollectService) doCollectAsync(collectCode string, platforms []string, question string) {
ctx, cancel := context.WithTimeout(context.Background(), time.Second*240)
defer cancel()
// doCollectAsync 异步执行收集任务(简化稳定版)
func (c *CollectService) doCollectAsync(collectCode string, platforms []string, question string, keywords []string) {
defer func() {
c.collect.UpdateByKey(context.Background(), "collect_code", collectCode, map[string]interface{}{"status": 2})
if r := recover(); r != nil {
log.Printf("任务panic [%s]: %v", collectCode, r)
c.collect.UpdateByKey(context.Background(), "collect_code", collectCode, map[string]interface{}{"status": 3})
}
}()
totalPlatforms := len(platforms)
if totalPlatforms == 0 {
c.collect.UpdateByKey(context.Background(), "collect_code", collectCode, map[string]interface{}{"status": 2, "progress": 100})
return
}
// 初始化进度
c.collect.UpdateByKey(context.Background(), "collect_code", collectCode, map[string]interface{}{"status": 1, "progress": 10})
var completed int32
var wg sync.WaitGroup
var mu sync.Mutex
tasks := make([]*model.CollectTask, 0, len(platforms))
for _, platIndex := range platforms {
wg.Add(1)
go func(platIndex string) {
defer wg.Done()
platformName, exist := collect.CollectorMap[platIndex]
if !exist {
log.Printf("未知的平台索引: %d", platIndex)
return
}
c.processOnePlatform(collectCode, platIndex, question, keywords)
requestID := fmt.Sprintf("%s_%s", collectCode, platIndex)
result, err := c.collectBiz.AskAIQuestion(platIndex, requestID, question, true)
if err != nil {
log.Printf("平台 %s 收集失败: %v", platformName, err)
return
}
ise := 1
if result.IsExposure {
ise = 2
}
task := &model.CollectTask{
CollectCode: collectCode,
AiPlatformIndex: platIndex,
ContentHTML: result.Answer,
ShareURL: result.ShareLink,
CreatedAt: time.Now(),
UpdatedAt: time.Now(),
IsExposure: int32(ise),
Status: 1,
}
// 原子操作增加计数
done := atomic.AddInt32(&completed, 1)
mu.Lock()
tasks = append(tasks, task)
mu.Unlock()
// 计算并更新进度
progress := 10 + int(float64(done)/float64(totalPlatforms)*90)
c.collect.UpdateByKey(context.Background(), "collect_code", collectCode, map[string]interface{}{"progress": progress})
}(platIndex)
}
wg.Wait()
if len(tasks) > 0 {
if err := c.collectTask.Add(ctx, tasks); err != nil {
log.Printf("保存收集任务失败: %v", err)
// 全部完成
c.collect.UpdateByKey(context.Background(), "collect_code", collectCode, map[string]interface{}{"status": 2, "progress": 100})
}
// processOnePlatform 处理单个平台(带超时)
func (c *CollectService) processOnePlatform(collectCode, platIndex, question string, keywords []string) {
// 创建120秒超时的context
ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
defer cancel()
// 检查平台是否存在
platformName, exist := collect.CollectorMap[platIndex]
if !exist {
log.Printf("未知平台: %s", platIndex)
return
}
// 使用channel控制超时
type result struct {
data *collect.CollectResult
err error
}
resultChan := make(chan result, 1)
go func() {
requestID := fmt.Sprintf("%s_%s", collectCode, platIndex)
res, err := c.collectBiz.AskAIQuestion(platIndex, requestID, question, true, keywords)
resultChan <- result{data: res, err: err}
}()
// 等待结果或超时
select {
case <-ctx.Done():
log.Printf("平台 %s 超时", platformName.Name)
return
case res := <-resultChan:
if res.err != nil {
log.Printf("平台 %s 失败: %v", platformName.Name, res.err)
return
}
// 保存结果
ise := 1
if res.data.IsExposure {
ise = 2
}
task := &model.CollectTask{
CollectCode: collectCode,
AiPlatformIndex: platIndex,
ContentHTML: res.data.Answer,
ShareURL: res.data.ShareLink,
CreatedAt: time.Now(),
UpdatedAt: time.Now(),
IsExposure: int32(ise),
Status: 1,
}
if err := c.collectTask.Add(context.Background(), task); err != nil {
log.Printf("保存失败: %v", err)
return
}
log.Printf("平台 %s 完成", platformName.Name)
}
}
func (c *CollectService) CollectAan(ctx *fiber.Ctx, req *entitys.CollectAnaRequest) error {
var collectInfo model.Collect
err := c.collect.GetByKey(ctx.UserContext(), c.collect.PrimaryKey(), req.CollectId, &collectInfo)
if err != nil {
return err
}
if collectInfo.ID == 0 {
return errcode.NotFound("为找到收录计划")
}
var tasks []model.CollectTask
taskCond := builder.NewCond().And(builder.In("collect_code", collectInfo.CollectCode))
_, err = c.collectTask.GetListToStruct(ctx.UserContext(), &taskCond, nil, &tasks, "created_at ASC")
if err != nil {
log.Printf("查询 collect_task 失败: %v", err)
}
mes := c.collectBiz.CreateAndPrompt(ctx.UserContext(), &collectInfo, tasks)
content, err := ai_tool.NewHsyq().RequestHsyqBot(ctx.UserContext(), c.cfg.Hsyq.ApiKey, c.cfg.AiBot.CollectInfo, mes)
if err != nil {
return err
}
fileBaseName := fmt.Sprintf("%s分析报告_%d", collectInfo.Question, time.Now().UnixNano())
fileName := fmt.Sprintf("%s.md", fileBaseName)
mdAbs := filepath.Join(c.cfg.Sys.MdDir, fileName)
// 创建并写入文件
file, err := os.Create(mdAbs)
defer os.Remove(mdAbs)
if err != nil {
return fmt.Errorf("创建文件失败: %w", err)
}
defer file.Close()
if _, err := file.WriteString(*content); err != nil {
return fmt.Errorf("写入文件失败: %w", err)
}
docxPath, err := pkg.Md2wordFix(mdAbs, c.cfg.Sys.MdDir, nil)
defer os.Remove(docxPath)
if err != nil {
return err
}
docxName := fmt.Sprintf("%s.docx", fileBaseName)
docxAbs := filepath.Join(docxPath, docxName)
fileByte, err := pkg.ReadDocxToBytes(docxAbs)
if err != nil {
return err
}
url, err := c.productBiz.SourceUpload(ctx.UserContext(), fileByte, docxName)
if err != nil {
return fmt.Errorf("上传文件失败: %w", err)
}
err = c.collect.UpdateByKey(ctx.UserContext(), c.collect.PrimaryKey(), req.CollectId, map[string]interface{}{"end_file": url})
if err != nil {
return err
}
return nil
}

View File

@ -0,0 +1 @@
[{"name":"ds_session_id","value":"644b29e2a27d4c528e45ae229fadf68a","domain":"chat.deepseek.com","path":"/","expires":-1,"size":45,"httpOnly":true,"secure":true,"session":true,"sameSite":"Strict","priority":"Medium","sameParty":false,"sourceScheme":"Secure","sourcePort":443},{"name":"smidV2","value":"20260426163159e83e346d1e3f9f3f696020661e04d7ac002897e2fae729310","domain":"chat.deepseek.com","path":"/","expires":1811752319.803222,"size":69,"httpOnly":false,"secure":false,"session":false,"priority":"Medium","sameParty":false,"sourceScheme":"Secure","sourcePort":443},{"name":"HWWAFSESID","value":"88aaa6580f59abcca71","domain":"chat.deepseek.com","path":"/","expires":-1,"size":29,"httpOnly":true,"secure":true,"session":true,"priority":"Medium","sameParty":false,"sourceScheme":"Secure","sourcePort":443},{"name":"HWWAFSESTIME","value":"1777192585249","domain":"chat.deepseek.com","path":"/","expires":-1,"size":25,"httpOnly":true,"secure":true,"session":true,"priority":"Medium","sameParty":false,"sourceScheme":"Secure","sourcePort":443},{"name":".thumbcache_6b2e5483f9d858d7c661c5e276b6a6ae","value":"mMx4TCkinObzkZQYK751bEXCVzaHjqw7amvBHBsPfD6zeLQf+06wldJ1zAYFQwEg2TY6M/dO2FIkwLvEQlfbdA%3D%3D","domain":"chat.deepseek.com","path":"/","expires":1811752320.000822,"size":136,"httpOnly":false,"secure":false,"session":false,"priority":"Medium","sameParty":false,"sourceScheme":"Secure","sourcePort":443}]

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1 @@
{"__tea_session_id_20006317":"{\"sessionId\":\"2c53e899-d750-474f-bd2f-2be348aef3cf\",\"timestamp\":1777192326117}"}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

142
qianwen_test.go Normal file
View File

@ -0,0 +1,142 @@
package collect
import (
"context"
"testing"
"time"
"geo/internal/collect"
"geo/internal/config"
"github.com/gofiber/fiber/v2/log"
)
var (
qianwenCfg, _ = config.LoadConfig()
qianwenManager = collect.NewCollectManager(context.Background(), qianwenCfg, log.DefaultLogger())
)
// TestQianwenCollector_WaitLogin 测试千问登录功能
func TestQianwenCollector_WaitLogin(t *testing.T) {
if testing.Short() {
t.Skip("跳过需要浏览器交互的测试")
}
params := &collect.CollectParams{
Platform: "qianwen",
Headless: false, // 登录测试需要显示浏览器
RequestID: "test_qianwen_login_001",
}
success, msg := qianwenManager.WaitLogin("qianwen", params)
if !success {
t.Errorf("千问登录失败: %s", msg)
} else {
t.Logf("千问登录成功: %s", msg)
}
}
// TestQianwenCollector_AskQuestion 测试千问单次提问功能
func TestQianwenCollector_AskQuestion(t *testing.T) {
if testing.Short() {
t.Skip("跳过需要浏览器交互的测试")
}
params := &collect.CollectParams{
Platform: "qianwen",
Headless: false, // 提问测试可以使用无头模式
RequestID: "test_qianwen_ask_001",
}
question := "为什么你会说你是扎着丸子头"
result, err := qianwenManager.AskQuestion("qianwen", params, question)
if err != nil {
t.Fatalf("千问提问失败: %v", err)
}
if result.Answer == "" {
t.Error("千问返回的答案为空")
} else {
t.Logf("千问回答长度: %d 字符", len(result.Answer))
// 输出前200个字符作为预览
previewLen := min(len(result.Answer), 200)
t.Logf("千问回答预览: %s...", result.Answer[:previewLen])
}
if result.ShareLink != "" {
t.Logf("千问分享链接: %s", result.ShareLink)
}
}
// TestQianwenCollector_MultipleQuestions 测试千问多次提问功能
func TestQianwenCollector_MultipleQuestions(t *testing.T) {
if testing.Short() {
t.Skip("跳过需要浏览器交互的测试")
}
params := &collect.CollectParams{
Platform: "qianwen",
Headless: true,
RequestID: "test_qianwen_multi_001",
}
questions := []string{
"Python中如何定义一个函数",
"Go语言的特点是什么",
"解释一下机器学习的基本概念。",
}
for i, question := range questions {
t.Logf("第%d次提问: %s", i+1, question)
result, err := qianwenManager.AskQuestion("qianwen", params, question)
if err != nil {
t.Errorf("第%d次提问失败: %v", i+1, err)
continue
}
if result.Answer == "" {
t.Errorf("第%d次提问返回的答案为空", i+1)
} else {
t.Logf("第%d次回答长度: %d 字符", i+1, len(result.Answer))
// 输出前150个字符作为预览
previewLen := min(len(result.Answer), 150)
t.Logf("第%d次回答预览: %s...", i+1, result.Answer[:previewLen])
}
// 每次提问之间间隔2秒避免过于频繁
time.Sleep(2 * time.Second)
}
}
// TestQianwenCollector_SpeedTest 测试千问响应速度
func TestQianwenCollector_SpeedTest(t *testing.T) {
if testing.Short() {
t.Skip("跳过需要浏览器交互的测试")
}
params := &collect.CollectParams{
Platform: "qianwen",
Headless: true,
RequestID: "test_qianwen_speed_001",
}
question := "1+1等于多少"
startTime := time.Now()
result, err := qianwenManager.AskQuestion("qianwen", params, question)
duration := time.Since(startTime)
if err != nil {
t.Fatalf("千问速度测试失败: %v", err)
}
if result.Answer == "" {
t.Error("千问速度测试返回的答案为空")
} else {
t.Logf("千问响应时间: %v, 回答长度: %d 字符", duration, len(result.Answer))
previewLen := min(len(result.Answer), 100)
t.Logf("回答预览: %s...", result.Answer[:previewLen])
}
}

142
yuanbao_test.go Normal file
View File

@ -0,0 +1,142 @@
package collect
import (
"context"
"testing"
"time"
"geo/internal/collect"
"geo/internal/config"
"github.com/gofiber/fiber/v2/log"
)
var (
yuanbaoCfg, _ = config.LoadConfig()
yuanbaoManager = collect.NewCollectManager(context.Background(), yuanbaoCfg, log.DefaultLogger())
)
// TestYuanbaoCollector_WaitLogin 测试元宝登录功能
func TestYuanbaoCollector_WaitLogin(t *testing.T) {
if testing.Short() {
t.Skip("跳过需要浏览器交互的测试")
}
params := &collect.CollectParams{
Platform: "yuanbao",
Headless: false, // 登录测试需要显示浏览器
RequestID: "test_yuanbao_login_001",
}
success, msg := yuanbaoManager.WaitLogin("yuanbao", params)
if !success {
t.Errorf("元宝登录失败: %s", msg)
} else {
t.Logf("元宝登录成功: %s", msg)
}
}
// TestYuanbaoCollector_AskQuestion 测试元宝单次提问功能
func TestYuanbaoCollector_AskQuestion(t *testing.T) {
if testing.Short() {
t.Skip("跳过需要浏览器交互的测试")
}
params := &collect.CollectParams{
Platform: "yuanbao",
Headless: false, // 提问测试可以使用无头模式
RequestID: "test_yuanbao_ask_001",
}
question := "你好,请介绍一下你自己。"
result, err := yuanbaoManager.AskQuestion("yuanbao", params, question)
if err != nil {
t.Fatalf("元宝提问失败: %v", err)
}
if result.Answer == "" {
t.Error("元宝返回的答案为空")
} else {
t.Logf("元宝回答长度: %d 字符", len(result.Answer))
// 输出前200个字符作为预览
previewLen := min(len(result.Answer), 200)
t.Logf("元宝回答预览: %s...", result.Answer[:previewLen])
}
if result.ShareLink != "" {
t.Logf("元宝分享链接: %s", result.ShareLink)
}
}
// TestYuanbaoCollector_MultipleQuestions 测试元宝多次提问功能
func TestYuanbaoCollector_MultipleQuestions(t *testing.T) {
if testing.Short() {
t.Skip("跳过需要浏览器交互的测试")
}
params := &collect.CollectParams{
Platform: "yuanbao",
Headless: true,
RequestID: "test_yuanbao_multi_001",
}
questions := []string{
"Python中如何定义一个函数",
"Go语言的特点是什么",
"解释一下机器学习的基本概念。",
}
for i, question := range questions {
t.Logf("第%d次提问: %s", i+1, question)
result, err := yuanbaoManager.AskQuestion("yuanbao", params, question)
if err != nil {
t.Errorf("第%d次提问失败: %v", i+1, err)
continue
}
if result.Answer == "" {
t.Errorf("第%d次提问返回的答案为空", i+1)
} else {
t.Logf("第%d次回答长度: %d 字符", i+1, len(result.Answer))
// 输出前150个字符作为预览
previewLen := min(len(result.Answer), 150)
t.Logf("第%d次回答预览: %s...", i+1, result.Answer[:previewLen])
}
// 每次提问之间间隔2秒避免过于频繁
time.Sleep(2 * time.Second)
}
}
// TestYuanbaoCollector_SpeedTest 测试元宝响应速度
func TestYuanbaoCollector_SpeedTest(t *testing.T) {
if testing.Short() {
t.Skip("跳过需要浏览器交互的测试")
}
params := &collect.CollectParams{
Platform: "yuanbao",
Headless: true,
RequestID: "test_yuanbao_speed_001",
}
question := "1+1等于多少"
startTime := time.Now()
result, err := yuanbaoManager.AskQuestion("yuanbao", params, question)
duration := time.Since(startTime)
if err != nil {
t.Fatalf("元宝速度测试失败: %v", err)
}
if result.Answer == "" {
t.Error("元宝速度测试返回的答案为空")
} else {
t.Logf("元宝响应时间: %v, 回答长度: %d 字符", duration, len(result.Answer))
previewLen := min(len(result.Answer), 100)
t.Logf("回答预览: %s...", result.Answer[:previewLen])
}
}