diff --git a/.gitignore b/.gitignore index e473800..bc8bdcd 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,6 @@ chrome/* chrome_data/* cookies/* docs/* -logs/* \ No newline at end of file +logs/* +md/* +cmd/server/server.exe \ No newline at end of file diff --git a/cmd/server/server.exe b/cmd/server/server.exe new file mode 100644 index 0000000..97ce45e Binary files /dev/null and b/cmd/server/server.exe differ diff --git a/example_test.go b/example_test.go index 0cdf980..4a95e1d 100644 --- a/example_test.go +++ b/example_test.go @@ -2,15 +2,11 @@ package collect import ( "context" - "fmt" "geo/internal/collect" "geo/internal/config" "log" "os" - "strings" "testing" - - "github.com/go-rod/rod/lib/proto" ) var ( @@ -81,97 +77,6 @@ func TestWenxinCollector_WaitLogin(t *testing.T) { t.Log("Cookie已保存,后续测试可以使用已登录状态") } -// TestWenxinCollector_SimpleAsk 简单测试文心一言提问 -func TestWenxinCollector_SimpleAsk(t *testing.T) { - if testing.Short() { - t.Skip("跳过需要浏览器交互的测试") - } - - params := &collect.CollectParams{ - Headless: false, // 显示浏览器以便观察 - UserIndex: "test_user", - PlatIndex: "wenxin", - RequestID: "test_wenxin_simple_001", - Platform: "wenxin", - } - - t.Log("=== 简单测试文心一言提问 ===") - - // 获取收集器 - collector, err := manager.GetCollector("wenxin", params) - if err != nil { - t.Fatalf("获取收集器失败: %v", err) - } - - wenxinCollector := collector.(*collect.WenxinCollector) - - // 初始化浏览器 - if err := wenxinCollector.SetupDriver(); err != nil { - t.Fatalf("启动浏览器失败: %v", err) - } - defer wenxinCollector.Close() - - // 加载Cookie - if err := wenxinCollector.LoadCookies(); err != nil { - t.Logf("未找到Cookie文件: %v", err) - } - - // 导航到聊天页面 - wenxinCollector.Page.MustNavigate(wenxinCollector.ChatURL) - wenxinCollector.Sleep(5) - - // 检查登录状态 - isLoggedIn := wenxinCollector.CheckLoginStatus() - t.Logf("登录状态: %v", isLoggedIn) - - if !isLoggedIn { - t.Fatal("未登录,请先调用WaitLogin登录") - } - - // 手动输入问题 - question := "你好" - t.Logf("准备输入问题: %s", question) - - // 查找输入框 - inputBox, err := wenxinCollector.WaitForElementVisible("[contenteditable='true']", 10) - if err != nil { - t.Fatalf("未找到输入框: %v", err) - } - t.Log("✓ 找到输入框") - - // 点击输入框 - inputBox.Click(proto.InputMouseButtonLeft, 1) - wenxinCollector.SleepMs(500) - - // 清空输入框 - wenxinCollector.ClearInput(inputBox) - wenxinCollector.SleepMs(300) - - // 使用键盘输入 - t.Log("正在输入问题...") - inputBox.Input(question) - wenxinCollector.SleepMs(1000) - t.Log("✓ 问题已输入") - - // 查找并点击发送按钮 - sendBtn, err := wenxinCollector.Page.Element("button") - if err != nil { - t.Fatalf("未找到发送按钮: %v", err) - } - t.Log("✓ 找到发送按钮") - - t.Log("正在点击发送按钮...") - sendBtn.Click(proto.InputMouseButtonLeft, 1) - wenxinCollector.SleepMs(3000) - t.Log("✓ 已点击发送按钮") - - t.Log("\n请观察浏览器窗口,查看是否成功发送问题并收到回答") - t.Log("测试将在10秒后结束...") - wenxinCollector.Sleep(10) - - t.Log("=== 测试完成 ===") -} - // TestWenxinCollector_AskQuestion 测试文心一言提问功能 // 注意:此测试需要有效的登录状态 func TestWenxinCollector_AskQuestion(t *testing.T) { @@ -189,7 +94,7 @@ func TestWenxinCollector_AskQuestion(t *testing.T) { } // 定义提问内容 - question := "请用一句话介绍Go语言" + question := "四川房地产软件排名" t.Logf("向文心一言提问: %s", question) // 调用管理器提问并获取答案 @@ -206,330 +111,3 @@ func TestWenxinCollector_AskQuestion(t *testing.T) { t.Error("答案为空") } } - -// TestMultiplePlatforms_Compare 测试多平台对比 -func TestMultiplePlatforms_Compare(t *testing.T) { - if testing.Short() { - t.Skip("跳过需要浏览器交互的测试") - } - - question := "什么是人工智能?" - platforms := []string{"wenxin", "deepseek"} - - results := make(map[string]string) - - for _, platform := range platforms { - params := &collect.CollectParams{ - Headless: true, - UserIndex: "test_user", - PlatIndex: platform, - RequestID: fmt.Sprintf("test_%s", platform), - Platform: platform, - } - - t.Logf("正在向%s提问...", platform) - answer, err := manager.AskQuestion(platform, params, question) - if err != nil { - t.Logf("%s提问失败: %v", platform, err) - results[platform] = fmt.Sprintf("错误: %v", err) - continue - } - - results[platform] = answer - t.Logf("%s回答完成,长度: %d", platform, len(answer)) - } - - // 输出对比结果 - t.Log("\n===== 多平台回答对比 =====") - for platform, answer := range results { - t.Logf("\n[%s]:\n%s\n", platform, answer) - } -} - -// TestWenxinCollector_DebugPageStructure 调试页面结构 -func TestWenxinCollector_DebugPageStructure(t *testing.T) { - if testing.Short() { - t.Skip("跳过需要浏览器交互的测试") - } - - params := &collect.CollectParams{ - Headless: false, - UserIndex: "test_user", - PlatIndex: "wenxin", - RequestID: "test_wenxin_debug_001", - Platform: "wenxin", - } - - t.Log("=== 调试文心一言页面结构 ===") - - // 获取收集器 - collector, err := manager.GetCollector("wenxin", params) - if err != nil { - t.Fatalf("获取收集器失败: %v", err) - } - - wenxinCollector := collector.(*collect.WenxinCollector) - if err := wenxinCollector.SetupDriver(); err != nil { - t.Fatalf("启动浏览器失败: %v", err) - } - defer wenxinCollector.Close() - - // 加载Cookie - if err := wenxinCollector.LoadCookies(); err != nil { - t.Logf("未找到Cookie文件: %v", err) - } - - // 导航到聊天页面 - wenxinCollector.Page.MustNavigate(wenxinCollector.ChatURL) - wenxinCollector.Sleep(5) - - // 检查登录状态 - isLoggedIn := wenxinCollector.CheckLoginStatus() - t.Logf("登录状态: %v", isLoggedIn) - - if !isLoggedIn { - t.Fatal("未登录,请先调用WaitLogin登录") - } - - // 查找所有可能的输入框 - t.Log("\n=== 查找输入框 ===") - inputSelectors := []string{ - "textarea", - "[contenteditable='true']", - "input[type='text']", - ".input-box", - "#chat-input", - "[placeholder]", - } - - for _, selector := range inputSelectors { - elements, err := wenxinCollector.Page.Elements(selector) - if err == nil && len(elements) > 0 { - t.Logf("✓ 找到 %d 个元素: %s", len(elements), selector) - for i, elem := range elements { - if i >= 3 { - break // 只显示前3个 - } - text, _ := elem.Text() - tagName, _ := elem.Property("tagName") - class, _ := elem.Attribute("class") - id, _ := elem.Attribute("id") - placeholder, _ := elem.Attribute("placeholder") - - idStr := "" - if id != nil { - idStr = *id - } - classStr := "" - if class != nil { - classStr = *class - } - placeholderStr := "" - if placeholder != nil { - placeholderStr = *placeholder - } - - t.Logf(" [%d] tag=%s, id=%s, class=%s, placeholder=%s, text=%s", - i, tagName.Str(), idStr, classStr, placeholderStr, text[:min(50, len(text))]) - } - } else { - t.Logf("✗ 未找到元素: %s", selector) - } - } - - // 查找所有按钮 - t.Log("\n=== 查找发送按钮 ===") - buttonSelectors := []string{ - "button", - "svg", - "[aria-label]", - } - - for _, selector := range buttonSelectors { - elements, err := wenxinCollector.Page.Elements(selector) - if err == nil && len(elements) > 0 { - t.Logf("✓ 找到 %d 个元素: %s", len(elements), selector) - for i, elem := range elements { - if i >= 5 { - break - } - text, _ := elem.Text() - tagName, _ := elem.Property("tagName") - class, _ := elem.Attribute("class") - ariaLabel, _ := elem.Attribute("aria-label") - ariaLabelText := "" - if ariaLabel != nil { - ariaLabelText = *ariaLabel - } - - classStr := "" - if class != nil { - classStr = *class - } - - trimmedText := strings.TrimSpace(text) - if trimmedText != "" || ariaLabelText != "" { - t.Logf(" [%d] tag=%s, class=%s, aria-label=%s, text=%s", - i, tagName.Str(), classStr, ariaLabelText, trimmedText[:min(30, len(trimmedText))]) - } - } - } - } - - t.Log("\n=== 调试完成 ===") - t.Log("请保持浏览器窗口打开,手动检查页面结构") - - // 等待用户观察 - select {} -} - -// TestWenxinCollector_DebugAnswer 调试答案区域 -func TestWenxinCollector_DebugAnswer(t *testing.T) { - if testing.Short() { - t.Skip("跳过需要浏览器交互的测试") - } - - params := &collect.CollectParams{ - Headless: false, - UserIndex: "test_user", - PlatIndex: "wenxin", - RequestID: "test_wenxin_debug_answer", - Platform: "wenxin", - } - - t.Log("=== 调试文心一言答案区域 ===") - - collector, err := manager.GetCollector("wenxin", params) - if err != nil { - t.Fatalf("获取收集器失败: %v", err) - } - - wenxinCollector := collector.(*collect.WenxinCollector) - if err := wenxinCollector.SetupDriver(); err != nil { - t.Fatalf("启动浏览器失败: %v", err) - } - defer wenxinCollector.Close() - - if err := wenxinCollector.LoadCookies(); err != nil { - t.Logf("未找到Cookie文件: %v", err) - } - - wenxinCollector.Page.MustNavigate(wenxinCollector.ChatURL) - wenxinCollector.Sleep(5) - - if !wenxinCollector.CheckLoginStatus() { - t.Fatal("未登录") - } - - // 手动输入问题并发送 - t.Log("请在浏览器中手动输入问题并等待AI回答完成") - t.Log("然后按回车键继续...") - fmt.Scanln() - - // 查找所有可能的答案容器 - t.Log("\n=== 查找答案容器 ===") - - // 方式1: 查找包含answer/response/message的元素 - containers, _ := wenxinCollector.Page.Elements("[class*='answer'], [class*='response'], [class*='message']") - t.Logf("找到 %d 个容器元素", len(containers)) - for i, container := range containers { - text, _ := container.Text() - classAttr, _ := container.Attribute("class") - tagName, _ := container.Property("tagName") - - classStr := "" - if classAttr != nil { - classStr = *classAttr - } - - if len(strings.TrimSpace(text)) > 20 { - t.Logf("[%d] tag=%s, class=%s, text长度=%d, 前100字符=%s", - i, tagName.Str(), classStr, len(text), text[:min(100, len(text))]) - } - } - - // 方式2: 查找所有div,显示较长的文本 - t.Log("\n=== 查找长文本div ===") - allDivs, _ := wenxinCollector.Page.Elements("div") - var longTextDivs []struct{ - index int - text string - class string - } - - for i, div := range allDivs { - text, _ := div.Text() - if len(strings.TrimSpace(text)) > 50 { - classAttr, _ := div.Attribute("class") - classStr := "" - if classAttr != nil { - classStr = *classAttr - } - longTextDivs = append(longTextDivs, struct{ - index int - text string - class string - }{i, text, classStr}) - } - } - - t.Logf("找到 %d 个长文本div", len(longTextDivs)) - for _, item := range longTextDivs { - t.Logf("[%d] class=%s, 长度=%d, 前150字符=%s", - item.index, item.class, len(item.text), item.text[:min(150, len(item.text))]) - } - - t.Log("\n=== 调试完成,请保持浏览器打开以便观察 ===") - select {} -} - -// BenchmarkWenxinCollector 性能测试(仅供参考) -func BenchmarkWenxinCollector(b *testing.B) { - b.Skip("跳过性能测试") -} - -// ExampleCollectManager 使用示例 -func ExampleCollectManager() { - - // 列出支持的平台 - platforms := manager.ListPlatforms() - fmt.Printf("支持的平台: %v\n", platforms) - - // 设置参数 - params := &collect.CollectParams{ - Headless: true, - UserIndex: "user_001", - PlatIndex: "wenxin", - RequestID: "req_001", - Platform: "wenxin", - } - - // 向文心一言提问 - answer, err := manager.AskQuestion("wenxin", params, "什么是人工智能?") - if err != nil { - fmt.Printf("错误: %v\n", err) - return - } - - fmt.Printf("答案: %s\n", answer) -} - -// ExampleWenxinCollector_WaitLogin 文心一言登录示例 -func ExampleWenxinCollector_WaitLogin() { - params := &collect.CollectParams{ - Headless: false, // 登录时需要显示浏览器 - UserIndex: "user_001", - PlatIndex: "wenxin", - RequestID: "example_login_001", - Platform: "wenxin", - } - - fmt.Println("正在打开文心一言登录页面...") - success, msg := manager.WaitLogin("wenxin", params) - if success { - fmt.Printf("登录成功: %s\n", msg) - fmt.Println("Cookie已保存,下次可以自动登录") - } else { - fmt.Printf("登录失败: %s\n", msg) - } -} diff --git a/go.mod b/go.mod index 3b9a971..04625e5 100644 --- a/go.mod +++ b/go.mod @@ -22,6 +22,7 @@ require ( require ( filippo.io/edwards25519 v1.1.0 // indirect github.com/andybalholm/brotli v1.1.0 // indirect + github.com/atotto/clipboard v0.1.4 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect github.com/gabriel-vasile/mimetype v1.4.13 // indirect diff --git a/go.sum b/go.sum index 1600f3b..c2ba6f7 100644 --- a/go.sum +++ b/go.sum @@ -10,6 +10,8 @@ github.com/aliyun/aliyun-oss-go-sdk v3.0.2+incompatible h1:8psS8a+wKfiLt1iVDX79F github.com/aliyun/aliyun-oss-go-sdk v3.0.2+incompatible/go.mod h1:T/Aws4fEfogEE9v+HPhhw+CntffsBHJ8nXQCwKr0/g8= github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M= github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY= +github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z4= +github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI= github.com/avast/retry-go v3.0.0+incompatible/go.mod h1:XtSnn+n/sHqQIpZ10K1qAevBhOOCWBLXXy3hyiqqBrY= github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs= github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c= diff --git a/internal/ai_tool/collect.go b/internal/ai_tool/collect.go index e363cf7..41953ee 100644 --- a/internal/ai_tool/collect.go +++ b/internal/ai_tool/collect.go @@ -64,7 +64,7 @@ type CreateReq struct { // 品牌词,多个用英文逗号隔开 Keywords string `json:"keywords"` // 平台,1-deepseek,2-豆包,3-元宝,4-千问,5-文心一言,6-纳米,7-kimi,8-智普 - Platform int64 `json:"platform"` + Platform int `json:"platform"` // 问题 Question string `json:"question"` // 建议填第三方的用户id。方便查单 @@ -110,7 +110,7 @@ type CheckTaskRes struct { } `json:"data"` } -func (s *Collect) CheckTask(requestId string) (*CheckTaskRes, error) { +func (s *Collect) CheckTask(ctx context.Context, requestId string) (*CheckTaskRes, error) { url := "http://8.138.187.158:8082/api/geo/check_task" request := map[string]interface{}{ "request_id": requestId, diff --git a/internal/collect/base.go b/internal/collect/base.go index a8bf8ff..8782080 100644 --- a/internal/collect/base.go +++ b/internal/collect/base.go @@ -110,16 +110,25 @@ func (b *BaseCollector) SetupDriver() error { l.UserDataDir(userDataDir) l.Set("window-size", "1920,1080") + + // 设置中文语言环境 l.Set("lang", "zh-CN") + l.Set("accept-lang", "zh-CN,zh;q=0.9,en;q=0.8") l.Set("force-device-scale-factor", "1") + // 设置时区为中国 + l.Set("timezone", "Asia/Shanghai") + url, err := l.Launch() if err != nil { return fmt.Errorf("启动浏览器失败: %v", err) } b.Browser = rod.New().Context(b.ctx).ControlURL(url).MustConnect() + + // 创建新页面 b.Page = b.Browser.MustPage() + return nil } @@ -302,6 +311,11 @@ func (b *BaseCollector) InitPage() error { b.Page.MustNavigate(b.ChatURL) b.WaitForPageReady(5) b.Sleep(2) + } else { + // 首次访问,先导航到页面 + b.Page.MustNavigate(b.ChatURL) + b.WaitForPageReady(5) + b.Sleep(2) } b.SaveCookies() diff --git a/internal/collect/utils.go b/internal/collect/utils.go index ceda5ba..58fbd02 100644 --- a/internal/collect/utils.go +++ b/internal/collect/utils.go @@ -1,6 +1,7 @@ package collect import ( + "fmt" "regexp" "strings" ) @@ -26,10 +27,110 @@ func CleanHTMLTags(html string) string { // 去除多余的空格和换行 cleaned = strings.TrimSpace(cleaned) - + // 将多个连续空格替换为单个空格 multipleSpaces := regexp.MustCompile(`\s+`) cleaned = multipleSpaces.ReplaceAllString(cleaned, " ") return cleaned } + +// CleanDivTags 只清理div标签,保留其他HTML标签和纯文本内容 +// 这个函数会移除所有
标签,但保留标签内的内容 +func CleanDivTags(html string) string { + if html == "" { + return "" + } + + // 移除所有
标签(不区分大小写) + re := regexp.MustCompile(`(?i)]*>`) + cleaned := re.ReplaceAllString(html, "") + + // 解码常见的HTML实体 + cleaned = strings.ReplaceAll(cleaned, " ", " ") + cleaned = strings.ReplaceAll(cleaned, "<", "<") + cleaned = strings.ReplaceAll(cleaned, ">", ">") + cleaned = strings.ReplaceAll(cleaned, "&", "&") + cleaned = strings.ReplaceAll(cleaned, """, "\"") + cleaned = strings.ReplaceAll(cleaned, "'", "'") + + // 去除多余的空格和换行 + cleaned = strings.TrimSpace(cleaned) + + // 将多个连续空格替换为单个空格 + multipleSpaces := regexp.MustCompile(`\s+`) + cleaned = multipleSpaces.ReplaceAllString(cleaned, " ") + + return cleaned +} + +// HighlightKeywordsInHTML 在HTML内容中高亮显示指定的关键词 +// htmlContent: 原始HTML内容 +// pointKeys: 需要高亮的关键词列表 +// 返回处理后的HTML内容,每个关键词会被不同颜色的span标签包裹 +func HighlightKeywordsInHTML(htmlContent string, pointKeys []string) string { + if htmlContent == "" || len(pointKeys) == 0 { + return htmlContent + } + + // 预定义的颜色列表(使用CSS颜色值) + colors := []string{ + "#FF6B6B", // 红色 + "#4ECDC4", // 青色 + "#45B7D1", // 蓝色 + "#FFA07A", // 浅橙色 + "#98D8C8", // 薄荷绿 + "#F7DC6F", // 黄色 + "#BB8FCE", // 紫色 + "#85C1E2", // 天蓝色 + "#F8B739", // 橙色 + "#52B788", // 绿色 + "#E63946", // 深红色 + "#457B9D", // 深蓝色 + "#2A9D8F", // 蓝绿色 + "#E9C46A", // 金黄色 + "#F4A261", // 橘色 + } + + result := htmlContent + + // 遍历每个关键词,为其分配颜色并添加高亮标记 + for index, keyword := range pointKeys { + if keyword == "" { + continue + } + + // 选择颜色(循环使用颜色列表) + color := colors[index%len(colors)] + + // 转义关键词中的特殊正则字符 + escapedKeyword := regexp.QuoteMeta(keyword) + + // 创建匹配模式,确保只匹配完整的词(避免部分匹配) + // 使用大小写不敏感匹配 + pattern := fmt.Sprintf(`(?i)(%s)`, escapedKeyword) + re := regexp.MustCompile(pattern) + + // 替换匹配的关键词为带颜色的span标签 + replacement := fmt.Sprintf(`$1`, color) + result = re.ReplaceAllString(result, replacement) + } + + return result +} + +// HighlightKeywordsInText 在纯文本中高亮显示指定的关键词(先转换为HTML) +// textContent: 纯文本内容 +// pointKeys: 需要高亮的关键词列表 +// 返回带有高亮标记的HTML内容 +func HighlightKeywordsInText(textContent string, pointKeys []string) string { + if textContent == "" || len(pointKeys) == 0 { + return textContent + } + + // 将纯文本转换为HTML段落格式 + htmlContent := fmt.Sprintf("

%s

", strings.ReplaceAll(textContent, "\n", "

")) + + // 使用HTML高亮方法 + return HighlightKeywordsInHTML(htmlContent, pointKeys) +} diff --git a/internal/collect/wenxin.go b/internal/collect/wenxin.go index 703efae..65e4e84 100644 --- a/internal/collect/wenxin.go +++ b/internal/collect/wenxin.go @@ -8,10 +8,20 @@ import ( "strings" "time" + "github.com/atotto/clipboard" "github.com/go-rod/rod" "github.com/go-rod/rod/lib/proto" + "regexp" ) +// Source 文章引用来源结构体 +type Source struct { + Title string `json:"name"` + Url string `json:"url"` + PlatformName string `json:"platform"` + PlatformIcon string `json:"Platform_icon"` +} + // WenxinCollector 文心一言收集器 type WenxinCollector struct { *BaseCollector @@ -30,6 +40,35 @@ func NewWenxinCollector(ctx context.Context, params *CollectParams, cfg *config. return collector } +// SetupDriver 重写父类方法,添加中文语言设置 +func (c *WenxinCollector) SetupDriver() error { + if err := c.BaseCollector.SetupDriver(); err != nil { + return err + } + + // 通过 JavaScript 设置 navigator.language 为中文 + jsCode := ` + (function() { + Object.defineProperty(navigator, 'language', { + get: function() { return 'zh-CN'; }, + configurable: true + }); + Object.defineProperty(navigator, 'languages', { + get: function() { return ['zh-CN', 'zh', 'en']; }, + configurable: true + }); + })(); + ` + + if _, err := c.Page.Eval(jsCode); err != nil { + c.LogInfo(fmt.Sprintf("设置语言失败: %v", err)) + } else { + c.LogInfo("已设置浏览器语言为中文 (zh-CN)") + } + + return nil +} + // CheckLoginStatus 检查登录状态 func (c *WenxinCollector) CheckLoginStatus() bool { currentURL := c.GetCurrentURL() @@ -150,6 +189,24 @@ func (c *WenxinCollector) AskQuestion(question string) (string, error) { } c.LogInfo(fmt.Sprintf("成功获取答案,长度: %d 字符", len(answer))) + + // 获取分享链接 + _, shareErr := c.getShareLink() + if shareErr != nil { + c.LogInfo(fmt.Sprintf("分享链接获取状态: %v", shareErr)) + } + + // 获取引用来源 + sources, sourcesErr := c.GetSources() + if sourcesErr != nil { + c.LogInfo(fmt.Sprintf("引用来源获取失败: %v", sourcesErr)) + } else if len(sources) > 0 { + c.LogInfo(fmt.Sprintf("成功获取 %d 个引用来源", len(sources))) + for i, source := range sources { + c.LogInfo(fmt.Sprintf(" [%d] 标题: %s, 来源: %s, URL: %s", i+1, source.Title, source.PlatformName, source.Url)) + } + } + return answer, nil } @@ -411,3 +468,507 @@ func (c *WenxinCollector) SafeElement(selector string) (*rod.Element, error) { } return c.Page.Element(selector) } + +// getShareLink 获取分享链接 +func (c *WenxinCollector) getShareLink() (string, error) { + c.LogInfo("=== 开始获取分享链接 ===") + + // 步骤1: 先找到包含dialogCardBottom的div + c.LogInfo("步骤1: 查找包含'dialogCardBottom'的div元素...") + + var dialogDiv *rod.Element + + allDivs, err := c.Page.Elements("div") + if err != nil { + return "", fmt.Errorf("获取页面div元素失败: %v", err) + } + + c.LogInfo(fmt.Sprintf("在 %d 个div元素中查找包含'dialogCardBottom'的class", len(allDivs))) + + for _, elem := range allDivs { + classAttr, _ := elem.Attribute("class") + if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "dialogcardbottom") { + tagName, _ := elem.Property("tagName") + c.LogInfo(fmt.Sprintf("✓ 找到dialogCardBottom容器: tag=%s, class=%s", tagName.Str(), *classAttr)) + dialogDiv = elem + break + } + } + + if dialogDiv == nil { + return "", fmt.Errorf("未找到包含'dialogCardBottom' class的div元素") + } + + // 步骤2: 在这个div内部查找包含share的元素 + c.LogInfo("步骤2: 在dialogCardBottom容器内查找包含'share'的元素...") + + var shareDiv *rod.Element + + // 获取该容器内的所有子元素 + childDivs, err := dialogDiv.Elements("div") + if err != nil { + return "", fmt.Errorf("获取子div元素失败: %v", err) + } + + c.LogInfo(fmt.Sprintf("在 %d 个子div元素中查找包含'share'的class", len(childDivs))) + + for _, elem := range childDivs { + classAttr, _ := elem.Attribute("class") + if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "share") { + tagName, _ := elem.Property("tagName") + c.LogInfo(fmt.Sprintf("✓ 找到目标元素: tag=%s, class=%s", tagName.Str(), *classAttr)) + shareDiv = elem + break + } + } + + if shareDiv == nil { + // 如果没找到div,尝试查找其他类型的元素(如button、svg等) + c.LogInfo("未在子div中找到,尝试查找其他元素类型...") + + // 尝试查找所有子元素 + allChildren, _ := dialogDiv.Elements("*") + for _, elem := range allChildren { + classAttr, _ := elem.Attribute("class") + if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "share") { + tagName, _ := elem.Property("tagName") + c.LogInfo(fmt.Sprintf("✓ 找到目标元素: tag=%s, class=%s", tagName.Str(), *classAttr)) + shareDiv = elem + break + } + } + } + + if shareDiv == nil { + return "", fmt.Errorf("在dialogCardBottom容器内未找到包含'share' class的元素") + } + + // 滚动到元素位置 + c.LogInfo("滚动到分享图标位置...") + if scrollErr := shareDiv.ScrollIntoView(); scrollErr != nil { + c.LogInfo(fmt.Sprintf("滚动失败: %v", scrollErr)) + } + c.SleepMs(800) + + // 普通点击 + c.LogInfo("执行普通点击...") + if clickErr := shareDiv.Click(proto.InputMouseButtonLeft, 1); clickErr != nil { + return "", fmt.Errorf("点击分享图标失败: %v", clickErr) + } + + c.LogInfo("✓ 点击成功") + c.SleepMs(2000) // 等待弹窗出现 + c.Screenshot("after_share_icon_click") + + // 步骤3: 在弹窗中查找shareContainer的div + c.LogInfo("步骤3: 查找包含'shareContainer'的div元素...") + + var shareContainerDiv *rod.Element + + // 重新获取所有div元素 + allDivs, err = c.Page.Elements("div") + if err != nil { + return "", fmt.Errorf("获取页面div元素失败: %v", err) + } + + c.LogInfo(fmt.Sprintf("在 %d 个div元素中查找包含'shareContainer'的class", len(allDivs))) + + for _, elem := range allDivs { + classAttr, _ := elem.Attribute("class") + if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "sharecontainer") { + tagName, _ := elem.Property("tagName") + c.LogInfo(fmt.Sprintf("✓ 找到shareContainer容器: tag=%s, class=%s", tagName.Str(), *classAttr)) + shareContainerDiv = elem + break + } + } + + if shareContainerDiv == nil { + return "", fmt.Errorf("未找到包含'shareContainer' class的div元素") + } + + // 步骤4: 在shareContainer内查找genLink的button + c.LogInfo("步骤4: 在shareContainer容器内查找包含'genLink'的button...") + + var genLinkBtn *rod.Element + + buttons, err := shareContainerDiv.Elements("button") + if err != nil { + return "", fmt.Errorf("获取button元素失败: %v", err) + } + + c.LogInfo(fmt.Sprintf("在 %d 个button元素中查找包含'genLink'的class", len(buttons))) + + for _, elem := range buttons { + classAttr, _ := elem.Attribute("class") + if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "genlink") { + tagName, _ := elem.Property("tagName") + text, _ := elem.Text() + c.LogInfo(fmt.Sprintf("✓ 找到genLink按钮: tag=%s, class=%s, text=%s", tagName.Str(), *classAttr, strings.TrimSpace(text))) + genLinkBtn = elem + break + } + } + + if genLinkBtn == nil { + return "", fmt.Errorf("在shareContainer容器内未找到包含'genLink' class的button") + } + + // 滚动到按钮位置 + c.LogInfo("滚动到genLink按钮位置...") + if scrollErr := genLinkBtn.ScrollIntoView(); scrollErr != nil { + c.LogInfo(fmt.Sprintf("滚动失败: %v", scrollErr)) + } + c.SleepMs(500) + + // 点击genLink按钮 + c.LogInfo("点击genLink按钮...") + if clickErr := genLinkBtn.Click(proto.InputMouseButtonLeft, 1); clickErr != nil { + return "", fmt.Errorf("点击genLink按钮失败: %v", clickErr) + } + + c.LogInfo("✓ genLink按钮点击成功") + c.SleepMs(1500) // 等待复制链接完成 + + // 步骤5: 从剪贴板读取分享链接 + c.LogInfo("步骤5: 从系统剪贴板读取分享链接...") + + clipboardText, err := clipboard.ReadAll() + if err != nil { + return "", fmt.Errorf("读取剪贴板失败: %v", err) + } + + if clipboardText == "" { + return "", fmt.Errorf("剪贴板内容为空") + } + + c.LogInfo(fmt.Sprintf("剪贴板原始内容: %s", clipboardText)) + + // 使用正则表达式提取URL + // 匹配 http:// 或 https:// 开头的URL + re := regexp.MustCompile(`https?://[^\s]+`) + matches := re.FindStringSubmatch(clipboardText) + + if len(matches) == 0 { + return "", fmt.Errorf("未能从剪贴板内容中提取URL") + } + + url := matches[0] + c.LogInfo(fmt.Sprintf("✓✓✓ 成功获取分享链接: %s", url)) + return url, nil +} + +// GetSources 获取文章引用来源(前5个) +func (c *WenxinCollector) GetSources() ([]Source, error) { + c.LogInfo("=== 开始获取文章引用来源 ===") + + var sources []Source + + // 步骤1: 多层查找titleText的div + c.LogInfo("步骤1: 查找roleSystem容器...") + + var roleSystemDiv *rod.Element + + allDivs, err := c.Page.Elements("div") + if err != nil { + return nil, fmt.Errorf("获取页面div元素失败: %v", err) + } + + c.LogInfo(fmt.Sprintf("在 %d 个div元素中查找包含'roleSystem'的class", len(allDivs))) + + for _, elem := range allDivs { + classAttr, _ := elem.Attribute("class") + if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "rolesystem") { + tagName, _ := elem.Property("tagName") + c.LogInfo(fmt.Sprintf("✓ 找到roleSystem容器: tag=%s, class=%s", tagName.Str(), *classAttr)) + roleSystemDiv = elem + break + } + } + + if roleSystemDiv == nil { + c.LogInfo("未找到roleSystem容器,结束获取") + return sources, nil // 没有找到就返回空列表 + } + + // 步骤2: 在roleSystem下查找container + c.LogInfo("步骤2: 在roleSystem内查找包含'container'的div...") + + var containerDiv *rod.Element + + containerDivs, err := roleSystemDiv.Elements("div") + if err != nil { + return nil, fmt.Errorf("获取roleSystem子div元素失败: %v", err) + } + + c.LogInfo(fmt.Sprintf("在 %d 个子div中查找包含'container'的class", len(containerDivs))) + + for _, elem := range containerDivs { + classAttr, _ := elem.Attribute("class") + if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "container") { + tagName, _ := elem.Property("tagName") + c.LogInfo(fmt.Sprintf("✓ 找到container容器: tag=%s, class=%s", tagName.Str(), *classAttr)) + containerDiv = elem + break + } + } + + if containerDiv == nil { + c.LogInfo("未找到container容器,结束获取") + return sources, nil + } + + // 步骤3: 查找第二个container(在整个页面中查找所有container,取第二个) + c.LogInfo("步骤3: 在页面中查找所有包含'container'的div,找到第二个...") + + var secondContainerDiv *rod.Element + + allDivs, err = c.Page.Elements("div") + if err != nil { + return nil, fmt.Errorf("获取页面div元素失败: %v", err) + } + + containerCount := 0 + for _, elem := range allDivs { + classAttr, _ := elem.Attribute("class") + if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "container") { + containerCount++ + if containerCount == 2 { + tagName, _ := elem.Property("tagName") + c.LogInfo(fmt.Sprintf("✓ 找到第二个container容器: tag=%s, class=%s", tagName.Str(), *classAttr)) + secondContainerDiv = elem + break + } + } + } + + if secondContainerDiv == nil { + c.LogInfo(fmt.Sprintf("未找到第二个container容器(共找到 %d 个),结束获取", containerCount)) + return sources, nil + } + + // 步骤4: 在第二个container内查找titleText + c.LogInfo("步骤4: 在第二个container内查找包含'titleText'的div...") + + var titleTextDiv *rod.Element + + titleTextDivs, err := secondContainerDiv.Elements("div") + if err != nil { + return nil, fmt.Errorf("获取第二个container的子div元素失败: %v", err) + } + + c.LogInfo(fmt.Sprintf("在 %d 个子div中查找包含'titleText'的class", len(titleTextDivs))) + + for _, elem := range titleTextDivs { + classAttr, _ := elem.Attribute("class") + if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "titletext") { + tagName, _ := elem.Property("tagName") + c.LogInfo(fmt.Sprintf("✓ 找到titleText元素: tag=%s, class=%s", tagName.Str(), *classAttr)) + titleTextDiv = elem + break + } + } + + if titleTextDiv == nil { + c.LogInfo("未找到titleText元素,结束获取") + return sources, nil + } + + // 点击titleText + c.LogInfo("点击titleText元素...") + if scrollErr := titleTextDiv.ScrollIntoView(); scrollErr != nil { + c.LogInfo(fmt.Sprintf("滚动失败: %v", scrollErr)) + } + c.SleepMs(500) + + if clickErr := titleTextDiv.Click(proto.InputMouseButtonLeft, 1); clickErr != nil { + return nil, fmt.Errorf("点击titleText失败: %v", clickErr) + } + + c.LogInfo("✓ titleText点击成功") + c.SleepMs(2000) // 等待侧边窗出现 + c.Screenshot("after_titletext_click") + + // 步骤2: 查找SourcesViewer侧边窗 + c.LogInfo("步骤2: 查找包含'SourcesViewer'的div元素...") + + var sourcesViewerDiv *rod.Element + + allDivs, err = c.Page.Elements("div") + if err != nil { + return nil, fmt.Errorf("获取页面div元素失败: %v", err) + } + + for _, elem := range allDivs { + classAttr, _ := elem.Attribute("class") + if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "sourcesviewer") { + tagName, _ := elem.Property("tagName") + c.LogInfo(fmt.Sprintf("✓ 找到SourcesViewer容器: tag=%s, class=%s", tagName.Str(), *classAttr)) + sourcesViewerDiv = elem + break + } + } + + if sourcesViewerDiv == nil { + return nil, fmt.Errorf("未找到SourcesViewer侧边窗") + } + + // 步骤3: 在SourcesViewer内查找list容器 + c.LogInfo("步骤3: 在SourcesViewer内查找包含'list'的div...") + + var listDiv *rod.Element + + listDivs, err := sourcesViewerDiv.Elements("div") + if err != nil { + return nil, fmt.Errorf("获取子div元素失败: %v", err) + } + + for _, elem := range listDivs { + classAttr, _ := elem.Attribute("class") + if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "list") { + tagName, _ := elem.Property("tagName") + c.LogInfo(fmt.Sprintf("✓ 找到list容器: tag=%s, class=%s", tagName.Str(), *classAttr)) + listDiv = elem + break + } + } + + if listDiv == nil { + return nil, fmt.Errorf("未找到list容器") + } + + // 步骤4: 在list内查找所有item + c.LogInfo("步骤4: 在list内查找包含'item'的div...") + + itemDivs, err := listDiv.Elements("div") + if err != nil { + return nil, fmt.Errorf("获取item元素失败: %v", err) + } + + c.LogInfo(fmt.Sprintf("找到 %d 个item元素", len(itemDivs))) + + // 只处理前5个item + maxItems := 5 + if len(itemDivs) < maxItems { + maxItems = len(itemDivs) + } + + for i := 0; i < maxItems; i++ { + item := itemDivs[i] + + c.LogInfo(fmt.Sprintf("\n--- 处理第 %d 个item ---", i+1)) + + source := Source{} + + // 查找titleInfo (标题) + titleDivs, _ := item.Elements("div") + for _, div := range titleDivs { + classAttr, _ := div.Attribute("class") + if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "title") { + text, _ := div.Text() + source.Title = strings.TrimSpace(text) + c.LogInfo(fmt.Sprintf(" 标题: %s", source.Title)) + break + } + } + + // 查找site_icon (图标URL) + imgs, _ := item.Elements("img") + for _, img := range imgs { + classAttr, _ := img.Attribute("class") + if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "site_icon") { + srcAttr, _ := img.Attribute("src") + if srcAttr != nil { + source.PlatformIcon = *srcAttr + c.LogInfo(fmt.Sprintf(" 图标: %s", source.PlatformIcon)) + } + break + } + } + + // 查找siteText (来源媒体名称) + for _, div := range titleDivs { + classAttr, _ := div.Attribute("class") + if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "sitetext") { + text, _ := div.Text() + source.PlatformName = strings.TrimSpace(text) + c.LogInfo(fmt.Sprintf(" 来源: %s", source.PlatformName)) + break + } + } + + // 尝试获取跳转URL + // 方法1: 查找item内的a标签 + links, _ := item.Elements("a") + if len(links) > 0 { + href, _ := links[0].Attribute("href") + if href != nil && *href != "" { + source.Url = *href + c.LogInfo(fmt.Sprintf(" URL (从href获取): %s", source.Url)) + } + } + + // 方法2: 如果没找到href,尝试点击item获取URL + if source.Url == "" { + c.LogInfo(" 未找到href,尝试点击item获取URL...") + + // 记录当前URL + currentURL := c.Page.MustInfo().URL + + // 点击item + if scrollErr := item.ScrollIntoView(); scrollErr != nil { + c.LogInfo(fmt.Sprintf(" 滚动失败: %v", scrollErr)) + } + c.SleepMs(300) + + if clickErr := item.Click(proto.InputMouseButtonLeft, 1); clickErr != nil { + c.LogInfo(fmt.Sprintf(" 点击item失败: %v", clickErr)) + } else { + c.SleepMs(2000) // 等待页面跳转 + + // 获取新URL + newURL := c.Page.MustInfo().URL + if newURL != currentURL { + source.Url = newURL + c.LogInfo(fmt.Sprintf(" URL (从跳转获取): %s", source.Url)) + + // 返回上一页 + c.Page.MustNavigateBack() + c.SleepMs(1500) // 等待返回 + + // 重新查找item元素(因为页面刷新了) + c.LogInfo(" 重新查找item元素...") + allDivs, _ = c.Page.Elements("div") + for _, elem := range allDivs { + classAttr, _ := elem.Attribute("class") + if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "sourcesviewer") { + sourcesViewerDiv = elem + break + } + } + if sourcesViewerDiv != nil { + listDivs, _ = sourcesViewerDiv.Elements("div") + for _, elem := range listDivs { + classAttr, _ := elem.Attribute("class") + if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "list") { + listDiv = elem + break + } + } + if listDiv != nil { + itemDivs, _ = listDiv.Elements("div") + } + } + } + } + } + + // 添加到结果列表 + if source.Title != "" || source.Url != "" { + sources = append(sources, source) + } + } + + c.LogInfo(fmt.Sprintf("\n✓✓✓ 成功获取 %d 个引用来源", len(sources))) + return sources, nil +} diff --git a/internal/data/model/collect.gen.go b/internal/data/model/collect.gen.go index ec400e4..b34b631 100644 --- a/internal/data/model/collect.gen.go +++ b/internal/data/model/collect.gen.go @@ -12,16 +12,17 @@ const TableNameCollect = "collect" // Collect mapped from table type Collect struct { - ID int64 `gorm:"column:id;primaryKey;autoIncrement:true" json:"id"` - ProductID int32 `gorm:"column:product_id;not null" json:"product_id"` - Keywords string `gorm:"column:keywords;not null" json:"keywords"` - Platform string `gorm:"column:platform;not null" json:"platform"` - Question string `gorm:"column:question;not null" json:"question"` - Progress int32 `gorm:"column:progress;not null" json:"progress"` - EndFile string `gorm:"column:end_file;not null" json:"end_file"` - CreatedAt time.Time `gorm:"column:created_at;not null;default:CURRENT_TIMESTAMP" json:"created_at"` - UpdatedAt time.Time `gorm:"column:updated_at;not null;default:CURRENT_TIMESTAMP" json:"updated_at"` - Status int32 `gorm:"column:status;not null;comment:1:生成中,2:生成完毕" json:"status"` // 1:生成中,2:生成完毕 + ID int64 `gorm:"column:id;primaryKey;autoIncrement:true" json:"id"` + CollectCode string `gorm:"column:collect_code;not null" json:"collect_code"` + ProductID int32 `gorm:"column:product_id;not null" json:"product_id"` + Keywords string `gorm:"column:keywords;not null" json:"keywords"` + Platform string `gorm:"column:platform;not null" json:"platform"` + Question string `gorm:"column:question;not null" json:"question"` + Progress int32 `gorm:"column:progress;not null" json:"progress"` + EndFile string `gorm:"column:end_file;not null" json:"end_file"` + CreatedAt time.Time `gorm:"column:created_at;not null;default:CURRENT_TIMESTAMP" json:"created_at"` + UpdatedAt time.Time `gorm:"column:updated_at;not null;default:CURRENT_TIMESTAMP" json:"updated_at"` + Status int32 `gorm:"column:status;not null;comment:1:生成中,2:生成完毕" json:"status"` // 1:生成中,2:生成完毕 } // TableName Collect's table name diff --git a/internal/data/model/collect_task.gen.go b/internal/data/model/collect_task.gen.go index 6353355..914c641 100644 --- a/internal/data/model/collect_task.gen.go +++ b/internal/data/model/collect_task.gen.go @@ -14,10 +14,11 @@ const TableNameCollectTask = "collect_task" type CollectTask struct { ID int64 `gorm:"column:id;primaryKey;autoIncrement:true" json:"id"` RequestID string `gorm:"column:request_id;not null" json:"request_id"` - CollectID int32 `gorm:"column:collect_id;not null" json:"collect_id"` - ScriptTime string `gorm:"column:script_time;not null" json:"script_time"` + CollectCode string `gorm:"column:collect_code;not null" json:"collect_code"` + Question string `gorm:"column:question;not null" json:"question"` + ScriptTime int32 `gorm:"column:script_time;not null" json:"script_time"` Platform int32 `gorm:"column:platform;not null" json:"platform"` - CollectData int32 `gorm:"column:collect_data;not null" json:"collect_data"` + CollectData string `gorm:"column:collect_data;not null" json:"collect_data"` ShareURL string `gorm:"column:share_url;not null;default:0" json:"share_url"` ImgURL string `gorm:"column:img_url;not null" json:"img_url"` PointKeyword string `gorm:"column:point_keyword;not null" json:"point_keyword"` diff --git a/internal/entitys/request.go b/internal/entitys/request.go index 00887d4..97b0006 100644 --- a/internal/entitys/request.go +++ b/internal/entitys/request.go @@ -198,7 +198,7 @@ type ( ProductCollectRequest struct { AccessToken string `json:"access_token" validate:"required" zh:"access_token"` Keywords []string `json:"keywords" validate:"required" zh:"关键词"` - Platform []int64 `json:"platform" validate:"required" zh:"平台"` + Platform []int `json:"platform" validate:"required" zh:"平台"` Question string `json:"question" validate:"required" zh:"问题"` ProductId int32 `json:"product_id" validate:"required" zh:"项目Id"` } diff --git a/internal/server/router/app.go b/internal/server/router/app.go index ea53612..7f44479 100644 --- a/internal/server/router/app.go +++ b/internal/server/router/app.go @@ -59,6 +59,7 @@ func (m *AppModule) Register(router fiber.Router) { router.Post("/product/detail", vali(m.productService.Detail, &entitys.ProductDetailRequest{})) router.Post("/product/update", vali(m.productService.Update, &entitys.ProductUpdateRequest{})) router.Post("/product/del", vali(m.productService.Del, &entitys.ProductDelRequest{})) + router.Post("/product/collect", vali(m.productService.Collect, &entitys.ProductCollectRequest{})) router.Post("/img/upload", m.productService.ImgUpload) router.Post("/plat/list", vali(m.appService.PlatList, &entitys.PlatListRequest{})) diff --git a/internal/service/product.go b/internal/service/product.go index e852781..88085ac 100644 --- a/internal/service/product.go +++ b/internal/service/product.go @@ -2,6 +2,7 @@ package service import ( "context" + "fmt" "geo/internal/ai_tool" "geo/internal/biz" "geo/internal/config" @@ -12,9 +13,13 @@ import ( "geo/tmpl/dataTemp" "geo/tmpl/errcode" "io" + "log" "os" "path/filepath" + "runtime/debug" + "strconv" "strings" + "sync" "time" "github.com/go-viper/mapstructure/v2" @@ -28,6 +33,8 @@ type ProductService struct { authBiz *biz.AuthBiz productBiz *biz.ProductBiz aiBiz *biz.AiBiz + collect *impl.CollectImpl + collectTask *impl.CollectTaskImpl } func NewProductService( @@ -45,6 +52,8 @@ func NewProductService( authBiz: authBiz, productBiz: productBiz, aiBiz: aiBiz, + collect: collect, + collectTask: collectTask, } } @@ -236,35 +245,341 @@ func (p *ProductService) CreateProductInfoByDocx(c *fiber.Ctx) error { } func (p *ProductService) Collect(c *fiber.Ctx, req *entitys.ProductCollectRequest) error { + log.Printf("[DEBUG] ========== 请求开始 ==========") + log.Printf("[DEBUG] 请求时间: %s", time.Now().Format("2006-01-02 15:04:05.000")) + log.Printf("[Collect] 开始处理收集请求, ProductID: %d, Platforms: %v, Keywords: %v", + req.ProductId, req.Platform, req.Keywords) + _, err := p.authBiz.ValidateAccessToken(c.UserContext(), req.AccessToken) if err != nil { + log.Printf("[Collect] 验证token失败, ProductID: %d, Error: %v", req.ProductId, err) return err } - productInfo, err := p.productBiz.GetProduct(c.UserContext(), req.ProductId) + + _, err = p.productBiz.GetProduct(c.UserContext(), req.ProductId) if err != nil { + log.Printf("[Collect] 获取产品信息失败, ProductID: %d, Error: %v", req.ProductId, err) return err } - collect := ai_tool.NewCollect(p.cfg.Collect.ApiKey) - ctx, cancel := context.WithTimeout(context.Background(), time.Second*120) - defer cancel() - for _, v := range req.Platform { + + platformStr := make([]string, len(req.Platform)) + for i, s := range req.Platform { + platformStr[i] = strconv.Itoa(s) + } + + collectCode := fmt.Sprintf("C%d_%d", req.ProductId, time.Now().UnixNano()) + collectData := &model.Collect{ + CollectCode: collectCode, + ProductID: req.ProductId, + Keywords: strings.Join(req.Keywords, ","), + Platform: strings.Join(platformStr, ","), + Question: req.Question, + CreatedAt: time.Now(), + } + + log.Printf("[Collect] 创建收集记录, CollectCode: %s, ProductID: %d", collectCode, req.ProductId) + + err = p.collect.Add(c.UserContext(), collectData) + if err != nil { + log.Printf("[Collect] 保存收集记录失败, CollectCode: %s, Error: %v", collectCode, err) + return err + } + + log.Printf("[Collect] ✅ 启动异步收集任务, CollectCode: %s, Platforms: %v", collectCode, req.Platform) + + go func() { + // 记录 goroutine 启动时间 + startTime := time.Now() + log.Printf("[Goroutine] 异步任务启动, CollectCode: %s, 启动时间: %s", collectCode, startTime.Format("15:04:05.000")) + + // 使用独立 context,避免请求结束后任务被取消 + ctx, cancel := context.WithTimeout(context.Background(), time.Second*240) + + // 监控 context 取消 go func() { - defer func() { - if r := recover(); r != nil { - } - }() - request := ai_tool.CreateReq{ - Keywords: strings.Join(req.Keywords, ","), - Question: req.Question, - Platform: v, - ThirdID: "11", + <-ctx.Done() + log.Printf("[Goroutine] ❌ Context被取消! CollectCode: %s, 原因: %v, 耗时: %v", + collectCode, ctx.Err(), time.Since(startTime)) + }() + + defer func() { + if r := recover(); r != nil { + log.Printf("[Goroutine] ❌ PANIC: %v\nStack: %s", r, debug.Stack()) } - _, _err := collect.Create(ctx, &request) - if _err != nil { - return + log.Printf("[Goroutine] 异步任务结束, CollectCode: %s, 总耗时: %v", collectCode, time.Since(startTime)) + cancel() + log.Printf("[Goroutine] 已调用 cancel(), CollectCode: %s", collectCode) + }() + + log.Printf("[Goroutine] 准备调用 doCollect, CollectCode: %s", collectCode) + p.doCollect(ctx, collectData, req.Platform) + log.Printf("[Goroutine] doCollect 已返回, CollectCode: %s", collectCode) + }() + + log.Printf("[DEBUG] ========== 请求返回 ==========") + return pkg.HandleResponse(c, "收录生成中") +} + +func (p *ProductService) doCollect(ctx context.Context, collectData *model.Collect, platforms []int) { + collectCode := collectData.CollectCode + startTime := time.Now() + + log.Printf("[doCollect] ========== 开始执行 ==========") + log.Printf("[doCollect] CollectCode: %s, Platforms: %v", collectCode, platforms) + log.Printf("[doCollect] Context状态: %v, 超时时间: %v", ctx.Err(), time.Second*240) + + // 监控 context + go func() { + <-ctx.Done() + log.Printf("[doCollect] ⚠️ 检测到Context取消! CollectCode: %s, 原因: %v, 已执行时间: %v", + collectCode, ctx.Err(), time.Since(startTime)) + }() + + collectClient := ai_tool.NewCollect(p.cfg.Collect.ApiKey) + log.Printf("[doCollect] 已创建 collectClient") + + var wg sync.WaitGroup + resCh := make(chan *model.CollectTask, len(platforms)) + log.Printf("[doCollect] 创建 channel, 容量: %d", len(platforms)) + + // 启动监控 goroutine + monitorStart := time.Now() + + // 启动所有平台的任务 + log.Printf("[doCollect] 启动 %d 个平台任务", len(platforms)) + for i, plat := range platforms { + log.Printf("[doCollect] 启动任务 #%d, Platform: %d", i+1, plat) + wg.Add(1) + go p.processPlatform(ctx, &wg, collectClient, collectData, plat, resCh, i+1) + } + + go func() { + log.Printf("[Monitor] 监控goroutine启动, CollectCode: %s", collectCode) + wg.Wait() + log.Printf("[Monitor] ✅ 所有任务完成, 准备关闭channel, 等待时间: %v", time.Since(monitorStart)) + close(resCh) + log.Printf("[Monitor] Channel已关闭") + }() + + // 收集结果 - 添加超时保护 + log.Printf("[doCollect] 开始等待结果...") + var datas []*model.CollectTask + taskCount := 0 + + // 设置一个最大等待时间 + waitTimeout := time.After(250 * time.Second) + + for { + select { + case task, ok := <-resCh: + if !ok { + log.Printf("[doCollect] Channel已关闭, 收集到 %d 条结果", len(datas)) + goto SAVE + } + datas = append(datas, task) + taskCount++ + log.Printf("[doCollect] ✅ 收到结果 #%d, Platform: %d, RequestID: %s, ScriptTime: %d", + taskCount, task.Platform, task.RequestID, task.ScriptTime) + + case <-waitTimeout: + log.Printf("[doCollect] ⚠️ 等待超时 250秒, 强制退出, 已收集: %d/%d", taskCount, len(platforms)) + goto SAVE + + case <-ctx.Done(): + log.Printf("[doCollect] ❌ Context取消, 强制退出, 已收集: %d/%d, 原因: %v", + taskCount, len(platforms), ctx.Err()) + goto SAVE + } + } + +SAVE: + log.Printf("[doCollect] 收集完成, 共 %d 条结果", len(datas)) + + // 保存结果 + if len(datas) > 0 { + log.Printf("[doCollect] 开始保存到数据库, 数量: %d", len(datas)) + saveStart := time.Now() + if err := p.collectTask.Add(ctx, datas); err != nil { + log.Printf("[doCollect] ❌ 保存失败: %v", err) + } else { + log.Printf("[doCollect] ✅ 保存成功, 耗时: %v", time.Since(saveStart)) + } + } else { + log.Printf("[doCollect] ⚠️ 没有结果需要保存") + } + + elapsed := time.Since(startTime) + log.Printf("[doCollect] ========== 结束执行, 总耗时: %v ==========", elapsed) +} + +func (p *ProductService) processPlatform(ctx context.Context, wg *sync.WaitGroup, + collectClient *ai_tool.Collect, collectData *model.Collect, plat int, + resCh chan<- *model.CollectTask, taskNum int) { + + collectCode := collectData.CollectCode + startTime := time.Now() + + log.Printf("[Platform #%d] ========== 开始 ==========", taskNum) + log.Printf("[Platform #%d] CollectCode: %s, Platform: %d", taskNum, collectCode, plat) + + // 确保 wg.Done() 一定会被调用 + defer func() { + log.Printf("[Platform #%d] 准备调用 wg.Done(), 已执行时间: %v", taskNum, time.Since(startTime)) + wg.Done() + log.Printf("[Platform #%d] 已调用 wg.Done()", taskNum) + }() + + defer func() { + if r := recover(); r != nil { + log.Printf("[Platform #%d] ❌ PANIC: %v\nStack: %s", taskNum, r, debug.Stack()) + } + log.Printf("[Platform #%d] ========== 结束, 耗时: %v ==========", taskNum, time.Since(startTime)) + }() + + // 检查 context 是否已取消 + select { + case <-ctx.Done(): + log.Printf("[Platform #%d] ❌ Context已取消, 退出执行, 原因: %v", taskNum, ctx.Err()) + return + default: + log.Printf("[Platform #%d] Context正常", taskNum) + } + + // 创建任务 + request := ai_tool.CreateReq{ + Keywords: collectData.Keywords, + Question: collectData.Question, + Platform: plat, + ThirdID: fmt.Sprintf("%s_%d", collectData.CollectCode, plat), + } + + log.Printf("[Platform #%d] 调用 Create API, Request: %+v", taskNum, request) + + createStart := time.Now() + res, err := collectClient.Create(ctx, &request) + createElapsed := time.Since(createStart) + + if err != nil { + log.Printf("[Platform #%d] ❌ Create失败, 耗时: %v, Error: %v", taskNum, createElapsed, err) + return + } + if res.Code != 1 { + log.Printf("[Platform #%d] ❌ Create返回错误码, 耗时: %v, Code: %d, Message: %s", + taskNum, createElapsed, res.Code, res.Msg) + return + } + + log.Printf("[Platform #%d] ✅ Create成功, 耗时: %v, RequestID: %s", + taskNum, createElapsed, res.Data.RequestId) + + // 轮询任务状态 + log.Printf("[Platform #%d] 开始轮询, RequestID: %s", taskNum, res.Data.RequestId) + + pollStart := time.Now() + task := p.pollTaskStatus(ctx, collectClient, res.Data.RequestId, collectData, plat, taskNum) + pollElapsed := time.Since(pollStart) + + if task != nil { + log.Printf("[Platform #%d] ✅ 轮询成功, 耗时: %v, ScriptTime: %d", + taskNum, pollElapsed, task.ScriptTime) + + // 发送结果到 channel + log.Printf("[Platform #%d] 准备发送结果到channel", taskNum) + select { + case resCh <- task: + log.Printf("[Platform #%d] ✅ 结果已发送到channel", taskNum) + case <-ctx.Done(): + log.Printf("[Platform #%d] ⚠️ Context取消, 放弃发送结果", taskNum) + return + } + } else { + log.Printf("[Platform #%d] ❌ 轮询失败, 耗时: %v, 未获取到结果", taskNum, pollElapsed) + } +} + +func (p *ProductService) pollTaskStatus(ctx context.Context, collectClient *ai_tool.Collect, + requestID string, collectData *model.Collect, plat int, taskNum int) *model.CollectTask { + + collectCode := collectData.CollectCode + startTime := time.Now() + + log.Printf("[Poll #%d] ========== 开始轮询 ==========", taskNum) + log.Printf("[Poll #%d] CollectCode: %s, Platform: %d, RequestID: %s", + taskNum, collectCode, plat, requestID) + + ticker := time.NewTicker(5 * time.Second) + defer ticker.Stop() + + errCount := 0 + const maxErrors = 5 + pollCount := 0 + + for { + select { + case <-ctx.Done(): + log.Printf("[Poll #%d] ❌ Context取消, 停止轮询, 已轮询%d次, 耗时: %v, 原因: %v", + taskNum, pollCount, time.Since(startTime), ctx.Err()) + return nil + + case <-ticker.C: + pollCount++ + log.Printf("[Poll #%d] 第 %d 次轮询, 已耗时: %v", taskNum, pollCount, time.Since(startTime)) + + checkStart := time.Now() + checkRes, err := collectClient.CheckTask(ctx, requestID) + checkElapsed := time.Since(checkStart) + + if err != nil { + errCount++ + log.Printf("[Poll #%d] ❌ 轮询失败(第%d次错误), 耗时: %v, Error: %v, 累计错误: %d/%d", + taskNum, pollCount, checkElapsed, err, errCount, maxErrors) + if errCount >= maxErrors { + log.Printf("[Poll #%d] 达到最大错误次数, 停止轮询", taskNum) + return nil + } + continue } - }() + log.Printf("[Poll #%d] ✅ 轮询成功, 耗时: %v, Code: %d, Status: %d, ScriptTime: %d, ShouluDate: %s", + taskNum, checkElapsed, checkRes.Code, checkRes.Data.Status, + checkRes.Data.ScriptTime, checkRes.Data.ShouluDate) + + if checkRes.Code != 1 { + log.Printf("[Poll #%d] ❌ 返回错误码: %d", taskNum, checkRes.Code) + return nil + } + // 判断任务是否完成 + // 根据你的业务逻辑调整判断条件 + isCompleted := false + completeReason := "" + + if checkRes.Data.Status != 0 { // 假设 2 表示完成 + isCompleted = true + completeReason = fmt.Sprintf("chekcStatus=%d", checkRes.Data.Status) + } + + if isCompleted { + log.Printf("[Poll #%d] 🎉 任务完成! 原因: %s, 总轮询次数: %d, 总耗时: %v", + taskNum, completeReason, pollCount, time.Since(startTime)) + + return &model.CollectTask{ + RequestID: checkRes.Data.RequestId, + CollectCode: collectData.CollectCode, + ScriptTime: int32(checkRes.Data.ScriptTime), + Platform: int32(checkRes.Data.Platform), + CollectData: checkRes.Data.ShouluDate, + ShareURL: checkRes.Data.ShareUrl, + ImgURL: checkRes.Data.ImgUrl, + PointKeyword: checkRes.Data.HitWord, + Question: checkRes.Data.Question, + Res: pkg.JsonStringIgonErr(checkRes), + CreatedAt: time.Now(), + Status: int32(checkRes.Data.Status), + } + } + + log.Printf("[Poll #%d] 任务未完成, 继续轮询, Status=%d, ScriptTime=%d, ShouluDate=%s", + taskNum, checkRes.Data.Status, checkRes.Data.ScriptTime, checkRes.Data.ShouluDate) + } } - return pkg.HandleResponse(c, productInfo) }