This commit is contained in:
renzhiyuan 2026-04-25 17:23:04 +08:00
parent 82c94a7038
commit a7d5f4ff0b
14 changed files with 1037 additions and 460 deletions

4
.gitignore vendored
View File

@ -3,4 +3,6 @@ chrome/*
chrome_data/* chrome_data/*
cookies/* cookies/*
docs/* docs/*
logs/* logs/*
md/*
cmd/server/server.exe

BIN
cmd/server/server.exe Normal file

Binary file not shown.

View File

@ -2,15 +2,11 @@ package collect
import ( import (
"context" "context"
"fmt"
"geo/internal/collect" "geo/internal/collect"
"geo/internal/config" "geo/internal/config"
"log" "log"
"os" "os"
"strings"
"testing" "testing"
"github.com/go-rod/rod/lib/proto"
) )
var ( var (
@ -81,97 +77,6 @@ func TestWenxinCollector_WaitLogin(t *testing.T) {
t.Log("Cookie已保存后续测试可以使用已登录状态") t.Log("Cookie已保存后续测试可以使用已登录状态")
} }
// TestWenxinCollector_SimpleAsk 简单测试文心一言提问
func TestWenxinCollector_SimpleAsk(t *testing.T) {
if testing.Short() {
t.Skip("跳过需要浏览器交互的测试")
}
params := &collect.CollectParams{
Headless: false, // 显示浏览器以便观察
UserIndex: "test_user",
PlatIndex: "wenxin",
RequestID: "test_wenxin_simple_001",
Platform: "wenxin",
}
t.Log("=== 简单测试文心一言提问 ===")
// 获取收集器
collector, err := manager.GetCollector("wenxin", params)
if err != nil {
t.Fatalf("获取收集器失败: %v", err)
}
wenxinCollector := collector.(*collect.WenxinCollector)
// 初始化浏览器
if err := wenxinCollector.SetupDriver(); err != nil {
t.Fatalf("启动浏览器失败: %v", err)
}
defer wenxinCollector.Close()
// 加载Cookie
if err := wenxinCollector.LoadCookies(); err != nil {
t.Logf("未找到Cookie文件: %v", err)
}
// 导航到聊天页面
wenxinCollector.Page.MustNavigate(wenxinCollector.ChatURL)
wenxinCollector.Sleep(5)
// 检查登录状态
isLoggedIn := wenxinCollector.CheckLoginStatus()
t.Logf("登录状态: %v", isLoggedIn)
if !isLoggedIn {
t.Fatal("未登录请先调用WaitLogin登录")
}
// 手动输入问题
question := "你好"
t.Logf("准备输入问题: %s", question)
// 查找输入框
inputBox, err := wenxinCollector.WaitForElementVisible("[contenteditable='true']", 10)
if err != nil {
t.Fatalf("未找到输入框: %v", err)
}
t.Log("✓ 找到输入框")
// 点击输入框
inputBox.Click(proto.InputMouseButtonLeft, 1)
wenxinCollector.SleepMs(500)
// 清空输入框
wenxinCollector.ClearInput(inputBox)
wenxinCollector.SleepMs(300)
// 使用键盘输入
t.Log("正在输入问题...")
inputBox.Input(question)
wenxinCollector.SleepMs(1000)
t.Log("✓ 问题已输入")
// 查找并点击发送按钮
sendBtn, err := wenxinCollector.Page.Element("button")
if err != nil {
t.Fatalf("未找到发送按钮: %v", err)
}
t.Log("✓ 找到发送按钮")
t.Log("正在点击发送按钮...")
sendBtn.Click(proto.InputMouseButtonLeft, 1)
wenxinCollector.SleepMs(3000)
t.Log("✓ 已点击发送按钮")
t.Log("\n请观察浏览器窗口查看是否成功发送问题并收到回答")
t.Log("测试将在10秒后结束...")
wenxinCollector.Sleep(10)
t.Log("=== 测试完成 ===")
}
// TestWenxinCollector_AskQuestion 测试文心一言提问功能 // TestWenxinCollector_AskQuestion 测试文心一言提问功能
// 注意:此测试需要有效的登录状态 // 注意:此测试需要有效的登录状态
func TestWenxinCollector_AskQuestion(t *testing.T) { func TestWenxinCollector_AskQuestion(t *testing.T) {
@ -189,7 +94,7 @@ func TestWenxinCollector_AskQuestion(t *testing.T) {
} }
// 定义提问内容 // 定义提问内容
question := "请用一句话介绍Go语言" question := "四川房地产软件排名"
t.Logf("向文心一言提问: %s", question) t.Logf("向文心一言提问: %s", question)
// 调用管理器提问并获取答案 // 调用管理器提问并获取答案
@ -206,330 +111,3 @@ func TestWenxinCollector_AskQuestion(t *testing.T) {
t.Error("答案为空") t.Error("答案为空")
} }
} }
// TestMultiplePlatforms_Compare 测试多平台对比
func TestMultiplePlatforms_Compare(t *testing.T) {
if testing.Short() {
t.Skip("跳过需要浏览器交互的测试")
}
question := "什么是人工智能?"
platforms := []string{"wenxin", "deepseek"}
results := make(map[string]string)
for _, platform := range platforms {
params := &collect.CollectParams{
Headless: true,
UserIndex: "test_user",
PlatIndex: platform,
RequestID: fmt.Sprintf("test_%s", platform),
Platform: platform,
}
t.Logf("正在向%s提问...", platform)
answer, err := manager.AskQuestion(platform, params, question)
if err != nil {
t.Logf("%s提问失败: %v", platform, err)
results[platform] = fmt.Sprintf("错误: %v", err)
continue
}
results[platform] = answer
t.Logf("%s回答完成长度: %d", platform, len(answer))
}
// 输出对比结果
t.Log("\n===== 多平台回答对比 =====")
for platform, answer := range results {
t.Logf("\n[%s]:\n%s\n", platform, answer)
}
}
// TestWenxinCollector_DebugPageStructure 调试页面结构
func TestWenxinCollector_DebugPageStructure(t *testing.T) {
if testing.Short() {
t.Skip("跳过需要浏览器交互的测试")
}
params := &collect.CollectParams{
Headless: false,
UserIndex: "test_user",
PlatIndex: "wenxin",
RequestID: "test_wenxin_debug_001",
Platform: "wenxin",
}
t.Log("=== 调试文心一言页面结构 ===")
// 获取收集器
collector, err := manager.GetCollector("wenxin", params)
if err != nil {
t.Fatalf("获取收集器失败: %v", err)
}
wenxinCollector := collector.(*collect.WenxinCollector)
if err := wenxinCollector.SetupDriver(); err != nil {
t.Fatalf("启动浏览器失败: %v", err)
}
defer wenxinCollector.Close()
// 加载Cookie
if err := wenxinCollector.LoadCookies(); err != nil {
t.Logf("未找到Cookie文件: %v", err)
}
// 导航到聊天页面
wenxinCollector.Page.MustNavigate(wenxinCollector.ChatURL)
wenxinCollector.Sleep(5)
// 检查登录状态
isLoggedIn := wenxinCollector.CheckLoginStatus()
t.Logf("登录状态: %v", isLoggedIn)
if !isLoggedIn {
t.Fatal("未登录请先调用WaitLogin登录")
}
// 查找所有可能的输入框
t.Log("\n=== 查找输入框 ===")
inputSelectors := []string{
"textarea",
"[contenteditable='true']",
"input[type='text']",
".input-box",
"#chat-input",
"[placeholder]",
}
for _, selector := range inputSelectors {
elements, err := wenxinCollector.Page.Elements(selector)
if err == nil && len(elements) > 0 {
t.Logf("✓ 找到 %d 个元素: %s", len(elements), selector)
for i, elem := range elements {
if i >= 3 {
break // 只显示前3个
}
text, _ := elem.Text()
tagName, _ := elem.Property("tagName")
class, _ := elem.Attribute("class")
id, _ := elem.Attribute("id")
placeholder, _ := elem.Attribute("placeholder")
idStr := ""
if id != nil {
idStr = *id
}
classStr := ""
if class != nil {
classStr = *class
}
placeholderStr := ""
if placeholder != nil {
placeholderStr = *placeholder
}
t.Logf(" [%d] tag=%s, id=%s, class=%s, placeholder=%s, text=%s",
i, tagName.Str(), idStr, classStr, placeholderStr, text[:min(50, len(text))])
}
} else {
t.Logf("✗ 未找到元素: %s", selector)
}
}
// 查找所有按钮
t.Log("\n=== 查找发送按钮 ===")
buttonSelectors := []string{
"button",
"svg",
"[aria-label]",
}
for _, selector := range buttonSelectors {
elements, err := wenxinCollector.Page.Elements(selector)
if err == nil && len(elements) > 0 {
t.Logf("✓ 找到 %d 个元素: %s", len(elements), selector)
for i, elem := range elements {
if i >= 5 {
break
}
text, _ := elem.Text()
tagName, _ := elem.Property("tagName")
class, _ := elem.Attribute("class")
ariaLabel, _ := elem.Attribute("aria-label")
ariaLabelText := ""
if ariaLabel != nil {
ariaLabelText = *ariaLabel
}
classStr := ""
if class != nil {
classStr = *class
}
trimmedText := strings.TrimSpace(text)
if trimmedText != "" || ariaLabelText != "" {
t.Logf(" [%d] tag=%s, class=%s, aria-label=%s, text=%s",
i, tagName.Str(), classStr, ariaLabelText, trimmedText[:min(30, len(trimmedText))])
}
}
}
}
t.Log("\n=== 调试完成 ===")
t.Log("请保持浏览器窗口打开,手动检查页面结构")
// 等待用户观察
select {}
}
// TestWenxinCollector_DebugAnswer 调试答案区域
func TestWenxinCollector_DebugAnswer(t *testing.T) {
if testing.Short() {
t.Skip("跳过需要浏览器交互的测试")
}
params := &collect.CollectParams{
Headless: false,
UserIndex: "test_user",
PlatIndex: "wenxin",
RequestID: "test_wenxin_debug_answer",
Platform: "wenxin",
}
t.Log("=== 调试文心一言答案区域 ===")
collector, err := manager.GetCollector("wenxin", params)
if err != nil {
t.Fatalf("获取收集器失败: %v", err)
}
wenxinCollector := collector.(*collect.WenxinCollector)
if err := wenxinCollector.SetupDriver(); err != nil {
t.Fatalf("启动浏览器失败: %v", err)
}
defer wenxinCollector.Close()
if err := wenxinCollector.LoadCookies(); err != nil {
t.Logf("未找到Cookie文件: %v", err)
}
wenxinCollector.Page.MustNavigate(wenxinCollector.ChatURL)
wenxinCollector.Sleep(5)
if !wenxinCollector.CheckLoginStatus() {
t.Fatal("未登录")
}
// 手动输入问题并发送
t.Log("请在浏览器中手动输入问题并等待AI回答完成")
t.Log("然后按回车键继续...")
fmt.Scanln()
// 查找所有可能的答案容器
t.Log("\n=== 查找答案容器 ===")
// 方式1: 查找包含answer/response/message的元素
containers, _ := wenxinCollector.Page.Elements("[class*='answer'], [class*='response'], [class*='message']")
t.Logf("找到 %d 个容器元素", len(containers))
for i, container := range containers {
text, _ := container.Text()
classAttr, _ := container.Attribute("class")
tagName, _ := container.Property("tagName")
classStr := ""
if classAttr != nil {
classStr = *classAttr
}
if len(strings.TrimSpace(text)) > 20 {
t.Logf("[%d] tag=%s, class=%s, text长度=%d, 前100字符=%s",
i, tagName.Str(), classStr, len(text), text[:min(100, len(text))])
}
}
// 方式2: 查找所有div显示较长的文本
t.Log("\n=== 查找长文本div ===")
allDivs, _ := wenxinCollector.Page.Elements("div")
var longTextDivs []struct{
index int
text string
class string
}
for i, div := range allDivs {
text, _ := div.Text()
if len(strings.TrimSpace(text)) > 50 {
classAttr, _ := div.Attribute("class")
classStr := ""
if classAttr != nil {
classStr = *classAttr
}
longTextDivs = append(longTextDivs, struct{
index int
text string
class string
}{i, text, classStr})
}
}
t.Logf("找到 %d 个长文本div", len(longTextDivs))
for _, item := range longTextDivs {
t.Logf("[%d] class=%s, 长度=%d, 前150字符=%s",
item.index, item.class, len(item.text), item.text[:min(150, len(item.text))])
}
t.Log("\n=== 调试完成,请保持浏览器打开以便观察 ===")
select {}
}
// BenchmarkWenxinCollector 性能测试(仅供参考)
func BenchmarkWenxinCollector(b *testing.B) {
b.Skip("跳过性能测试")
}
// ExampleCollectManager 使用示例
func ExampleCollectManager() {
// 列出支持的平台
platforms := manager.ListPlatforms()
fmt.Printf("支持的平台: %v\n", platforms)
// 设置参数
params := &collect.CollectParams{
Headless: true,
UserIndex: "user_001",
PlatIndex: "wenxin",
RequestID: "req_001",
Platform: "wenxin",
}
// 向文心一言提问
answer, err := manager.AskQuestion("wenxin", params, "什么是人工智能?")
if err != nil {
fmt.Printf("错误: %v\n", err)
return
}
fmt.Printf("答案: %s\n", answer)
}
// ExampleWenxinCollector_WaitLogin 文心一言登录示例
func ExampleWenxinCollector_WaitLogin() {
params := &collect.CollectParams{
Headless: false, // 登录时需要显示浏览器
UserIndex: "user_001",
PlatIndex: "wenxin",
RequestID: "example_login_001",
Platform: "wenxin",
}
fmt.Println("正在打开文心一言登录页面...")
success, msg := manager.WaitLogin("wenxin", params)
if success {
fmt.Printf("登录成功: %s\n", msg)
fmt.Println("Cookie已保存下次可以自动登录")
} else {
fmt.Printf("登录失败: %s\n", msg)
}
}

1
go.mod
View File

@ -22,6 +22,7 @@ require (
require ( require (
filippo.io/edwards25519 v1.1.0 // indirect filippo.io/edwards25519 v1.1.0 // indirect
github.com/andybalholm/brotli v1.1.0 // indirect github.com/andybalholm/brotli v1.1.0 // indirect
github.com/atotto/clipboard v0.1.4 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
github.com/gabriel-vasile/mimetype v1.4.13 // indirect github.com/gabriel-vasile/mimetype v1.4.13 // indirect

2
go.sum
View File

@ -10,6 +10,8 @@ github.com/aliyun/aliyun-oss-go-sdk v3.0.2+incompatible h1:8psS8a+wKfiLt1iVDX79F
github.com/aliyun/aliyun-oss-go-sdk v3.0.2+incompatible/go.mod h1:T/Aws4fEfogEE9v+HPhhw+CntffsBHJ8nXQCwKr0/g8= github.com/aliyun/aliyun-oss-go-sdk v3.0.2+incompatible/go.mod h1:T/Aws4fEfogEE9v+HPhhw+CntffsBHJ8nXQCwKr0/g8=
github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M= github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M=
github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY= github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY=
github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z4=
github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI=
github.com/avast/retry-go v3.0.0+incompatible/go.mod h1:XtSnn+n/sHqQIpZ10K1qAevBhOOCWBLXXy3hyiqqBrY= github.com/avast/retry-go v3.0.0+incompatible/go.mod h1:XtSnn+n/sHqQIpZ10K1qAevBhOOCWBLXXy3hyiqqBrY=
github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs= github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs=
github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c= github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c=

View File

@ -64,7 +64,7 @@ type CreateReq struct {
// 品牌词,多个用英文逗号隔开 // 品牌词,多个用英文逗号隔开
Keywords string `json:"keywords"` Keywords string `json:"keywords"`
// 平台1-deepseek2-豆包3-元宝4-千问5-文心一言6-纳米7-kimi8-智普 // 平台1-deepseek2-豆包3-元宝4-千问5-文心一言6-纳米7-kimi8-智普
Platform int64 `json:"platform"` Platform int `json:"platform"`
// 问题 // 问题
Question string `json:"question"` Question string `json:"question"`
// 建议填第三方的用户id。方便查单 // 建议填第三方的用户id。方便查单
@ -110,7 +110,7 @@ type CheckTaskRes struct {
} `json:"data"` } `json:"data"`
} }
func (s *Collect) CheckTask(requestId string) (*CheckTaskRes, error) { func (s *Collect) CheckTask(ctx context.Context, requestId string) (*CheckTaskRes, error) {
url := "http://8.138.187.158:8082/api/geo/check_task" url := "http://8.138.187.158:8082/api/geo/check_task"
request := map[string]interface{}{ request := map[string]interface{}{
"request_id": requestId, "request_id": requestId,

View File

@ -110,16 +110,25 @@ func (b *BaseCollector) SetupDriver() error {
l.UserDataDir(userDataDir) l.UserDataDir(userDataDir)
l.Set("window-size", "1920,1080") l.Set("window-size", "1920,1080")
// 设置中文语言环境
l.Set("lang", "zh-CN") l.Set("lang", "zh-CN")
l.Set("accept-lang", "zh-CN,zh;q=0.9,en;q=0.8")
l.Set("force-device-scale-factor", "1") l.Set("force-device-scale-factor", "1")
// 设置时区为中国
l.Set("timezone", "Asia/Shanghai")
url, err := l.Launch() url, err := l.Launch()
if err != nil { if err != nil {
return fmt.Errorf("启动浏览器失败: %v", err) return fmt.Errorf("启动浏览器失败: %v", err)
} }
b.Browser = rod.New().Context(b.ctx).ControlURL(url).MustConnect() b.Browser = rod.New().Context(b.ctx).ControlURL(url).MustConnect()
// 创建新页面
b.Page = b.Browser.MustPage() b.Page = b.Browser.MustPage()
return nil return nil
} }
@ -302,6 +311,11 @@ func (b *BaseCollector) InitPage() error {
b.Page.MustNavigate(b.ChatURL) b.Page.MustNavigate(b.ChatURL)
b.WaitForPageReady(5) b.WaitForPageReady(5)
b.Sleep(2) b.Sleep(2)
} else {
// 首次访问,先导航到页面
b.Page.MustNavigate(b.ChatURL)
b.WaitForPageReady(5)
b.Sleep(2)
} }
b.SaveCookies() b.SaveCookies()

View File

@ -1,6 +1,7 @@
package collect package collect
import ( import (
"fmt"
"regexp" "regexp"
"strings" "strings"
) )
@ -26,10 +27,110 @@ func CleanHTMLTags(html string) string {
// 去除多余的空格和换行 // 去除多余的空格和换行
cleaned = strings.TrimSpace(cleaned) cleaned = strings.TrimSpace(cleaned)
// 将多个连续空格替换为单个空格 // 将多个连续空格替换为单个空格
multipleSpaces := regexp.MustCompile(`\s+`) multipleSpaces := regexp.MustCompile(`\s+`)
cleaned = multipleSpaces.ReplaceAllString(cleaned, " ") cleaned = multipleSpaces.ReplaceAllString(cleaned, " ")
return cleaned return cleaned
} }
// CleanDivTags 只清理div标签保留其他HTML标签和纯文本内容
// 这个函数会移除所有<div>和</div>标签,但保留标签内的内容
func CleanDivTags(html string) string {
if html == "" {
return ""
}
// 移除所有<div>和</div>标签(不区分大小写)
re := regexp.MustCompile(`(?i)</?div[^>]*>`)
cleaned := re.ReplaceAllString(html, "")
// 解码常见的HTML实体
cleaned = strings.ReplaceAll(cleaned, "&nbsp;", " ")
cleaned = strings.ReplaceAll(cleaned, "&lt;", "<")
cleaned = strings.ReplaceAll(cleaned, "&gt;", ">")
cleaned = strings.ReplaceAll(cleaned, "&amp;", "&")
cleaned = strings.ReplaceAll(cleaned, "&quot;", "\"")
cleaned = strings.ReplaceAll(cleaned, "&#39;", "'")
// 去除多余的空格和换行
cleaned = strings.TrimSpace(cleaned)
// 将多个连续空格替换为单个空格
multipleSpaces := regexp.MustCompile(`\s+`)
cleaned = multipleSpaces.ReplaceAllString(cleaned, " ")
return cleaned
}
// HighlightKeywordsInHTML 在HTML内容中高亮显示指定的关键词
// htmlContent: 原始HTML内容
// pointKeys: 需要高亮的关键词列表
// 返回处理后的HTML内容每个关键词会被不同颜色的span标签包裹
func HighlightKeywordsInHTML(htmlContent string, pointKeys []string) string {
if htmlContent == "" || len(pointKeys) == 0 {
return htmlContent
}
// 预定义的颜色列表使用CSS颜色值
colors := []string{
"#FF6B6B", // 红色
"#4ECDC4", // 青色
"#45B7D1", // 蓝色
"#FFA07A", // 浅橙色
"#98D8C8", // 薄荷绿
"#F7DC6F", // 黄色
"#BB8FCE", // 紫色
"#85C1E2", // 天蓝色
"#F8B739", // 橙色
"#52B788", // 绿色
"#E63946", // 深红色
"#457B9D", // 深蓝色
"#2A9D8F", // 蓝绿色
"#E9C46A", // 金黄色
"#F4A261", // 橘色
}
result := htmlContent
// 遍历每个关键词,为其分配颜色并添加高亮标记
for index, keyword := range pointKeys {
if keyword == "" {
continue
}
// 选择颜色(循环使用颜色列表)
color := colors[index%len(colors)]
// 转义关键词中的特殊正则字符
escapedKeyword := regexp.QuoteMeta(keyword)
// 创建匹配模式,确保只匹配完整的词(避免部分匹配)
// 使用大小写不敏感匹配
pattern := fmt.Sprintf(`(?i)(%s)`, escapedKeyword)
re := regexp.MustCompile(pattern)
// 替换匹配的关键词为带颜色的span标签
replacement := fmt.Sprintf(`<span style="color:%s;font-weight:bold;">$1</span>`, color)
result = re.ReplaceAllString(result, replacement)
}
return result
}
// HighlightKeywordsInText 在纯文本中高亮显示指定的关键词先转换为HTML
// textContent: 纯文本内容
// pointKeys: 需要高亮的关键词列表
// 返回带有高亮标记的HTML内容
func HighlightKeywordsInText(textContent string, pointKeys []string) string {
if textContent == "" || len(pointKeys) == 0 {
return textContent
}
// 将纯文本转换为HTML段落格式
htmlContent := fmt.Sprintf("<p>%s</p>", strings.ReplaceAll(textContent, "\n", "</p><p>"))
// 使用HTML高亮方法
return HighlightKeywordsInHTML(htmlContent, pointKeys)
}

View File

@ -8,10 +8,20 @@ import (
"strings" "strings"
"time" "time"
"github.com/atotto/clipboard"
"github.com/go-rod/rod" "github.com/go-rod/rod"
"github.com/go-rod/rod/lib/proto" "github.com/go-rod/rod/lib/proto"
"regexp"
) )
// Source 文章引用来源结构体
type Source struct {
Title string `json:"name"`
Url string `json:"url"`
PlatformName string `json:"platform"`
PlatformIcon string `json:"Platform_icon"`
}
// WenxinCollector 文心一言收集器 // WenxinCollector 文心一言收集器
type WenxinCollector struct { type WenxinCollector struct {
*BaseCollector *BaseCollector
@ -30,6 +40,35 @@ func NewWenxinCollector(ctx context.Context, params *CollectParams, cfg *config.
return collector return collector
} }
// SetupDriver 重写父类方法,添加中文语言设置
func (c *WenxinCollector) SetupDriver() error {
if err := c.BaseCollector.SetupDriver(); err != nil {
return err
}
// 通过 JavaScript 设置 navigator.language 为中文
jsCode := `
(function() {
Object.defineProperty(navigator, 'language', {
get: function() { return 'zh-CN'; },
configurable: true
});
Object.defineProperty(navigator, 'languages', {
get: function() { return ['zh-CN', 'zh', 'en']; },
configurable: true
});
})();
`
if _, err := c.Page.Eval(jsCode); err != nil {
c.LogInfo(fmt.Sprintf("设置语言失败: %v", err))
} else {
c.LogInfo("已设置浏览器语言为中文 (zh-CN)")
}
return nil
}
// CheckLoginStatus 检查登录状态 // CheckLoginStatus 检查登录状态
func (c *WenxinCollector) CheckLoginStatus() bool { func (c *WenxinCollector) CheckLoginStatus() bool {
currentURL := c.GetCurrentURL() currentURL := c.GetCurrentURL()
@ -150,6 +189,24 @@ func (c *WenxinCollector) AskQuestion(question string) (string, error) {
} }
c.LogInfo(fmt.Sprintf("成功获取答案,长度: %d 字符", len(answer))) c.LogInfo(fmt.Sprintf("成功获取答案,长度: %d 字符", len(answer)))
// 获取分享链接
_, shareErr := c.getShareLink()
if shareErr != nil {
c.LogInfo(fmt.Sprintf("分享链接获取状态: %v", shareErr))
}
// 获取引用来源
sources, sourcesErr := c.GetSources()
if sourcesErr != nil {
c.LogInfo(fmt.Sprintf("引用来源获取失败: %v", sourcesErr))
} else if len(sources) > 0 {
c.LogInfo(fmt.Sprintf("成功获取 %d 个引用来源", len(sources)))
for i, source := range sources {
c.LogInfo(fmt.Sprintf(" [%d] 标题: %s, 来源: %s, URL: %s", i+1, source.Title, source.PlatformName, source.Url))
}
}
return answer, nil return answer, nil
} }
@ -411,3 +468,507 @@ func (c *WenxinCollector) SafeElement(selector string) (*rod.Element, error) {
} }
return c.Page.Element(selector) return c.Page.Element(selector)
} }
// getShareLink 获取分享链接
func (c *WenxinCollector) getShareLink() (string, error) {
c.LogInfo("=== 开始获取分享链接 ===")
// 步骤1: 先找到包含dialogCardBottom的div
c.LogInfo("步骤1: 查找包含'dialogCardBottom'的div元素...")
var dialogDiv *rod.Element
allDivs, err := c.Page.Elements("div")
if err != nil {
return "", fmt.Errorf("获取页面div元素失败: %v", err)
}
c.LogInfo(fmt.Sprintf("在 %d 个div元素中查找包含'dialogCardBottom'的class", len(allDivs)))
for _, elem := range allDivs {
classAttr, _ := elem.Attribute("class")
if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "dialogcardbottom") {
tagName, _ := elem.Property("tagName")
c.LogInfo(fmt.Sprintf("✓ 找到dialogCardBottom容器: tag=%s, class=%s", tagName.Str(), *classAttr))
dialogDiv = elem
break
}
}
if dialogDiv == nil {
return "", fmt.Errorf("未找到包含'dialogCardBottom' class的div元素")
}
// 步骤2: 在这个div内部查找包含share的元素
c.LogInfo("步骤2: 在dialogCardBottom容器内查找包含'share'的元素...")
var shareDiv *rod.Element
// 获取该容器内的所有子元素
childDivs, err := dialogDiv.Elements("div")
if err != nil {
return "", fmt.Errorf("获取子div元素失败: %v", err)
}
c.LogInfo(fmt.Sprintf("在 %d 个子div元素中查找包含'share'的class", len(childDivs)))
for _, elem := range childDivs {
classAttr, _ := elem.Attribute("class")
if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "share") {
tagName, _ := elem.Property("tagName")
c.LogInfo(fmt.Sprintf("✓ 找到目标元素: tag=%s, class=%s", tagName.Str(), *classAttr))
shareDiv = elem
break
}
}
if shareDiv == nil {
// 如果没找到div尝试查找其他类型的元素如button、svg等
c.LogInfo("未在子div中找到尝试查找其他元素类型...")
// 尝试查找所有子元素
allChildren, _ := dialogDiv.Elements("*")
for _, elem := range allChildren {
classAttr, _ := elem.Attribute("class")
if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "share") {
tagName, _ := elem.Property("tagName")
c.LogInfo(fmt.Sprintf("✓ 找到目标元素: tag=%s, class=%s", tagName.Str(), *classAttr))
shareDiv = elem
break
}
}
}
if shareDiv == nil {
return "", fmt.Errorf("在dialogCardBottom容器内未找到包含'share' class的元素")
}
// 滚动到元素位置
c.LogInfo("滚动到分享图标位置...")
if scrollErr := shareDiv.ScrollIntoView(); scrollErr != nil {
c.LogInfo(fmt.Sprintf("滚动失败: %v", scrollErr))
}
c.SleepMs(800)
// 普通点击
c.LogInfo("执行普通点击...")
if clickErr := shareDiv.Click(proto.InputMouseButtonLeft, 1); clickErr != nil {
return "", fmt.Errorf("点击分享图标失败: %v", clickErr)
}
c.LogInfo("✓ 点击成功")
c.SleepMs(2000) // 等待弹窗出现
c.Screenshot("after_share_icon_click")
// 步骤3: 在弹窗中查找shareContainer的div
c.LogInfo("步骤3: 查找包含'shareContainer'的div元素...")
var shareContainerDiv *rod.Element
// 重新获取所有div元素
allDivs, err = c.Page.Elements("div")
if err != nil {
return "", fmt.Errorf("获取页面div元素失败: %v", err)
}
c.LogInfo(fmt.Sprintf("在 %d 个div元素中查找包含'shareContainer'的class", len(allDivs)))
for _, elem := range allDivs {
classAttr, _ := elem.Attribute("class")
if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "sharecontainer") {
tagName, _ := elem.Property("tagName")
c.LogInfo(fmt.Sprintf("✓ 找到shareContainer容器: tag=%s, class=%s", tagName.Str(), *classAttr))
shareContainerDiv = elem
break
}
}
if shareContainerDiv == nil {
return "", fmt.Errorf("未找到包含'shareContainer' class的div元素")
}
// 步骤4: 在shareContainer内查找genLink的button
c.LogInfo("步骤4: 在shareContainer容器内查找包含'genLink'的button...")
var genLinkBtn *rod.Element
buttons, err := shareContainerDiv.Elements("button")
if err != nil {
return "", fmt.Errorf("获取button元素失败: %v", err)
}
c.LogInfo(fmt.Sprintf("在 %d 个button元素中查找包含'genLink'的class", len(buttons)))
for _, elem := range buttons {
classAttr, _ := elem.Attribute("class")
if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "genlink") {
tagName, _ := elem.Property("tagName")
text, _ := elem.Text()
c.LogInfo(fmt.Sprintf("✓ 找到genLink按钮: tag=%s, class=%s, text=%s", tagName.Str(), *classAttr, strings.TrimSpace(text)))
genLinkBtn = elem
break
}
}
if genLinkBtn == nil {
return "", fmt.Errorf("在shareContainer容器内未找到包含'genLink' class的button")
}
// 滚动到按钮位置
c.LogInfo("滚动到genLink按钮位置...")
if scrollErr := genLinkBtn.ScrollIntoView(); scrollErr != nil {
c.LogInfo(fmt.Sprintf("滚动失败: %v", scrollErr))
}
c.SleepMs(500)
// 点击genLink按钮
c.LogInfo("点击genLink按钮...")
if clickErr := genLinkBtn.Click(proto.InputMouseButtonLeft, 1); clickErr != nil {
return "", fmt.Errorf("点击genLink按钮失败: %v", clickErr)
}
c.LogInfo("✓ genLink按钮点击成功")
c.SleepMs(1500) // 等待复制链接完成
// 步骤5: 从剪贴板读取分享链接
c.LogInfo("步骤5: 从系统剪贴板读取分享链接...")
clipboardText, err := clipboard.ReadAll()
if err != nil {
return "", fmt.Errorf("读取剪贴板失败: %v", err)
}
if clipboardText == "" {
return "", fmt.Errorf("剪贴板内容为空")
}
c.LogInfo(fmt.Sprintf("剪贴板原始内容: %s", clipboardText))
// 使用正则表达式提取URL
// 匹配 http:// 或 https:// 开头的URL
re := regexp.MustCompile(`https?://[^\s]+`)
matches := re.FindStringSubmatch(clipboardText)
if len(matches) == 0 {
return "", fmt.Errorf("未能从剪贴板内容中提取URL")
}
url := matches[0]
c.LogInfo(fmt.Sprintf("✓✓✓ 成功获取分享链接: %s", url))
return url, nil
}
// GetSources 获取文章引用来源前5个
func (c *WenxinCollector) GetSources() ([]Source, error) {
c.LogInfo("=== 开始获取文章引用来源 ===")
var sources []Source
// 步骤1: 多层查找titleText的div
c.LogInfo("步骤1: 查找roleSystem容器...")
var roleSystemDiv *rod.Element
allDivs, err := c.Page.Elements("div")
if err != nil {
return nil, fmt.Errorf("获取页面div元素失败: %v", err)
}
c.LogInfo(fmt.Sprintf("在 %d 个div元素中查找包含'roleSystem'的class", len(allDivs)))
for _, elem := range allDivs {
classAttr, _ := elem.Attribute("class")
if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "rolesystem") {
tagName, _ := elem.Property("tagName")
c.LogInfo(fmt.Sprintf("✓ 找到roleSystem容器: tag=%s, class=%s", tagName.Str(), *classAttr))
roleSystemDiv = elem
break
}
}
if roleSystemDiv == nil {
c.LogInfo("未找到roleSystem容器结束获取")
return sources, nil // 没有找到就返回空列表
}
// 步骤2: 在roleSystem下查找container
c.LogInfo("步骤2: 在roleSystem内查找包含'container'的div...")
var containerDiv *rod.Element
containerDivs, err := roleSystemDiv.Elements("div")
if err != nil {
return nil, fmt.Errorf("获取roleSystem子div元素失败: %v", err)
}
c.LogInfo(fmt.Sprintf("在 %d 个子div中查找包含'container'的class", len(containerDivs)))
for _, elem := range containerDivs {
classAttr, _ := elem.Attribute("class")
if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "container") {
tagName, _ := elem.Property("tagName")
c.LogInfo(fmt.Sprintf("✓ 找到container容器: tag=%s, class=%s", tagName.Str(), *classAttr))
containerDiv = elem
break
}
}
if containerDiv == nil {
c.LogInfo("未找到container容器结束获取")
return sources, nil
}
// 步骤3: 查找第二个container在整个页面中查找所有container取第二个
c.LogInfo("步骤3: 在页面中查找所有包含'container'的div找到第二个...")
var secondContainerDiv *rod.Element
allDivs, err = c.Page.Elements("div")
if err != nil {
return nil, fmt.Errorf("获取页面div元素失败: %v", err)
}
containerCount := 0
for _, elem := range allDivs {
classAttr, _ := elem.Attribute("class")
if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "container") {
containerCount++
if containerCount == 2 {
tagName, _ := elem.Property("tagName")
c.LogInfo(fmt.Sprintf("✓ 找到第二个container容器: tag=%s, class=%s", tagName.Str(), *classAttr))
secondContainerDiv = elem
break
}
}
}
if secondContainerDiv == nil {
c.LogInfo(fmt.Sprintf("未找到第二个container容器共找到 %d 个),结束获取", containerCount))
return sources, nil
}
// 步骤4: 在第二个container内查找titleText
c.LogInfo("步骤4: 在第二个container内查找包含'titleText'的div...")
var titleTextDiv *rod.Element
titleTextDivs, err := secondContainerDiv.Elements("div")
if err != nil {
return nil, fmt.Errorf("获取第二个container的子div元素失败: %v", err)
}
c.LogInfo(fmt.Sprintf("在 %d 个子div中查找包含'titleText'的class", len(titleTextDivs)))
for _, elem := range titleTextDivs {
classAttr, _ := elem.Attribute("class")
if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "titletext") {
tagName, _ := elem.Property("tagName")
c.LogInfo(fmt.Sprintf("✓ 找到titleText元素: tag=%s, class=%s", tagName.Str(), *classAttr))
titleTextDiv = elem
break
}
}
if titleTextDiv == nil {
c.LogInfo("未找到titleText元素结束获取")
return sources, nil
}
// 点击titleText
c.LogInfo("点击titleText元素...")
if scrollErr := titleTextDiv.ScrollIntoView(); scrollErr != nil {
c.LogInfo(fmt.Sprintf("滚动失败: %v", scrollErr))
}
c.SleepMs(500)
if clickErr := titleTextDiv.Click(proto.InputMouseButtonLeft, 1); clickErr != nil {
return nil, fmt.Errorf("点击titleText失败: %v", clickErr)
}
c.LogInfo("✓ titleText点击成功")
c.SleepMs(2000) // 等待侧边窗出现
c.Screenshot("after_titletext_click")
// 步骤2: 查找SourcesViewer侧边窗
c.LogInfo("步骤2: 查找包含'SourcesViewer'的div元素...")
var sourcesViewerDiv *rod.Element
allDivs, err = c.Page.Elements("div")
if err != nil {
return nil, fmt.Errorf("获取页面div元素失败: %v", err)
}
for _, elem := range allDivs {
classAttr, _ := elem.Attribute("class")
if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "sourcesviewer") {
tagName, _ := elem.Property("tagName")
c.LogInfo(fmt.Sprintf("✓ 找到SourcesViewer容器: tag=%s, class=%s", tagName.Str(), *classAttr))
sourcesViewerDiv = elem
break
}
}
if sourcesViewerDiv == nil {
return nil, fmt.Errorf("未找到SourcesViewer侧边窗")
}
// 步骤3: 在SourcesViewer内查找list容器
c.LogInfo("步骤3: 在SourcesViewer内查找包含'list'的div...")
var listDiv *rod.Element
listDivs, err := sourcesViewerDiv.Elements("div")
if err != nil {
return nil, fmt.Errorf("获取子div元素失败: %v", err)
}
for _, elem := range listDivs {
classAttr, _ := elem.Attribute("class")
if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "list") {
tagName, _ := elem.Property("tagName")
c.LogInfo(fmt.Sprintf("✓ 找到list容器: tag=%s, class=%s", tagName.Str(), *classAttr))
listDiv = elem
break
}
}
if listDiv == nil {
return nil, fmt.Errorf("未找到list容器")
}
// 步骤4: 在list内查找所有item
c.LogInfo("步骤4: 在list内查找包含'item'的div...")
itemDivs, err := listDiv.Elements("div")
if err != nil {
return nil, fmt.Errorf("获取item元素失败: %v", err)
}
c.LogInfo(fmt.Sprintf("找到 %d 个item元素", len(itemDivs)))
// 只处理前5个item
maxItems := 5
if len(itemDivs) < maxItems {
maxItems = len(itemDivs)
}
for i := 0; i < maxItems; i++ {
item := itemDivs[i]
c.LogInfo(fmt.Sprintf("\n--- 处理第 %d 个item ---", i+1))
source := Source{}
// 查找titleInfo (标题)
titleDivs, _ := item.Elements("div")
for _, div := range titleDivs {
classAttr, _ := div.Attribute("class")
if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "title") {
text, _ := div.Text()
source.Title = strings.TrimSpace(text)
c.LogInfo(fmt.Sprintf(" 标题: %s", source.Title))
break
}
}
// 查找site_icon (图标URL)
imgs, _ := item.Elements("img")
for _, img := range imgs {
classAttr, _ := img.Attribute("class")
if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "site_icon") {
srcAttr, _ := img.Attribute("src")
if srcAttr != nil {
source.PlatformIcon = *srcAttr
c.LogInfo(fmt.Sprintf(" 图标: %s", source.PlatformIcon))
}
break
}
}
// 查找siteText (来源媒体名称)
for _, div := range titleDivs {
classAttr, _ := div.Attribute("class")
if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "sitetext") {
text, _ := div.Text()
source.PlatformName = strings.TrimSpace(text)
c.LogInfo(fmt.Sprintf(" 来源: %s", source.PlatformName))
break
}
}
// 尝试获取跳转URL
// 方法1: 查找item内的a标签
links, _ := item.Elements("a")
if len(links) > 0 {
href, _ := links[0].Attribute("href")
if href != nil && *href != "" {
source.Url = *href
c.LogInfo(fmt.Sprintf(" URL (从href获取): %s", source.Url))
}
}
// 方法2: 如果没找到href尝试点击item获取URL
if source.Url == "" {
c.LogInfo(" 未找到href尝试点击item获取URL...")
// 记录当前URL
currentURL := c.Page.MustInfo().URL
// 点击item
if scrollErr := item.ScrollIntoView(); scrollErr != nil {
c.LogInfo(fmt.Sprintf(" 滚动失败: %v", scrollErr))
}
c.SleepMs(300)
if clickErr := item.Click(proto.InputMouseButtonLeft, 1); clickErr != nil {
c.LogInfo(fmt.Sprintf(" 点击item失败: %v", clickErr))
} else {
c.SleepMs(2000) // 等待页面跳转
// 获取新URL
newURL := c.Page.MustInfo().URL
if newURL != currentURL {
source.Url = newURL
c.LogInfo(fmt.Sprintf(" URL (从跳转获取): %s", source.Url))
// 返回上一页
c.Page.MustNavigateBack()
c.SleepMs(1500) // 等待返回
// 重新查找item元素因为页面刷新了
c.LogInfo(" 重新查找item元素...")
allDivs, _ = c.Page.Elements("div")
for _, elem := range allDivs {
classAttr, _ := elem.Attribute("class")
if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "sourcesviewer") {
sourcesViewerDiv = elem
break
}
}
if sourcesViewerDiv != nil {
listDivs, _ = sourcesViewerDiv.Elements("div")
for _, elem := range listDivs {
classAttr, _ := elem.Attribute("class")
if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "list") {
listDiv = elem
break
}
}
if listDiv != nil {
itemDivs, _ = listDiv.Elements("div")
}
}
}
}
}
// 添加到结果列表
if source.Title != "" || source.Url != "" {
sources = append(sources, source)
}
}
c.LogInfo(fmt.Sprintf("\n✓✓✓ 成功获取 %d 个引用来源", len(sources)))
return sources, nil
}

View File

@ -12,16 +12,17 @@ const TableNameCollect = "collect"
// Collect mapped from table <collect> // Collect mapped from table <collect>
type Collect struct { type Collect struct {
ID int64 `gorm:"column:id;primaryKey;autoIncrement:true" json:"id"` ID int64 `gorm:"column:id;primaryKey;autoIncrement:true" json:"id"`
ProductID int32 `gorm:"column:product_id;not null" json:"product_id"` CollectCode string `gorm:"column:collect_code;not null" json:"collect_code"`
Keywords string `gorm:"column:keywords;not null" json:"keywords"` ProductID int32 `gorm:"column:product_id;not null" json:"product_id"`
Platform string `gorm:"column:platform;not null" json:"platform"` Keywords string `gorm:"column:keywords;not null" json:"keywords"`
Question string `gorm:"column:question;not null" json:"question"` Platform string `gorm:"column:platform;not null" json:"platform"`
Progress int32 `gorm:"column:progress;not null" json:"progress"` Question string `gorm:"column:question;not null" json:"question"`
EndFile string `gorm:"column:end_file;not null" json:"end_file"` Progress int32 `gorm:"column:progress;not null" json:"progress"`
CreatedAt time.Time `gorm:"column:created_at;not null;default:CURRENT_TIMESTAMP" json:"created_at"` EndFile string `gorm:"column:end_file;not null" json:"end_file"`
UpdatedAt time.Time `gorm:"column:updated_at;not null;default:CURRENT_TIMESTAMP" json:"updated_at"` CreatedAt time.Time `gorm:"column:created_at;not null;default:CURRENT_TIMESTAMP" json:"created_at"`
Status int32 `gorm:"column:status;not null;comment:1:生成中2生成完毕" json:"status"` // 1:生成中2生成完毕 UpdatedAt time.Time `gorm:"column:updated_at;not null;default:CURRENT_TIMESTAMP" json:"updated_at"`
Status int32 `gorm:"column:status;not null;comment:1:生成中2生成完毕" json:"status"` // 1:生成中2生成完毕
} }
// TableName Collect's table name // TableName Collect's table name

View File

@ -14,10 +14,11 @@ const TableNameCollectTask = "collect_task"
type CollectTask struct { type CollectTask struct {
ID int64 `gorm:"column:id;primaryKey;autoIncrement:true" json:"id"` ID int64 `gorm:"column:id;primaryKey;autoIncrement:true" json:"id"`
RequestID string `gorm:"column:request_id;not null" json:"request_id"` RequestID string `gorm:"column:request_id;not null" json:"request_id"`
CollectID int32 `gorm:"column:collect_id;not null" json:"collect_id"` CollectCode string `gorm:"column:collect_code;not null" json:"collect_code"`
ScriptTime string `gorm:"column:script_time;not null" json:"script_time"` Question string `gorm:"column:question;not null" json:"question"`
ScriptTime int32 `gorm:"column:script_time;not null" json:"script_time"`
Platform int32 `gorm:"column:platform;not null" json:"platform"` Platform int32 `gorm:"column:platform;not null" json:"platform"`
CollectData int32 `gorm:"column:collect_data;not null" json:"collect_data"` CollectData string `gorm:"column:collect_data;not null" json:"collect_data"`
ShareURL string `gorm:"column:share_url;not null;default:0" json:"share_url"` ShareURL string `gorm:"column:share_url;not null;default:0" json:"share_url"`
ImgURL string `gorm:"column:img_url;not null" json:"img_url"` ImgURL string `gorm:"column:img_url;not null" json:"img_url"`
PointKeyword string `gorm:"column:point_keyword;not null" json:"point_keyword"` PointKeyword string `gorm:"column:point_keyword;not null" json:"point_keyword"`

View File

@ -198,7 +198,7 @@ type (
ProductCollectRequest struct { ProductCollectRequest struct {
AccessToken string `json:"access_token" validate:"required" zh:"access_token"` AccessToken string `json:"access_token" validate:"required" zh:"access_token"`
Keywords []string `json:"keywords" validate:"required" zh:"关键词"` Keywords []string `json:"keywords" validate:"required" zh:"关键词"`
Platform []int64 `json:"platform" validate:"required" zh:"平台"` Platform []int `json:"platform" validate:"required" zh:"平台"`
Question string `json:"question" validate:"required" zh:"问题"` Question string `json:"question" validate:"required" zh:"问题"`
ProductId int32 `json:"product_id" validate:"required" zh:"项目Id"` ProductId int32 `json:"product_id" validate:"required" zh:"项目Id"`
} }

View File

@ -59,6 +59,7 @@ func (m *AppModule) Register(router fiber.Router) {
router.Post("/product/detail", vali(m.productService.Detail, &entitys.ProductDetailRequest{})) router.Post("/product/detail", vali(m.productService.Detail, &entitys.ProductDetailRequest{}))
router.Post("/product/update", vali(m.productService.Update, &entitys.ProductUpdateRequest{})) router.Post("/product/update", vali(m.productService.Update, &entitys.ProductUpdateRequest{}))
router.Post("/product/del", vali(m.productService.Del, &entitys.ProductDelRequest{})) router.Post("/product/del", vali(m.productService.Del, &entitys.ProductDelRequest{}))
router.Post("/product/collect", vali(m.productService.Collect, &entitys.ProductCollectRequest{}))
router.Post("/img/upload", m.productService.ImgUpload) router.Post("/img/upload", m.productService.ImgUpload)
router.Post("/plat/list", vali(m.appService.PlatList, &entitys.PlatListRequest{})) router.Post("/plat/list", vali(m.appService.PlatList, &entitys.PlatListRequest{}))

View File

@ -2,6 +2,7 @@ package service
import ( import (
"context" "context"
"fmt"
"geo/internal/ai_tool" "geo/internal/ai_tool"
"geo/internal/biz" "geo/internal/biz"
"geo/internal/config" "geo/internal/config"
@ -12,9 +13,13 @@ import (
"geo/tmpl/dataTemp" "geo/tmpl/dataTemp"
"geo/tmpl/errcode" "geo/tmpl/errcode"
"io" "io"
"log"
"os" "os"
"path/filepath" "path/filepath"
"runtime/debug"
"strconv"
"strings" "strings"
"sync"
"time" "time"
"github.com/go-viper/mapstructure/v2" "github.com/go-viper/mapstructure/v2"
@ -28,6 +33,8 @@ type ProductService struct {
authBiz *biz.AuthBiz authBiz *biz.AuthBiz
productBiz *biz.ProductBiz productBiz *biz.ProductBiz
aiBiz *biz.AiBiz aiBiz *biz.AiBiz
collect *impl.CollectImpl
collectTask *impl.CollectTaskImpl
} }
func NewProductService( func NewProductService(
@ -45,6 +52,8 @@ func NewProductService(
authBiz: authBiz, authBiz: authBiz,
productBiz: productBiz, productBiz: productBiz,
aiBiz: aiBiz, aiBiz: aiBiz,
collect: collect,
collectTask: collectTask,
} }
} }
@ -236,35 +245,341 @@ func (p *ProductService) CreateProductInfoByDocx(c *fiber.Ctx) error {
} }
func (p *ProductService) Collect(c *fiber.Ctx, req *entitys.ProductCollectRequest) error { func (p *ProductService) Collect(c *fiber.Ctx, req *entitys.ProductCollectRequest) error {
log.Printf("[DEBUG] ========== 请求开始 ==========")
log.Printf("[DEBUG] 请求时间: %s", time.Now().Format("2006-01-02 15:04:05.000"))
log.Printf("[Collect] 开始处理收集请求, ProductID: %d, Platforms: %v, Keywords: %v",
req.ProductId, req.Platform, req.Keywords)
_, err := p.authBiz.ValidateAccessToken(c.UserContext(), req.AccessToken) _, err := p.authBiz.ValidateAccessToken(c.UserContext(), req.AccessToken)
if err != nil { if err != nil {
log.Printf("[Collect] 验证token失败, ProductID: %d, Error: %v", req.ProductId, err)
return err return err
} }
productInfo, err := p.productBiz.GetProduct(c.UserContext(), req.ProductId)
_, err = p.productBiz.GetProduct(c.UserContext(), req.ProductId)
if err != nil { if err != nil {
log.Printf("[Collect] 获取产品信息失败, ProductID: %d, Error: %v", req.ProductId, err)
return err return err
} }
collect := ai_tool.NewCollect(p.cfg.Collect.ApiKey)
ctx, cancel := context.WithTimeout(context.Background(), time.Second*120) platformStr := make([]string, len(req.Platform))
defer cancel() for i, s := range req.Platform {
for _, v := range req.Platform { platformStr[i] = strconv.Itoa(s)
}
collectCode := fmt.Sprintf("C%d_%d", req.ProductId, time.Now().UnixNano())
collectData := &model.Collect{
CollectCode: collectCode,
ProductID: req.ProductId,
Keywords: strings.Join(req.Keywords, ","),
Platform: strings.Join(platformStr, ","),
Question: req.Question,
CreatedAt: time.Now(),
}
log.Printf("[Collect] 创建收集记录, CollectCode: %s, ProductID: %d", collectCode, req.ProductId)
err = p.collect.Add(c.UserContext(), collectData)
if err != nil {
log.Printf("[Collect] 保存收集记录失败, CollectCode: %s, Error: %v", collectCode, err)
return err
}
log.Printf("[Collect] ✅ 启动异步收集任务, CollectCode: %s, Platforms: %v", collectCode, req.Platform)
go func() {
// 记录 goroutine 启动时间
startTime := time.Now()
log.Printf("[Goroutine] 异步任务启动, CollectCode: %s, 启动时间: %s", collectCode, startTime.Format("15:04:05.000"))
// 使用独立 context避免请求结束后任务被取消
ctx, cancel := context.WithTimeout(context.Background(), time.Second*240)
// 监控 context 取消
go func() { go func() {
defer func() { <-ctx.Done()
if r := recover(); r != nil { log.Printf("[Goroutine] ❌ Context被取消! CollectCode: %s, 原因: %v, 耗时: %v",
} collectCode, ctx.Err(), time.Since(startTime))
}() }()
request := ai_tool.CreateReq{
Keywords: strings.Join(req.Keywords, ","), defer func() {
Question: req.Question, if r := recover(); r != nil {
Platform: v, log.Printf("[Goroutine] ❌ PANIC: %v\nStack: %s", r, debug.Stack())
ThirdID: "11",
} }
_, _err := collect.Create(ctx, &request) log.Printf("[Goroutine] 异步任务结束, CollectCode: %s, 总耗时: %v", collectCode, time.Since(startTime))
if _err != nil { cancel()
return log.Printf("[Goroutine] 已调用 cancel(), CollectCode: %s", collectCode)
}()
log.Printf("[Goroutine] 准备调用 doCollect, CollectCode: %s", collectCode)
p.doCollect(ctx, collectData, req.Platform)
log.Printf("[Goroutine] doCollect 已返回, CollectCode: %s", collectCode)
}()
log.Printf("[DEBUG] ========== 请求返回 ==========")
return pkg.HandleResponse(c, "收录生成中")
}
func (p *ProductService) doCollect(ctx context.Context, collectData *model.Collect, platforms []int) {
collectCode := collectData.CollectCode
startTime := time.Now()
log.Printf("[doCollect] ========== 开始执行 ==========")
log.Printf("[doCollect] CollectCode: %s, Platforms: %v", collectCode, platforms)
log.Printf("[doCollect] Context状态: %v, 超时时间: %v", ctx.Err(), time.Second*240)
// 监控 context
go func() {
<-ctx.Done()
log.Printf("[doCollect] ⚠️ 检测到Context取消! CollectCode: %s, 原因: %v, 已执行时间: %v",
collectCode, ctx.Err(), time.Since(startTime))
}()
collectClient := ai_tool.NewCollect(p.cfg.Collect.ApiKey)
log.Printf("[doCollect] 已创建 collectClient")
var wg sync.WaitGroup
resCh := make(chan *model.CollectTask, len(platforms))
log.Printf("[doCollect] 创建 channel, 容量: %d", len(platforms))
// 启动监控 goroutine
monitorStart := time.Now()
// 启动所有平台的任务
log.Printf("[doCollect] 启动 %d 个平台任务", len(platforms))
for i, plat := range platforms {
log.Printf("[doCollect] 启动任务 #%d, Platform: %d", i+1, plat)
wg.Add(1)
go p.processPlatform(ctx, &wg, collectClient, collectData, plat, resCh, i+1)
}
go func() {
log.Printf("[Monitor] 监控goroutine启动, CollectCode: %s", collectCode)
wg.Wait()
log.Printf("[Monitor] ✅ 所有任务完成, 准备关闭channel, 等待时间: %v", time.Since(monitorStart))
close(resCh)
log.Printf("[Monitor] Channel已关闭")
}()
// 收集结果 - 添加超时保护
log.Printf("[doCollect] 开始等待结果...")
var datas []*model.CollectTask
taskCount := 0
// 设置一个最大等待时间
waitTimeout := time.After(250 * time.Second)
for {
select {
case task, ok := <-resCh:
if !ok {
log.Printf("[doCollect] Channel已关闭, 收集到 %d 条结果", len(datas))
goto SAVE
}
datas = append(datas, task)
taskCount++
log.Printf("[doCollect] ✅ 收到结果 #%d, Platform: %d, RequestID: %s, ScriptTime: %d",
taskCount, task.Platform, task.RequestID, task.ScriptTime)
case <-waitTimeout:
log.Printf("[doCollect] ⚠️ 等待超时 250秒, 强制退出, 已收集: %d/%d", taskCount, len(platforms))
goto SAVE
case <-ctx.Done():
log.Printf("[doCollect] ❌ Context取消, 强制退出, 已收集: %d/%d, 原因: %v",
taskCount, len(platforms), ctx.Err())
goto SAVE
}
}
SAVE:
log.Printf("[doCollect] 收集完成, 共 %d 条结果", len(datas))
// 保存结果
if len(datas) > 0 {
log.Printf("[doCollect] 开始保存到数据库, 数量: %d", len(datas))
saveStart := time.Now()
if err := p.collectTask.Add(ctx, datas); err != nil {
log.Printf("[doCollect] ❌ 保存失败: %v", err)
} else {
log.Printf("[doCollect] ✅ 保存成功, 耗时: %v", time.Since(saveStart))
}
} else {
log.Printf("[doCollect] ⚠️ 没有结果需要保存")
}
elapsed := time.Since(startTime)
log.Printf("[doCollect] ========== 结束执行, 总耗时: %v ==========", elapsed)
}
func (p *ProductService) processPlatform(ctx context.Context, wg *sync.WaitGroup,
collectClient *ai_tool.Collect, collectData *model.Collect, plat int,
resCh chan<- *model.CollectTask, taskNum int) {
collectCode := collectData.CollectCode
startTime := time.Now()
log.Printf("[Platform #%d] ========== 开始 ==========", taskNum)
log.Printf("[Platform #%d] CollectCode: %s, Platform: %d", taskNum, collectCode, plat)
// 确保 wg.Done() 一定会被调用
defer func() {
log.Printf("[Platform #%d] 准备调用 wg.Done(), 已执行时间: %v", taskNum, time.Since(startTime))
wg.Done()
log.Printf("[Platform #%d] 已调用 wg.Done()", taskNum)
}()
defer func() {
if r := recover(); r != nil {
log.Printf("[Platform #%d] ❌ PANIC: %v\nStack: %s", taskNum, r, debug.Stack())
}
log.Printf("[Platform #%d] ========== 结束, 耗时: %v ==========", taskNum, time.Since(startTime))
}()
// 检查 context 是否已取消
select {
case <-ctx.Done():
log.Printf("[Platform #%d] ❌ Context已取消, 退出执行, 原因: %v", taskNum, ctx.Err())
return
default:
log.Printf("[Platform #%d] Context正常", taskNum)
}
// 创建任务
request := ai_tool.CreateReq{
Keywords: collectData.Keywords,
Question: collectData.Question,
Platform: plat,
ThirdID: fmt.Sprintf("%s_%d", collectData.CollectCode, plat),
}
log.Printf("[Platform #%d] 调用 Create API, Request: %+v", taskNum, request)
createStart := time.Now()
res, err := collectClient.Create(ctx, &request)
createElapsed := time.Since(createStart)
if err != nil {
log.Printf("[Platform #%d] ❌ Create失败, 耗时: %v, Error: %v", taskNum, createElapsed, err)
return
}
if res.Code != 1 {
log.Printf("[Platform #%d] ❌ Create返回错误码, 耗时: %v, Code: %d, Message: %s",
taskNum, createElapsed, res.Code, res.Msg)
return
}
log.Printf("[Platform #%d] ✅ Create成功, 耗时: %v, RequestID: %s",
taskNum, createElapsed, res.Data.RequestId)
// 轮询任务状态
log.Printf("[Platform #%d] 开始轮询, RequestID: %s", taskNum, res.Data.RequestId)
pollStart := time.Now()
task := p.pollTaskStatus(ctx, collectClient, res.Data.RequestId, collectData, plat, taskNum)
pollElapsed := time.Since(pollStart)
if task != nil {
log.Printf("[Platform #%d] ✅ 轮询成功, 耗时: %v, ScriptTime: %d",
taskNum, pollElapsed, task.ScriptTime)
// 发送结果到 channel
log.Printf("[Platform #%d] 准备发送结果到channel", taskNum)
select {
case resCh <- task:
log.Printf("[Platform #%d] ✅ 结果已发送到channel", taskNum)
case <-ctx.Done():
log.Printf("[Platform #%d] ⚠️ Context取消, 放弃发送结果", taskNum)
return
}
} else {
log.Printf("[Platform #%d] ❌ 轮询失败, 耗时: %v, 未获取到结果", taskNum, pollElapsed)
}
}
func (p *ProductService) pollTaskStatus(ctx context.Context, collectClient *ai_tool.Collect,
requestID string, collectData *model.Collect, plat int, taskNum int) *model.CollectTask {
collectCode := collectData.CollectCode
startTime := time.Now()
log.Printf("[Poll #%d] ========== 开始轮询 ==========", taskNum)
log.Printf("[Poll #%d] CollectCode: %s, Platform: %d, RequestID: %s",
taskNum, collectCode, plat, requestID)
ticker := time.NewTicker(5 * time.Second)
defer ticker.Stop()
errCount := 0
const maxErrors = 5
pollCount := 0
for {
select {
case <-ctx.Done():
log.Printf("[Poll #%d] ❌ Context取消, 停止轮询, 已轮询%d次, 耗时: %v, 原因: %v",
taskNum, pollCount, time.Since(startTime), ctx.Err())
return nil
case <-ticker.C:
pollCount++
log.Printf("[Poll #%d] 第 %d 次轮询, 已耗时: %v", taskNum, pollCount, time.Since(startTime))
checkStart := time.Now()
checkRes, err := collectClient.CheckTask(ctx, requestID)
checkElapsed := time.Since(checkStart)
if err != nil {
errCount++
log.Printf("[Poll #%d] ❌ 轮询失败(第%d次错误), 耗时: %v, Error: %v, 累计错误: %d/%d",
taskNum, pollCount, checkElapsed, err, errCount, maxErrors)
if errCount >= maxErrors {
log.Printf("[Poll #%d] 达到最大错误次数, 停止轮询", taskNum)
return nil
}
continue
} }
}() log.Printf("[Poll #%d] ✅ 轮询成功, 耗时: %v, Code: %d, Status: %d, ScriptTime: %d, ShouluDate: %s",
taskNum, checkElapsed, checkRes.Code, checkRes.Data.Status,
checkRes.Data.ScriptTime, checkRes.Data.ShouluDate)
if checkRes.Code != 1 {
log.Printf("[Poll #%d] ❌ 返回错误码: %d", taskNum, checkRes.Code)
return nil
}
// 判断任务是否完成
// 根据你的业务逻辑调整判断条件
isCompleted := false
completeReason := ""
if checkRes.Data.Status != 0 { // 假设 2 表示完成
isCompleted = true
completeReason = fmt.Sprintf("chekcStatus=%d", checkRes.Data.Status)
}
if isCompleted {
log.Printf("[Poll #%d] 🎉 任务完成! 原因: %s, 总轮询次数: %d, 总耗时: %v",
taskNum, completeReason, pollCount, time.Since(startTime))
return &model.CollectTask{
RequestID: checkRes.Data.RequestId,
CollectCode: collectData.CollectCode,
ScriptTime: int32(checkRes.Data.ScriptTime),
Platform: int32(checkRes.Data.Platform),
CollectData: checkRes.Data.ShouluDate,
ShareURL: checkRes.Data.ShareUrl,
ImgURL: checkRes.Data.ImgUrl,
PointKeyword: checkRes.Data.HitWord,
Question: checkRes.Data.Question,
Res: pkg.JsonStringIgonErr(checkRes),
CreatedAt: time.Now(),
Status: int32(checkRes.Data.Status),
}
}
log.Printf("[Poll #%d] 任务未完成, 继续轮询, Status=%d, ScriptTime=%d, ShouluDate=%s",
taskNum, checkRes.Data.Status, checkRes.Data.ScriptTime, checkRes.Data.ShouluDate)
}
} }
return pkg.HandleResponse(c, productInfo)
} }