diff --git a/.gitignore b/.gitignore index e473800..bc8bdcd 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,6 @@ chrome/* chrome_data/* cookies/* docs/* -logs/* \ No newline at end of file +logs/* +md/* +cmd/server/server.exe \ No newline at end of file diff --git a/cmd/server/server.exe b/cmd/server/server.exe new file mode 100644 index 0000000..97ce45e Binary files /dev/null and b/cmd/server/server.exe differ diff --git a/example_test.go b/example_test.go index 0cdf980..4a95e1d 100644 --- a/example_test.go +++ b/example_test.go @@ -2,15 +2,11 @@ package collect import ( "context" - "fmt" "geo/internal/collect" "geo/internal/config" "log" "os" - "strings" "testing" - - "github.com/go-rod/rod/lib/proto" ) var ( @@ -81,97 +77,6 @@ func TestWenxinCollector_WaitLogin(t *testing.T) { t.Log("Cookie已保存,后续测试可以使用已登录状态") } -// TestWenxinCollector_SimpleAsk 简单测试文心一言提问 -func TestWenxinCollector_SimpleAsk(t *testing.T) { - if testing.Short() { - t.Skip("跳过需要浏览器交互的测试") - } - - params := &collect.CollectParams{ - Headless: false, // 显示浏览器以便观察 - UserIndex: "test_user", - PlatIndex: "wenxin", - RequestID: "test_wenxin_simple_001", - Platform: "wenxin", - } - - t.Log("=== 简单测试文心一言提问 ===") - - // 获取收集器 - collector, err := manager.GetCollector("wenxin", params) - if err != nil { - t.Fatalf("获取收集器失败: %v", err) - } - - wenxinCollector := collector.(*collect.WenxinCollector) - - // 初始化浏览器 - if err := wenxinCollector.SetupDriver(); err != nil { - t.Fatalf("启动浏览器失败: %v", err) - } - defer wenxinCollector.Close() - - // 加载Cookie - if err := wenxinCollector.LoadCookies(); err != nil { - t.Logf("未找到Cookie文件: %v", err) - } - - // 导航到聊天页面 - wenxinCollector.Page.MustNavigate(wenxinCollector.ChatURL) - wenxinCollector.Sleep(5) - - // 检查登录状态 - isLoggedIn := wenxinCollector.CheckLoginStatus() - t.Logf("登录状态: %v", isLoggedIn) - - if !isLoggedIn { - t.Fatal("未登录,请先调用WaitLogin登录") - } - - // 手动输入问题 - question := "你好" - t.Logf("准备输入问题: %s", question) - - // 查找输入框 - inputBox, err := wenxinCollector.WaitForElementVisible("[contenteditable='true']", 10) - if err != nil { - t.Fatalf("未找到输入框: %v", err) - } - t.Log("✓ 找到输入框") - - // 点击输入框 - inputBox.Click(proto.InputMouseButtonLeft, 1) - wenxinCollector.SleepMs(500) - - // 清空输入框 - wenxinCollector.ClearInput(inputBox) - wenxinCollector.SleepMs(300) - - // 使用键盘输入 - t.Log("正在输入问题...") - inputBox.Input(question) - wenxinCollector.SleepMs(1000) - t.Log("✓ 问题已输入") - - // 查找并点击发送按钮 - sendBtn, err := wenxinCollector.Page.Element("button") - if err != nil { - t.Fatalf("未找到发送按钮: %v", err) - } - t.Log("✓ 找到发送按钮") - - t.Log("正在点击发送按钮...") - sendBtn.Click(proto.InputMouseButtonLeft, 1) - wenxinCollector.SleepMs(3000) - t.Log("✓ 已点击发送按钮") - - t.Log("\n请观察浏览器窗口,查看是否成功发送问题并收到回答") - t.Log("测试将在10秒后结束...") - wenxinCollector.Sleep(10) - - t.Log("=== 测试完成 ===") -} - // TestWenxinCollector_AskQuestion 测试文心一言提问功能 // 注意:此测试需要有效的登录状态 func TestWenxinCollector_AskQuestion(t *testing.T) { @@ -189,7 +94,7 @@ func TestWenxinCollector_AskQuestion(t *testing.T) { } // 定义提问内容 - question := "请用一句话介绍Go语言" + question := "四川房地产软件排名" t.Logf("向文心一言提问: %s", question) // 调用管理器提问并获取答案 @@ -206,330 +111,3 @@ func TestWenxinCollector_AskQuestion(t *testing.T) { t.Error("答案为空") } } - -// TestMultiplePlatforms_Compare 测试多平台对比 -func TestMultiplePlatforms_Compare(t *testing.T) { - if testing.Short() { - t.Skip("跳过需要浏览器交互的测试") - } - - question := "什么是人工智能?" - platforms := []string{"wenxin", "deepseek"} - - results := make(map[string]string) - - for _, platform := range platforms { - params := &collect.CollectParams{ - Headless: true, - UserIndex: "test_user", - PlatIndex: platform, - RequestID: fmt.Sprintf("test_%s", platform), - Platform: platform, - } - - t.Logf("正在向%s提问...", platform) - answer, err := manager.AskQuestion(platform, params, question) - if err != nil { - t.Logf("%s提问失败: %v", platform, err) - results[platform] = fmt.Sprintf("错误: %v", err) - continue - } - - results[platform] = answer - t.Logf("%s回答完成,长度: %d", platform, len(answer)) - } - - // 输出对比结果 - t.Log("\n===== 多平台回答对比 =====") - for platform, answer := range results { - t.Logf("\n[%s]:\n%s\n", platform, answer) - } -} - -// TestWenxinCollector_DebugPageStructure 调试页面结构 -func TestWenxinCollector_DebugPageStructure(t *testing.T) { - if testing.Short() { - t.Skip("跳过需要浏览器交互的测试") - } - - params := &collect.CollectParams{ - Headless: false, - UserIndex: "test_user", - PlatIndex: "wenxin", - RequestID: "test_wenxin_debug_001", - Platform: "wenxin", - } - - t.Log("=== 调试文心一言页面结构 ===") - - // 获取收集器 - collector, err := manager.GetCollector("wenxin", params) - if err != nil { - t.Fatalf("获取收集器失败: %v", err) - } - - wenxinCollector := collector.(*collect.WenxinCollector) - if err := wenxinCollector.SetupDriver(); err != nil { - t.Fatalf("启动浏览器失败: %v", err) - } - defer wenxinCollector.Close() - - // 加载Cookie - if err := wenxinCollector.LoadCookies(); err != nil { - t.Logf("未找到Cookie文件: %v", err) - } - - // 导航到聊天页面 - wenxinCollector.Page.MustNavigate(wenxinCollector.ChatURL) - wenxinCollector.Sleep(5) - - // 检查登录状态 - isLoggedIn := wenxinCollector.CheckLoginStatus() - t.Logf("登录状态: %v", isLoggedIn) - - if !isLoggedIn { - t.Fatal("未登录,请先调用WaitLogin登录") - } - - // 查找所有可能的输入框 - t.Log("\n=== 查找输入框 ===") - inputSelectors := []string{ - "textarea", - "[contenteditable='true']", - "input[type='text']", - ".input-box", - "#chat-input", - "[placeholder]", - } - - for _, selector := range inputSelectors { - elements, err := wenxinCollector.Page.Elements(selector) - if err == nil && len(elements) > 0 { - t.Logf("✓ 找到 %d 个元素: %s", len(elements), selector) - for i, elem := range elements { - if i >= 3 { - break // 只显示前3个 - } - text, _ := elem.Text() - tagName, _ := elem.Property("tagName") - class, _ := elem.Attribute("class") - id, _ := elem.Attribute("id") - placeholder, _ := elem.Attribute("placeholder") - - idStr := "" - if id != nil { - idStr = *id - } - classStr := "" - if class != nil { - classStr = *class - } - placeholderStr := "" - if placeholder != nil { - placeholderStr = *placeholder - } - - t.Logf(" [%d] tag=%s, id=%s, class=%s, placeholder=%s, text=%s", - i, tagName.Str(), idStr, classStr, placeholderStr, text[:min(50, len(text))]) - } - } else { - t.Logf("✗ 未找到元素: %s", selector) - } - } - - // 查找所有按钮 - t.Log("\n=== 查找发送按钮 ===") - buttonSelectors := []string{ - "button", - "svg", - "[aria-label]", - } - - for _, selector := range buttonSelectors { - elements, err := wenxinCollector.Page.Elements(selector) - if err == nil && len(elements) > 0 { - t.Logf("✓ 找到 %d 个元素: %s", len(elements), selector) - for i, elem := range elements { - if i >= 5 { - break - } - text, _ := elem.Text() - tagName, _ := elem.Property("tagName") - class, _ := elem.Attribute("class") - ariaLabel, _ := elem.Attribute("aria-label") - ariaLabelText := "" - if ariaLabel != nil { - ariaLabelText = *ariaLabel - } - - classStr := "" - if class != nil { - classStr = *class - } - - trimmedText := strings.TrimSpace(text) - if trimmedText != "" || ariaLabelText != "" { - t.Logf(" [%d] tag=%s, class=%s, aria-label=%s, text=%s", - i, tagName.Str(), classStr, ariaLabelText, trimmedText[:min(30, len(trimmedText))]) - } - } - } - } - - t.Log("\n=== 调试完成 ===") - t.Log("请保持浏览器窗口打开,手动检查页面结构") - - // 等待用户观察 - select {} -} - -// TestWenxinCollector_DebugAnswer 调试答案区域 -func TestWenxinCollector_DebugAnswer(t *testing.T) { - if testing.Short() { - t.Skip("跳过需要浏览器交互的测试") - } - - params := &collect.CollectParams{ - Headless: false, - UserIndex: "test_user", - PlatIndex: "wenxin", - RequestID: "test_wenxin_debug_answer", - Platform: "wenxin", - } - - t.Log("=== 调试文心一言答案区域 ===") - - collector, err := manager.GetCollector("wenxin", params) - if err != nil { - t.Fatalf("获取收集器失败: %v", err) - } - - wenxinCollector := collector.(*collect.WenxinCollector) - if err := wenxinCollector.SetupDriver(); err != nil { - t.Fatalf("启动浏览器失败: %v", err) - } - defer wenxinCollector.Close() - - if err := wenxinCollector.LoadCookies(); err != nil { - t.Logf("未找到Cookie文件: %v", err) - } - - wenxinCollector.Page.MustNavigate(wenxinCollector.ChatURL) - wenxinCollector.Sleep(5) - - if !wenxinCollector.CheckLoginStatus() { - t.Fatal("未登录") - } - - // 手动输入问题并发送 - t.Log("请在浏览器中手动输入问题并等待AI回答完成") - t.Log("然后按回车键继续...") - fmt.Scanln() - - // 查找所有可能的答案容器 - t.Log("\n=== 查找答案容器 ===") - - // 方式1: 查找包含answer/response/message的元素 - containers, _ := wenxinCollector.Page.Elements("[class*='answer'], [class*='response'], [class*='message']") - t.Logf("找到 %d 个容器元素", len(containers)) - for i, container := range containers { - text, _ := container.Text() - classAttr, _ := container.Attribute("class") - tagName, _ := container.Property("tagName") - - classStr := "" - if classAttr != nil { - classStr = *classAttr - } - - if len(strings.TrimSpace(text)) > 20 { - t.Logf("[%d] tag=%s, class=%s, text长度=%d, 前100字符=%s", - i, tagName.Str(), classStr, len(text), text[:min(100, len(text))]) - } - } - - // 方式2: 查找所有div,显示较长的文本 - t.Log("\n=== 查找长文本div ===") - allDivs, _ := wenxinCollector.Page.Elements("div") - var longTextDivs []struct{ - index int - text string - class string - } - - for i, div := range allDivs { - text, _ := div.Text() - if len(strings.TrimSpace(text)) > 50 { - classAttr, _ := div.Attribute("class") - classStr := "" - if classAttr != nil { - classStr = *classAttr - } - longTextDivs = append(longTextDivs, struct{ - index int - text string - class string - }{i, text, classStr}) - } - } - - t.Logf("找到 %d 个长文本div", len(longTextDivs)) - for _, item := range longTextDivs { - t.Logf("[%d] class=%s, 长度=%d, 前150字符=%s", - item.index, item.class, len(item.text), item.text[:min(150, len(item.text))]) - } - - t.Log("\n=== 调试完成,请保持浏览器打开以便观察 ===") - select {} -} - -// BenchmarkWenxinCollector 性能测试(仅供参考) -func BenchmarkWenxinCollector(b *testing.B) { - b.Skip("跳过性能测试") -} - -// ExampleCollectManager 使用示例 -func ExampleCollectManager() { - - // 列出支持的平台 - platforms := manager.ListPlatforms() - fmt.Printf("支持的平台: %v\n", platforms) - - // 设置参数 - params := &collect.CollectParams{ - Headless: true, - UserIndex: "user_001", - PlatIndex: "wenxin", - RequestID: "req_001", - Platform: "wenxin", - } - - // 向文心一言提问 - answer, err := manager.AskQuestion("wenxin", params, "什么是人工智能?") - if err != nil { - fmt.Printf("错误: %v\n", err) - return - } - - fmt.Printf("答案: %s\n", answer) -} - -// ExampleWenxinCollector_WaitLogin 文心一言登录示例 -func ExampleWenxinCollector_WaitLogin() { - params := &collect.CollectParams{ - Headless: false, // 登录时需要显示浏览器 - UserIndex: "user_001", - PlatIndex: "wenxin", - RequestID: "example_login_001", - Platform: "wenxin", - } - - fmt.Println("正在打开文心一言登录页面...") - success, msg := manager.WaitLogin("wenxin", params) - if success { - fmt.Printf("登录成功: %s\n", msg) - fmt.Println("Cookie已保存,下次可以自动登录") - } else { - fmt.Printf("登录失败: %s\n", msg) - } -} diff --git a/go.mod b/go.mod index 3b9a971..04625e5 100644 --- a/go.mod +++ b/go.mod @@ -22,6 +22,7 @@ require ( require ( filippo.io/edwards25519 v1.1.0 // indirect github.com/andybalholm/brotli v1.1.0 // indirect + github.com/atotto/clipboard v0.1.4 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect github.com/gabriel-vasile/mimetype v1.4.13 // indirect diff --git a/go.sum b/go.sum index 1600f3b..c2ba6f7 100644 --- a/go.sum +++ b/go.sum @@ -10,6 +10,8 @@ github.com/aliyun/aliyun-oss-go-sdk v3.0.2+incompatible h1:8psS8a+wKfiLt1iVDX79F github.com/aliyun/aliyun-oss-go-sdk v3.0.2+incompatible/go.mod h1:T/Aws4fEfogEE9v+HPhhw+CntffsBHJ8nXQCwKr0/g8= github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M= github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY= +github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z4= +github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI= github.com/avast/retry-go v3.0.0+incompatible/go.mod h1:XtSnn+n/sHqQIpZ10K1qAevBhOOCWBLXXy3hyiqqBrY= github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs= github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c= diff --git a/internal/ai_tool/collect.go b/internal/ai_tool/collect.go index e363cf7..41953ee 100644 --- a/internal/ai_tool/collect.go +++ b/internal/ai_tool/collect.go @@ -64,7 +64,7 @@ type CreateReq struct { // 品牌词,多个用英文逗号隔开 Keywords string `json:"keywords"` // 平台,1-deepseek,2-豆包,3-元宝,4-千问,5-文心一言,6-纳米,7-kimi,8-智普 - Platform int64 `json:"platform"` + Platform int `json:"platform"` // 问题 Question string `json:"question"` // 建议填第三方的用户id。方便查单 @@ -110,7 +110,7 @@ type CheckTaskRes struct { } `json:"data"` } -func (s *Collect) CheckTask(requestId string) (*CheckTaskRes, error) { +func (s *Collect) CheckTask(ctx context.Context, requestId string) (*CheckTaskRes, error) { url := "http://8.138.187.158:8082/api/geo/check_task" request := map[string]interface{}{ "request_id": requestId, diff --git a/internal/collect/base.go b/internal/collect/base.go index a8bf8ff..8782080 100644 --- a/internal/collect/base.go +++ b/internal/collect/base.go @@ -110,16 +110,25 @@ func (b *BaseCollector) SetupDriver() error { l.UserDataDir(userDataDir) l.Set("window-size", "1920,1080") + + // 设置中文语言环境 l.Set("lang", "zh-CN") + l.Set("accept-lang", "zh-CN,zh;q=0.9,en;q=0.8") l.Set("force-device-scale-factor", "1") + // 设置时区为中国 + l.Set("timezone", "Asia/Shanghai") + url, err := l.Launch() if err != nil { return fmt.Errorf("启动浏览器失败: %v", err) } b.Browser = rod.New().Context(b.ctx).ControlURL(url).MustConnect() + + // 创建新页面 b.Page = b.Browser.MustPage() + return nil } @@ -302,6 +311,11 @@ func (b *BaseCollector) InitPage() error { b.Page.MustNavigate(b.ChatURL) b.WaitForPageReady(5) b.Sleep(2) + } else { + // 首次访问,先导航到页面 + b.Page.MustNavigate(b.ChatURL) + b.WaitForPageReady(5) + b.Sleep(2) } b.SaveCookies() diff --git a/internal/collect/utils.go b/internal/collect/utils.go index ceda5ba..58fbd02 100644 --- a/internal/collect/utils.go +++ b/internal/collect/utils.go @@ -1,6 +1,7 @@ package collect import ( + "fmt" "regexp" "strings" ) @@ -26,10 +27,110 @@ func CleanHTMLTags(html string) string { // 去除多余的空格和换行 cleaned = strings.TrimSpace(cleaned) - + // 将多个连续空格替换为单个空格 multipleSpaces := regexp.MustCompile(`\s+`) cleaned = multipleSpaces.ReplaceAllString(cleaned, " ") return cleaned } + +// CleanDivTags 只清理div标签,保留其他HTML标签和纯文本内容 +// 这个函数会移除所有
%s
", strings.ReplaceAll(textContent, "\n", ""))
+
+ // 使用HTML高亮方法
+ return HighlightKeywordsInHTML(htmlContent, pointKeys)
+}
diff --git a/internal/collect/wenxin.go b/internal/collect/wenxin.go
index 703efae..65e4e84 100644
--- a/internal/collect/wenxin.go
+++ b/internal/collect/wenxin.go
@@ -8,10 +8,20 @@ import (
"strings"
"time"
+ "github.com/atotto/clipboard"
"github.com/go-rod/rod"
"github.com/go-rod/rod/lib/proto"
+ "regexp"
)
+// Source 文章引用来源结构体
+type Source struct {
+ Title string `json:"name"`
+ Url string `json:"url"`
+ PlatformName string `json:"platform"`
+ PlatformIcon string `json:"Platform_icon"`
+}
+
// WenxinCollector 文心一言收集器
type WenxinCollector struct {
*BaseCollector
@@ -30,6 +40,35 @@ func NewWenxinCollector(ctx context.Context, params *CollectParams, cfg *config.
return collector
}
+// SetupDriver 重写父类方法,添加中文语言设置
+func (c *WenxinCollector) SetupDriver() error {
+ if err := c.BaseCollector.SetupDriver(); err != nil {
+ return err
+ }
+
+ // 通过 JavaScript 设置 navigator.language 为中文
+ jsCode := `
+ (function() {
+ Object.defineProperty(navigator, 'language', {
+ get: function() { return 'zh-CN'; },
+ configurable: true
+ });
+ Object.defineProperty(navigator, 'languages', {
+ get: function() { return ['zh-CN', 'zh', 'en']; },
+ configurable: true
+ });
+ })();
+ `
+
+ if _, err := c.Page.Eval(jsCode); err != nil {
+ c.LogInfo(fmt.Sprintf("设置语言失败: %v", err))
+ } else {
+ c.LogInfo("已设置浏览器语言为中文 (zh-CN)")
+ }
+
+ return nil
+}
+
// CheckLoginStatus 检查登录状态
func (c *WenxinCollector) CheckLoginStatus() bool {
currentURL := c.GetCurrentURL()
@@ -150,6 +189,24 @@ func (c *WenxinCollector) AskQuestion(question string) (string, error) {
}
c.LogInfo(fmt.Sprintf("成功获取答案,长度: %d 字符", len(answer)))
+
+ // 获取分享链接
+ _, shareErr := c.getShareLink()
+ if shareErr != nil {
+ c.LogInfo(fmt.Sprintf("分享链接获取状态: %v", shareErr))
+ }
+
+ // 获取引用来源
+ sources, sourcesErr := c.GetSources()
+ if sourcesErr != nil {
+ c.LogInfo(fmt.Sprintf("引用来源获取失败: %v", sourcesErr))
+ } else if len(sources) > 0 {
+ c.LogInfo(fmt.Sprintf("成功获取 %d 个引用来源", len(sources)))
+ for i, source := range sources {
+ c.LogInfo(fmt.Sprintf(" [%d] 标题: %s, 来源: %s, URL: %s", i+1, source.Title, source.PlatformName, source.Url))
+ }
+ }
+
return answer, nil
}
@@ -411,3 +468,507 @@ func (c *WenxinCollector) SafeElement(selector string) (*rod.Element, error) {
}
return c.Page.Element(selector)
}
+
+// getShareLink 获取分享链接
+func (c *WenxinCollector) getShareLink() (string, error) {
+ c.LogInfo("=== 开始获取分享链接 ===")
+
+ // 步骤1: 先找到包含dialogCardBottom的div
+ c.LogInfo("步骤1: 查找包含'dialogCardBottom'的div元素...")
+
+ var dialogDiv *rod.Element
+
+ allDivs, err := c.Page.Elements("div")
+ if err != nil {
+ return "", fmt.Errorf("获取页面div元素失败: %v", err)
+ }
+
+ c.LogInfo(fmt.Sprintf("在 %d 个div元素中查找包含'dialogCardBottom'的class", len(allDivs)))
+
+ for _, elem := range allDivs {
+ classAttr, _ := elem.Attribute("class")
+ if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "dialogcardbottom") {
+ tagName, _ := elem.Property("tagName")
+ c.LogInfo(fmt.Sprintf("✓ 找到dialogCardBottom容器: tag=%s, class=%s", tagName.Str(), *classAttr))
+ dialogDiv = elem
+ break
+ }
+ }
+
+ if dialogDiv == nil {
+ return "", fmt.Errorf("未找到包含'dialogCardBottom' class的div元素")
+ }
+
+ // 步骤2: 在这个div内部查找包含share的元素
+ c.LogInfo("步骤2: 在dialogCardBottom容器内查找包含'share'的元素...")
+
+ var shareDiv *rod.Element
+
+ // 获取该容器内的所有子元素
+ childDivs, err := dialogDiv.Elements("div")
+ if err != nil {
+ return "", fmt.Errorf("获取子div元素失败: %v", err)
+ }
+
+ c.LogInfo(fmt.Sprintf("在 %d 个子div元素中查找包含'share'的class", len(childDivs)))
+
+ for _, elem := range childDivs {
+ classAttr, _ := elem.Attribute("class")
+ if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "share") {
+ tagName, _ := elem.Property("tagName")
+ c.LogInfo(fmt.Sprintf("✓ 找到目标元素: tag=%s, class=%s", tagName.Str(), *classAttr))
+ shareDiv = elem
+ break
+ }
+ }
+
+ if shareDiv == nil {
+ // 如果没找到div,尝试查找其他类型的元素(如button、svg等)
+ c.LogInfo("未在子div中找到,尝试查找其他元素类型...")
+
+ // 尝试查找所有子元素
+ allChildren, _ := dialogDiv.Elements("*")
+ for _, elem := range allChildren {
+ classAttr, _ := elem.Attribute("class")
+ if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "share") {
+ tagName, _ := elem.Property("tagName")
+ c.LogInfo(fmt.Sprintf("✓ 找到目标元素: tag=%s, class=%s", tagName.Str(), *classAttr))
+ shareDiv = elem
+ break
+ }
+ }
+ }
+
+ if shareDiv == nil {
+ return "", fmt.Errorf("在dialogCardBottom容器内未找到包含'share' class的元素")
+ }
+
+ // 滚动到元素位置
+ c.LogInfo("滚动到分享图标位置...")
+ if scrollErr := shareDiv.ScrollIntoView(); scrollErr != nil {
+ c.LogInfo(fmt.Sprintf("滚动失败: %v", scrollErr))
+ }
+ c.SleepMs(800)
+
+ // 普通点击
+ c.LogInfo("执行普通点击...")
+ if clickErr := shareDiv.Click(proto.InputMouseButtonLeft, 1); clickErr != nil {
+ return "", fmt.Errorf("点击分享图标失败: %v", clickErr)
+ }
+
+ c.LogInfo("✓ 点击成功")
+ c.SleepMs(2000) // 等待弹窗出现
+ c.Screenshot("after_share_icon_click")
+
+ // 步骤3: 在弹窗中查找shareContainer的div
+ c.LogInfo("步骤3: 查找包含'shareContainer'的div元素...")
+
+ var shareContainerDiv *rod.Element
+
+ // 重新获取所有div元素
+ allDivs, err = c.Page.Elements("div")
+ if err != nil {
+ return "", fmt.Errorf("获取页面div元素失败: %v", err)
+ }
+
+ c.LogInfo(fmt.Sprintf("在 %d 个div元素中查找包含'shareContainer'的class", len(allDivs)))
+
+ for _, elem := range allDivs {
+ classAttr, _ := elem.Attribute("class")
+ if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "sharecontainer") {
+ tagName, _ := elem.Property("tagName")
+ c.LogInfo(fmt.Sprintf("✓ 找到shareContainer容器: tag=%s, class=%s", tagName.Str(), *classAttr))
+ shareContainerDiv = elem
+ break
+ }
+ }
+
+ if shareContainerDiv == nil {
+ return "", fmt.Errorf("未找到包含'shareContainer' class的div元素")
+ }
+
+ // 步骤4: 在shareContainer内查找genLink的button
+ c.LogInfo("步骤4: 在shareContainer容器内查找包含'genLink'的button...")
+
+ var genLinkBtn *rod.Element
+
+ buttons, err := shareContainerDiv.Elements("button")
+ if err != nil {
+ return "", fmt.Errorf("获取button元素失败: %v", err)
+ }
+
+ c.LogInfo(fmt.Sprintf("在 %d 个button元素中查找包含'genLink'的class", len(buttons)))
+
+ for _, elem := range buttons {
+ classAttr, _ := elem.Attribute("class")
+ if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "genlink") {
+ tagName, _ := elem.Property("tagName")
+ text, _ := elem.Text()
+ c.LogInfo(fmt.Sprintf("✓ 找到genLink按钮: tag=%s, class=%s, text=%s", tagName.Str(), *classAttr, strings.TrimSpace(text)))
+ genLinkBtn = elem
+ break
+ }
+ }
+
+ if genLinkBtn == nil {
+ return "", fmt.Errorf("在shareContainer容器内未找到包含'genLink' class的button")
+ }
+
+ // 滚动到按钮位置
+ c.LogInfo("滚动到genLink按钮位置...")
+ if scrollErr := genLinkBtn.ScrollIntoView(); scrollErr != nil {
+ c.LogInfo(fmt.Sprintf("滚动失败: %v", scrollErr))
+ }
+ c.SleepMs(500)
+
+ // 点击genLink按钮
+ c.LogInfo("点击genLink按钮...")
+ if clickErr := genLinkBtn.Click(proto.InputMouseButtonLeft, 1); clickErr != nil {
+ return "", fmt.Errorf("点击genLink按钮失败: %v", clickErr)
+ }
+
+ c.LogInfo("✓ genLink按钮点击成功")
+ c.SleepMs(1500) // 等待复制链接完成
+
+ // 步骤5: 从剪贴板读取分享链接
+ c.LogInfo("步骤5: 从系统剪贴板读取分享链接...")
+
+ clipboardText, err := clipboard.ReadAll()
+ if err != nil {
+ return "", fmt.Errorf("读取剪贴板失败: %v", err)
+ }
+
+ if clipboardText == "" {
+ return "", fmt.Errorf("剪贴板内容为空")
+ }
+
+ c.LogInfo(fmt.Sprintf("剪贴板原始内容: %s", clipboardText))
+
+ // 使用正则表达式提取URL
+ // 匹配 http:// 或 https:// 开头的URL
+ re := regexp.MustCompile(`https?://[^\s]+`)
+ matches := re.FindStringSubmatch(clipboardText)
+
+ if len(matches) == 0 {
+ return "", fmt.Errorf("未能从剪贴板内容中提取URL")
+ }
+
+ url := matches[0]
+ c.LogInfo(fmt.Sprintf("✓✓✓ 成功获取分享链接: %s", url))
+ return url, nil
+}
+
+// GetSources 获取文章引用来源(前5个)
+func (c *WenxinCollector) GetSources() ([]Source, error) {
+ c.LogInfo("=== 开始获取文章引用来源 ===")
+
+ var sources []Source
+
+ // 步骤1: 多层查找titleText的div
+ c.LogInfo("步骤1: 查找roleSystem容器...")
+
+ var roleSystemDiv *rod.Element
+
+ allDivs, err := c.Page.Elements("div")
+ if err != nil {
+ return nil, fmt.Errorf("获取页面div元素失败: %v", err)
+ }
+
+ c.LogInfo(fmt.Sprintf("在 %d 个div元素中查找包含'roleSystem'的class", len(allDivs)))
+
+ for _, elem := range allDivs {
+ classAttr, _ := elem.Attribute("class")
+ if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "rolesystem") {
+ tagName, _ := elem.Property("tagName")
+ c.LogInfo(fmt.Sprintf("✓ 找到roleSystem容器: tag=%s, class=%s", tagName.Str(), *classAttr))
+ roleSystemDiv = elem
+ break
+ }
+ }
+
+ if roleSystemDiv == nil {
+ c.LogInfo("未找到roleSystem容器,结束获取")
+ return sources, nil // 没有找到就返回空列表
+ }
+
+ // 步骤2: 在roleSystem下查找container
+ c.LogInfo("步骤2: 在roleSystem内查找包含'container'的div...")
+
+ var containerDiv *rod.Element
+
+ containerDivs, err := roleSystemDiv.Elements("div")
+ if err != nil {
+ return nil, fmt.Errorf("获取roleSystem子div元素失败: %v", err)
+ }
+
+ c.LogInfo(fmt.Sprintf("在 %d 个子div中查找包含'container'的class", len(containerDivs)))
+
+ for _, elem := range containerDivs {
+ classAttr, _ := elem.Attribute("class")
+ if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "container") {
+ tagName, _ := elem.Property("tagName")
+ c.LogInfo(fmt.Sprintf("✓ 找到container容器: tag=%s, class=%s", tagName.Str(), *classAttr))
+ containerDiv = elem
+ break
+ }
+ }
+
+ if containerDiv == nil {
+ c.LogInfo("未找到container容器,结束获取")
+ return sources, nil
+ }
+
+ // 步骤3: 查找第二个container(在整个页面中查找所有container,取第二个)
+ c.LogInfo("步骤3: 在页面中查找所有包含'container'的div,找到第二个...")
+
+ var secondContainerDiv *rod.Element
+
+ allDivs, err = c.Page.Elements("div")
+ if err != nil {
+ return nil, fmt.Errorf("获取页面div元素失败: %v", err)
+ }
+
+ containerCount := 0
+ for _, elem := range allDivs {
+ classAttr, _ := elem.Attribute("class")
+ if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "container") {
+ containerCount++
+ if containerCount == 2 {
+ tagName, _ := elem.Property("tagName")
+ c.LogInfo(fmt.Sprintf("✓ 找到第二个container容器: tag=%s, class=%s", tagName.Str(), *classAttr))
+ secondContainerDiv = elem
+ break
+ }
+ }
+ }
+
+ if secondContainerDiv == nil {
+ c.LogInfo(fmt.Sprintf("未找到第二个container容器(共找到 %d 个),结束获取", containerCount))
+ return sources, nil
+ }
+
+ // 步骤4: 在第二个container内查找titleText
+ c.LogInfo("步骤4: 在第二个container内查找包含'titleText'的div...")
+
+ var titleTextDiv *rod.Element
+
+ titleTextDivs, err := secondContainerDiv.Elements("div")
+ if err != nil {
+ return nil, fmt.Errorf("获取第二个container的子div元素失败: %v", err)
+ }
+
+ c.LogInfo(fmt.Sprintf("在 %d 个子div中查找包含'titleText'的class", len(titleTextDivs)))
+
+ for _, elem := range titleTextDivs {
+ classAttr, _ := elem.Attribute("class")
+ if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "titletext") {
+ tagName, _ := elem.Property("tagName")
+ c.LogInfo(fmt.Sprintf("✓ 找到titleText元素: tag=%s, class=%s", tagName.Str(), *classAttr))
+ titleTextDiv = elem
+ break
+ }
+ }
+
+ if titleTextDiv == nil {
+ c.LogInfo("未找到titleText元素,结束获取")
+ return sources, nil
+ }
+
+ // 点击titleText
+ c.LogInfo("点击titleText元素...")
+ if scrollErr := titleTextDiv.ScrollIntoView(); scrollErr != nil {
+ c.LogInfo(fmt.Sprintf("滚动失败: %v", scrollErr))
+ }
+ c.SleepMs(500)
+
+ if clickErr := titleTextDiv.Click(proto.InputMouseButtonLeft, 1); clickErr != nil {
+ return nil, fmt.Errorf("点击titleText失败: %v", clickErr)
+ }
+
+ c.LogInfo("✓ titleText点击成功")
+ c.SleepMs(2000) // 等待侧边窗出现
+ c.Screenshot("after_titletext_click")
+
+ // 步骤2: 查找SourcesViewer侧边窗
+ c.LogInfo("步骤2: 查找包含'SourcesViewer'的div元素...")
+
+ var sourcesViewerDiv *rod.Element
+
+ allDivs, err = c.Page.Elements("div")
+ if err != nil {
+ return nil, fmt.Errorf("获取页面div元素失败: %v", err)
+ }
+
+ for _, elem := range allDivs {
+ classAttr, _ := elem.Attribute("class")
+ if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "sourcesviewer") {
+ tagName, _ := elem.Property("tagName")
+ c.LogInfo(fmt.Sprintf("✓ 找到SourcesViewer容器: tag=%s, class=%s", tagName.Str(), *classAttr))
+ sourcesViewerDiv = elem
+ break
+ }
+ }
+
+ if sourcesViewerDiv == nil {
+ return nil, fmt.Errorf("未找到SourcesViewer侧边窗")
+ }
+
+ // 步骤3: 在SourcesViewer内查找list容器
+ c.LogInfo("步骤3: 在SourcesViewer内查找包含'list'的div...")
+
+ var listDiv *rod.Element
+
+ listDivs, err := sourcesViewerDiv.Elements("div")
+ if err != nil {
+ return nil, fmt.Errorf("获取子div元素失败: %v", err)
+ }
+
+ for _, elem := range listDivs {
+ classAttr, _ := elem.Attribute("class")
+ if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "list") {
+ tagName, _ := elem.Property("tagName")
+ c.LogInfo(fmt.Sprintf("✓ 找到list容器: tag=%s, class=%s", tagName.Str(), *classAttr))
+ listDiv = elem
+ break
+ }
+ }
+
+ if listDiv == nil {
+ return nil, fmt.Errorf("未找到list容器")
+ }
+
+ // 步骤4: 在list内查找所有item
+ c.LogInfo("步骤4: 在list内查找包含'item'的div...")
+
+ itemDivs, err := listDiv.Elements("div")
+ if err != nil {
+ return nil, fmt.Errorf("获取item元素失败: %v", err)
+ }
+
+ c.LogInfo(fmt.Sprintf("找到 %d 个item元素", len(itemDivs)))
+
+ // 只处理前5个item
+ maxItems := 5
+ if len(itemDivs) < maxItems {
+ maxItems = len(itemDivs)
+ }
+
+ for i := 0; i < maxItems; i++ {
+ item := itemDivs[i]
+
+ c.LogInfo(fmt.Sprintf("\n--- 处理第 %d 个item ---", i+1))
+
+ source := Source{}
+
+ // 查找titleInfo (标题)
+ titleDivs, _ := item.Elements("div")
+ for _, div := range titleDivs {
+ classAttr, _ := div.Attribute("class")
+ if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "title") {
+ text, _ := div.Text()
+ source.Title = strings.TrimSpace(text)
+ c.LogInfo(fmt.Sprintf(" 标题: %s", source.Title))
+ break
+ }
+ }
+
+ // 查找site_icon (图标URL)
+ imgs, _ := item.Elements("img")
+ for _, img := range imgs {
+ classAttr, _ := img.Attribute("class")
+ if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "site_icon") {
+ srcAttr, _ := img.Attribute("src")
+ if srcAttr != nil {
+ source.PlatformIcon = *srcAttr
+ c.LogInfo(fmt.Sprintf(" 图标: %s", source.PlatformIcon))
+ }
+ break
+ }
+ }
+
+ // 查找siteText (来源媒体名称)
+ for _, div := range titleDivs {
+ classAttr, _ := div.Attribute("class")
+ if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "sitetext") {
+ text, _ := div.Text()
+ source.PlatformName = strings.TrimSpace(text)
+ c.LogInfo(fmt.Sprintf(" 来源: %s", source.PlatformName))
+ break
+ }
+ }
+
+ // 尝试获取跳转URL
+ // 方法1: 查找item内的a标签
+ links, _ := item.Elements("a")
+ if len(links) > 0 {
+ href, _ := links[0].Attribute("href")
+ if href != nil && *href != "" {
+ source.Url = *href
+ c.LogInfo(fmt.Sprintf(" URL (从href获取): %s", source.Url))
+ }
+ }
+
+ // 方法2: 如果没找到href,尝试点击item获取URL
+ if source.Url == "" {
+ c.LogInfo(" 未找到href,尝试点击item获取URL...")
+
+ // 记录当前URL
+ currentURL := c.Page.MustInfo().URL
+
+ // 点击item
+ if scrollErr := item.ScrollIntoView(); scrollErr != nil {
+ c.LogInfo(fmt.Sprintf(" 滚动失败: %v", scrollErr))
+ }
+ c.SleepMs(300)
+
+ if clickErr := item.Click(proto.InputMouseButtonLeft, 1); clickErr != nil {
+ c.LogInfo(fmt.Sprintf(" 点击item失败: %v", clickErr))
+ } else {
+ c.SleepMs(2000) // 等待页面跳转
+
+ // 获取新URL
+ newURL := c.Page.MustInfo().URL
+ if newURL != currentURL {
+ source.Url = newURL
+ c.LogInfo(fmt.Sprintf(" URL (从跳转获取): %s", source.Url))
+
+ // 返回上一页
+ c.Page.MustNavigateBack()
+ c.SleepMs(1500) // 等待返回
+
+ // 重新查找item元素(因为页面刷新了)
+ c.LogInfo(" 重新查找item元素...")
+ allDivs, _ = c.Page.Elements("div")
+ for _, elem := range allDivs {
+ classAttr, _ := elem.Attribute("class")
+ if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "sourcesviewer") {
+ sourcesViewerDiv = elem
+ break
+ }
+ }
+ if sourcesViewerDiv != nil {
+ listDivs, _ = sourcesViewerDiv.Elements("div")
+ for _, elem := range listDivs {
+ classAttr, _ := elem.Attribute("class")
+ if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "list") {
+ listDiv = elem
+ break
+ }
+ }
+ if listDiv != nil {
+ itemDivs, _ = listDiv.Elements("div")
+ }
+ }
+ }
+ }
+ }
+
+ // 添加到结果列表
+ if source.Title != "" || source.Url != "" {
+ sources = append(sources, source)
+ }
+ }
+
+ c.LogInfo(fmt.Sprintf("\n✓✓✓ 成功获取 %d 个引用来源", len(sources)))
+ return sources, nil
+}
diff --git a/internal/data/model/collect.gen.go b/internal/data/model/collect.gen.go
index ec400e4..b34b631 100644
--- a/internal/data/model/collect.gen.go
+++ b/internal/data/model/collect.gen.go
@@ -12,16 +12,17 @@ const TableNameCollect = "collect"
// Collect mapped from table