package collect import ( "context" "fmt" "geo/internal/config" "regexp" "strings" "time" "github.com/atotto/clipboard" "github.com/go-rod/rod" "github.com/go-rod/rod/lib/proto" "github.com/gofiber/fiber/v2/log" ) // DeepseekCollector DeepSeek收集器 type DeepseekCollector struct { *BaseCollector } // NewDeepseekCollector 创建DeepSeek收集器 func NewDeepseekCollector(ctx context.Context, params *CollectParams, cfg *config.Config, logger log.AllLogger, browser *rod.Browser, page *rod.Page) CollectorInterface { collector := &DeepseekCollector{ BaseCollector: NewBaseCollector(ctx, params, cfg, logger, browser, page), } // 设置DeepSeek的URL collector.LoginURL = "https://chat.deepseek.com/" collector.ChatURL = "https://chat.deepseek.com/" return collector } // CheckLoginStatus 检查登录状态 func (c *DeepseekCollector) CheckLoginStatus() bool { currentURL := c.GetCurrentURL() c.LogInfo(fmt.Sprintf("当前URL: %s", currentURL)) if currentURL == c.LoginURL { return false } return true } // WaitLogin 等待登录 func (c *DeepseekCollector) WaitLogin() (bool, string) { if err := c.SetupDriver(); err != nil { return false, fmt.Sprintf("浏览器启动失败: %v", err) } defer c.Close() c.Page.MustNavigate(c.ChatURL) c.Sleep(3) if c.CheckLoginStatus() { c.SaveCookies() return true, "already_logged_in" } c.LogInfo("未检测到登录状态,等待用户登录...") // 最多等待300秒 for i := 0; i < 300; i++ { if c.CheckLoginStatus() { c.Sleep(2) c.SaveCookies() return true, "login_success" } time.Sleep(1 * time.Second) } return false, "登录超时" } // AskQuestion 提问并获取答案 func (c *DeepseekCollector) AskQuestion(question string) (*CollectResult, error) { // 注意:SetupDriver 和 Close 已由 Manager 管理,这里不再调用 if err := c.InitPage(); err != nil { return nil, fmt.Errorf("页面初始化失败: %v", err) } c.Sleep(3) if err := c.inputQuestion(question); err != nil { return nil, fmt.Errorf("输入问题失败: %v", err) } if err := c.clickSendButton(); err != nil { return nil, fmt.Errorf("点击发送按钮失败: %v", err) } answer, err := c.waitForAnswer() if err != nil { return nil, fmt.Errorf("获取答案失败: %v", err) } // 关键词高亮处理 answerStr, isExposure := HighlightKeywordsInHTML(answer, c.KeyWords) // 获取分享链接 shareLink := "" link, _ := c.getShareLink() if link != "" { shareLink = link } return &CollectResult{ Answer: answerStr, ShareLink: shareLink, IsExposure: isExposure, }, nil } // inputQuestion 输入问题 func (c *DeepseekCollector) inputQuestion(question string) error { c.LogInfo("输入问题...") // DeepSeek的输入框选择器 inputSelectors := []string{ "textarea[placeholder*='输入']", "textarea[placeholder*='问']", "textarea", "[contenteditable='true']", ".chat-input textarea", "#message-input", } var inputBox *rod.Element var err error for _, selector := range inputSelectors { inputBox, err = c.WaitForElementVisible(selector, 10) if err == nil && inputBox != nil { c.LogInfo(fmt.Sprintf("找到输入框: %s", selector)) break } } if inputBox == nil { return fmt.Errorf("未找到输入框") } // 点击获取焦点 if err := inputBox.Click(proto.InputMouseButtonLeft, 1); err != nil { return fmt.Errorf("点击输入框失败: %v", err) } c.SleepMs(500) // fallback: 使用Focus + Input inputBox.Focus() c.SleepMs(200) inputBox.Input(question) c.LogInfo(fmt.Sprintf("问题已输入: %s", question)) c.SleepMs(1000) return nil } // clickSendButton 点击发送按钮 func (c *DeepseekCollector) clickSendButton() error { c.LogInfo("点击发送按钮...") // 使用正则匹配包含"send"或"提交"的class allElements, err := c.Page.Elements("*") if err != nil { return fmt.Errorf("获取页面元素失败: %v", err) } var sendBtn *rod.Element for _, elem := range allElements { classAttr, _ := elem.Attribute("class") if classAttr != nil { classLower := strings.ToLower(*classAttr) if strings.Contains(classLower, "send") || strings.Contains(classLower, "submit") { // 检查是否是可点击的元素(button、div等) tagName, _ := elem.Property("tagName") if tagName.Str() == "BUTTON" || tagName.Str() == "DIV" || tagName.Str() == "SVG" { sendBtn = elem c.LogInfo(fmt.Sprintf("通过正则找到发送按钮: class=%s, tag=%s", *classAttr, tagName.Str())) break } } } } if sendBtn == nil { // fallback: 尝试查找发送图标或最后一个button buttons, _ := c.Page.Elements("button") if len(buttons) > 0 { sendBtn = buttons[len(buttons)-1] c.LogInfo("使用最后一个button作为发送按钮") } } if sendBtn == nil { // 尝试查找SVG图标 svgs, _ := c.Page.Elements("svg") for _, svg := range svgs { parent, _ := svg.Parent() if parent != nil { tagName, _ := parent.Property("tagName") if tagName.Str() == "BUTTON" { sendBtn = parent c.LogInfo("使用包含SVG的button作为发送按钮") break } } } } if sendBtn == nil { return fmt.Errorf("未找到发送按钮") } c.SleepMs(500) // 滚动到可见区域 if err := sendBtn.ScrollIntoView(); err != nil { c.LogInfo(fmt.Sprintf("滚动失败: %v", err)) } c.SleepMs(300) // 点击发送按钮 c.LogInfo("执行点击...") if err := sendBtn.Click(proto.InputMouseButtonLeft, 1); err != nil { return fmt.Errorf("点击发送按钮失败: %v", err) } c.LogInfo("已点击发送按钮") c.SleepMs(2000) return nil } // waitForAnswer 等待并获取答案(处理流式输出) func (c *DeepseekCollector) waitForAnswer() (string, error) { c.LogInfo("等待AI回答...") timeout := 180 // 最大等待时间(秒) startTime := time.Now() var lastAnswer string var stableCount int const requiredStableCount = 5 // 需要连续5次内容不变才认为完成 isAnswering := false for time.Since(startTime).Seconds() < float64(timeout) { // 查找答案区域 - 尝试多种选择器 answerSelectors := []string{ ".message-content", ".response-content", "[class*='assistant'] [class*='content']", "[class*='ai'] [class*='message']", ".chat-message.ai", "[class*='answer']", "[class*='response']", } var answerText string var foundAnswer bool for _, selector := range answerSelectors { answerElements, err := c.Page.Elements(selector) if err == nil && len(answerElements) > 0 { // 获取最后一个答案元素(最新的回答) lastAnswer := answerElements[len(answerElements)-1] visible, _ := lastAnswer.Visible() if visible { // 尝试获取HTML内容 htmlContent, err := lastAnswer.HTML() if err == nil && len(strings.TrimSpace(htmlContent)) > 30 { answerText = CleanDivTags(htmlContent) foundAnswer = true c.LogInfo(fmt.Sprintf("找到答案(%s),清理后文本长度: %d", selector, len(answerText))) break } // 如果HTML获取失败,尝试获取文本 textContent, _ := lastAnswer.Text() if len(strings.TrimSpace(textContent)) > 30 { answerText = strings.TrimSpace(textContent) foundAnswer = true c.LogInfo(fmt.Sprintf("找到答案(%s),文本长度: %d", selector, len(answerText))) break } } } } if !foundAnswer { c.LogInfo("未找到答案元素,继续等待...") } // 检查是否获取到答案 if answerText != "" && len(answerText) > 30 { if !isAnswering { c.LogInfo("检测到AI开始回答...") isAnswering = true } // 检查内容是否稳定(流式输出完成) if answerText == lastAnswer { stableCount++ c.LogInfo(fmt.Sprintf("答案稳定中... (%d/%d), 长度: %d", stableCount, requiredStableCount, len(answerText))) // 如果内容稳定,说明回答完成 if stableCount >= requiredStableCount { c.LogInfo(fmt.Sprintf("✓ AI回答完成,最终长度: %d 字符", len(answerText))) return answerText, nil } } else { // 内容还在变化,重置计数器 stableCount = 0 lastAnswer = answerText c.LogInfo(fmt.Sprintf("检测到流式输出,当前长度: %d 字符", len(answerText))) } } c.SleepMs(1500) // 每1.5秒检查一次 // 每10秒输出一次等待状态 elapsed := int(time.Since(startTime).Seconds()) if elapsed > 0 && elapsed%10 == 0 { c.LogInfo(fmt.Sprintf("等待AI回答中... 已等待 %d 秒", elapsed)) } } return "", fmt.Errorf("等待答案超时(%d秒)", timeout) } // getShareLink 获取分享链接 func (c *DeepseekCollector) getShareLink() (string, error) { c.LogInfo("=== 开始获取分享链接 ===") // 步骤1: 查找分享按钮(需要根据DeepSeek实际页面结构调整) c.LogInfo("步骤1: 查找分享按钮...") var shareBtn *rod.Element // 尝试多种方式查找分享按钮 shareSelectors := []string{ "[class*='share']", "[aria-label*='分享']", "[aria-label*='Share']", "button svg[path*='share']", ".share-button", ".share-icon", } for _, selector := range shareSelectors { btns, err := c.Page.Elements(selector) if err == nil && len(btns) > 0 { shareBtn = btns[0] c.LogInfo(fmt.Sprintf("✓ 找到分享按钮: %s", selector)) break } } if shareBtn == nil { // fallback: 遍历所有元素查找包含share的class allElements, _ := c.Page.Elements("*") for _, elem := range allElements { classAttr, _ := elem.Attribute("class") if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "share") { tagName, _ := elem.Property("tagName") if tagName.Str() == "BUTTON" || tagName.Str() == "DIV" || tagName.Str() == "SVG" { shareBtn = elem c.LogInfo(fmt.Sprintf("✓ 通过正则找到分享按钮: tag=%s, class=%s", tagName.Str(), *classAttr)) break } } } } if shareBtn == nil { c.LogInfo("未找到分享按钮,跳过获取分享链接") return "", fmt.Errorf("未找到分享按钮") } // 滚动到元素位置 c.LogInfo("滚动到分享按钮位置...") if scrollErr := shareBtn.ScrollIntoView(); scrollErr != nil { c.LogInfo(fmt.Sprintf("滚动失败: %v", scrollErr)) } c.SleepMs(800) // 点击分享按钮 c.LogInfo("执行点击分享按钮...") if clickErr := shareBtn.Click(proto.InputMouseButtonLeft, 1); clickErr != nil { return "", fmt.Errorf("点击分享按钮失败: %v", clickErr) } c.LogInfo("✓ 点击成功") c.SleepMs(3000) // 等待弹窗出现 c.Screenshot("after_share_click") // 步骤2: 在弹窗中查找复制链接按钮(带重试机制) c.LogInfo("步骤2: 查找复制链接按钮...") var copyLinkBtn *rod.Element maxRetries := 5 retryDelay := 1000 for attempt := 1; attempt <= maxRetries; attempt++ { c.LogInfo(fmt.Sprintf("第 %d/%d 次尝试查找复制链接按钮...", attempt, maxRetries)) // 尝试多种方式查找复制按钮 copySelectors := []string{ "[class*='copy']", "[class*='Copy']", "[aria-label*='复制']", "[aria-label*='Copy']", "button[class*='link']", } for _, selector := range copySelectors { btns, err := c.Page.Elements(selector) if err == nil && len(btns) > 0 { copyLinkBtn = btns[0] c.LogInfo(fmt.Sprintf("✓ 找到复制链接按钮: %s", selector)) break } } if copyLinkBtn != nil { break } // fallback: 遍历所有元素 allElements, _ := c.Page.Elements("*") for _, elem := range allElements { classAttr, _ := elem.Attribute("class") if classAttr != nil { classLower := strings.ToLower(*classAttr) if strings.Contains(classLower, "copy") || strings.Contains(classLower, "link") { tagName, _ := elem.Property("tagName") if tagName.Str() == "BUTTON" || tagName.Str() == "DIV" { copyLinkBtn = elem c.LogInfo(fmt.Sprintf("✓ 通过正则找到复制按钮: tag=%s, class=%s", tagName.Str(), *classAttr)) break } } } } if copyLinkBtn != nil { break } // 没找到,等待后重试 if attempt < maxRetries { c.LogInfo(fmt.Sprintf("未找到复制链接按钮,%d毫秒后重试...", retryDelay)) c.SleepMs(retryDelay) } } if copyLinkBtn == nil { c.Screenshot("copy_button_not_found") return "", fmt.Errorf("经过 %d 次重试仍未找到复制链接按钮", maxRetries) } // 滚动到按钮位置 c.LogInfo("滚动到复制链接按钮位置...") if scrollErr := copyLinkBtn.ScrollIntoView(); scrollErr != nil { c.LogInfo(fmt.Sprintf("滚动失败: %v", scrollErr)) } c.SleepMs(500) // 点击复制链接按钮 c.LogInfo("点击复制链接按钮...") if clickErr := copyLinkBtn.Click(proto.InputMouseButtonLeft, 1); clickErr != nil { return "", fmt.Errorf("点击复制链接按钮失败: %v", clickErr) } c.LogInfo("✓ 复制链接按钮点击成功") c.SleepMs(1500) // 等待复制链接完成 // 步骤3: 从剪贴板读取分享链接 c.LogInfo("步骤3: 从系统剪贴板读取分享链接...") clipboardText, err := clipboard.ReadAll() if err != nil { return "", fmt.Errorf("读取剪贴板失败: %v", err) } if clipboardText == "" { return "", fmt.Errorf("剪贴板内容为空") } c.LogInfo(fmt.Sprintf("剪贴板原始内容: %s", clipboardText)) // 使用正则表达式提取URL re := regexp.MustCompile(`https?://[^\s]+`) matches := re.FindStringSubmatch(clipboardText) if len(matches) == 0 { return "", fmt.Errorf("未能从剪贴板内容中提取URL") } url := matches[0] c.LogInfo(fmt.Sprintf("✓✓✓ 成功获取分享链接: %s", url)) return url, nil } // SafeElement 安全地获取元素 func (c *DeepseekCollector) SafeElement(selector string) (*rod.Element, error) { exists, _, err := c.Page.Has(selector) if err != nil { return nil, err } if !exists { return nil, nil } return c.Page.Element(selector) }