geoGo/internal/collect/deepseek.go

520 lines
14 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package collect
import (
"context"
"fmt"
"geo/internal/config"
"regexp"
"strings"
"time"
"github.com/atotto/clipboard"
"github.com/go-rod/rod"
"github.com/go-rod/rod/lib/proto"
"github.com/gofiber/fiber/v2/log"
)
// DeepseekCollector DeepSeek收集器
type DeepseekCollector struct {
*BaseCollector
}
// NewDeepseekCollector 创建DeepSeek收集器
func NewDeepseekCollector(ctx context.Context, params *CollectParams, cfg *config.Config, logger log.AllLogger, browser *rod.Browser, page *rod.Page) CollectorInterface {
collector := &DeepseekCollector{
BaseCollector: NewBaseCollector(ctx, params, cfg, logger, browser, page),
}
// 设置DeepSeek的URL
collector.LoginURL = "https://chat.deepseek.com/"
collector.ChatURL = "https://chat.deepseek.com/"
return collector
}
// CheckLoginStatus 检查登录状态
func (c *DeepseekCollector) CheckLoginStatus() bool {
currentURL := c.GetCurrentURL()
c.LogInfo(fmt.Sprintf("当前URL: %s", currentURL))
if currentURL == c.LoginURL {
return false
}
return true
}
// WaitLogin 等待登录
func (c *DeepseekCollector) WaitLogin() (bool, string) {
if err := c.SetupDriver(); err != nil {
return false, fmt.Sprintf("浏览器启动失败: %v", err)
}
defer c.Close()
c.Page.MustNavigate(c.ChatURL)
c.Sleep(3)
if c.CheckLoginStatus() {
c.SaveCookies()
return true, "already_logged_in"
}
c.LogInfo("未检测到登录状态,等待用户登录...")
// 最多等待300秒
for i := 0; i < 300; i++ {
if c.CheckLoginStatus() {
c.Sleep(2)
c.SaveCookies()
return true, "login_success"
}
time.Sleep(1 * time.Second)
}
return false, "登录超时"
}
// AskQuestion 提问并获取答案
func (c *DeepseekCollector) AskQuestion(question string) (*CollectResult, error) {
// 注意SetupDriver 和 Close 已由 Manager 管理,这里不再调用
if err := c.InitPage(); err != nil {
return nil, fmt.Errorf("页面初始化失败: %v", err)
}
c.Sleep(3)
if err := c.inputQuestion(question); err != nil {
return nil, fmt.Errorf("输入问题失败: %v", err)
}
if err := c.clickSendButton(); err != nil {
return nil, fmt.Errorf("点击发送按钮失败: %v", err)
}
answer, err := c.waitForAnswer()
if err != nil {
return nil, fmt.Errorf("获取答案失败: %v", err)
}
// 关键词高亮处理
answerStr, isExposure := HighlightKeywordsInHTML(answer, c.KeyWords)
// 获取分享链接
shareLink := ""
link, _ := c.getShareLink()
if link != "" {
shareLink = link
}
return &CollectResult{
Answer: answerStr,
ShareLink: shareLink,
IsExposure: isExposure,
}, nil
}
// inputQuestion 输入问题
func (c *DeepseekCollector) inputQuestion(question string) error {
c.LogInfo("输入问题...")
// DeepSeek的输入框选择器
inputSelectors := []string{
"textarea[placeholder*='输入']",
"textarea[placeholder*='问']",
"textarea",
"[contenteditable='true']",
".chat-input textarea",
"#message-input",
}
var inputBox *rod.Element
var err error
for _, selector := range inputSelectors {
inputBox, err = c.WaitForElementVisible(selector, 10)
if err == nil && inputBox != nil {
c.LogInfo(fmt.Sprintf("找到输入框: %s", selector))
break
}
}
if inputBox == nil {
return fmt.Errorf("未找到输入框")
}
// 点击获取焦点
if err := inputBox.Click(proto.InputMouseButtonLeft, 1); err != nil {
return fmt.Errorf("点击输入框失败: %v", err)
}
c.SleepMs(500)
// fallback: 使用Focus + Input
inputBox.Focus()
c.SleepMs(200)
inputBox.Input(question)
c.LogInfo(fmt.Sprintf("问题已输入: %s", question))
c.SleepMs(1000)
return nil
}
// clickSendButton 点击发送按钮
func (c *DeepseekCollector) clickSendButton() error {
c.LogInfo("点击发送按钮...")
// 使用正则匹配包含"send"或"提交"的class
allElements, err := c.Page.Elements("*")
if err != nil {
return fmt.Errorf("获取页面元素失败: %v", err)
}
var sendBtn *rod.Element
for _, elem := range allElements {
classAttr, _ := elem.Attribute("class")
if classAttr != nil {
classLower := strings.ToLower(*classAttr)
if strings.Contains(classLower, "send") || strings.Contains(classLower, "submit") {
// 检查是否是可点击的元素button、div等
tagName, _ := elem.Property("tagName")
if tagName.Str() == "BUTTON" || tagName.Str() == "DIV" || tagName.Str() == "SVG" {
sendBtn = elem
c.LogInfo(fmt.Sprintf("通过正则找到发送按钮: class=%s, tag=%s", *classAttr, tagName.Str()))
break
}
}
}
}
if sendBtn == nil {
// fallback: 尝试查找发送图标或最后一个button
buttons, _ := c.Page.Elements("button")
if len(buttons) > 0 {
sendBtn = buttons[len(buttons)-1]
c.LogInfo("使用最后一个button作为发送按钮")
}
}
if sendBtn == nil {
// 尝试查找SVG图标
svgs, _ := c.Page.Elements("svg")
for _, svg := range svgs {
parent, _ := svg.Parent()
if parent != nil {
tagName, _ := parent.Property("tagName")
if tagName.Str() == "BUTTON" {
sendBtn = parent
c.LogInfo("使用包含SVG的button作为发送按钮")
break
}
}
}
}
if sendBtn == nil {
return fmt.Errorf("未找到发送按钮")
}
c.SleepMs(500)
// 滚动到可见区域
if err := sendBtn.ScrollIntoView(); err != nil {
c.LogInfo(fmt.Sprintf("滚动失败: %v", err))
}
c.SleepMs(300)
// 点击发送按钮
c.LogInfo("执行点击...")
if err := sendBtn.Click(proto.InputMouseButtonLeft, 1); err != nil {
return fmt.Errorf("点击发送按钮失败: %v", err)
}
c.LogInfo("已点击发送按钮")
c.SleepMs(2000)
return nil
}
// waitForAnswer 等待并获取答案(处理流式输出)
func (c *DeepseekCollector) waitForAnswer() (string, error) {
c.LogInfo("等待AI回答...")
timeout := 180 // 最大等待时间(秒)
startTime := time.Now()
var lastAnswer string
var stableCount int
const requiredStableCount = 5 // 需要连续5次内容不变才认为完成
isAnswering := false
for time.Since(startTime).Seconds() < float64(timeout) {
// 查找答案区域 - 尝试多种选择器
answerSelectors := []string{
".message-content",
".response-content",
"[class*='assistant'] [class*='content']",
"[class*='ai'] [class*='message']",
".chat-message.ai",
"[class*='answer']",
"[class*='response']",
}
var answerText string
var foundAnswer bool
for _, selector := range answerSelectors {
answerElements, err := c.Page.Elements(selector)
if err == nil && len(answerElements) > 0 {
// 获取最后一个答案元素(最新的回答)
lastAnswer := answerElements[len(answerElements)-1]
visible, _ := lastAnswer.Visible()
if visible {
// 尝试获取HTML内容
htmlContent, err := lastAnswer.HTML()
if err == nil && len(strings.TrimSpace(htmlContent)) > 30 {
answerText = CleanDivTags(htmlContent)
foundAnswer = true
c.LogInfo(fmt.Sprintf("找到答案(%s清理后文本长度: %d", selector, len(answerText)))
break
}
// 如果HTML获取失败尝试获取文本
textContent, _ := lastAnswer.Text()
if len(strings.TrimSpace(textContent)) > 30 {
answerText = strings.TrimSpace(textContent)
foundAnswer = true
c.LogInfo(fmt.Sprintf("找到答案(%s文本长度: %d", selector, len(answerText)))
break
}
}
}
}
if !foundAnswer {
c.LogInfo("未找到答案元素,继续等待...")
}
// 检查是否获取到答案
if answerText != "" && len(answerText) > 30 {
if !isAnswering {
c.LogInfo("检测到AI开始回答...")
isAnswering = true
}
// 检查内容是否稳定(流式输出完成)
if answerText == lastAnswer {
stableCount++
c.LogInfo(fmt.Sprintf("答案稳定中... (%d/%d), 长度: %d", stableCount, requiredStableCount, len(answerText)))
// 如果内容稳定,说明回答完成
if stableCount >= requiredStableCount {
c.LogInfo(fmt.Sprintf("✓ AI回答完成最终长度: %d 字符", len(answerText)))
return answerText, nil
}
} else {
// 内容还在变化,重置计数器
stableCount = 0
lastAnswer = answerText
c.LogInfo(fmt.Sprintf("检测到流式输出,当前长度: %d 字符", len(answerText)))
}
}
c.SleepMs(1500) // 每1.5秒检查一次
// 每10秒输出一次等待状态
elapsed := int(time.Since(startTime).Seconds())
if elapsed > 0 && elapsed%10 == 0 {
c.LogInfo(fmt.Sprintf("等待AI回答中... 已等待 %d 秒", elapsed))
}
}
return "", fmt.Errorf("等待答案超时(%d秒", timeout)
}
// getShareLink 获取分享链接
func (c *DeepseekCollector) getShareLink() (string, error) {
c.LogInfo("=== 开始获取分享链接 ===")
// 步骤1: 查找分享按钮需要根据DeepSeek实际页面结构调整
c.LogInfo("步骤1: 查找分享按钮...")
var shareBtn *rod.Element
// 尝试多种方式查找分享按钮
shareSelectors := []string{
"[class*='share']",
"[aria-label*='分享']",
"[aria-label*='Share']",
"button svg[path*='share']",
".share-button",
".share-icon",
}
for _, selector := range shareSelectors {
btns, err := c.Page.Elements(selector)
if err == nil && len(btns) > 0 {
shareBtn = btns[0]
c.LogInfo(fmt.Sprintf("✓ 找到分享按钮: %s", selector))
break
}
}
if shareBtn == nil {
// fallback: 遍历所有元素查找包含share的class
allElements, _ := c.Page.Elements("*")
for _, elem := range allElements {
classAttr, _ := elem.Attribute("class")
if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "share") {
tagName, _ := elem.Property("tagName")
if tagName.Str() == "BUTTON" || tagName.Str() == "DIV" || tagName.Str() == "SVG" {
shareBtn = elem
c.LogInfo(fmt.Sprintf("✓ 通过正则找到分享按钮: tag=%s, class=%s", tagName.Str(), *classAttr))
break
}
}
}
}
if shareBtn == nil {
c.LogInfo("未找到分享按钮,跳过获取分享链接")
return "", fmt.Errorf("未找到分享按钮")
}
// 滚动到元素位置
c.LogInfo("滚动到分享按钮位置...")
if scrollErr := shareBtn.ScrollIntoView(); scrollErr != nil {
c.LogInfo(fmt.Sprintf("滚动失败: %v", scrollErr))
}
c.SleepMs(800)
// 点击分享按钮
c.LogInfo("执行点击分享按钮...")
if clickErr := shareBtn.Click(proto.InputMouseButtonLeft, 1); clickErr != nil {
return "", fmt.Errorf("点击分享按钮失败: %v", clickErr)
}
c.LogInfo("✓ 点击成功")
c.SleepMs(3000) // 等待弹窗出现
c.Screenshot("after_share_click")
// 步骤2: 在弹窗中查找复制链接按钮(带重试机制)
c.LogInfo("步骤2: 查找复制链接按钮...")
var copyLinkBtn *rod.Element
maxRetries := 5
retryDelay := 1000
for attempt := 1; attempt <= maxRetries; attempt++ {
c.LogInfo(fmt.Sprintf("第 %d/%d 次尝试查找复制链接按钮...", attempt, maxRetries))
// 尝试多种方式查找复制按钮
copySelectors := []string{
"[class*='copy']",
"[class*='Copy']",
"[aria-label*='复制']",
"[aria-label*='Copy']",
"button[class*='link']",
}
for _, selector := range copySelectors {
btns, err := c.Page.Elements(selector)
if err == nil && len(btns) > 0 {
copyLinkBtn = btns[0]
c.LogInfo(fmt.Sprintf("✓ 找到复制链接按钮: %s", selector))
break
}
}
if copyLinkBtn != nil {
break
}
// fallback: 遍历所有元素
allElements, _ := c.Page.Elements("*")
for _, elem := range allElements {
classAttr, _ := elem.Attribute("class")
if classAttr != nil {
classLower := strings.ToLower(*classAttr)
if strings.Contains(classLower, "copy") || strings.Contains(classLower, "link") {
tagName, _ := elem.Property("tagName")
if tagName.Str() == "BUTTON" || tagName.Str() == "DIV" {
copyLinkBtn = elem
c.LogInfo(fmt.Sprintf("✓ 通过正则找到复制按钮: tag=%s, class=%s", tagName.Str(), *classAttr))
break
}
}
}
}
if copyLinkBtn != nil {
break
}
// 没找到,等待后重试
if attempt < maxRetries {
c.LogInfo(fmt.Sprintf("未找到复制链接按钮,%d毫秒后重试...", retryDelay))
c.SleepMs(retryDelay)
}
}
if copyLinkBtn == nil {
c.Screenshot("copy_button_not_found")
return "", fmt.Errorf("经过 %d 次重试仍未找到复制链接按钮", maxRetries)
}
// 滚动到按钮位置
c.LogInfo("滚动到复制链接按钮位置...")
if scrollErr := copyLinkBtn.ScrollIntoView(); scrollErr != nil {
c.LogInfo(fmt.Sprintf("滚动失败: %v", scrollErr))
}
c.SleepMs(500)
// 点击复制链接按钮
c.LogInfo("点击复制链接按钮...")
if clickErr := copyLinkBtn.Click(proto.InputMouseButtonLeft, 1); clickErr != nil {
return "", fmt.Errorf("点击复制链接按钮失败: %v", clickErr)
}
c.LogInfo("✓ 复制链接按钮点击成功")
c.SleepMs(1500) // 等待复制链接完成
// 步骤3: 从剪贴板读取分享链接
c.LogInfo("步骤3: 从系统剪贴板读取分享链接...")
clipboardText, err := clipboard.ReadAll()
if err != nil {
return "", fmt.Errorf("读取剪贴板失败: %v", err)
}
if clipboardText == "" {
return "", fmt.Errorf("剪贴板内容为空")
}
c.LogInfo(fmt.Sprintf("剪贴板原始内容: %s", clipboardText))
// 使用正则表达式提取URL
re := regexp.MustCompile(`https?://[^\s]+`)
matches := re.FindStringSubmatch(clipboardText)
if len(matches) == 0 {
return "", fmt.Errorf("未能从剪贴板内容中提取URL")
}
url := matches[0]
c.LogInfo(fmt.Sprintf("✓✓✓ 成功获取分享链接: %s", url))
return url, nil
}
// SafeElement 安全地获取元素
func (c *DeepseekCollector) SafeElement(selector string) (*rod.Element, error) {
exists, _, err := c.Page.Has(selector)
if err != nil {
return nil, err
}
if !exists {
return nil, nil
}
return c.Page.Element(selector)
}