520 lines
14 KiB
Go
520 lines
14 KiB
Go
package collect
|
||
|
||
import (
|
||
"context"
|
||
"fmt"
|
||
"geo/internal/config"
|
||
"regexp"
|
||
"strings"
|
||
"time"
|
||
|
||
"github.com/atotto/clipboard"
|
||
"github.com/go-rod/rod"
|
||
"github.com/go-rod/rod/lib/proto"
|
||
"github.com/gofiber/fiber/v2/log"
|
||
)
|
||
|
||
// DeepseekCollector DeepSeek收集器
|
||
type DeepseekCollector struct {
|
||
*BaseCollector
|
||
}
|
||
|
||
// NewDeepseekCollector 创建DeepSeek收集器
|
||
func NewDeepseekCollector(ctx context.Context, params *CollectParams, cfg *config.Config, logger log.AllLogger, browser *rod.Browser, page *rod.Page) CollectorInterface {
|
||
collector := &DeepseekCollector{
|
||
BaseCollector: NewBaseCollector(ctx, params, cfg, logger, browser, page),
|
||
}
|
||
|
||
// 设置DeepSeek的URL
|
||
collector.LoginURL = "https://chat.deepseek.com/"
|
||
collector.ChatURL = "https://chat.deepseek.com/"
|
||
|
||
return collector
|
||
}
|
||
|
||
// CheckLoginStatus 检查登录状态
|
||
func (c *DeepseekCollector) CheckLoginStatus() bool {
|
||
currentURL := c.GetCurrentURL()
|
||
c.LogInfo(fmt.Sprintf("当前URL: %s", currentURL))
|
||
if currentURL == c.LoginURL {
|
||
return false
|
||
}
|
||
|
||
return true
|
||
}
|
||
|
||
// WaitLogin 等待登录
|
||
func (c *DeepseekCollector) WaitLogin() (bool, string) {
|
||
if err := c.SetupDriver(); err != nil {
|
||
return false, fmt.Sprintf("浏览器启动失败: %v", err)
|
||
}
|
||
defer c.Close()
|
||
|
||
c.Page.MustNavigate(c.ChatURL)
|
||
c.Sleep(3)
|
||
|
||
if c.CheckLoginStatus() {
|
||
c.SaveCookies()
|
||
return true, "already_logged_in"
|
||
}
|
||
|
||
c.LogInfo("未检测到登录状态,等待用户登录...")
|
||
|
||
// 最多等待300秒
|
||
for i := 0; i < 300; i++ {
|
||
if c.CheckLoginStatus() {
|
||
c.Sleep(2)
|
||
c.SaveCookies()
|
||
return true, "login_success"
|
||
}
|
||
time.Sleep(1 * time.Second)
|
||
}
|
||
|
||
return false, "登录超时"
|
||
}
|
||
|
||
// AskQuestion 提问并获取答案
|
||
func (c *DeepseekCollector) AskQuestion(question string) (*CollectResult, error) {
|
||
// 注意:SetupDriver 和 Close 已由 Manager 管理,这里不再调用
|
||
|
||
if err := c.InitPage(); err != nil {
|
||
return nil, fmt.Errorf("页面初始化失败: %v", err)
|
||
}
|
||
|
||
c.Sleep(3)
|
||
|
||
if err := c.inputQuestion(question); err != nil {
|
||
return nil, fmt.Errorf("输入问题失败: %v", err)
|
||
}
|
||
|
||
if err := c.clickSendButton(); err != nil {
|
||
return nil, fmt.Errorf("点击发送按钮失败: %v", err)
|
||
}
|
||
|
||
answer, err := c.waitForAnswer()
|
||
if err != nil {
|
||
return nil, fmt.Errorf("获取答案失败: %v", err)
|
||
}
|
||
|
||
// 关键词高亮处理
|
||
answerStr, isExposure := HighlightKeywordsInHTML(answer, c.KeyWords)
|
||
|
||
// 获取分享链接
|
||
shareLink := ""
|
||
link, _ := c.getShareLink()
|
||
if link != "" {
|
||
shareLink = link
|
||
}
|
||
|
||
return &CollectResult{
|
||
Answer: answerStr,
|
||
ShareLink: shareLink,
|
||
IsExposure: isExposure,
|
||
}, nil
|
||
}
|
||
|
||
// inputQuestion 输入问题
|
||
func (c *DeepseekCollector) inputQuestion(question string) error {
|
||
c.LogInfo("输入问题...")
|
||
|
||
// DeepSeek的输入框选择器
|
||
inputSelectors := []string{
|
||
"textarea[placeholder*='输入']",
|
||
"textarea[placeholder*='问']",
|
||
"textarea",
|
||
"[contenteditable='true']",
|
||
".chat-input textarea",
|
||
"#message-input",
|
||
}
|
||
|
||
var inputBox *rod.Element
|
||
var err error
|
||
|
||
for _, selector := range inputSelectors {
|
||
inputBox, err = c.WaitForElementVisible(selector, 10)
|
||
if err == nil && inputBox != nil {
|
||
c.LogInfo(fmt.Sprintf("找到输入框: %s", selector))
|
||
break
|
||
}
|
||
}
|
||
|
||
if inputBox == nil {
|
||
return fmt.Errorf("未找到输入框")
|
||
}
|
||
|
||
// 点击获取焦点
|
||
if err := inputBox.Click(proto.InputMouseButtonLeft, 1); err != nil {
|
||
return fmt.Errorf("点击输入框失败: %v", err)
|
||
}
|
||
c.SleepMs(500)
|
||
|
||
// fallback: 使用Focus + Input
|
||
inputBox.Focus()
|
||
c.SleepMs(200)
|
||
inputBox.Input(question)
|
||
|
||
c.LogInfo(fmt.Sprintf("问题已输入: %s", question))
|
||
c.SleepMs(1000)
|
||
|
||
return nil
|
||
}
|
||
|
||
// clickSendButton 点击发送按钮
|
||
func (c *DeepseekCollector) clickSendButton() error {
|
||
c.LogInfo("点击发送按钮...")
|
||
|
||
// 使用正则匹配包含"send"或"提交"的class
|
||
allElements, err := c.Page.Elements("*")
|
||
if err != nil {
|
||
return fmt.Errorf("获取页面元素失败: %v", err)
|
||
}
|
||
|
||
var sendBtn *rod.Element
|
||
for _, elem := range allElements {
|
||
classAttr, _ := elem.Attribute("class")
|
||
if classAttr != nil {
|
||
classLower := strings.ToLower(*classAttr)
|
||
if strings.Contains(classLower, "send") || strings.Contains(classLower, "submit") {
|
||
// 检查是否是可点击的元素(button、div等)
|
||
tagName, _ := elem.Property("tagName")
|
||
if tagName.Str() == "BUTTON" || tagName.Str() == "DIV" || tagName.Str() == "SVG" {
|
||
sendBtn = elem
|
||
c.LogInfo(fmt.Sprintf("通过正则找到发送按钮: class=%s, tag=%s", *classAttr, tagName.Str()))
|
||
break
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
if sendBtn == nil {
|
||
// fallback: 尝试查找发送图标或最后一个button
|
||
buttons, _ := c.Page.Elements("button")
|
||
if len(buttons) > 0 {
|
||
sendBtn = buttons[len(buttons)-1]
|
||
c.LogInfo("使用最后一个button作为发送按钮")
|
||
}
|
||
}
|
||
|
||
if sendBtn == nil {
|
||
// 尝试查找SVG图标
|
||
svgs, _ := c.Page.Elements("svg")
|
||
for _, svg := range svgs {
|
||
parent, _ := svg.Parent()
|
||
if parent != nil {
|
||
tagName, _ := parent.Property("tagName")
|
||
if tagName.Str() == "BUTTON" {
|
||
sendBtn = parent
|
||
c.LogInfo("使用包含SVG的button作为发送按钮")
|
||
break
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
if sendBtn == nil {
|
||
return fmt.Errorf("未找到发送按钮")
|
||
}
|
||
|
||
c.SleepMs(500)
|
||
|
||
// 滚动到可见区域
|
||
if err := sendBtn.ScrollIntoView(); err != nil {
|
||
c.LogInfo(fmt.Sprintf("滚动失败: %v", err))
|
||
}
|
||
c.SleepMs(300)
|
||
|
||
// 点击发送按钮
|
||
c.LogInfo("执行点击...")
|
||
if err := sendBtn.Click(proto.InputMouseButtonLeft, 1); err != nil {
|
||
return fmt.Errorf("点击发送按钮失败: %v", err)
|
||
}
|
||
|
||
c.LogInfo("已点击发送按钮")
|
||
c.SleepMs(2000)
|
||
|
||
return nil
|
||
}
|
||
|
||
// waitForAnswer 等待并获取答案(处理流式输出)
|
||
func (c *DeepseekCollector) waitForAnswer() (string, error) {
|
||
c.LogInfo("等待AI回答...")
|
||
|
||
timeout := 180 // 最大等待时间(秒)
|
||
startTime := time.Now()
|
||
|
||
var lastAnswer string
|
||
var stableCount int
|
||
const requiredStableCount = 5 // 需要连续5次内容不变才认为完成
|
||
isAnswering := false
|
||
|
||
for time.Since(startTime).Seconds() < float64(timeout) {
|
||
// 查找答案区域 - 尝试多种选择器
|
||
answerSelectors := []string{
|
||
".message-content",
|
||
".response-content",
|
||
"[class*='assistant'] [class*='content']",
|
||
"[class*='ai'] [class*='message']",
|
||
".chat-message.ai",
|
||
"[class*='answer']",
|
||
"[class*='response']",
|
||
}
|
||
|
||
var answerText string
|
||
var foundAnswer bool
|
||
|
||
for _, selector := range answerSelectors {
|
||
answerElements, err := c.Page.Elements(selector)
|
||
if err == nil && len(answerElements) > 0 {
|
||
// 获取最后一个答案元素(最新的回答)
|
||
lastAnswer := answerElements[len(answerElements)-1]
|
||
|
||
visible, _ := lastAnswer.Visible()
|
||
if visible {
|
||
// 尝试获取HTML内容
|
||
htmlContent, err := lastAnswer.HTML()
|
||
if err == nil && len(strings.TrimSpace(htmlContent)) > 30 {
|
||
answerText = CleanDivTags(htmlContent)
|
||
foundAnswer = true
|
||
c.LogInfo(fmt.Sprintf("找到答案(%s),清理后文本长度: %d", selector, len(answerText)))
|
||
break
|
||
}
|
||
|
||
// 如果HTML获取失败,尝试获取文本
|
||
textContent, _ := lastAnswer.Text()
|
||
if len(strings.TrimSpace(textContent)) > 30 {
|
||
answerText = strings.TrimSpace(textContent)
|
||
foundAnswer = true
|
||
c.LogInfo(fmt.Sprintf("找到答案(%s),文本长度: %d", selector, len(answerText)))
|
||
break
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
if !foundAnswer {
|
||
c.LogInfo("未找到答案元素,继续等待...")
|
||
}
|
||
|
||
// 检查是否获取到答案
|
||
if answerText != "" && len(answerText) > 30 {
|
||
if !isAnswering {
|
||
c.LogInfo("检测到AI开始回答...")
|
||
isAnswering = true
|
||
}
|
||
|
||
// 检查内容是否稳定(流式输出完成)
|
||
if answerText == lastAnswer {
|
||
stableCount++
|
||
c.LogInfo(fmt.Sprintf("答案稳定中... (%d/%d), 长度: %d", stableCount, requiredStableCount, len(answerText)))
|
||
|
||
// 如果内容稳定,说明回答完成
|
||
if stableCount >= requiredStableCount {
|
||
c.LogInfo(fmt.Sprintf("✓ AI回答完成,最终长度: %d 字符", len(answerText)))
|
||
return answerText, nil
|
||
}
|
||
} else {
|
||
// 内容还在变化,重置计数器
|
||
stableCount = 0
|
||
lastAnswer = answerText
|
||
c.LogInfo(fmt.Sprintf("检测到流式输出,当前长度: %d 字符", len(answerText)))
|
||
}
|
||
}
|
||
|
||
c.SleepMs(1500) // 每1.5秒检查一次
|
||
|
||
// 每10秒输出一次等待状态
|
||
elapsed := int(time.Since(startTime).Seconds())
|
||
if elapsed > 0 && elapsed%10 == 0 {
|
||
c.LogInfo(fmt.Sprintf("等待AI回答中... 已等待 %d 秒", elapsed))
|
||
}
|
||
}
|
||
|
||
return "", fmt.Errorf("等待答案超时(%d秒)", timeout)
|
||
}
|
||
|
||
// getShareLink 获取分享链接
|
||
func (c *DeepseekCollector) getShareLink() (string, error) {
|
||
c.LogInfo("=== 开始获取分享链接 ===")
|
||
|
||
// 步骤1: 查找分享按钮(需要根据DeepSeek实际页面结构调整)
|
||
c.LogInfo("步骤1: 查找分享按钮...")
|
||
|
||
var shareBtn *rod.Element
|
||
|
||
// 尝试多种方式查找分享按钮
|
||
shareSelectors := []string{
|
||
"[class*='share']",
|
||
"[aria-label*='分享']",
|
||
"[aria-label*='Share']",
|
||
"button svg[path*='share']",
|
||
".share-button",
|
||
".share-icon",
|
||
}
|
||
|
||
for _, selector := range shareSelectors {
|
||
btns, err := c.Page.Elements(selector)
|
||
if err == nil && len(btns) > 0 {
|
||
shareBtn = btns[0]
|
||
c.LogInfo(fmt.Sprintf("✓ 找到分享按钮: %s", selector))
|
||
break
|
||
}
|
||
}
|
||
|
||
if shareBtn == nil {
|
||
// fallback: 遍历所有元素查找包含share的class
|
||
allElements, _ := c.Page.Elements("*")
|
||
for _, elem := range allElements {
|
||
classAttr, _ := elem.Attribute("class")
|
||
if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "share") {
|
||
tagName, _ := elem.Property("tagName")
|
||
if tagName.Str() == "BUTTON" || tagName.Str() == "DIV" || tagName.Str() == "SVG" {
|
||
shareBtn = elem
|
||
c.LogInfo(fmt.Sprintf("✓ 通过正则找到分享按钮: tag=%s, class=%s", tagName.Str(), *classAttr))
|
||
break
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
if shareBtn == nil {
|
||
c.LogInfo("未找到分享按钮,跳过获取分享链接")
|
||
return "", fmt.Errorf("未找到分享按钮")
|
||
}
|
||
|
||
// 滚动到元素位置
|
||
c.LogInfo("滚动到分享按钮位置...")
|
||
if scrollErr := shareBtn.ScrollIntoView(); scrollErr != nil {
|
||
c.LogInfo(fmt.Sprintf("滚动失败: %v", scrollErr))
|
||
}
|
||
c.SleepMs(800)
|
||
|
||
// 点击分享按钮
|
||
c.LogInfo("执行点击分享按钮...")
|
||
if clickErr := shareBtn.Click(proto.InputMouseButtonLeft, 1); clickErr != nil {
|
||
return "", fmt.Errorf("点击分享按钮失败: %v", clickErr)
|
||
}
|
||
|
||
c.LogInfo("✓ 点击成功")
|
||
c.SleepMs(3000) // 等待弹窗出现
|
||
c.Screenshot("after_share_click")
|
||
|
||
// 步骤2: 在弹窗中查找复制链接按钮(带重试机制)
|
||
c.LogInfo("步骤2: 查找复制链接按钮...")
|
||
|
||
var copyLinkBtn *rod.Element
|
||
maxRetries := 5
|
||
retryDelay := 1000
|
||
|
||
for attempt := 1; attempt <= maxRetries; attempt++ {
|
||
c.LogInfo(fmt.Sprintf("第 %d/%d 次尝试查找复制链接按钮...", attempt, maxRetries))
|
||
|
||
// 尝试多种方式查找复制按钮
|
||
copySelectors := []string{
|
||
"[class*='copy']",
|
||
"[class*='Copy']",
|
||
"[aria-label*='复制']",
|
||
"[aria-label*='Copy']",
|
||
"button[class*='link']",
|
||
}
|
||
|
||
for _, selector := range copySelectors {
|
||
btns, err := c.Page.Elements(selector)
|
||
if err == nil && len(btns) > 0 {
|
||
copyLinkBtn = btns[0]
|
||
c.LogInfo(fmt.Sprintf("✓ 找到复制链接按钮: %s", selector))
|
||
break
|
||
}
|
||
}
|
||
|
||
if copyLinkBtn != nil {
|
||
break
|
||
}
|
||
|
||
// fallback: 遍历所有元素
|
||
allElements, _ := c.Page.Elements("*")
|
||
for _, elem := range allElements {
|
||
classAttr, _ := elem.Attribute("class")
|
||
if classAttr != nil {
|
||
classLower := strings.ToLower(*classAttr)
|
||
if strings.Contains(classLower, "copy") || strings.Contains(classLower, "link") {
|
||
tagName, _ := elem.Property("tagName")
|
||
if tagName.Str() == "BUTTON" || tagName.Str() == "DIV" {
|
||
copyLinkBtn = elem
|
||
c.LogInfo(fmt.Sprintf("✓ 通过正则找到复制按钮: tag=%s, class=%s", tagName.Str(), *classAttr))
|
||
break
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
if copyLinkBtn != nil {
|
||
break
|
||
}
|
||
|
||
// 没找到,等待后重试
|
||
if attempt < maxRetries {
|
||
c.LogInfo(fmt.Sprintf("未找到复制链接按钮,%d毫秒后重试...", retryDelay))
|
||
c.SleepMs(retryDelay)
|
||
}
|
||
}
|
||
|
||
if copyLinkBtn == nil {
|
||
c.Screenshot("copy_button_not_found")
|
||
return "", fmt.Errorf("经过 %d 次重试仍未找到复制链接按钮", maxRetries)
|
||
}
|
||
|
||
// 滚动到按钮位置
|
||
c.LogInfo("滚动到复制链接按钮位置...")
|
||
if scrollErr := copyLinkBtn.ScrollIntoView(); scrollErr != nil {
|
||
c.LogInfo(fmt.Sprintf("滚动失败: %v", scrollErr))
|
||
}
|
||
c.SleepMs(500)
|
||
|
||
// 点击复制链接按钮
|
||
c.LogInfo("点击复制链接按钮...")
|
||
if clickErr := copyLinkBtn.Click(proto.InputMouseButtonLeft, 1); clickErr != nil {
|
||
return "", fmt.Errorf("点击复制链接按钮失败: %v", clickErr)
|
||
}
|
||
|
||
c.LogInfo("✓ 复制链接按钮点击成功")
|
||
c.SleepMs(1500) // 等待复制链接完成
|
||
|
||
// 步骤3: 从剪贴板读取分享链接
|
||
c.LogInfo("步骤3: 从系统剪贴板读取分享链接...")
|
||
|
||
clipboardText, err := clipboard.ReadAll()
|
||
if err != nil {
|
||
return "", fmt.Errorf("读取剪贴板失败: %v", err)
|
||
}
|
||
|
||
if clipboardText == "" {
|
||
return "", fmt.Errorf("剪贴板内容为空")
|
||
}
|
||
|
||
c.LogInfo(fmt.Sprintf("剪贴板原始内容: %s", clipboardText))
|
||
|
||
// 使用正则表达式提取URL
|
||
re := regexp.MustCompile(`https?://[^\s]+`)
|
||
matches := re.FindStringSubmatch(clipboardText)
|
||
|
||
if len(matches) == 0 {
|
||
return "", fmt.Errorf("未能从剪贴板内容中提取URL")
|
||
}
|
||
|
||
url := matches[0]
|
||
c.LogInfo(fmt.Sprintf("✓✓✓ 成功获取分享链接: %s", url))
|
||
return url, nil
|
||
}
|
||
|
||
// SafeElement 安全地获取元素
|
||
func (c *DeepseekCollector) SafeElement(selector string) (*rod.Element, error) {
|
||
exists, _, err := c.Page.Has(selector)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
if !exists {
|
||
return nil, nil
|
||
}
|
||
return c.Page.Element(selector)
|
||
}
|