442 lines
12 KiB
Go
442 lines
12 KiB
Go
package collect
|
||
|
||
import (
|
||
"context"
|
||
"fmt"
|
||
"geo/internal/config"
|
||
"strings"
|
||
"time"
|
||
|
||
"github.com/atotto/clipboard"
|
||
"github.com/go-rod/rod"
|
||
"github.com/go-rod/rod/lib/proto"
|
||
"github.com/gofiber/fiber/v2/log"
|
||
)
|
||
|
||
// DoubaoCollector 豆包收集器
|
||
type DoubaoCollector struct {
|
||
*BaseCollector
|
||
}
|
||
|
||
// NewDoubaoCollector 创建豆包收集器
|
||
func NewDoubaoCollector(ctx context.Context, params *CollectParams, cfg *config.Config, logger log.AllLogger) CollectorInterface {
|
||
collector := &DoubaoCollector{
|
||
BaseCollector: NewBaseCollector(ctx, params, cfg, logger),
|
||
}
|
||
|
||
// 设置豆包的URL
|
||
collector.LoginURL = "https://www.doubao.com/"
|
||
collector.ChatURL = "https://www.doubao.com/chat/"
|
||
|
||
return collector
|
||
}
|
||
|
||
// CheckLoginStatus 检查登录状态
|
||
func (c *DoubaoCollector) CheckLoginStatus() bool {
|
||
c.LogInfo("检查豆包登录状态...")
|
||
|
||
// 方法3: 检查是否有登录按钮(如果存在说明未登录)
|
||
loginButtons, err := c.Page.Elements("button")
|
||
if err == nil {
|
||
for _, btn := range loginButtons {
|
||
text, _ := btn.Text()
|
||
trimmedText := strings.TrimSpace(text)
|
||
if trimmedText == "登录" || trimmedText == "Login" || strings.Contains(trimmedText, "登录") {
|
||
c.LogInfo(fmt.Sprintf("检测到登录按钮'%s',说明未登录", trimmedText))
|
||
return false
|
||
}
|
||
}
|
||
}
|
||
|
||
c.LogInfo("未检测到登录状态相关元素")
|
||
return true
|
||
}
|
||
|
||
// WaitLogin 等待登录
|
||
func (c *DoubaoCollector) WaitLogin() (bool, string) {
|
||
if err := c.SetupDriver(); err != nil {
|
||
return false, fmt.Sprintf("浏览器启动失败: %v", err)
|
||
}
|
||
defer c.Close()
|
||
|
||
c.LogInfo("导航到豆包页面...")
|
||
c.Page.MustNavigate(c.ChatURL)
|
||
c.Sleep(3)
|
||
|
||
// 截图查看初始状态
|
||
c.Screenshot("doubao_initial")
|
||
|
||
if c.CheckLoginStatus() {
|
||
c.LogInfo("已登录,保存cookies")
|
||
c.SaveCookies()
|
||
return true, "already_logged_in"
|
||
}
|
||
|
||
c.LogInfo("未登录,等待手动登录...")
|
||
c.Screenshot("doubao_need_login")
|
||
|
||
// 最多等待300秒
|
||
for i := 0; i < 300; i++ {
|
||
if c.CheckLoginStatus() {
|
||
c.Sleep(2)
|
||
c.SaveCookies()
|
||
c.Screenshot("doubao_login_success")
|
||
c.LogInfo("登录成功!")
|
||
return true, "login_success"
|
||
}
|
||
|
||
// 每10秒输出一次提示
|
||
if i%10 == 0 && i > 0 {
|
||
c.LogInfo(fmt.Sprintf("等待登录中... 已等待 %d 秒", i))
|
||
}
|
||
|
||
time.Sleep(1 * time.Second)
|
||
}
|
||
|
||
return false, "登录超时"
|
||
}
|
||
|
||
// AskQuestion 提问并获取答案
|
||
func (c *DoubaoCollector) AskQuestion(question string) (*CollectResult, error) {
|
||
if err := c.SetupDriver(); err != nil {
|
||
return nil, fmt.Errorf("浏览器启动失败: %v", err)
|
||
}
|
||
defer c.Close()
|
||
|
||
if err := c.InitPage(); err != nil {
|
||
return nil, fmt.Errorf("页面初始化失败: %v", err)
|
||
}
|
||
|
||
// 检查是否登录
|
||
if !c.CheckLoginStatus() {
|
||
return nil, fmt.Errorf("未登录,请先调用WaitLogin进行登录")
|
||
}
|
||
|
||
c.LogInfo(fmt.Sprintf("开始提问: %s", question))
|
||
|
||
if err := c.inputQuestion(question); err != nil {
|
||
return nil, fmt.Errorf("输入问题失败: %v", err)
|
||
}
|
||
|
||
if err := c.clickSendButton(); err != nil {
|
||
return nil, fmt.Errorf("点击发送按钮失败: %v", err)
|
||
}
|
||
|
||
answer, err := c.waitForAnswer()
|
||
if err != nil {
|
||
return nil, fmt.Errorf("获取答案失败: %v", err)
|
||
}
|
||
|
||
// 直接使用原始HTML格式,不进行关键词高亮处理
|
||
answerStr, isExposure := HighlightKeywordsInText(answer, c.KeyWords)
|
||
|
||
//// 获取分享链接
|
||
shareLink := ""
|
||
//shareLink := c.getShareLink()
|
||
|
||
c.LogInfo(fmt.Sprintf("✓ 获取答案成功,长度: %d 字符", len(answer)))
|
||
|
||
return &CollectResult{
|
||
Answer: answerStr,
|
||
ShareLink: shareLink,
|
||
IsExposure: isExposure,
|
||
}, nil
|
||
}
|
||
|
||
// inputQuestion 输入问题
|
||
func (c *DoubaoCollector) inputQuestion(question string) error {
|
||
c.LogInfo("输入问题...")
|
||
|
||
// 豆包的输入框选择器 - 使用精确的class匹配
|
||
inputSelectors := []string{
|
||
"textarea[placeholder*='发消息...']",
|
||
}
|
||
|
||
var inputBox *rod.Element
|
||
var err error
|
||
|
||
for _, selector := range inputSelectors {
|
||
inputBox, err = c.WaitForElementVisible(selector, 10)
|
||
if err == nil && inputBox != nil {
|
||
c.LogInfo(fmt.Sprintf("找到输入框: %s", selector))
|
||
break
|
||
}
|
||
}
|
||
|
||
if inputBox == nil {
|
||
return fmt.Errorf("未找到输入框")
|
||
}
|
||
|
||
// 点击获取焦点
|
||
if err := inputBox.Click(proto.InputMouseButtonLeft, 1); err != nil {
|
||
return fmt.Errorf("点击输入框失败: %v", err)
|
||
}
|
||
|
||
// 使用原生Input方法输入(更稳定)
|
||
inputBox.Input(question)
|
||
c.LogInfo(fmt.Sprintf("问题已输入: %s", question))
|
||
|
||
return nil
|
||
}
|
||
|
||
// clickSendButton 点击发送按钮
|
||
func (c *DoubaoCollector) clickSendButton() error {
|
||
c.LogInfo("点击发送按钮...")
|
||
|
||
var sendBtn *rod.Element
|
||
|
||
// 如果没找到,尝试遍历所有button元素
|
||
|
||
allButtons, _ := c.Page.Elements("button")
|
||
for _, btn := range allButtons {
|
||
// 检查按钮是否可点击且可见
|
||
visible, _ := btn.Visible()
|
||
if visible {
|
||
classAttr, _ := btn.Attribute("class")
|
||
text, _ := btn.Text()
|
||
|
||
// 检查是否包含send、submit等关键词
|
||
if classAttr != nil && (strings.Contains(strings.ToLower(*classAttr), "send") ||
|
||
strings.Contains(strings.ToLower(*classAttr), "submit")) {
|
||
sendBtn = btn
|
||
|
||
break
|
||
}
|
||
|
||
// 检查文本内容
|
||
trimmedText := strings.TrimSpace(text)
|
||
if trimmedText == "发送" || trimmedText == "Send" {
|
||
sendBtn = btn
|
||
c.LogInfo(fmt.Sprintf("通过文本找到发送按钮: text=%s", trimmedText))
|
||
break
|
||
}
|
||
}
|
||
}
|
||
|
||
if sendBtn == nil {
|
||
return fmt.Errorf("未找到发送按钮")
|
||
}
|
||
|
||
// 滚动到可见区域
|
||
if err := sendBtn.ScrollIntoView(); err != nil {
|
||
c.LogInfo(fmt.Sprintf("滚动失败: %v", err))
|
||
}
|
||
|
||
// 点击发送按钮
|
||
c.LogInfo("执行点击...")
|
||
if err := sendBtn.Click(proto.InputMouseButtonLeft, 1); err != nil {
|
||
return fmt.Errorf("点击发送按钮失败: %v", err)
|
||
}
|
||
|
||
c.LogInfo("已点击发送按钮")
|
||
|
||
return nil
|
||
}
|
||
|
||
// waitForAnswer 等待并获取答案(处理流式输出)
|
||
func (c *DoubaoCollector) waitForAnswer() (string, error) {
|
||
c.LogInfo("等待AI回答...")
|
||
|
||
timeout := 180 // 最大等待时间(秒)
|
||
startTime := time.Now()
|
||
|
||
var lastAnswer string
|
||
var stableCount int // 稳定计数器
|
||
const requiredStableCount = 3 // 需要连续3次内容不变才认为完成(减少到3次以更快响应)
|
||
isAnswering := false // 标记是否正在回答中
|
||
|
||
for time.Since(startTime).Seconds() < float64(timeout) {
|
||
// 直接查找包含 data-message-id 的元素,这是豆包答案的标准标识
|
||
answerElements, err := c.Page.Elements("div[data-message-id]")
|
||
if err == nil && len(answerElements) > 0 {
|
||
// 取最后一个元素(最新的回答)
|
||
lastAnswerElem := answerElements[len(answerElements)-1]
|
||
|
||
visible, _ := lastAnswerElem.Visible()
|
||
if visible {
|
||
// 直接获取原始HTML内容,不做任何处理
|
||
htmlContent, err := lastAnswerElem.HTML()
|
||
if err == nil && htmlContent != "" {
|
||
answerHTML := strings.TrimSpace(htmlContent)
|
||
|
||
if !isAnswering && answerHTML != "" {
|
||
c.LogInfo("检测到AI开始回答...")
|
||
isAnswering = true
|
||
}
|
||
|
||
// 检查内容是否稳定(流式输出完成)
|
||
if answerHTML == lastAnswer && answerHTML != "" {
|
||
stableCount++
|
||
c.LogInfo(fmt.Sprintf("答案稳定中... (%d/%d), 长度: %d", stableCount, requiredStableCount, len(answerHTML)))
|
||
|
||
// 如果内容稳定足够次数,说明回答完成
|
||
if stableCount >= requiredStableCount {
|
||
c.LogInfo(fmt.Sprintf("✓ AI回答完成,最终HTML长度: %d 字符", len(answerHTML)))
|
||
return answerHTML, nil
|
||
}
|
||
} else {
|
||
// 内容还在变化,重置计数器
|
||
stableCount = 0
|
||
lastAnswer = answerHTML
|
||
if answerHTML != "" {
|
||
c.LogInfo(fmt.Sprintf("检测到流式输出,当前HTML长度: %d 字符", len(answerHTML)))
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
c.SleepMs(1000) // 每1秒检查一次
|
||
|
||
// 每10秒输出一次等待状态
|
||
elapsed := int(time.Since(startTime).Seconds())
|
||
if elapsed > 0 && elapsed%10 == 0 {
|
||
c.LogInfo(fmt.Sprintf("等待AI回答中... 已等待 %d 秒", elapsed))
|
||
}
|
||
}
|
||
|
||
return "", fmt.Errorf("等待答案超时(%d秒)", timeout)
|
||
}
|
||
|
||
// getShareLink 尝试获取当前对话的分享链接
|
||
func (c *DoubaoCollector) getShareLink() string {
|
||
c.LogInfo("尝试获取分享链接...")
|
||
|
||
// 步骤1: 找到class包含message-action-button-main的div
|
||
actionDiv, err := c.Page.Element("div[data-foundation-type*='receive-message-action-bar']")
|
||
if err != nil || actionDiv == nil {
|
||
c.LogInfo("未找到message-action-button-main元素")
|
||
return ""
|
||
}
|
||
|
||
c.LogInfo("找到message-action-button-main元素")
|
||
|
||
// 步骤2: 在该div中找到所有button,取倒数第二个作为分享按钮
|
||
buttons, err := actionDiv.Elements("button")
|
||
if err != nil || len(buttons) == 0 {
|
||
c.LogInfo("未找到button元素")
|
||
return ""
|
||
}
|
||
|
||
if len(buttons) < 2 {
|
||
c.LogInfo(fmt.Sprintf("button数量不足(%d),无法获取倒数第二个", len(buttons)))
|
||
return ""
|
||
}
|
||
|
||
shareBtn := buttons[len(buttons)-3]
|
||
c.LogInfo(fmt.Sprintf("找到分享按钮(共%d个button)", len(buttons)))
|
||
|
||
// 检查是否可点击,如果pointer-events为none,使用JavaScript点击
|
||
visible, _ := shareBtn.Visible()
|
||
if !visible {
|
||
c.LogInfo("分享按钮不可见,尝试使用JavaScript点击")
|
||
// 使用立即执行函数,但返回一个空函数避免.apply错误
|
||
_, err := c.Page.Eval(`(function(){Array.from(document.querySelectorAll('div[class*="message-action-button-main"] button')).slice(-2)[0].click();return function(){};})`)
|
||
if err != nil {
|
||
c.LogInfo(fmt.Sprintf("JavaScript点击失败: %v", err))
|
||
return ""
|
||
}
|
||
} else {
|
||
// 正常点击
|
||
if err := shareBtn.Click(proto.InputMouseButtonLeft, 1); err != nil {
|
||
c.LogInfo(fmt.Sprintf("点击分享按钮失败: %v,尝试JavaScript点击", err))
|
||
// Fallback: 使用JavaScript点击
|
||
_, err := c.Page.Eval(`(function(){Array.from(document.querySelectorAll('div[class*="message-action-button-main"] button')).slice(-2)[0].click();return function(){};})`)
|
||
if err != nil {
|
||
c.LogInfo(fmt.Sprintf("JavaScript点击也失败: %v", err))
|
||
return ""
|
||
}
|
||
}
|
||
}
|
||
|
||
c.SleepMs(500)
|
||
|
||
// 步骤3: 找到内容为"复制链接"的span并点击
|
||
copySpan, err := c.Page.ElementX("//span[contains(text(), '复制链接')]")
|
||
if err != nil || copySpan == nil {
|
||
c.LogInfo("未找到'复制链接'span元素")
|
||
return ""
|
||
}
|
||
|
||
c.LogInfo("找到'复制链接'span元素,点击复制...")
|
||
if err := copySpan.Click(proto.InputMouseButtonLeft, 1); err != nil {
|
||
c.LogInfo(fmt.Sprintf("点击复制按钮失败: %v,尝试JavaScript点击", err))
|
||
// Fallback: 使用JavaScript点击
|
||
script := `
|
||
(function() {
|
||
var spans = document.querySelectorAll('span');
|
||
for (var i = 0; i < spans.length; i++) {
|
||
if (spans[i].textContent.includes('复制链接')) {
|
||
spans[i].click();
|
||
return true;
|
||
}
|
||
}
|
||
return false;
|
||
})()
|
||
`
|
||
result, err := c.Page.Eval(script)
|
||
if err != nil || result == nil {
|
||
c.LogInfo(fmt.Sprintf("JavaScript点击复制按钮失败: %v", err))
|
||
return ""
|
||
}
|
||
}
|
||
c.SleepMs(500)
|
||
|
||
// 步骤4: 从剪贴板获取内容
|
||
clipboardContent := c.getClipboardContent()
|
||
if clipboardContent != "" {
|
||
c.LogInfo(fmt.Sprintf("从剪贴板获取到分享链接: %s", clipboardContent))
|
||
return clipboardContent
|
||
}
|
||
|
||
c.LogInfo("未能从剪贴板获取链接")
|
||
return ""
|
||
}
|
||
|
||
// getClipboardContent 从剪贴板获取内容
|
||
func (c *DoubaoCollector) getClipboardContent() string {
|
||
// 使用atotto/clipboard库读取系统剪贴板
|
||
text, err := clipboard.ReadAll()
|
||
if err != nil {
|
||
c.LogInfo(fmt.Sprintf("读取剪贴板失败: %v", err))
|
||
return ""
|
||
}
|
||
|
||
if text == "" {
|
||
c.LogInfo("剪贴板内容为空")
|
||
return ""
|
||
}
|
||
|
||
c.LogInfo(fmt.Sprintf("剪贴板原始内容: %s", text))
|
||
return text
|
||
}
|
||
|
||
// extractURL 从文本中提取 URL
|
||
func extractURL(text string) string {
|
||
// 简单的 URL 提取逻辑
|
||
start := strings.Index(text, "https://")
|
||
if start == -1 {
|
||
start = strings.Index(text, "http://")
|
||
}
|
||
if start != -1 {
|
||
end := strings.Index(text[start:], " ")
|
||
if end == -1 {
|
||
return text[start:]
|
||
}
|
||
return text[start : start+end]
|
||
}
|
||
return ""
|
||
}
|
||
|
||
// SafeElement 安全地获取元素
|
||
func (c *DoubaoCollector) SafeElement(selector string) (*rod.Element, error) {
|
||
exists, _, err := c.Page.Has(selector)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
if !exists {
|
||
return nil, nil
|
||
}
|
||
return c.Page.Element(selector)
|
||
}
|