540 lines
15 KiB
Go
540 lines
15 KiB
Go
package collect
|
||
|
||
import (
|
||
"context"
|
||
"fmt"
|
||
"geo/internal/config"
|
||
"strings"
|
||
"time"
|
||
|
||
"github.com/atotto/clipboard"
|
||
"github.com/go-rod/rod"
|
||
"github.com/go-rod/rod/lib/proto"
|
||
"github.com/gofiber/fiber/v2/log"
|
||
)
|
||
|
||
// DoubaoCollector 豆包收集器
|
||
type DoubaoCollector struct {
|
||
*BaseCollector
|
||
}
|
||
|
||
// NewDoubaoCollector 创建豆包收集器
|
||
func NewDoubaoCollector(ctx context.Context, params *CollectParams, cfg *config.Config, logger log.AllLogger) CollectorInterface {
|
||
collector := &DoubaoCollector{
|
||
BaseCollector: NewBaseCollector(ctx, params, cfg, logger),
|
||
}
|
||
|
||
// 设置豆包的URL
|
||
collector.LoginURL = "https://www.doubao.com/"
|
||
collector.ChatURL = "https://www.doubao.com/chat/"
|
||
|
||
return collector
|
||
}
|
||
|
||
// CheckLoginStatus 检查登录状态
|
||
func (c *DoubaoCollector) CheckLoginStatus() bool {
|
||
c.LogInfo("检查豆包登录状态...")
|
||
|
||
// 方法3: 检查是否有登录按钮(如果存在说明未登录)
|
||
loginButtons, err := c.Page.Elements("button")
|
||
if err == nil {
|
||
for _, btn := range loginButtons {
|
||
text, _ := btn.Text()
|
||
trimmedText := strings.TrimSpace(text)
|
||
if trimmedText == "登录" || trimmedText == "Login" || strings.Contains(trimmedText, "登录") {
|
||
c.LogInfo(fmt.Sprintf("检测到登录按钮'%s',说明未登录", trimmedText))
|
||
return false
|
||
}
|
||
}
|
||
}
|
||
|
||
c.LogInfo("未检测到登录状态相关元素")
|
||
return true
|
||
}
|
||
|
||
// WaitLogin 等待登录
|
||
func (c *DoubaoCollector) WaitLogin() (bool, string) {
|
||
if err := c.SetupDriver(); err != nil {
|
||
return false, fmt.Sprintf("浏览器启动失败: %v", err)
|
||
}
|
||
defer c.Close()
|
||
|
||
c.LogInfo("导航到豆包页面...")
|
||
c.Page.MustNavigate(c.ChatURL)
|
||
c.Sleep(3)
|
||
|
||
// 截图查看初始状态
|
||
c.Screenshot("doubao_initial")
|
||
|
||
if c.CheckLoginStatus() {
|
||
c.LogInfo("已登录,保存cookies")
|
||
c.SaveCookies()
|
||
return true, "already_logged_in"
|
||
}
|
||
|
||
c.LogInfo("未登录,等待手动登录...")
|
||
c.Screenshot("doubao_need_login")
|
||
|
||
// 最多等待300秒
|
||
for i := 0; i < 300; i++ {
|
||
if c.CheckLoginStatus() {
|
||
c.Sleep(2)
|
||
c.SaveCookies()
|
||
c.Screenshot("doubao_login_success")
|
||
c.LogInfo("登录成功!")
|
||
return true, "login_success"
|
||
}
|
||
|
||
// 每10秒输出一次提示
|
||
if i%10 == 0 && i > 0 {
|
||
c.LogInfo(fmt.Sprintf("等待登录中... 已等待 %d 秒", i))
|
||
}
|
||
|
||
time.Sleep(1 * time.Second)
|
||
}
|
||
|
||
return false, "登录超时"
|
||
}
|
||
|
||
// AskQuestion 提问并获取答案
|
||
func (c *DoubaoCollector) AskQuestion(question string) (*CollectResult, error) {
|
||
if err := c.SetupDriver(); err != nil {
|
||
return nil, fmt.Errorf("浏览器启动失败: %v", err)
|
||
}
|
||
defer c.Close()
|
||
|
||
if err := c.InitPage(); err != nil {
|
||
return nil, fmt.Errorf("页面初始化失败: %v", err)
|
||
}
|
||
|
||
// 检查是否登录
|
||
if !c.CheckLoginStatus() {
|
||
return nil, fmt.Errorf("未登录,请先调用WaitLogin进行登录")
|
||
}
|
||
|
||
c.LogInfo(fmt.Sprintf("开始提问: %s", question))
|
||
|
||
if err := c.inputQuestion(question); err != nil {
|
||
return nil, fmt.Errorf("输入问题失败: %v", err)
|
||
}
|
||
|
||
if err := c.clickSendButton(); err != nil {
|
||
return nil, fmt.Errorf("点击发送按钮失败: %v", err)
|
||
}
|
||
|
||
answer, err := c.waitForAnswer()
|
||
if err != nil {
|
||
return nil, fmt.Errorf("获取答案失败: %v", err)
|
||
}
|
||
answerStr, isExposure := HighlightKeywordsInHTML(answer, c.KeyWords)
|
||
|
||
// 获取分享链接
|
||
shareLink := c.getShareLink()
|
||
|
||
c.LogInfo(fmt.Sprintf("✓ 获取答案成功,长度: %d 字符", len(answer)))
|
||
|
||
return &CollectResult{
|
||
Answer: answerStr,
|
||
ShareLink: shareLink,
|
||
IsExposure: isExposure,
|
||
}, nil
|
||
}
|
||
|
||
// inputQuestion 输入问题
|
||
func (c *DoubaoCollector) inputQuestion(question string) error {
|
||
c.LogInfo("输入问题...")
|
||
|
||
// 豆包的输入框选择器 - 使用精确的class匹配
|
||
inputSelectors := []string{
|
||
"textarea[placeholder*='发消息...']",
|
||
"[class*='input'] textarea",
|
||
"textarea.semi-input-textarea",
|
||
"textarea[placeholder='发消息...']",
|
||
"textarea[class*='semi-input-textarea']",
|
||
}
|
||
|
||
var inputBox *rod.Element
|
||
var err error
|
||
|
||
for _, selector := range inputSelectors {
|
||
inputBox, err = c.WaitForElementVisible(selector, 10)
|
||
if err == nil && inputBox != nil {
|
||
c.LogInfo(fmt.Sprintf("找到输入框: %s", selector))
|
||
break
|
||
}
|
||
}
|
||
|
||
if inputBox == nil {
|
||
return fmt.Errorf("未找到输入框")
|
||
}
|
||
|
||
// 点击获取焦点
|
||
if err := inputBox.Click(proto.InputMouseButtonLeft, 1); err != nil {
|
||
return fmt.Errorf("点击输入框失败: %v", err)
|
||
}
|
||
|
||
// 清空输入框(如果失败也继续)
|
||
if err := c.ClearInput(inputBox); err != nil {
|
||
c.LogInfo(fmt.Sprintf("清空输入框失败: %v", err))
|
||
}
|
||
|
||
// 使用原生Input方法输入(更稳定)
|
||
inputBox.Input(question)
|
||
c.LogInfo(fmt.Sprintf("问题已输入: %s", question))
|
||
|
||
return nil
|
||
}
|
||
|
||
// clickSendButton 点击发送按钮
|
||
func (c *DoubaoCollector) clickSendButton() error {
|
||
c.LogInfo("点击发送按钮...")
|
||
|
||
// 尝试多种方式查找发送按钮
|
||
sendSelectors := []string{
|
||
"button[class*='send']",
|
||
"button[class*='submit']",
|
||
".send-btn",
|
||
".submit-btn",
|
||
"[aria-label*='发送']",
|
||
"[aria-label*='send']",
|
||
".send-icon",
|
||
"button svg[path*='send']",
|
||
"button svg[path*='arrow']",
|
||
}
|
||
|
||
var sendBtn *rod.Element
|
||
var err error
|
||
|
||
// 先尝试通过选择器查找
|
||
for _, selector := range sendSelectors {
|
||
sendBtn, err = c.WaitForElementClickable(selector, 5)
|
||
if err == nil && sendBtn != nil {
|
||
c.LogInfo(fmt.Sprintf("找到发送按钮: %s", selector))
|
||
break
|
||
}
|
||
}
|
||
|
||
// 如果没找到,尝试遍历所有button元素
|
||
if sendBtn == nil {
|
||
c.LogInfo("通过选择器未找到发送按钮,尝试遍历所有button元素...")
|
||
allButtons, _ := c.Page.Elements("button")
|
||
for _, btn := range allButtons {
|
||
// 检查按钮是否可点击且可见
|
||
visible, _ := btn.Visible()
|
||
if visible {
|
||
classAttr, _ := btn.Attribute("class")
|
||
text, _ := btn.Text()
|
||
|
||
// 检查是否包含send、submit等关键词
|
||
if classAttr != nil && (strings.Contains(strings.ToLower(*classAttr), "send") ||
|
||
strings.Contains(strings.ToLower(*classAttr), "submit")) {
|
||
sendBtn = btn
|
||
c.LogInfo(fmt.Sprintf("通过class找到发送按钮: class=%s", *classAttr))
|
||
break
|
||
}
|
||
|
||
// 检查文本内容
|
||
trimmedText := strings.TrimSpace(text)
|
||
if trimmedText == "发送" || trimmedText == "Send" {
|
||
sendBtn = btn
|
||
c.LogInfo(fmt.Sprintf("通过文本找到发送按钮: text=%s", trimmedText))
|
||
break
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// 最后的fallback:查找最后一个button
|
||
if sendBtn == nil {
|
||
buttons, _ := c.Page.Elements("button")
|
||
if len(buttons) > 0 {
|
||
sendBtn = buttons[len(buttons)-1]
|
||
c.LogInfo("使用最后一个button作为发送按钮")
|
||
}
|
||
}
|
||
|
||
if sendBtn == nil {
|
||
return fmt.Errorf("未找到发送按钮")
|
||
}
|
||
|
||
// 滚动到可见区域
|
||
if err := sendBtn.ScrollIntoView(); err != nil {
|
||
c.LogInfo(fmt.Sprintf("滚动失败: %v", err))
|
||
}
|
||
|
||
// 点击发送按钮
|
||
c.LogInfo("执行点击...")
|
||
if err := sendBtn.Click(proto.InputMouseButtonLeft, 1); err != nil {
|
||
return fmt.Errorf("点击发送按钮失败: %v", err)
|
||
}
|
||
|
||
c.LogInfo("已点击发送按钮")
|
||
|
||
return nil
|
||
}
|
||
|
||
// waitForAnswer 等待并获取答案(处理流式输出)
|
||
func (c *DoubaoCollector) waitForAnswer() (string, error) {
|
||
c.LogInfo("等待AI回答...")
|
||
|
||
timeout := 180 // 最大等待时间(秒)
|
||
startTime := time.Now()
|
||
|
||
var lastAnswer string
|
||
var stableCount int // 稳定计数器
|
||
const requiredStableCount = 5 // 需要连续5次内容不变才认为完成
|
||
isAnswering := false // 标记是否正在回答中
|
||
|
||
for time.Since(startTime).Seconds() < float64(timeout) {
|
||
// 尝试多种方式查找答案容器
|
||
answerSelectors := []string{
|
||
"div[data-message-id]",
|
||
"div[data-message-id*='']",
|
||
}
|
||
|
||
var answerText string
|
||
|
||
for _, selector := range answerSelectors {
|
||
answerElements, err := c.Page.Elements(selector)
|
||
if err == nil && len(answerElements) > 0 {
|
||
// 取最后一个元素(最新的回答)
|
||
lastAnswerElem := answerElements[len(answerElements)-1]
|
||
|
||
visible, _ := lastAnswerElem.Visible()
|
||
if visible {
|
||
// 尝试获取HTML内容
|
||
htmlContent, err := lastAnswerElem.HTML()
|
||
if err == nil && len(strings.TrimSpace(htmlContent)) > 30 {
|
||
// 清理HTML标签,只保留纯文本
|
||
answerText = CleanHTMLTags(htmlContent)
|
||
c.LogInfo(fmt.Sprintf("找到答案容器: %s, 清理后文本长度: %d", selector, len(answerText)))
|
||
break
|
||
}
|
||
|
||
// 如果HTML获取失败,尝试获取文本
|
||
text, err := lastAnswerElem.Text()
|
||
if err == nil && len(strings.TrimSpace(text)) > 30 {
|
||
answerText = strings.TrimSpace(text)
|
||
c.LogInfo(fmt.Sprintf("找到答案容器: %s, 文本长度: %d", selector, len(answerText)))
|
||
break
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// 如果常规方法没找到,尝试查找所有包含较多文本的div
|
||
if answerText == "" {
|
||
allDivs, _ := c.Page.Elements("div")
|
||
for _, div := range allDivs {
|
||
visible, _ := div.Visible()
|
||
if !visible {
|
||
continue
|
||
}
|
||
|
||
text, err := div.Text()
|
||
if err == nil {
|
||
trimmedText := strings.TrimSpace(text)
|
||
// 查找包含较多文本且不是输入框的div
|
||
if len(trimmedText) > 50 && len(trimmedText) < 5000 {
|
||
// 排除输入框相关的div
|
||
classAttr, _ := div.Attribute("class")
|
||
if classAttr != nil {
|
||
classLower := strings.ToLower(*classAttr)
|
||
if strings.Contains(classLower, "input") ||
|
||
strings.Contains(classLower, "textarea") ||
|
||
strings.Contains(classLower, "send") {
|
||
continue
|
||
}
|
||
}
|
||
|
||
answerText = CleanHTMLTags(trimmedText)
|
||
c.LogInfo(fmt.Sprintf("通过遍历div找到答案,文本长度: %d", len(answerText)))
|
||
break
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// 检查是否获取到答案
|
||
if answerText != "" && len(answerText) > 30 {
|
||
if !isAnswering {
|
||
c.LogInfo("检测到AI开始回答...")
|
||
isAnswering = true
|
||
}
|
||
|
||
// 检查内容是否稳定(流式输出完成)
|
||
if answerText == lastAnswer {
|
||
stableCount++
|
||
c.LogInfo(fmt.Sprintf("答案稳定中... (%d/%d), 长度: %d", stableCount, requiredStableCount, len(answerText)))
|
||
|
||
// 如果内容稳定足够次数,说明回答完成
|
||
if stableCount >= requiredStableCount {
|
||
c.LogInfo(fmt.Sprintf("✓ AI回答完成,最终长度: %d 字符", len(answerText)))
|
||
return answerText, nil
|
||
}
|
||
} else {
|
||
// 内容还在变化,重置计数器
|
||
stableCount = 0
|
||
lastAnswer = answerText
|
||
c.LogInfo(fmt.Sprintf("检测到流式输出,当前长度: %d 字符", len(answerText)))
|
||
}
|
||
}
|
||
|
||
c.SleepMs(1500) // 每1.5秒检查一次
|
||
|
||
// 每10秒输出一次等待状态
|
||
elapsed := int(time.Since(startTime).Seconds())
|
||
if elapsed > 0 && elapsed%10 == 0 {
|
||
c.LogInfo(fmt.Sprintf("等待AI回答中... 已等待 %d 秒", elapsed))
|
||
// 截图帮助调试
|
||
if elapsed%30 == 0 {
|
||
c.Screenshot(fmt.Sprintf("doubao_wait_answer_%d", elapsed))
|
||
}
|
||
}
|
||
}
|
||
|
||
return "", fmt.Errorf("等待答案超时(%d秒)", timeout)
|
||
}
|
||
|
||
// getShareLink 尝试获取当前对话的分享链接
|
||
func (c *DoubaoCollector) getShareLink() string {
|
||
c.LogInfo("尝试获取分享链接...")
|
||
|
||
// 步骤1: 找到class包含message-action-button-main的div
|
||
actionDiv, err := c.Page.Element("div[data-foundation-type*='receive-message-action-bar']")
|
||
if err != nil || actionDiv == nil {
|
||
c.LogInfo("未找到message-action-button-main元素")
|
||
return ""
|
||
}
|
||
|
||
c.LogInfo("找到message-action-button-main元素")
|
||
|
||
// 步骤2: 在该div中找到所有button,取倒数第二个作为分享按钮
|
||
buttons, err := actionDiv.Elements("button")
|
||
if err != nil || len(buttons) == 0 {
|
||
c.LogInfo("未找到button元素")
|
||
return ""
|
||
}
|
||
|
||
if len(buttons) < 2 {
|
||
c.LogInfo(fmt.Sprintf("button数量不足(%d),无法获取倒数第二个", len(buttons)))
|
||
return ""
|
||
}
|
||
|
||
shareBtn := buttons[len(buttons)-3]
|
||
c.LogInfo(fmt.Sprintf("找到分享按钮(共%d个button)", len(buttons)))
|
||
|
||
// 检查是否可点击,如果pointer-events为none,使用JavaScript点击
|
||
visible, _ := shareBtn.Visible()
|
||
if !visible {
|
||
c.LogInfo("分享按钮不可见,尝试使用JavaScript点击")
|
||
// 使用立即执行函数,但返回一个空函数避免.apply错误
|
||
_, err := c.Page.Eval(`(function(){Array.from(document.querySelectorAll('div[class*="message-action-button-main"] button')).slice(-2)[0].click();return function(){};})`)
|
||
if err != nil {
|
||
c.LogInfo(fmt.Sprintf("JavaScript点击失败: %v", err))
|
||
return ""
|
||
}
|
||
} else {
|
||
// 正常点击
|
||
if err := shareBtn.Click(proto.InputMouseButtonLeft, 1); err != nil {
|
||
c.LogInfo(fmt.Sprintf("点击分享按钮失败: %v,尝试JavaScript点击", err))
|
||
// Fallback: 使用JavaScript点击
|
||
_, err := c.Page.Eval(`(function(){Array.from(document.querySelectorAll('div[class*="message-action-button-main"] button')).slice(-2)[0].click();return function(){};})`)
|
||
if err != nil {
|
||
c.LogInfo(fmt.Sprintf("JavaScript点击也失败: %v", err))
|
||
return ""
|
||
}
|
||
}
|
||
}
|
||
|
||
c.SleepMs(500)
|
||
|
||
// 步骤3: 找到内容为"复制链接"的span并点击
|
||
copySpan, err := c.Page.ElementX("//span[contains(text(), '复制链接')]")
|
||
if err != nil || copySpan == nil {
|
||
c.LogInfo("未找到'复制链接'span元素")
|
||
return ""
|
||
}
|
||
|
||
c.LogInfo("找到'复制链接'span元素,点击复制...")
|
||
if err := copySpan.Click(proto.InputMouseButtonLeft, 1); err != nil {
|
||
c.LogInfo(fmt.Sprintf("点击复制按钮失败: %v,尝试JavaScript点击", err))
|
||
// Fallback: 使用JavaScript点击
|
||
script := `
|
||
(function() {
|
||
var spans = document.querySelectorAll('span');
|
||
for (var i = 0; i < spans.length; i++) {
|
||
if (spans[i].textContent.includes('复制链接')) {
|
||
spans[i].click();
|
||
return true;
|
||
}
|
||
}
|
||
return false;
|
||
})()
|
||
`
|
||
result, err := c.Page.Eval(script)
|
||
if err != nil || result == nil {
|
||
c.LogInfo(fmt.Sprintf("JavaScript点击复制按钮失败: %v", err))
|
||
return ""
|
||
}
|
||
}
|
||
c.SleepMs(500)
|
||
|
||
// 步骤4: 从剪贴板获取内容
|
||
clipboardContent := c.getClipboardContent()
|
||
if clipboardContent != "" {
|
||
c.LogInfo(fmt.Sprintf("从剪贴板获取到分享链接: %s", clipboardContent))
|
||
return clipboardContent
|
||
}
|
||
|
||
c.LogInfo("未能从剪贴板获取链接")
|
||
return ""
|
||
}
|
||
|
||
// getClipboardContent 从剪贴板获取内容
|
||
func (c *DoubaoCollector) getClipboardContent() string {
|
||
// 使用atotto/clipboard库读取系统剪贴板
|
||
text, err := clipboard.ReadAll()
|
||
if err != nil {
|
||
c.LogInfo(fmt.Sprintf("读取剪贴板失败: %v", err))
|
||
return ""
|
||
}
|
||
|
||
if text == "" {
|
||
c.LogInfo("剪贴板内容为空")
|
||
return ""
|
||
}
|
||
|
||
c.LogInfo(fmt.Sprintf("剪贴板原始内容: %s", text))
|
||
return text
|
||
}
|
||
|
||
// extractURL 从文本中提取 URL
|
||
func extractURL(text string) string {
|
||
// 简单的 URL 提取逻辑
|
||
start := strings.Index(text, "https://")
|
||
if start == -1 {
|
||
start = strings.Index(text, "http://")
|
||
}
|
||
if start != -1 {
|
||
end := strings.Index(text[start:], " ")
|
||
if end == -1 {
|
||
return text[start:]
|
||
}
|
||
return text[start : start+end]
|
||
}
|
||
return ""
|
||
}
|
||
|
||
// SafeElement 安全地获取元素
|
||
func (c *DoubaoCollector) SafeElement(selector string) (*rod.Element, error) {
|
||
exists, _, err := c.Page.Has(selector)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
if !exists {
|
||
return nil, nil
|
||
}
|
||
return c.Page.Element(selector)
|
||
}
|