geoGo/internal/collect/yuanbao.go

302 lines
7.7 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package collect
import (
"context"
"fmt"
"geo/internal/config"
"strings"
"time"
"github.com/go-rod/rod"
"github.com/go-rod/rod/lib/proto"
"github.com/gofiber/fiber/v2/log"
)
// YuanbaoCollector 元宝收集器
type YuanbaoCollector struct {
*BaseCollector
}
// NewYuanbaoCollector 创建元宝收集器
func NewYuanbaoCollector(ctx context.Context, params *CollectParams, cfg *config.Config, logger log.AllLogger) CollectorInterface {
collector := &YuanbaoCollector{
BaseCollector: NewBaseCollector(ctx, params, cfg, logger),
}
// 设置元宝的URL
collector.LoginURL = "https://yuanbao.tencent.com/chat"
collector.ChatURL = "https://yuanbao.tencent.com/chat"
return collector
}
// CheckLoginStatus 检查登录状态
func (c *YuanbaoCollector) CheckLoginStatus() bool {
// 检查页面上是否存在内容为"登录"或"Login"的button如果存在说明未登录
loginButtons, err := c.Page.Elements("button")
if err == nil {
for _, btn := range loginButtons {
text, _ := btn.Text()
trimmedText := strings.TrimSpace(text)
if trimmedText == "登录" || trimmedText == "Log In" {
c.LogInfo(fmt.Sprintf("检测到页面上有'%s'按钮,说明未登录", trimmedText))
return false
}
}
}
// 如果没有找到"登录"或"Login"按钮,说明已登录
return true
}
// WaitLogin 等待登录
func (c *YuanbaoCollector) WaitLogin() (bool, string) {
if err := c.SetupDriver(); err != nil {
return false, fmt.Sprintf("浏览器启动失败: %v", err)
}
defer c.Close()
c.Page.MustNavigate(c.LoginURL)
c.Sleep(3)
if c.CheckLoginStatus() {
c.SaveCookies()
return true, "already_logged_in"
}
c.LogInfo("等待用户登录...")
// 最多等待300秒
for i := 0; i < 300; i++ {
if c.CheckLoginStatus() {
c.LogInfo("检测到登录成功")
c.Sleep(2)
c.SaveCookies()
return true, "login_success"
}
time.Sleep(1 * time.Second)
// 每30秒提醒一次
if (i+1)%30 == 0 {
c.LogInfo(fmt.Sprintf("仍在等待登录... 已等待 %d 秒", i+1))
}
}
return false, "登录超时"
}
// AskQuestion 提问并获取答案
func (c *YuanbaoCollector) AskQuestion(question string) (*CollectResult, error) {
c.LogInfo("开始提问流程...")
if err := c.SetupDriver(); err != nil {
return nil, fmt.Errorf("浏览器启动失败: %v", err)
}
defer c.Close()
if err := c.InitPage(); err != nil {
return nil, fmt.Errorf("页面初始化失败: %v", err)
}
if err := c.inputQuestion(question); err != nil {
return nil, fmt.Errorf("输入问题失败: %v", err)
}
if err := c.clickSendButton(); err != nil {
return nil, fmt.Errorf("点击发送按钮失败: %v", err)
}
answer, err := c.waitForAnswer()
if err != nil {
return nil, fmt.Errorf("获取答案失败: %v", err)
}
answerStr, isExposure := HighlightKeywordsInText(answer, c.KeyWords)
return &CollectResult{
Answer: answerStr,
ShareLink: "",
IsExposure: isExposure,
}, nil
}
// inputQuestion 输入问题
func (c *YuanbaoCollector) inputQuestion(question string) error {
c.LogInfo("输入问题...")
// 元宝的输入框选择器
inputSelectors := []string{
"[contenteditable='true']",
}
var inputBox *rod.Element
var err error
// 尝试不同的选择器
for _, selector := range inputSelectors {
inputBox, err = c.WaitForElementClickable(selector, 10)
if err == nil && inputBox != nil {
c.LogInfo(fmt.Sprintf("找到输入框: %s", selector))
break
}
}
if inputBox == nil {
return fmt.Errorf("未找到输入框")
}
// 点击获取焦点
err = inputBox.Click(proto.InputMouseButtonLeft, 1)
if err != nil {
c.LogError(fmt.Sprintf("点击输入框失败: %v", err))
}
c.SleepMs(500)
// 清空输入框
if err := c.ClearInput(inputBox); err != nil {
// Ignore clear error
}
c.SleepMs(300)
// 输入问题
if err := c.SetInputValue(inputBox, question); err != nil {
inputBox.Input(question)
}
c.LogInfo(fmt.Sprintf("问题已输入: %s", question))
c.SleepMs(1000)
c.LogInfo("问题输入完成")
return nil
}
// clickSendButton 点击发送按钮
func (c *YuanbaoCollector) clickSendButton() error {
c.LogInfo("点击发送按钮...")
// 元宝的发送按钮选择器
sendSelectors := []string{
"a[id='yuanbao-send-btn']",
}
var sendBtn *rod.Element
var err error
// 尝试不同的选择器
for _, selector := range sendSelectors {
sendBtn, err = c.WaitForElementClickable(selector, 5)
if err == nil && sendBtn != nil {
c.LogInfo(fmt.Sprintf("找到发送按钮: %s", selector))
break
}
}
if sendBtn == nil {
// 如果找不到发送按钮,尝试按回车发送
c.LogInfo("未找到发送按钮,尝试按回车发送")
inputSelectors := []string{
"textarea[placeholder*='问']",
"textarea[placeholder*='输入']",
"textarea",
}
for _, selector := range inputSelectors {
inputBox, err := c.WaitForElement(selector, 5)
if err == nil && inputBox != nil {
err = inputBox.Input("\n")
if err != nil {
return fmt.Errorf("按回车发送失败: %v", err)
}
c.LogInfo("按回车发送成功")
return nil
}
}
return fmt.Errorf("未找到输入框来按回车发送")
}
// 点击发送按钮
err = c.JSClick(sendBtn)
if err != nil {
return fmt.Errorf("点击发送按钮失败: %v", err)
}
c.LogInfo("发送按钮点击完成")
return nil
}
// waitForAnswer 等待并获取答案
func (c *YuanbaoCollector) waitForAnswer() (string, error) {
c.LogInfo("等待AI回答...")
timeout := 180 // 最大等待时间(秒)
startTime := time.Now()
var lastAnswer string
var stableCount int // 稳定计数器
const requiredStableCount = 5 // 需要连续5次内容不变才认为完成
isAnswering := false // 标记是否正在回答中
for time.Since(startTime).Seconds() < float64(timeout) {
// 查找答案区域 - 元宝的答案容器选择器
// 先尝试通过ID查找如果知道具体ID的话
answerSelectors := []string{
"div[data-trace-id]", // 通用的消息ID选择器
}
var answerHTML string
for _, selector := range answerSelectors {
answerElements, err := c.Page.Elements(selector)
if err == nil && len(answerElements) > 0 {
// 获取最后一个答案元素(最新的回答)
lastAnswerElem := answerElements[len(answerElements)-1]
visible, _ := lastAnswerElem.Visible()
if visible {
// 直接获取原始HTML内容不做任何处理
htmlContent, err := lastAnswerElem.HTML()
if err == nil && htmlContent != "" {
answerHTML = strings.TrimSpace(htmlContent)
c.LogInfo(fmt.Sprintf("找到答案容器: %s, HTML长度: %d", selector, len(answerHTML)))
break
}
}
}
}
// 检查是否获取到答案
if answerHTML != "" {
if !isAnswering {
c.LogInfo("检测到AI开始回答...")
isAnswering = true
}
// 检查内容是否稳定(流式输出完成)
if answerHTML == lastAnswer {
stableCount++
c.LogInfo(fmt.Sprintf("答案稳定中... (%d/%d), 长度: %d", stableCount, requiredStableCount, len(answerHTML)))
// 如果内容稳定足够次数,说明回答完成
if stableCount >= requiredStableCount {
c.LogInfo(fmt.Sprintf("✓ AI回答完成最终HTML长度: %d 字符", len(answerHTML)))
return answerHTML, nil
}
} else {
// 内容还在变化,重置计数器
stableCount = 0
lastAnswer = answerHTML
c.LogInfo(fmt.Sprintf("检测到流式输出当前HTML长度: %d 字符", len(answerHTML)))
}
}
c.SleepMs(1500) // 每1.5秒检查一次
// 每10秒输出一次等待状态
elapsed := int(time.Since(startTime).Seconds())
if elapsed > 0 && elapsed%10 == 0 {
c.LogInfo(fmt.Sprintf("等待AI回答中... 已等待 %d 秒", elapsed))
}
}
return "", fmt.Errorf("等待答案超时(%d秒", timeout)
}