geoGo/internal/collect/qianwen.go

294 lines
7.6 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package collect
import (
"context"
"fmt"
"geo/internal/config"
"strings"
"time"
"github.com/go-rod/rod"
"github.com/go-rod/rod/lib/proto"
"github.com/gofiber/fiber/v2/log"
)
// QianwenCollector 通义千问收集器
type QianwenCollector struct {
*BaseCollector
}
// NewQianwenCollector 创建通义千问收集器
func NewQianwenCollector(ctx context.Context, params *CollectParams, cfg *config.Config, logger log.AllLogger) CollectorInterface {
collector := &QianwenCollector{
BaseCollector: NewBaseCollector(ctx, params, cfg, logger),
}
// 设置通义千问的URL
collector.LoginURL = "https://tongyi.aliyun.com/qianwen/"
collector.ChatURL = "https://tongyi.aliyun.com/qianwen/"
return collector
}
// CheckLoginStatus 检查登录状态
func (c *QianwenCollector) CheckLoginStatus() bool {
// 检查页面上是否存在内容为"登录"或"Login"的button如果存在说明未登录
loginButtons, err := c.Page.Elements("button")
if err == nil {
for _, btn := range loginButtons {
text, _ := btn.Text()
trimmedText := strings.TrimSpace(text)
if trimmedText == "登录" || trimmedText == "Login" {
c.LogInfo(fmt.Sprintf("检测到页面上有'%s'按钮,说明未登录", trimmedText))
return false
}
}
}
return true
}
// WaitLogin 等待登录
func (c *QianwenCollector) WaitLogin() (bool, string) {
if err := c.SetupDriver(); err != nil {
return false, fmt.Sprintf("浏览器启动失败: %v", err)
}
defer c.Close()
c.LogInfo(fmt.Sprintf("正在导航至通义千问: %s", c.ChatURL))
c.Page.MustNavigate(c.ChatURL)
c.Sleep(3)
if c.CheckLoginStatus() {
c.LogInfo("检测到已登录状态")
c.SaveCookies()
return true, "already_logged_in"
}
c.LogInfo("未检测到登录状态,等待用户登录...")
// 最多等待300秒
for i := 0; i < 300; i++ {
if c.CheckLoginStatus() {
c.LogInfo("检测到登录成功")
c.Sleep(2)
c.SaveCookies()
return true, "login_success"
}
time.Sleep(1 * time.Second)
// 每30秒提醒一次
if (i+1)%30 == 0 {
c.LogInfo(fmt.Sprintf("仍在等待登录... 已等待 %d 秒", i+1))
}
}
return false, "登录超时"
}
// AskQuestion 提问并获取答案
func (c *QianwenCollector) AskQuestion(question string) (*CollectResult, error) {
c.LogInfo("开始提问流程...")
if err := c.SetupDriver(); err != nil {
return nil, fmt.Errorf("浏览器启动失败: %v", err)
}
defer c.Close()
if err := c.InitPage(); err != nil {
return nil, fmt.Errorf("页面初始化失败: %v", err)
}
if err := c.inputQuestion(question); err != nil {
return nil, fmt.Errorf("输入问题失败: %v", err)
}
if err := c.clickSendButton(); err != nil {
return nil, fmt.Errorf("点击发送按钮失败: %v", err)
}
answer, err := c.waitForAnswer()
if err != nil {
return nil, fmt.Errorf("获取答案失败: %v", err)
}
answerStr, isExposure := HighlightKeywordsInText(answer, c.KeyWords)
return &CollectResult{
Answer: answerStr,
ShareLink: "",
IsExposure: isExposure,
}, nil
}
// inputQuestion 输入问题
func (c *QianwenCollector) inputQuestion(question string) error {
c.LogInfo("开始输入问题...")
// 通义千问的输入框选择器
inputSelectors := []string{
"[contenteditable='true']",
".chat-input textarea",
"#chat-input",
".input-box textarea",
".question-input",
}
var inputBox *rod.Element
var err error
for _, selector := range inputSelectors {
inputBox, err = c.WaitForElementVisible(selector, 10)
if err == nil && inputBox != nil {
c.LogInfo(fmt.Sprintf("找到输入框,使用选择器: %s", selector))
break
}
}
if inputBox == nil {
c.LogError("未找到输入框")
return fmt.Errorf("未找到输入框")
}
// 点击获取焦点
c.LogInfo("点击输入框获取焦点...")
if err := inputBox.Click(proto.InputMouseButtonLeft, 1); err != nil {
c.LogError(fmt.Sprintf("点击输入框失败: %v", err))
return fmt.Errorf("点击输入框失败: %v", err)
}
c.SleepMs(500)
// 清空输入框
c.LogInfo("清空输入框...")
// 输入问题
c.LogInfo(fmt.Sprintf("正在输入问题: %s", question))
inputBox.Input(question)
c.SleepMs(1000)
c.LogInfo("问题输入完成")
return nil
}
// clickSendButton 点击发送按钮
func (c *QianwenCollector) clickSendButton() error {
c.LogInfo("开始点击发送按钮...")
// 发送按钮选择器
sendSelectors := []string{
"[aria-label*='发送']",
}
var sendBtn *rod.Element
var err error
for _, selector := range sendSelectors {
sendBtn, err = c.WaitForElementClickable(selector, 5)
if err == nil && sendBtn != nil {
c.LogInfo(fmt.Sprintf("找到发送按钮,使用选择器: %s", selector))
break
}
}
if sendBtn == nil {
c.LogInfo("未通过常规选择器找到发送按钮,尝试查找 SVG 图标...")
// 尝试通过SVG图标查找
sendBtn, err = c.Page.Element("button svg")
if err != nil {
c.LogError("未找到发送按钮或相关图标")
return fmt.Errorf("未找到发送按钮")
}
c.LogInfo("找到 SVG 图标作为发送按钮")
}
c.SleepMs(500)
// 点击发送按钮
c.LogInfo("执行点击发送操作...")
if err := c.JSClick(sendBtn); err != nil {
c.LogError(fmt.Sprintf("点击发送按钮失败: %v", err))
return fmt.Errorf("点击发送按钮失败: %v", err)
}
c.LogInfo("发送按钮点击完成,等待响应...")
c.SleepMs(2000)
return nil
}
// waitForAnswer 等待并获取答案
func (c *QianwenCollector) waitForAnswer() (string, error) {
c.LogInfo("等待AI回答...")
timeout := 180 // 最大等待时间(秒)
startTime := time.Now()
var lastAnswer string
var stableCount int // 稳定计数器
const requiredStableCount = 5 // 需要连续5次内容不变才认为完成
isAnswering := false // 标记是否正在回答中
for time.Since(startTime).Seconds() < float64(timeout) {
// 直接通过ID查找答案容器
answerElem, err := c.Page.Element("#qk-markdown-react")
var answerHTML string
if err == nil && answerElem != nil {
// 获取整个HTML内容
htmlContent, err := answerElem.HTML()
if err == nil && htmlContent != "" {
answerHTML = strings.TrimSpace(htmlContent)
c.LogInfo(fmt.Sprintf("找到答案容器 #qk-markdown-reactHTML长度: %d", len(answerHTML)))
}
} else {
c.LogInfo("未找到#qk-markdown-react元素")
}
// 检查是否获取到答案
if answerHTML != "" {
if !isAnswering {
c.LogInfo("检测到AI开始回答...")
isAnswering = true
}
// 检查内容是否稳定(流式输出完成)
if answerHTML == lastAnswer {
stableCount++
c.LogInfo(fmt.Sprintf("答案稳定中... (%d/%d), 长度: %d", stableCount, requiredStableCount, len(answerHTML)))
// 如果内容稳定足够次数,说明回答完成
if stableCount >= requiredStableCount {
c.LogInfo(fmt.Sprintf("✓ AI回答完成最终HTML长度: %d 字符", len(answerHTML)))
return answerHTML, nil
}
} else {
// 内容还在变化,重置计数器
stableCount = 0
lastAnswer = answerHTML
c.LogInfo(fmt.Sprintf("检测到流式输出当前HTML长度: %d 字符", len(answerHTML)))
}
}
c.SleepMs(1500) // 每1.5秒检查一次
// 每10秒输出一次等待状态
elapsed := int(time.Since(startTime).Seconds())
if elapsed > 0 && elapsed%10 == 0 {
c.LogInfo(fmt.Sprintf("等待AI回答中... 已等待 %d 秒", elapsed))
}
}
return "", fmt.Errorf("等待答案超时(%d秒", timeout)
}
// SafeElement 安全地获取元素
func (c *QianwenCollector) SafeElement(selector string) (*rod.Element, error) {
exists, _, err := c.Page.Has(selector)
if err != nil {
return nil, err
}
if !exists {
return nil, nil
}
return c.Page.Element(selector)
}