294 lines
7.6 KiB
Go
294 lines
7.6 KiB
Go
package collect
|
||
|
||
import (
|
||
"context"
|
||
"fmt"
|
||
"geo/internal/config"
|
||
"strings"
|
||
"time"
|
||
|
||
"github.com/go-rod/rod"
|
||
"github.com/go-rod/rod/lib/proto"
|
||
"github.com/gofiber/fiber/v2/log"
|
||
)
|
||
|
||
// QianwenCollector 通义千问收集器
|
||
type QianwenCollector struct {
|
||
*BaseCollector
|
||
}
|
||
|
||
// NewQianwenCollector 创建通义千问收集器
|
||
func NewQianwenCollector(ctx context.Context, params *CollectParams, cfg *config.Config, logger log.AllLogger) CollectorInterface {
|
||
collector := &QianwenCollector{
|
||
BaseCollector: NewBaseCollector(ctx, params, cfg, logger),
|
||
}
|
||
|
||
// 设置通义千问的URL
|
||
collector.LoginURL = "https://tongyi.aliyun.com/qianwen/"
|
||
collector.ChatURL = "https://tongyi.aliyun.com/qianwen/"
|
||
|
||
return collector
|
||
}
|
||
|
||
// CheckLoginStatus 检查登录状态
|
||
func (c *QianwenCollector) CheckLoginStatus() bool {
|
||
// 检查页面上是否存在内容为"登录"或"Login"的button,如果存在说明未登录
|
||
loginButtons, err := c.Page.Elements("button")
|
||
if err == nil {
|
||
for _, btn := range loginButtons {
|
||
text, _ := btn.Text()
|
||
trimmedText := strings.TrimSpace(text)
|
||
if trimmedText == "登录" || trimmedText == "Login" {
|
||
c.LogInfo(fmt.Sprintf("检测到页面上有'%s'按钮,说明未登录", trimmedText))
|
||
return false
|
||
}
|
||
}
|
||
}
|
||
return true
|
||
}
|
||
|
||
// WaitLogin 等待登录
|
||
func (c *QianwenCollector) WaitLogin() (bool, string) {
|
||
if err := c.SetupDriver(); err != nil {
|
||
return false, fmt.Sprintf("浏览器启动失败: %v", err)
|
||
}
|
||
defer c.Close()
|
||
|
||
c.LogInfo(fmt.Sprintf("正在导航至通义千问: %s", c.ChatURL))
|
||
c.Page.MustNavigate(c.ChatURL)
|
||
c.Sleep(3)
|
||
|
||
if c.CheckLoginStatus() {
|
||
c.LogInfo("检测到已登录状态")
|
||
c.SaveCookies()
|
||
return true, "already_logged_in"
|
||
}
|
||
|
||
c.LogInfo("未检测到登录状态,等待用户登录...")
|
||
|
||
// 最多等待300秒
|
||
for i := 0; i < 300; i++ {
|
||
if c.CheckLoginStatus() {
|
||
c.LogInfo("检测到登录成功")
|
||
c.Sleep(2)
|
||
c.SaveCookies()
|
||
return true, "login_success"
|
||
}
|
||
time.Sleep(1 * time.Second)
|
||
|
||
// 每30秒提醒一次
|
||
if (i+1)%30 == 0 {
|
||
c.LogInfo(fmt.Sprintf("仍在等待登录... 已等待 %d 秒", i+1))
|
||
}
|
||
}
|
||
|
||
return false, "登录超时"
|
||
}
|
||
|
||
// AskQuestion 提问并获取答案
|
||
func (c *QianwenCollector) AskQuestion(question string) (*CollectResult, error) {
|
||
c.LogInfo("开始提问流程...")
|
||
|
||
if err := c.SetupDriver(); err != nil {
|
||
return nil, fmt.Errorf("浏览器启动失败: %v", err)
|
||
}
|
||
defer c.Close()
|
||
|
||
if err := c.InitPage(); err != nil {
|
||
return nil, fmt.Errorf("页面初始化失败: %v", err)
|
||
}
|
||
|
||
if err := c.inputQuestion(question); err != nil {
|
||
return nil, fmt.Errorf("输入问题失败: %v", err)
|
||
}
|
||
|
||
if err := c.clickSendButton(); err != nil {
|
||
return nil, fmt.Errorf("点击发送按钮失败: %v", err)
|
||
}
|
||
|
||
answer, err := c.waitForAnswer()
|
||
if err != nil {
|
||
return nil, fmt.Errorf("获取答案失败: %v", err)
|
||
}
|
||
answerStr, isExposure := HighlightKeywordsInText(answer, c.KeyWords)
|
||
return &CollectResult{
|
||
Answer: answerStr,
|
||
ShareLink: "",
|
||
IsExposure: isExposure,
|
||
}, nil
|
||
}
|
||
|
||
// inputQuestion 输入问题
|
||
func (c *QianwenCollector) inputQuestion(question string) error {
|
||
c.LogInfo("开始输入问题...")
|
||
|
||
// 通义千问的输入框选择器
|
||
inputSelectors := []string{
|
||
|
||
"[contenteditable='true']",
|
||
".chat-input textarea",
|
||
"#chat-input",
|
||
".input-box textarea",
|
||
".question-input",
|
||
}
|
||
|
||
var inputBox *rod.Element
|
||
var err error
|
||
|
||
for _, selector := range inputSelectors {
|
||
inputBox, err = c.WaitForElementVisible(selector, 10)
|
||
if err == nil && inputBox != nil {
|
||
c.LogInfo(fmt.Sprintf("找到输入框,使用选择器: %s", selector))
|
||
break
|
||
}
|
||
}
|
||
|
||
if inputBox == nil {
|
||
c.LogError("未找到输入框")
|
||
return fmt.Errorf("未找到输入框")
|
||
}
|
||
|
||
// 点击获取焦点
|
||
c.LogInfo("点击输入框获取焦点...")
|
||
if err := inputBox.Click(proto.InputMouseButtonLeft, 1); err != nil {
|
||
c.LogError(fmt.Sprintf("点击输入框失败: %v", err))
|
||
return fmt.Errorf("点击输入框失败: %v", err)
|
||
}
|
||
c.SleepMs(500)
|
||
|
||
// 清空输入框
|
||
c.LogInfo("清空输入框...")
|
||
|
||
// 输入问题
|
||
c.LogInfo(fmt.Sprintf("正在输入问题: %s", question))
|
||
inputBox.Input(question)
|
||
|
||
c.SleepMs(1000)
|
||
c.LogInfo("问题输入完成")
|
||
|
||
return nil
|
||
}
|
||
|
||
// clickSendButton 点击发送按钮
|
||
func (c *QianwenCollector) clickSendButton() error {
|
||
c.LogInfo("开始点击发送按钮...")
|
||
|
||
// 发送按钮选择器
|
||
sendSelectors := []string{
|
||
"[aria-label*='发送']",
|
||
}
|
||
|
||
var sendBtn *rod.Element
|
||
var err error
|
||
|
||
for _, selector := range sendSelectors {
|
||
sendBtn, err = c.WaitForElementClickable(selector, 5)
|
||
if err == nil && sendBtn != nil {
|
||
c.LogInfo(fmt.Sprintf("找到发送按钮,使用选择器: %s", selector))
|
||
break
|
||
}
|
||
}
|
||
|
||
if sendBtn == nil {
|
||
c.LogInfo("未通过常规选择器找到发送按钮,尝试查找 SVG 图标...")
|
||
// 尝试通过SVG图标查找
|
||
sendBtn, err = c.Page.Element("button svg")
|
||
if err != nil {
|
||
c.LogError("未找到发送按钮或相关图标")
|
||
return fmt.Errorf("未找到发送按钮")
|
||
}
|
||
c.LogInfo("找到 SVG 图标作为发送按钮")
|
||
}
|
||
|
||
c.SleepMs(500)
|
||
|
||
// 点击发送按钮
|
||
c.LogInfo("执行点击发送操作...")
|
||
if err := c.JSClick(sendBtn); err != nil {
|
||
c.LogError(fmt.Sprintf("点击发送按钮失败: %v", err))
|
||
return fmt.Errorf("点击发送按钮失败: %v", err)
|
||
}
|
||
|
||
c.LogInfo("发送按钮点击完成,等待响应...")
|
||
c.SleepMs(2000)
|
||
|
||
return nil
|
||
}
|
||
|
||
// waitForAnswer 等待并获取答案
|
||
func (c *QianwenCollector) waitForAnswer() (string, error) {
|
||
c.LogInfo("等待AI回答...")
|
||
|
||
timeout := 180 // 最大等待时间(秒)
|
||
startTime := time.Now()
|
||
|
||
var lastAnswer string
|
||
var stableCount int // 稳定计数器
|
||
const requiredStableCount = 5 // 需要连续5次内容不变才认为完成
|
||
isAnswering := false // 标记是否正在回答中
|
||
|
||
for time.Since(startTime).Seconds() < float64(timeout) {
|
||
// 直接通过ID查找答案容器
|
||
answerElem, err := c.Page.Element("#qk-markdown-react")
|
||
var answerHTML string
|
||
|
||
if err == nil && answerElem != nil {
|
||
// 获取整个HTML内容
|
||
htmlContent, err := answerElem.HTML()
|
||
if err == nil && htmlContent != "" {
|
||
answerHTML = strings.TrimSpace(htmlContent)
|
||
c.LogInfo(fmt.Sprintf("找到答案容器 #qk-markdown-react,HTML长度: %d", len(answerHTML)))
|
||
}
|
||
} else {
|
||
c.LogInfo("未找到#qk-markdown-react元素")
|
||
}
|
||
|
||
// 检查是否获取到答案
|
||
if answerHTML != "" {
|
||
if !isAnswering {
|
||
c.LogInfo("检测到AI开始回答...")
|
||
isAnswering = true
|
||
}
|
||
|
||
// 检查内容是否稳定(流式输出完成)
|
||
if answerHTML == lastAnswer {
|
||
stableCount++
|
||
c.LogInfo(fmt.Sprintf("答案稳定中... (%d/%d), 长度: %d", stableCount, requiredStableCount, len(answerHTML)))
|
||
|
||
// 如果内容稳定足够次数,说明回答完成
|
||
if stableCount >= requiredStableCount {
|
||
c.LogInfo(fmt.Sprintf("✓ AI回答完成,最终HTML长度: %d 字符", len(answerHTML)))
|
||
return answerHTML, nil
|
||
}
|
||
} else {
|
||
// 内容还在变化,重置计数器
|
||
stableCount = 0
|
||
lastAnswer = answerHTML
|
||
c.LogInfo(fmt.Sprintf("检测到流式输出,当前HTML长度: %d 字符", len(answerHTML)))
|
||
}
|
||
}
|
||
|
||
c.SleepMs(1500) // 每1.5秒检查一次
|
||
|
||
// 每10秒输出一次等待状态
|
||
elapsed := int(time.Since(startTime).Seconds())
|
||
if elapsed > 0 && elapsed%10 == 0 {
|
||
c.LogInfo(fmt.Sprintf("等待AI回答中... 已等待 %d 秒", elapsed))
|
||
}
|
||
}
|
||
|
||
return "", fmt.Errorf("等待答案超时(%d秒)", timeout)
|
||
}
|
||
|
||
// SafeElement 安全地获取元素
|
||
func (c *QianwenCollector) SafeElement(selector string) (*rod.Element, error) {
|
||
exists, _, err := c.Page.Has(selector)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
if !exists {
|
||
return nil, nil
|
||
}
|
||
return c.Page.Element(selector)
|
||
}
|