geoGo/internal/collect/deepseek.go

267 lines
6.1 KiB
Go

package collect
import (
"context"
"fmt"
"geo/internal/config"
"strings"
"time"
"github.com/go-rod/rod"
"github.com/go-rod/rod/lib/proto"
"github.com/gofiber/fiber/v2/log"
)
// DeepseekCollector DeepSeek收集器
type DeepseekCollector struct {
*BaseCollector
}
// NewDeepseekCollector 创建DeepSeek收集器
func NewDeepseekCollector(ctx context.Context, params *CollectParams, cfg *config.Config, logger log.AllLogger) CollectorInterface {
collector := &DeepseekCollector{
BaseCollector: NewBaseCollector(ctx, params, cfg, logger),
}
// 设置DeepSeek的URL
collector.LoginURL = "https://chat.deepseek.com/"
collector.ChatURL = "https://chat.deepseek.com/"
return collector
}
// CheckLoginStatus 检查登录状态
func (c *DeepseekCollector) CheckLoginStatus() bool {
currentURL := c.GetCurrentURL()
// 如果在首页或登录页面,可能未登录
if strings.Contains(currentURL, "chat.deepseek.com") {
// 检查是否有用户头像或登录标识
userAvatar, err := c.SafeElement(".user-avatar, [class*='avatar'], [class*='profile']")
if err == nil && userAvatar != nil {
return true
}
// 检查是否有聊天输入框(登录后才有)
inputBox, err := c.SafeElement("textarea, [contenteditable='true']")
if err == nil && inputBox != nil {
return true
}
}
return false
}
// WaitLogin 等待登录
func (c *DeepseekCollector) WaitLogin() (bool, string) {
if err := c.SetupDriver(); err != nil {
return false, fmt.Sprintf("浏览器启动失败: %v", err)
}
defer c.Close()
c.Page.MustNavigate(c.ChatURL)
c.Sleep(3)
if c.CheckLoginStatus() {
c.SaveCookies()
return true, "already_logged_in"
}
for i := 0; i < 300; i++ {
if c.CheckLoginStatus() {
c.Sleep(2)
c.SaveCookies()
return true, "login_success"
}
time.Sleep(1 * time.Second)
}
return false, "登录超时"
}
// AskQuestion 提问并获取答案
func (c *DeepseekCollector) AskQuestion(question string) (*CollectResult, error) {
if err := c.SetupDriver(); err != nil {
return nil, fmt.Errorf("浏览器启动失败: %v", err)
}
defer c.Close()
if err := c.InitPage(); err != nil {
return nil, fmt.Errorf("页面初始化失败: %v", err)
}
c.Sleep(3)
if err := c.inputQuestion(question); err != nil {
return nil, fmt.Errorf("输入问题失败: %v", err)
}
if err := c.clickSendButton(); err != nil {
return nil, fmt.Errorf("点击发送按钮失败: %v", err)
}
answer, err := c.waitForAnswer()
if err != nil {
return nil, fmt.Errorf("获取答案失败: %v", err)
}
return &CollectResult{
Answer: answer,
ShareLink: "",
}, nil
}
// inputQuestion 输入问题
func (c *DeepseekCollector) inputQuestion(question string) error {
// DeepSeek的输入框选择器
inputSelectors := []string{
"textarea[placeholder*='输入']",
"textarea[placeholder*='问']",
"textarea",
"[contenteditable='true']",
".chat-input textarea",
"#message-input",
}
var inputBox *rod.Element
var err error
for _, selector := range inputSelectors {
inputBox, err = c.WaitForElementVisible(selector, 10)
if err == nil && inputBox != nil {
break
}
}
if inputBox == nil {
return fmt.Errorf("未找到输入框")
}
// 点击获取焦点
if err := inputBox.Click(proto.InputMouseButtonLeft, 1); err != nil {
return fmt.Errorf("点击输入框失败: %v", err)
}
c.SleepMs(500)
// 清空输入框
if err := c.ClearInput(inputBox); err != nil {
// Ignore clear error
}
c.SleepMs(300)
// 输入问题
if err := c.SetInputValue(inputBox, question); err != nil {
inputBox.Input(question)
}
c.SleepMs(1000)
return nil
}
// clickSendButton 点击发送按钮
func (c *DeepseekCollector) clickSendButton() error {
// 发送按钮选择器
sendSelectors := []string{
"button[class*='send']",
"button[class*='submit']",
".send-button",
".submit-button",
"button svg[path*='send']",
"[aria-label*='发送']",
"[aria-label*='Send']",
}
var sendBtn *rod.Element
var err error
for _, selector := range sendSelectors {
sendBtn, err = c.WaitForElementClickable(selector, 5)
if err == nil && sendBtn != nil {
break
}
}
if sendBtn == nil {
// 尝试查找发送图标
sendBtn, err = c.Page.Element("button svg")
if err != nil {
return fmt.Errorf("未找到发送按钮")
}
}
c.SleepMs(500)
// 点击发送按钮
if err := c.JSClick(sendBtn); err != nil {
return fmt.Errorf("点击发送按钮失败: %v", err)
}
c.SleepMs(2000)
return nil
}
// waitForAnswer 等待并获取答案
func (c *DeepseekCollector) waitForAnswer() (string, error) {
timeout := 120 // 最大等待时间(秒)
startTime := time.Now()
lastAnswerLength := 0
for time.Since(startTime).Seconds() < float64(timeout) {
// 查找答案区域
answerSelectors := []string{
".message-content",
".response-content",
"[class*='assistant'] [class*='content']",
"[class*='ai'] [class*='message']",
".chat-message.ai",
}
for _, selector := range answerSelectors {
answerElements, err := c.Page.Elements(selector)
if err == nil && len(answerElements) > 0 {
// 获取最后一个答案元素
lastAnswer := answerElements[len(answerElements)-1]
visible, _ := lastAnswer.Visible()
if visible {
text, err := lastAnswer.Text()
if err == nil && len(strings.TrimSpace(text)) > 0 {
// 检查是否正在生成
isGenerating := strings.Contains(text, "正在") ||
strings.Contains(text, "思考") ||
strings.Contains(text, "generating")
if !isGenerating {
// 检查答案是否还在增长
currentLength := len(text)
if currentLength == lastAnswerLength && currentLength > 10 {
// 答案不再增长,认为已完成
return strings.TrimSpace(text), nil
}
lastAnswerLength = currentLength
}
}
}
}
}
c.SleepMs(1500)
}
return "", fmt.Errorf("等待答案超时")
}
// SafeElement 安全地获取元素
func (c *DeepseekCollector) SafeElement(selector string) (*rod.Element, error) {
exists, _, err := c.Page.Has(selector)
if err != nil {
return nil, err
}
if !exists {
return nil, nil
}
return c.Page.Element(selector)
}