geoGo/internal/collect/deepseek.go

420 lines
11 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package collect
import (
"context"
"encoding/json"
"fmt"
"geo/internal/config"
"os"
"strings"
"time"
"github.com/go-rod/rod"
"github.com/go-rod/rod/lib/proto"
"github.com/gofiber/fiber/v2/log"
)
// DeepseekCollector DeepSeek收集器
type DeepseekCollector struct {
*BaseCollector
}
// NewDeepseekCollector 创建DeepSeek收集器
func NewDeepseekCollector(ctx context.Context, params *CollectParams, cfg *config.Config, logger log.AllLogger) CollectorInterface {
collector := &DeepseekCollector{
BaseCollector: NewBaseCollector(ctx, params, cfg, logger),
}
// 设置DeepSeek的URL
collector.LoginURL = "https://chat.deepseek.com/"
collector.ChatURL = "https://chat.deepseek.com/"
return collector
}
// saveLocalStorage 保存LocalStorage数据
func (c *DeepseekCollector) saveLocalStorage() error {
// 使用JavaScript获取所有LocalStorage数据
result, err := c.Page.Eval(`() => {
const data = {};
for (let i = 0; i < localStorage.length; i++) {
const key = localStorage.key(i);
data[key] = localStorage.getItem(key);
}
return JSON.stringify(data);
}`)
if err != nil {
return fmt.Errorf("获取LocalStorage失败: %v", err)
}
// 保存到文件
localStorageFile := c.CookiesFile[:len(c.CookiesFile)-5] + "_localstorage.json"
return os.WriteFile(localStorageFile, []byte(result.Value.Str()), 0644)
}
// loadLocalStorage 加载LocalStorage数据
func (c *DeepseekCollector) loadLocalStorage() error {
localStorageFile := c.CookiesFile[:len(c.CookiesFile)-5] + "_localstorage.json"
data, err := os.ReadFile(localStorageFile)
if err != nil {
return err
}
var storageData map[string]string
if err := json.Unmarshal(data, &storageData); err != nil {
return err
}
// 使用JavaScript设置LocalStorage
for key, value := range storageData {
_, err := c.Page.Eval(`(key, val) => localStorage.setItem(key, val)`, key, value)
if err != nil {
c.Logger.Warnf("设置LocalStorage键 %s 失败: %v", key, err)
}
}
return nil
}
// saveSessionStorage 保存SessionStorage数据
func (c *DeepseekCollector) saveSessionStorage() error {
result, err := c.Page.Eval(`() => {
const data = {};
for (let i = 0; i < sessionStorage.length; i++) {
const key = sessionStorage.key(i);
data[key] = sessionStorage.getItem(key);
}
return JSON.stringify(data);
}`)
if err != nil {
return fmt.Errorf("获取SessionStorage失败: %v", err)
}
sessionStorageFile := c.CookiesFile[:len(c.CookiesFile)-5] + "_sessionstorage.json"
return os.WriteFile(sessionStorageFile, []byte(result.Value.Str()), 0644)
}
// loadSessionStorage 加载SessionStorage数据
func (c *DeepseekCollector) loadSessionStorage() error {
sessionStorageFile := c.CookiesFile[:len(c.CookiesFile)-5] + "_sessionstorage.json"
data, err := os.ReadFile(sessionStorageFile)
if err != nil {
return err
}
var storageData map[string]string
if err := json.Unmarshal(data, &storageData); err != nil {
return err
}
for key, value := range storageData {
_, err := c.Page.Eval(`(key, val) => sessionStorage.setItem(key, val)`, key, value)
if err != nil {
c.Logger.Warnf("设置SessionStorage键 %s 失败: %v", key, err)
}
}
return nil
}
// SaveBrowserStorage 保存所有浏览器存储Cookies + LocalStorage + SessionStorage
func (c *DeepseekCollector) SaveBrowserStorage() error {
// 保存Cookies
if err := c.SaveCookies(); err != nil {
c.Logger.Warnf("保存Cookies失败: %v", err)
}
// 保存LocalStorage
if err := c.saveLocalStorage(); err != nil {
c.Logger.Warnf("保存LocalStorage失败: %v", err)
}
// 保存SessionStorage
if err := c.saveSessionStorage(); err != nil {
c.Logger.Warnf("保存SessionStorage失败: %v", err)
}
return nil
}
// LoadBrowserStorage 加载所有浏览器存储
func (c *DeepseekCollector) LoadBrowserStorage() error {
// 加载Cookies
if err := c.LoadCookies(); err != nil {
c.Logger.Warnf("加载Cookies失败: %v", err)
return err
}
// 加载LocalStorage
if err := c.loadLocalStorage(); err != nil {
c.Logger.Warnf("加载LocalStorage失败: %v", err)
}
// 加载SessionStorage
if err := c.loadSessionStorage(); err != nil {
c.Logger.Warnf("加载SessionStorage失败: %v", err)
}
return nil
}
// CheckLoginStatus 检查登录状态
func (c *DeepseekCollector) CheckLoginStatus() bool {
currentURL := c.GetCurrentURL()
// 如果在首页或登录页面,可能未登录
if strings.Contains(currentURL, "chat.deepseek.com") {
// 检查是否有用户头像或登录标识
userAvatar, err := c.SafeElement(".user-avatar, [class*='avatar'], [class*='profile']")
if err == nil && userAvatar != nil {
return true
}
// 检查是否有聊天输入框(登录后才有)
inputBox, err := c.SafeElement("textarea, [contenteditable='true']")
if err == nil && inputBox != nil {
return true
}
}
return false
}
// WaitLogin 等待登录
func (c *DeepseekCollector) WaitLogin() (bool, string) {
if err := c.SetupDriver(); err != nil {
return false, fmt.Sprintf("浏览器启动失败: %v", err)
}
defer c.Close()
c.Page.MustNavigate(c.ChatURL)
c.Sleep(3)
if c.CheckLoginStatus() {
c.SaveBrowserStorage()
return true, "already_logged_in"
}
for i := 0; i < 300; i++ {
if c.CheckLoginStatus() {
c.Sleep(2)
c.SaveBrowserStorage()
return true, "login_success"
}
time.Sleep(1 * time.Second)
}
return false, "登录超时"
}
// InitPage 初始化页面重写基类方法以支持LocalStorage
func (c *DeepseekCollector) InitPage() error {
// 先导航到页面
c.Page.MustNavigate(c.ChatURL)
c.WaitForPageReady(5)
// 然后尝试加载浏览器存储Cookies + LocalStorage + SessionStorage
if err := c.LoadBrowserStorage(); err == nil {
c.LogInfo("已加载浏览器存储")
// 重新加载页面以应用存储的数据
c.Page.MustReload()
c.WaitForPageReady(5)
} else {
c.LogInfo("未找到保存的浏览器存储")
}
return nil
}
// AskQuestion 提问并获取答案
func (c *DeepseekCollector) AskQuestion(question string) (*CollectResult, error) {
if err := c.SetupDriver(); err != nil {
return nil, fmt.Errorf("浏览器启动失败: %v", err)
}
defer c.Close()
if err := c.InitPage(); err != nil {
return nil, fmt.Errorf("页面初始化失败: %v", err)
}
if err := c.inputQuestion(question); err != nil {
return nil, fmt.Errorf("输入问题失败: %v", err)
}
if err := c.clickSendButton(); err != nil {
return nil, fmt.Errorf("点击发送按钮失败: %v", err)
}
answer, err := c.waitForAnswer()
if err != nil {
return nil, fmt.Errorf("获取答案失败: %v", err)
}
return &CollectResult{
Answer: answer,
ShareLink: "",
}, nil
}
// inputQuestion 输入问题
func (c *DeepseekCollector) inputQuestion(question string) error {
// DeepSeek的输入框选择器
inputSelectors := []string{
"textarea[placeholder*='Message DeepSeek']",
}
var inputBox *rod.Element
var err error
for _, selector := range inputSelectors {
inputBox, err = c.WaitForElementVisible(selector, 10)
if err == nil && inputBox != nil {
break
}
}
if inputBox == nil {
return fmt.Errorf("未找到输入框")
}
// 点击获取焦点
if err := inputBox.Click(proto.InputMouseButtonLeft, 1); err != nil {
return fmt.Errorf("点击输入框失败: %v", err)
}
c.SleepMs(500)
// 清空输入框
if err := c.ClearInput(inputBox); err != nil {
// Ignore clear error
}
c.SleepMs(300)
// 输入问题
if err := c.SetInputValue(inputBox, question); err != nil {
inputBox.Input(question)
}
c.SleepMs(1000)
return nil
}
// clickSendButton 点击发送按钮
func (c *DeepseekCollector) clickSendButton() error {
// 使用JavaScript直接找到input的父级下的第三个div并点击
clickJS := `
() => {
// 找到页面上第一个input元素
const input = document.querySelector('input');
if (!input) {
return { success: false, error: '未找到input元素', divCount: 0 };
}
// 获取input的父级元素
const parent = input.parentElement;
if (!parent) {
return { success: false, error: '未找到input的父级元素', divCount: 0 };
}
// 找到父级下的直接子级div元素只找一级
const divs = parent.querySelectorAll(':scope > div');
const divCount = divs.length;
if (divs.length < 2) {
return { success: false, error: '父级下没有足够的直接子级div元素', divCount: divCount };
}
// 获取第2个div作为发送按钮
const sendBtn = divs[1];
const s = sendBtn.querySelectorAll(':scope > div');
console.log(s.length);
console.log('开始点击');
// 点击发送按钮
s[0].click();
console.log('开始完成');
return { success: true, divCount: divCount };
}
`
result, err := c.Page.Eval(clickJS)
if err != nil {
return fmt.Errorf("执行点击JavaScript失败: %v", err)
}
// 检查执行结果
success := result.Value.Get("success").Bool()
divCount := result.Value.Get("divCount").Int()
c.LogInfof("父级下共有 %d 个直接子级div元素", divCount)
if !success {
errorMsg := result.Value.Get("error").String()
return fmt.Errorf("点击发送按钮失败: %s", errorMsg)
}
c.SleepMs(2000)
return nil
}
// waitForAnswer 等待并获取答案
func (c *DeepseekCollector) waitForAnswer() (string, error) {
timeout := 120 // 最大等待时间(秒)
startTime := time.Now()
lastAnswerLength := 0
for time.Since(startTime).Seconds() < float64(timeout) {
// 查找答案区域
answerSelectors := []string{
"div[class='ds-markdown']",
}
for _, selector := range answerSelectors {
answerElements, err := c.Page.Elements(selector)
if err == nil && len(answerElements) > 0 {
// 获取最后一个答案元素
lastAnswer := answerElements[len(answerElements)-1]
visible, _ := lastAnswer.Visible()
if visible {
text, err := lastAnswer.Text()
if err == nil && len(strings.TrimSpace(text)) > 0 {
// 检查是否正在生成
isGenerating := strings.Contains(text, "正在") ||
strings.Contains(text, "思考") ||
strings.Contains(text, "generating")
if !isGenerating {
// 检查答案是否还在增长
currentLength := len(text)
if currentLength == lastAnswerLength && currentLength > 10 {
// 答案不再增长,认为已完成
return strings.TrimSpace(text), nil
}
lastAnswerLength = currentLength
}
}
}
}
}
c.SleepMs(1500)
}
return "", fmt.Errorf("等待答案超时")
}
// SafeElement 安全地获取元素
func (c *DeepseekCollector) SafeElement(selector string) (*rod.Element, error) {
exists, _, err := c.Page.Has(selector)
if err != nil {
return nil, err
}
if !exists {
return nil, nil
}
return c.Page.Element(selector)
}