420 lines
11 KiB
Go
420 lines
11 KiB
Go
package collect
|
||
|
||
import (
|
||
"context"
|
||
"encoding/json"
|
||
"fmt"
|
||
"geo/internal/config"
|
||
"os"
|
||
"strings"
|
||
"time"
|
||
|
||
"github.com/go-rod/rod"
|
||
"github.com/go-rod/rod/lib/proto"
|
||
"github.com/gofiber/fiber/v2/log"
|
||
)
|
||
|
||
// DeepseekCollector DeepSeek收集器
|
||
type DeepseekCollector struct {
|
||
*BaseCollector
|
||
}
|
||
|
||
// NewDeepseekCollector 创建DeepSeek收集器
|
||
func NewDeepseekCollector(ctx context.Context, params *CollectParams, cfg *config.Config, logger log.AllLogger) CollectorInterface {
|
||
collector := &DeepseekCollector{
|
||
BaseCollector: NewBaseCollector(ctx, params, cfg, logger),
|
||
}
|
||
|
||
// 设置DeepSeek的URL
|
||
collector.LoginURL = "https://chat.deepseek.com/"
|
||
collector.ChatURL = "https://chat.deepseek.com/"
|
||
|
||
return collector
|
||
}
|
||
|
||
// saveLocalStorage 保存LocalStorage数据
|
||
func (c *DeepseekCollector) saveLocalStorage() error {
|
||
// 使用JavaScript获取所有LocalStorage数据
|
||
result, err := c.Page.Eval(`() => {
|
||
const data = {};
|
||
for (let i = 0; i < localStorage.length; i++) {
|
||
const key = localStorage.key(i);
|
||
data[key] = localStorage.getItem(key);
|
||
}
|
||
return JSON.stringify(data);
|
||
}`)
|
||
if err != nil {
|
||
return fmt.Errorf("获取LocalStorage失败: %v", err)
|
||
}
|
||
|
||
// 保存到文件
|
||
localStorageFile := c.CookiesFile[:len(c.CookiesFile)-5] + "_localstorage.json"
|
||
return os.WriteFile(localStorageFile, []byte(result.Value.Str()), 0644)
|
||
}
|
||
|
||
// loadLocalStorage 加载LocalStorage数据
|
||
func (c *DeepseekCollector) loadLocalStorage() error {
|
||
localStorageFile := c.CookiesFile[:len(c.CookiesFile)-5] + "_localstorage.json"
|
||
|
||
data, err := os.ReadFile(localStorageFile)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
|
||
var storageData map[string]string
|
||
if err := json.Unmarshal(data, &storageData); err != nil {
|
||
return err
|
||
}
|
||
|
||
// 使用JavaScript设置LocalStorage
|
||
for key, value := range storageData {
|
||
_, err := c.Page.Eval(`(key, val) => localStorage.setItem(key, val)`, key, value)
|
||
if err != nil {
|
||
c.Logger.Warnf("设置LocalStorage键 %s 失败: %v", key, err)
|
||
}
|
||
}
|
||
|
||
return nil
|
||
}
|
||
|
||
// saveSessionStorage 保存SessionStorage数据
|
||
func (c *DeepseekCollector) saveSessionStorage() error {
|
||
result, err := c.Page.Eval(`() => {
|
||
const data = {};
|
||
for (let i = 0; i < sessionStorage.length; i++) {
|
||
const key = sessionStorage.key(i);
|
||
data[key] = sessionStorage.getItem(key);
|
||
}
|
||
return JSON.stringify(data);
|
||
}`)
|
||
if err != nil {
|
||
return fmt.Errorf("获取SessionStorage失败: %v", err)
|
||
}
|
||
|
||
sessionStorageFile := c.CookiesFile[:len(c.CookiesFile)-5] + "_sessionstorage.json"
|
||
return os.WriteFile(sessionStorageFile, []byte(result.Value.Str()), 0644)
|
||
}
|
||
|
||
// loadSessionStorage 加载SessionStorage数据
|
||
func (c *DeepseekCollector) loadSessionStorage() error {
|
||
sessionStorageFile := c.CookiesFile[:len(c.CookiesFile)-5] + "_sessionstorage.json"
|
||
|
||
data, err := os.ReadFile(sessionStorageFile)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
|
||
var storageData map[string]string
|
||
if err := json.Unmarshal(data, &storageData); err != nil {
|
||
return err
|
||
}
|
||
|
||
for key, value := range storageData {
|
||
_, err := c.Page.Eval(`(key, val) => sessionStorage.setItem(key, val)`, key, value)
|
||
if err != nil {
|
||
c.Logger.Warnf("设置SessionStorage键 %s 失败: %v", key, err)
|
||
}
|
||
}
|
||
|
||
return nil
|
||
}
|
||
|
||
// SaveBrowserStorage 保存所有浏览器存储(Cookies + LocalStorage + SessionStorage)
|
||
func (c *DeepseekCollector) SaveBrowserStorage() error {
|
||
// 保存Cookies
|
||
if err := c.SaveCookies(); err != nil {
|
||
c.Logger.Warnf("保存Cookies失败: %v", err)
|
||
}
|
||
|
||
// 保存LocalStorage
|
||
if err := c.saveLocalStorage(); err != nil {
|
||
c.Logger.Warnf("保存LocalStorage失败: %v", err)
|
||
}
|
||
|
||
// 保存SessionStorage
|
||
if err := c.saveSessionStorage(); err != nil {
|
||
c.Logger.Warnf("保存SessionStorage失败: %v", err)
|
||
}
|
||
|
||
return nil
|
||
}
|
||
|
||
// LoadBrowserStorage 加载所有浏览器存储
|
||
func (c *DeepseekCollector) LoadBrowserStorage() error {
|
||
// 加载Cookies
|
||
if err := c.LoadCookies(); err != nil {
|
||
c.Logger.Warnf("加载Cookies失败: %v", err)
|
||
return err
|
||
}
|
||
|
||
// 加载LocalStorage
|
||
if err := c.loadLocalStorage(); err != nil {
|
||
c.Logger.Warnf("加载LocalStorage失败: %v", err)
|
||
}
|
||
|
||
// 加载SessionStorage
|
||
if err := c.loadSessionStorage(); err != nil {
|
||
c.Logger.Warnf("加载SessionStorage失败: %v", err)
|
||
}
|
||
|
||
return nil
|
||
}
|
||
|
||
// CheckLoginStatus 检查登录状态
|
||
func (c *DeepseekCollector) CheckLoginStatus() bool {
|
||
currentURL := c.GetCurrentURL()
|
||
|
||
// 如果在首页或登录页面,可能未登录
|
||
if strings.Contains(currentURL, "chat.deepseek.com") {
|
||
// 检查是否有用户头像或登录标识
|
||
userAvatar, err := c.SafeElement(".user-avatar, [class*='avatar'], [class*='profile']")
|
||
if err == nil && userAvatar != nil {
|
||
return true
|
||
}
|
||
|
||
// 检查是否有聊天输入框(登录后才有)
|
||
inputBox, err := c.SafeElement("textarea, [contenteditable='true']")
|
||
if err == nil && inputBox != nil {
|
||
return true
|
||
}
|
||
}
|
||
|
||
return false
|
||
}
|
||
|
||
// WaitLogin 等待登录
|
||
func (c *DeepseekCollector) WaitLogin() (bool, string) {
|
||
if err := c.SetupDriver(); err != nil {
|
||
return false, fmt.Sprintf("浏览器启动失败: %v", err)
|
||
}
|
||
defer c.Close()
|
||
|
||
c.Page.MustNavigate(c.ChatURL)
|
||
c.Sleep(3)
|
||
|
||
if c.CheckLoginStatus() {
|
||
c.SaveBrowserStorage()
|
||
return true, "already_logged_in"
|
||
}
|
||
|
||
for i := 0; i < 300; i++ {
|
||
if c.CheckLoginStatus() {
|
||
c.Sleep(2)
|
||
c.SaveBrowserStorage()
|
||
return true, "login_success"
|
||
}
|
||
time.Sleep(1 * time.Second)
|
||
}
|
||
|
||
return false, "登录超时"
|
||
}
|
||
|
||
// InitPage 初始化页面(重写基类方法以支持LocalStorage)
|
||
func (c *DeepseekCollector) InitPage() error {
|
||
// 先导航到页面
|
||
c.Page.MustNavigate(c.ChatURL)
|
||
c.WaitForPageReady(5)
|
||
|
||
// 然后尝试加载浏览器存储(Cookies + LocalStorage + SessionStorage)
|
||
if err := c.LoadBrowserStorage(); err == nil {
|
||
c.LogInfo("已加载浏览器存储")
|
||
// 重新加载页面以应用存储的数据
|
||
c.Page.MustReload()
|
||
c.WaitForPageReady(5)
|
||
} else {
|
||
c.LogInfo("未找到保存的浏览器存储")
|
||
}
|
||
|
||
return nil
|
||
}
|
||
|
||
// AskQuestion 提问并获取答案
|
||
func (c *DeepseekCollector) AskQuestion(question string) (*CollectResult, error) {
|
||
if err := c.SetupDriver(); err != nil {
|
||
return nil, fmt.Errorf("浏览器启动失败: %v", err)
|
||
}
|
||
defer c.Close()
|
||
|
||
if err := c.InitPage(); err != nil {
|
||
return nil, fmt.Errorf("页面初始化失败: %v", err)
|
||
}
|
||
|
||
if err := c.inputQuestion(question); err != nil {
|
||
return nil, fmt.Errorf("输入问题失败: %v", err)
|
||
}
|
||
|
||
if err := c.clickSendButton(); err != nil {
|
||
return nil, fmt.Errorf("点击发送按钮失败: %v", err)
|
||
}
|
||
|
||
answer, err := c.waitForAnswer()
|
||
if err != nil {
|
||
return nil, fmt.Errorf("获取答案失败: %v", err)
|
||
}
|
||
|
||
return &CollectResult{
|
||
Answer: answer,
|
||
ShareLink: "",
|
||
}, nil
|
||
}
|
||
|
||
// inputQuestion 输入问题
|
||
func (c *DeepseekCollector) inputQuestion(question string) error {
|
||
// DeepSeek的输入框选择器
|
||
inputSelectors := []string{
|
||
"textarea[placeholder*='Message DeepSeek']",
|
||
}
|
||
|
||
var inputBox *rod.Element
|
||
var err error
|
||
|
||
for _, selector := range inputSelectors {
|
||
inputBox, err = c.WaitForElementVisible(selector, 10)
|
||
if err == nil && inputBox != nil {
|
||
break
|
||
}
|
||
}
|
||
|
||
if inputBox == nil {
|
||
return fmt.Errorf("未找到输入框")
|
||
}
|
||
|
||
// 点击获取焦点
|
||
if err := inputBox.Click(proto.InputMouseButtonLeft, 1); err != nil {
|
||
return fmt.Errorf("点击输入框失败: %v", err)
|
||
}
|
||
c.SleepMs(500)
|
||
|
||
// 清空输入框
|
||
if err := c.ClearInput(inputBox); err != nil {
|
||
// Ignore clear error
|
||
}
|
||
c.SleepMs(300)
|
||
|
||
// 输入问题
|
||
if err := c.SetInputValue(inputBox, question); err != nil {
|
||
inputBox.Input(question)
|
||
}
|
||
|
||
c.SleepMs(1000)
|
||
|
||
return nil
|
||
}
|
||
|
||
// clickSendButton 点击发送按钮
|
||
func (c *DeepseekCollector) clickSendButton() error {
|
||
// 使用JavaScript直接找到input的父级下的第三个div并点击
|
||
clickJS := `
|
||
() => {
|
||
// 找到页面上第一个input元素
|
||
const input = document.querySelector('input');
|
||
if (!input) {
|
||
return { success: false, error: '未找到input元素', divCount: 0 };
|
||
}
|
||
|
||
// 获取input的父级元素
|
||
const parent = input.parentElement;
|
||
if (!parent) {
|
||
return { success: false, error: '未找到input的父级元素', divCount: 0 };
|
||
}
|
||
|
||
// 找到父级下的直接子级div元素(只找一级)
|
||
const divs = parent.querySelectorAll(':scope > div');
|
||
const divCount = divs.length;
|
||
|
||
if (divs.length < 2) {
|
||
return { success: false, error: '父级下没有足够的直接子级div元素', divCount: divCount };
|
||
}
|
||
|
||
// 获取第2个div作为发送按钮
|
||
const sendBtn = divs[1];
|
||
const s = sendBtn.querySelectorAll(':scope > div');
|
||
console.log(s.length);
|
||
console.log('开始点击');
|
||
// 点击发送按钮
|
||
s[0].click();
|
||
console.log('开始完成');
|
||
return { success: true, divCount: divCount };
|
||
}
|
||
`
|
||
|
||
result, err := c.Page.Eval(clickJS)
|
||
if err != nil {
|
||
return fmt.Errorf("执行点击JavaScript失败: %v", err)
|
||
}
|
||
|
||
// 检查执行结果
|
||
success := result.Value.Get("success").Bool()
|
||
divCount := result.Value.Get("divCount").Int()
|
||
|
||
c.LogInfof("父级下共有 %d 个直接子级div元素", divCount)
|
||
|
||
if !success {
|
||
errorMsg := result.Value.Get("error").String()
|
||
return fmt.Errorf("点击发送按钮失败: %s", errorMsg)
|
||
}
|
||
|
||
c.SleepMs(2000)
|
||
|
||
return nil
|
||
}
|
||
|
||
// waitForAnswer 等待并获取答案
|
||
func (c *DeepseekCollector) waitForAnswer() (string, error) {
|
||
timeout := 120 // 最大等待时间(秒)
|
||
startTime := time.Now()
|
||
lastAnswerLength := 0
|
||
|
||
for time.Since(startTime).Seconds() < float64(timeout) {
|
||
// 查找答案区域
|
||
answerSelectors := []string{
|
||
"div[class='ds-markdown']",
|
||
}
|
||
|
||
for _, selector := range answerSelectors {
|
||
answerElements, err := c.Page.Elements(selector)
|
||
if err == nil && len(answerElements) > 0 {
|
||
// 获取最后一个答案元素
|
||
lastAnswer := answerElements[len(answerElements)-1]
|
||
|
||
visible, _ := lastAnswer.Visible()
|
||
if visible {
|
||
text, err := lastAnswer.Text()
|
||
if err == nil && len(strings.TrimSpace(text)) > 0 {
|
||
// 检查是否正在生成
|
||
isGenerating := strings.Contains(text, "正在") ||
|
||
strings.Contains(text, "思考") ||
|
||
strings.Contains(text, "generating")
|
||
|
||
if !isGenerating {
|
||
// 检查答案是否还在增长
|
||
currentLength := len(text)
|
||
if currentLength == lastAnswerLength && currentLength > 10 {
|
||
// 答案不再增长,认为已完成
|
||
return strings.TrimSpace(text), nil
|
||
}
|
||
lastAnswerLength = currentLength
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
c.SleepMs(1500)
|
||
}
|
||
|
||
return "", fmt.Errorf("等待答案超时")
|
||
}
|
||
|
||
// SafeElement 安全地获取元素
|
||
func (c *DeepseekCollector) SafeElement(selector string) (*rod.Element, error) {
|
||
exists, _, err := c.Page.Has(selector)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
if !exists {
|
||
return nil, nil
|
||
}
|
||
return c.Page.Element(selector)
|
||
}
|