package collect import ( "context" "encoding/json" "fmt" "geo/internal/config" "os" "strings" "time" "github.com/go-rod/rod" "github.com/go-rod/rod/lib/proto" "github.com/gofiber/fiber/v2/log" ) // DeepseekCollector DeepSeek收集器 type DeepseekCollector struct { *BaseCollector } // NewDeepseekCollector 创建DeepSeek收集器 func NewDeepseekCollector(ctx context.Context, params *CollectParams, cfg *config.Config, logger log.AllLogger) CollectorInterface { collector := &DeepseekCollector{ BaseCollector: NewBaseCollector(ctx, params, cfg, logger), } // 设置DeepSeek的URL collector.LoginURL = "https://chat.deepseek.com/" collector.ChatURL = "https://chat.deepseek.com/" return collector } // saveLocalStorage 保存LocalStorage数据 func (c *DeepseekCollector) saveLocalStorage() error { // 使用JavaScript获取所有LocalStorage数据 result, err := c.Page.Eval(`() => { const data = {}; for (let i = 0; i < localStorage.length; i++) { const key = localStorage.key(i); data[key] = localStorage.getItem(key); } return JSON.stringify(data); }`) if err != nil { return fmt.Errorf("获取LocalStorage失败: %v", err) } // 保存到文件 localStorageFile := c.CookiesFile[:len(c.CookiesFile)-5] + "_localstorage.json" return os.WriteFile(localStorageFile, []byte(result.Value.Str()), 0644) } // loadLocalStorage 加载LocalStorage数据 func (c *DeepseekCollector) loadLocalStorage() error { localStorageFile := c.CookiesFile[:len(c.CookiesFile)-5] + "_localstorage.json" data, err := os.ReadFile(localStorageFile) if err != nil { return err } var storageData map[string]string if err := json.Unmarshal(data, &storageData); err != nil { return err } // 使用JavaScript设置LocalStorage for key, value := range storageData { _, err := c.Page.Eval(`(key, val) => localStorage.setItem(key, val)`, key, value) if err != nil { c.Logger.Warnf("设置LocalStorage键 %s 失败: %v", key, err) } } return nil } // saveSessionStorage 保存SessionStorage数据 func (c *DeepseekCollector) saveSessionStorage() error { result, err := c.Page.Eval(`() => { const data = {}; for (let i = 0; i < sessionStorage.length; i++) { const key = sessionStorage.key(i); data[key] = sessionStorage.getItem(key); } return JSON.stringify(data); }`) if err != nil { return fmt.Errorf("获取SessionStorage失败: %v", err) } sessionStorageFile := c.CookiesFile[:len(c.CookiesFile)-5] + "_sessionstorage.json" return os.WriteFile(sessionStorageFile, []byte(result.Value.Str()), 0644) } // loadSessionStorage 加载SessionStorage数据 func (c *DeepseekCollector) loadSessionStorage() error { sessionStorageFile := c.CookiesFile[:len(c.CookiesFile)-5] + "_sessionstorage.json" data, err := os.ReadFile(sessionStorageFile) if err != nil { return err } var storageData map[string]string if err := json.Unmarshal(data, &storageData); err != nil { return err } for key, value := range storageData { _, err := c.Page.Eval(`(key, val) => sessionStorage.setItem(key, val)`, key, value) if err != nil { c.Logger.Warnf("设置SessionStorage键 %s 失败: %v", key, err) } } return nil } // SaveBrowserStorage 保存所有浏览器存储(Cookies + LocalStorage + SessionStorage) func (c *DeepseekCollector) SaveBrowserStorage() error { // 保存Cookies if err := c.SaveCookies(); err != nil { c.Logger.Warnf("保存Cookies失败: %v", err) } // 保存LocalStorage if err := c.saveLocalStorage(); err != nil { c.Logger.Warnf("保存LocalStorage失败: %v", err) } // 保存SessionStorage if err := c.saveSessionStorage(); err != nil { c.Logger.Warnf("保存SessionStorage失败: %v", err) } return nil } // LoadBrowserStorage 加载所有浏览器存储 func (c *DeepseekCollector) LoadBrowserStorage() error { // 加载Cookies if err := c.LoadCookies(); err != nil { c.Logger.Warnf("加载Cookies失败: %v", err) return err } // 加载LocalStorage if err := c.loadLocalStorage(); err != nil { c.Logger.Warnf("加载LocalStorage失败: %v", err) } // 加载SessionStorage if err := c.loadSessionStorage(); err != nil { c.Logger.Warnf("加载SessionStorage失败: %v", err) } return nil } // CheckLoginStatus 检查登录状态 func (c *DeepseekCollector) CheckLoginStatus() bool { currentURL := c.GetCurrentURL() // 如果在首页或登录页面,可能未登录 if strings.Contains(currentURL, "chat.deepseek.com") { // 检查是否有用户头像或登录标识 userAvatar, err := c.SafeElement(".user-avatar, [class*='avatar'], [class*='profile']") if err == nil && userAvatar != nil { return true } // 检查是否有聊天输入框(登录后才有) inputBox, err := c.SafeElement("textarea, [contenteditable='true']") if err == nil && inputBox != nil { return true } } return false } // WaitLogin 等待登录 func (c *DeepseekCollector) WaitLogin() (bool, string) { if err := c.SetupDriver(); err != nil { return false, fmt.Sprintf("浏览器启动失败: %v", err) } defer c.Close() c.Page.MustNavigate(c.LoginURL) c.Sleep(3) if c.CheckLoginStatus() { c.SaveBrowserStorage() return true, "already_logged_in" } c.LogInfo("等待用户登录...") // 最多等待300秒 for i := 0; i < 300; i++ { if c.CheckLoginStatus() { c.Sleep(2) c.SaveBrowserStorage() return true, "login_success" } time.Sleep(1 * time.Second) // 每30秒提醒一次 if (i+1)%30 == 0 { c.LogInfo(fmt.Sprintf("仍在等待登录... 已等待 %d 秒", i+1)) } } return false, "登录超时" } // InitPage 初始化页面(重写基类方法以支持LocalStorage) func (c *DeepseekCollector) InitPage() error { // 先导航到页面 c.Page.MustNavigate(c.ChatURL) c.WaitForPageReady(5) // 然后尝试加载浏览器存储(Cookies + LocalStorage + SessionStorage) if err := c.LoadBrowserStorage(); err == nil { c.LogInfo("已加载浏览器存储") // 重新加载页面以应用存储的数据 c.Page.MustReload() c.WaitForPageReady(5) } else { c.LogInfo("未找到保存的浏览器存储") } return nil } // AskQuestion 提问并获取答案 func (c *DeepseekCollector) AskQuestion(question string) (*CollectResult, error) { c.LogInfo("开始提问流程...") if err := c.SetupDriver(); err != nil { return nil, fmt.Errorf("浏览器启动失败: %v", err) } defer c.Close() if err := c.InitPage(); err != nil { return nil, fmt.Errorf("页面初始化失败: %v", err) } if err := c.inputQuestion(question); err != nil { return nil, fmt.Errorf("输入问题失败: %v", err) } if err := c.clickSendButton(); err != nil { return nil, fmt.Errorf("点击发送按钮失败: %v", err) } answer, err := c.waitForAnswer() if err != nil { return nil, fmt.Errorf("获取答案失败: %v", err) } answerStr, isExposure := HighlightKeywordsInText(answer, c.KeyWords) return &CollectResult{ Answer: answerStr, ShareLink: "", IsExposure: isExposure, }, nil } // inputQuestion 输入问题 func (c *DeepseekCollector) inputQuestion(question string) error { c.LogInfo("输入问题...") // DeepSeek的输入框选择器 inputSelectors := []string{ "textarea[placeholder*='Message DeepSeek']", } var inputBox *rod.Element var err error for _, selector := range inputSelectors { inputBox, err = c.WaitForElementVisible(selector, 10) if err == nil && inputBox != nil { c.LogInfo(fmt.Sprintf("找到输入框: %s", selector)) break } } if inputBox == nil { return fmt.Errorf("未找到输入框") } // 点击获取焦点 if err := inputBox.Click(proto.InputMouseButtonLeft, 1); err != nil { return fmt.Errorf("点击输入框失败: %v", err) } c.SleepMs(500) // 清空输入框 if err := c.ClearInput(inputBox); err != nil { // Ignore clear error } c.SleepMs(300) // 输入问题 if err := c.SetInputValue(inputBox, question); err != nil { inputBox.Input(question) } c.LogInfo(fmt.Sprintf("问题已输入: %s", question)) c.SleepMs(1000) return nil } func (c *DeepseekCollector) clickSendButton() error { // 使用JavaScript直接找到input的父级下的第三个div并点击 clickJS := ` () => { // 找到页面上第一个input元素 const input = document.querySelector('input'); if (!input) { return { success: false, error: '未找到input元素', divCount: 0 }; } // 获取input的父级元素 const parent = input.parentElement; if (!parent) { return { success: false, error: '未找到input的父级元素', divCount: 0 }; } // 找到父级下的直接子级div元素(只找一级) const divs = parent.querySelectorAll(':scope > div'); const divCount = divs.length; if (divs.length < 2) { return { success: false, error: '父级下没有足够的直接子级div元素', divCount: divCount }; } // 获取第2个div作为发送按钮 const sendBtn = divs[1]; const s = sendBtn.querySelectorAll(':scope > div'); console.log(s.length); console.log('开始点击'); // 点击发送按钮 s[0].click(); console.log('开始完成'); return { success: true, divCount: divCount }; } ` result, err := c.Page.Eval(clickJS) if err != nil { return fmt.Errorf("执行点击JavaScript失败: %v", err) } // 检查执行结果 success := result.Value.Get("success").Bool() divCount := result.Value.Get("divCount").Int() c.LogInfof("父级下共有 %d 个直接子级div元素", divCount) if !success { errorMsg := result.Value.Get("error").String() return fmt.Errorf("点击发送按钮失败: %s", errorMsg) } c.SleepMs(2000) return nil } // waitForAnswer 等待并获取答案 func (c *DeepseekCollector) waitForAnswer() (string, error) { c.LogInfo("等待AI回答...") timeout := 180 // 最大等待时间(秒) startTime := time.Now() var lastAnswer string var stableCount int // 稳定计数器 const requiredStableCount = 3 // 需要连续3次内容不变才认为完成 isAnswering := false // 标记是否正在回答中 for time.Since(startTime).Seconds() < float64(timeout) { // 查找答案区域 - DeepSeek 使用 ds-markdown 类 answerSelectors := []string{ "div[class='ds-markdown']", ".message-content", ".response-text", "[class*='assistant'] [class*='content']", "[class*='ai'] [class*='message']", ".chat-message.ai", ".answer-content", "div[data-message-id]", // 通用的消息ID选择器 } var answerHTML string for _, selector := range answerSelectors { answerElements, err := c.Page.Elements(selector) if err == nil && len(answerElements) > 0 { // 获取最后一个答案元素 lastAnswerElem := answerElements[len(answerElements)-1] visible, _ := lastAnswerElem.Visible() if visible { // 直接获取原始HTML内容,不做任何处理 htmlContent, err := lastAnswerElem.HTML() if err == nil && htmlContent != "" { answerHTML = strings.TrimSpace(htmlContent) c.LogInfo(fmt.Sprintf("找到答案容器: %s, HTML长度: %d", selector, len(answerHTML))) break } } } } // 检查是否获取到答案 if answerHTML != "" { if !isAnswering { c.LogInfo("检测到AI开始回答...") isAnswering = true } // 检查内容是否稳定(流式输出完成) if answerHTML == lastAnswer { stableCount++ c.LogInfo(fmt.Sprintf("答案稳定中... (%d/%d), 长度: %d", stableCount, requiredStableCount, len(answerHTML))) // 如果内容稳定足够次数,说明回答完成 if stableCount >= requiredStableCount { c.LogInfo(fmt.Sprintf("✓ AI回答完成,最终HTML长度: %d 字符", len(answerHTML))) return answerHTML, nil } } else { // 内容还在变化,重置计数器 stableCount = 0 lastAnswer = answerHTML c.LogInfo(fmt.Sprintf("检测到流式输出,当前HTML长度: %d 字符", len(answerHTML))) } } c.SleepMs(1000) // 每1秒检查一次 // 每10秒输出一次等待状态 elapsed := int(time.Since(startTime).Seconds()) if elapsed > 0 && elapsed%10 == 0 { c.LogInfo(fmt.Sprintf("等待AI回答中... 已等待 %d 秒", elapsed)) } } return "", fmt.Errorf("等待答案超时(%d秒)", timeout) } // SafeElement 安全地获取元素 func (c *DeepseekCollector) SafeElement(selector string) (*rod.Element, error) { exists, _, err := c.Page.Has(selector) if err != nil { return nil, err } if !exists { return nil, nil } return c.Page.Element(selector) }