This commit is contained in:
Rzy 2026-04-24 18:49:41 +08:00
parent 452bd7889f
commit 82c94a7038
13 changed files with 2711 additions and 0 deletions

3
.gitignore vendored
View File

@ -1,3 +1,6 @@
images/*
chrome/*
chrome_data/*
cookies/*
docs/*
logs/*

View File

@ -3,6 +3,7 @@ package main
import (
"fmt"
"geo/internal/config"
"github.com/gofiber/fiber/v2/log"
)

535
example_test.go Normal file
View File

@ -0,0 +1,535 @@
package collect
import (
"context"
"fmt"
"geo/internal/collect"
"geo/internal/config"
"log"
"os"
"strings"
"testing"
"github.com/go-rod/rod/lib/proto"
)
var (
cfg, _ = config.LoadConfig()
logger = log.New(os.Stdout, "", log.LstdFlags)
manager = collect.NewCollectManager(context.Background(), cfg, logger)
)
// TestCollectManager_Basic 测试收集管理器的基本功能
func TestCollectManager_Basic(t *testing.T) {
// 测试列出平台
platforms := manager.ListPlatforms()
t.Logf("支持的平台: %v", platforms)
if len(platforms) != 4 {
t.Errorf("期望4个平台实际: %d", len(platforms))
}
// 测试获取收集器
for _, platform := range platforms {
params := &collect.CollectParams{
Headless: true,
UserIndex: "test_user",
PlatIndex: platform,
RequestID: "test_req",
Platform: platform,
}
collector, err := manager.GetCollector(platform, params)
if err != nil {
t.Errorf("获取%s收集器失败: %v", platform, err)
continue
}
if collector == nil {
t.Errorf("%s收集器为nil", platform)
}
t.Logf("成功创建%s收集器", platform)
}
}
// TestWenxinCollector_WaitLogin 测试文心一言登录功能
func TestWenxinCollector_WaitLogin(t *testing.T) {
if testing.Short() {
t.Skip("跳过需要浏览器交互的测试")
}
params := &collect.CollectParams{
Headless: false, // 显示浏览器窗口以便扫码登录
UserIndex: "test_user",
PlatIndex: "wenxin",
RequestID: "test_wenxin_login_001",
Platform: "wenxin",
}
t.Log("开始测试文心一言登录...")
t.Log("请在打开的浏览器窗口中完成百度账号登录(扫码或输入账号密码)")
success, msg := manager.WaitLogin("wenxin", params)
if !success {
t.Errorf("文心一言登录失败: %s", msg)
return
}
t.Logf("文心一言登录成功: %s", msg)
t.Log("Cookie已保存后续测试可以使用已登录状态")
}
// TestWenxinCollector_SimpleAsk 简单测试文心一言提问
func TestWenxinCollector_SimpleAsk(t *testing.T) {
if testing.Short() {
t.Skip("跳过需要浏览器交互的测试")
}
params := &collect.CollectParams{
Headless: false, // 显示浏览器以便观察
UserIndex: "test_user",
PlatIndex: "wenxin",
RequestID: "test_wenxin_simple_001",
Platform: "wenxin",
}
t.Log("=== 简单测试文心一言提问 ===")
// 获取收集器
collector, err := manager.GetCollector("wenxin", params)
if err != nil {
t.Fatalf("获取收集器失败: %v", err)
}
wenxinCollector := collector.(*collect.WenxinCollector)
// 初始化浏览器
if err := wenxinCollector.SetupDriver(); err != nil {
t.Fatalf("启动浏览器失败: %v", err)
}
defer wenxinCollector.Close()
// 加载Cookie
if err := wenxinCollector.LoadCookies(); err != nil {
t.Logf("未找到Cookie文件: %v", err)
}
// 导航到聊天页面
wenxinCollector.Page.MustNavigate(wenxinCollector.ChatURL)
wenxinCollector.Sleep(5)
// 检查登录状态
isLoggedIn := wenxinCollector.CheckLoginStatus()
t.Logf("登录状态: %v", isLoggedIn)
if !isLoggedIn {
t.Fatal("未登录请先调用WaitLogin登录")
}
// 手动输入问题
question := "你好"
t.Logf("准备输入问题: %s", question)
// 查找输入框
inputBox, err := wenxinCollector.WaitForElementVisible("[contenteditable='true']", 10)
if err != nil {
t.Fatalf("未找到输入框: %v", err)
}
t.Log("✓ 找到输入框")
// 点击输入框
inputBox.Click(proto.InputMouseButtonLeft, 1)
wenxinCollector.SleepMs(500)
// 清空输入框
wenxinCollector.ClearInput(inputBox)
wenxinCollector.SleepMs(300)
// 使用键盘输入
t.Log("正在输入问题...")
inputBox.Input(question)
wenxinCollector.SleepMs(1000)
t.Log("✓ 问题已输入")
// 查找并点击发送按钮
sendBtn, err := wenxinCollector.Page.Element("button")
if err != nil {
t.Fatalf("未找到发送按钮: %v", err)
}
t.Log("✓ 找到发送按钮")
t.Log("正在点击发送按钮...")
sendBtn.Click(proto.InputMouseButtonLeft, 1)
wenxinCollector.SleepMs(3000)
t.Log("✓ 已点击发送按钮")
t.Log("\n请观察浏览器窗口查看是否成功发送问题并收到回答")
t.Log("测试将在10秒后结束...")
wenxinCollector.Sleep(10)
t.Log("=== 测试完成 ===")
}
// TestWenxinCollector_AskQuestion 测试文心一言提问功能
// 注意:此测试需要有效的登录状态
func TestWenxinCollector_AskQuestion(t *testing.T) {
if testing.Short() {
t.Skip("跳过需要浏览器交互的测试")
}
// 设置收集参数
params := &collect.CollectParams{
Headless: false, // 显示浏览器以便调试
UserIndex: "test_user",
PlatIndex: "wenxin",
RequestID: "test_wenxin_001",
Platform: "wenxin",
}
// 定义提问内容
question := "请用一句话介绍Go语言"
t.Logf("向文心一言提问: %s", question)
// 调用管理器提问并获取答案
answer, err := manager.AskQuestion("wenxin", params, question)
if err != nil {
t.Errorf("提问失败: %v", err)
return
}
t.Logf("获取到答案:\n%s", answer)
// 验证答案非空
if len(answer) == 0 {
t.Error("答案为空")
}
}
// TestMultiplePlatforms_Compare 测试多平台对比
func TestMultiplePlatforms_Compare(t *testing.T) {
if testing.Short() {
t.Skip("跳过需要浏览器交互的测试")
}
question := "什么是人工智能?"
platforms := []string{"wenxin", "deepseek"}
results := make(map[string]string)
for _, platform := range platforms {
params := &collect.CollectParams{
Headless: true,
UserIndex: "test_user",
PlatIndex: platform,
RequestID: fmt.Sprintf("test_%s", platform),
Platform: platform,
}
t.Logf("正在向%s提问...", platform)
answer, err := manager.AskQuestion(platform, params, question)
if err != nil {
t.Logf("%s提问失败: %v", platform, err)
results[platform] = fmt.Sprintf("错误: %v", err)
continue
}
results[platform] = answer
t.Logf("%s回答完成长度: %d", platform, len(answer))
}
// 输出对比结果
t.Log("\n===== 多平台回答对比 =====")
for platform, answer := range results {
t.Logf("\n[%s]:\n%s\n", platform, answer)
}
}
// TestWenxinCollector_DebugPageStructure 调试页面结构
func TestWenxinCollector_DebugPageStructure(t *testing.T) {
if testing.Short() {
t.Skip("跳过需要浏览器交互的测试")
}
params := &collect.CollectParams{
Headless: false,
UserIndex: "test_user",
PlatIndex: "wenxin",
RequestID: "test_wenxin_debug_001",
Platform: "wenxin",
}
t.Log("=== 调试文心一言页面结构 ===")
// 获取收集器
collector, err := manager.GetCollector("wenxin", params)
if err != nil {
t.Fatalf("获取收集器失败: %v", err)
}
wenxinCollector := collector.(*collect.WenxinCollector)
if err := wenxinCollector.SetupDriver(); err != nil {
t.Fatalf("启动浏览器失败: %v", err)
}
defer wenxinCollector.Close()
// 加载Cookie
if err := wenxinCollector.LoadCookies(); err != nil {
t.Logf("未找到Cookie文件: %v", err)
}
// 导航到聊天页面
wenxinCollector.Page.MustNavigate(wenxinCollector.ChatURL)
wenxinCollector.Sleep(5)
// 检查登录状态
isLoggedIn := wenxinCollector.CheckLoginStatus()
t.Logf("登录状态: %v", isLoggedIn)
if !isLoggedIn {
t.Fatal("未登录请先调用WaitLogin登录")
}
// 查找所有可能的输入框
t.Log("\n=== 查找输入框 ===")
inputSelectors := []string{
"textarea",
"[contenteditable='true']",
"input[type='text']",
".input-box",
"#chat-input",
"[placeholder]",
}
for _, selector := range inputSelectors {
elements, err := wenxinCollector.Page.Elements(selector)
if err == nil && len(elements) > 0 {
t.Logf("✓ 找到 %d 个元素: %s", len(elements), selector)
for i, elem := range elements {
if i >= 3 {
break // 只显示前3个
}
text, _ := elem.Text()
tagName, _ := elem.Property("tagName")
class, _ := elem.Attribute("class")
id, _ := elem.Attribute("id")
placeholder, _ := elem.Attribute("placeholder")
idStr := ""
if id != nil {
idStr = *id
}
classStr := ""
if class != nil {
classStr = *class
}
placeholderStr := ""
if placeholder != nil {
placeholderStr = *placeholder
}
t.Logf(" [%d] tag=%s, id=%s, class=%s, placeholder=%s, text=%s",
i, tagName.Str(), idStr, classStr, placeholderStr, text[:min(50, len(text))])
}
} else {
t.Logf("✗ 未找到元素: %s", selector)
}
}
// 查找所有按钮
t.Log("\n=== 查找发送按钮 ===")
buttonSelectors := []string{
"button",
"svg",
"[aria-label]",
}
for _, selector := range buttonSelectors {
elements, err := wenxinCollector.Page.Elements(selector)
if err == nil && len(elements) > 0 {
t.Logf("✓ 找到 %d 个元素: %s", len(elements), selector)
for i, elem := range elements {
if i >= 5 {
break
}
text, _ := elem.Text()
tagName, _ := elem.Property("tagName")
class, _ := elem.Attribute("class")
ariaLabel, _ := elem.Attribute("aria-label")
ariaLabelText := ""
if ariaLabel != nil {
ariaLabelText = *ariaLabel
}
classStr := ""
if class != nil {
classStr = *class
}
trimmedText := strings.TrimSpace(text)
if trimmedText != "" || ariaLabelText != "" {
t.Logf(" [%d] tag=%s, class=%s, aria-label=%s, text=%s",
i, tagName.Str(), classStr, ariaLabelText, trimmedText[:min(30, len(trimmedText))])
}
}
}
}
t.Log("\n=== 调试完成 ===")
t.Log("请保持浏览器窗口打开,手动检查页面结构")
// 等待用户观察
select {}
}
// TestWenxinCollector_DebugAnswer 调试答案区域
func TestWenxinCollector_DebugAnswer(t *testing.T) {
if testing.Short() {
t.Skip("跳过需要浏览器交互的测试")
}
params := &collect.CollectParams{
Headless: false,
UserIndex: "test_user",
PlatIndex: "wenxin",
RequestID: "test_wenxin_debug_answer",
Platform: "wenxin",
}
t.Log("=== 调试文心一言答案区域 ===")
collector, err := manager.GetCollector("wenxin", params)
if err != nil {
t.Fatalf("获取收集器失败: %v", err)
}
wenxinCollector := collector.(*collect.WenxinCollector)
if err := wenxinCollector.SetupDriver(); err != nil {
t.Fatalf("启动浏览器失败: %v", err)
}
defer wenxinCollector.Close()
if err := wenxinCollector.LoadCookies(); err != nil {
t.Logf("未找到Cookie文件: %v", err)
}
wenxinCollector.Page.MustNavigate(wenxinCollector.ChatURL)
wenxinCollector.Sleep(5)
if !wenxinCollector.CheckLoginStatus() {
t.Fatal("未登录")
}
// 手动输入问题并发送
t.Log("请在浏览器中手动输入问题并等待AI回答完成")
t.Log("然后按回车键继续...")
fmt.Scanln()
// 查找所有可能的答案容器
t.Log("\n=== 查找答案容器 ===")
// 方式1: 查找包含answer/response/message的元素
containers, _ := wenxinCollector.Page.Elements("[class*='answer'], [class*='response'], [class*='message']")
t.Logf("找到 %d 个容器元素", len(containers))
for i, container := range containers {
text, _ := container.Text()
classAttr, _ := container.Attribute("class")
tagName, _ := container.Property("tagName")
classStr := ""
if classAttr != nil {
classStr = *classAttr
}
if len(strings.TrimSpace(text)) > 20 {
t.Logf("[%d] tag=%s, class=%s, text长度=%d, 前100字符=%s",
i, tagName.Str(), classStr, len(text), text[:min(100, len(text))])
}
}
// 方式2: 查找所有div显示较长的文本
t.Log("\n=== 查找长文本div ===")
allDivs, _ := wenxinCollector.Page.Elements("div")
var longTextDivs []struct{
index int
text string
class string
}
for i, div := range allDivs {
text, _ := div.Text()
if len(strings.TrimSpace(text)) > 50 {
classAttr, _ := div.Attribute("class")
classStr := ""
if classAttr != nil {
classStr = *classAttr
}
longTextDivs = append(longTextDivs, struct{
index int
text string
class string
}{i, text, classStr})
}
}
t.Logf("找到 %d 个长文本div", len(longTextDivs))
for _, item := range longTextDivs {
t.Logf("[%d] class=%s, 长度=%d, 前150字符=%s",
item.index, item.class, len(item.text), item.text[:min(150, len(item.text))])
}
t.Log("\n=== 调试完成,请保持浏览器打开以便观察 ===")
select {}
}
// BenchmarkWenxinCollector 性能测试(仅供参考)
func BenchmarkWenxinCollector(b *testing.B) {
b.Skip("跳过性能测试")
}
// ExampleCollectManager 使用示例
func ExampleCollectManager() {
// 列出支持的平台
platforms := manager.ListPlatforms()
fmt.Printf("支持的平台: %v\n", platforms)
// 设置参数
params := &collect.CollectParams{
Headless: true,
UserIndex: "user_001",
PlatIndex: "wenxin",
RequestID: "req_001",
Platform: "wenxin",
}
// 向文心一言提问
answer, err := manager.AskQuestion("wenxin", params, "什么是人工智能?")
if err != nil {
fmt.Printf("错误: %v\n", err)
return
}
fmt.Printf("答案: %s\n", answer)
}
// ExampleWenxinCollector_WaitLogin 文心一言登录示例
func ExampleWenxinCollector_WaitLogin() {
params := &collect.CollectParams{
Headless: false, // 登录时需要显示浏览器
UserIndex: "user_001",
PlatIndex: "wenxin",
RequestID: "example_login_001",
Platform: "wenxin",
}
fmt.Println("正在打开文心一言登录页面...")
success, msg := manager.WaitLogin("wenxin", params)
if success {
fmt.Printf("登录成功: %s\n", msg)
fmt.Println("Cookie已保存下次可以自动登录")
} else {
fmt.Printf("登录失败: %s\n", msg)
}
}

View File

@ -0,0 +1,86 @@
package biz
import (
"context"
"fmt"
"geo/internal/collect"
"geo/internal/config"
"log"
)
// CollectBiz AI收集业务层
type CollectBiz struct {
manager *collect.CollectManager
config *config.Config
logger *log.Logger
}
// NewCollectBiz 创建AI收集业务实例
func NewCollectBiz(ctx context.Context, cfg *config.Config, logger *log.Logger) *CollectBiz {
manager := collect.NewCollectManager(ctx, cfg, logger)
return &CollectBiz{
manager: manager,
config: cfg,
logger: logger,
}
}
// AskAIQuestion 向指定AI平台提问
// platform: 平台类型 (wenxin, deepseek, doubao, qianwen)
// userIndex: 用户索引
// platIndex: 平台索引
// requestID: 请求ID
// question: 问题内容
// headless: 是否无头模式
func (b *CollectBiz) AskAIQuestion(platform string, userIndex, platIndex, requestID, question string, headless bool) (string, error) {
params := &collect.CollectParams{
Headless: headless,
UserIndex: userIndex,
PlatIndex: platIndex,
RequestID: requestID,
Platform: platform,
}
answer, err := b.manager.AskQuestion(platform, params, question)
if err != nil {
return "", fmt.Errorf("向%s提问失败: %w", platform, err)
}
return answer, nil
}
// WaitAILogin 等待AI平台登录
func (b *CollectBiz) WaitAILogin(platform string, userIndex, platIndex, requestID string, headless bool) (bool, string) {
params := &collect.CollectParams{
Headless: headless,
UserIndex: userIndex,
PlatIndex: platIndex,
RequestID: requestID,
Platform: platform,
}
return b.manager.WaitLogin(platform, params)
}
// ListAIPlatforms 列出所有支持的AI平台
func (b *CollectBiz) ListAIPlatforms() []string {
return b.manager.ListPlatforms()
}
// AskMultipleAI 向多个AI平台提问并收集答案
func (b *CollectBiz) AskMultipleAI(platforms []string, userIndex, requestID, question string, headless bool) map[string]string {
results := make(map[string]string)
for _, platform := range platforms {
platIndex := platform // 默认使用platform作为platIndex
answer, err := b.AskAIQuestion(platform, userIndex, platIndex, requestID+"_"+platform, question, headless)
if err != nil {
b.logger.Printf("向%s提问失败: %v", platform, err)
results[platform] = fmt.Sprintf("错误: %v", err)
} else {
results[platform] = answer
}
}
return results
}

306
internal/collect/README.md Normal file
View File

@ -0,0 +1,306 @@
# AI平台收集功能使用说明
## 概述
`internal/collect` 模块提供了访问多个AI平台并进行问答的功能目前支持以下平台
- **文心一言** (wenxin) - 百度AI助手
- **DeepSeek** (deepseek) - 深度求索AI
- **豆包** (doubao) - 字节跳动AI助手
- **通义千问** (qianwen) - 阿里云AI助手
## 架构设计
### 核心组件
1. **CollectorInterface** - 收集器接口
- `WaitLogin() (bool, string)` - 等待登录
- `AskQuestion(question string) (string, error)` - 提问并获取答案
2. **BaseCollector** - 基础收集器
- 浏览器驱动管理
- Cookie管理保存/加载)
- 页面操作工具方法
3. **CollectManager** - 收集管理器
- 统一管理不同平台的收集器
- 提供便捷的API调用
4. **平台实现**
- `WenxinCollector` - 文心一言实现
- `DeepseekCollector` - DeepSeek实现
- `DoubaoCollector` - 豆包实现
- `QianwenCollector` - 通义千问实现
## 快速开始
### 1. 基本使用
```go
package main
import (
"context"
"fmt"
"geo/internal/collect"
"geo/internal/config"
"log"
"os"
)
func main() {
// 加载配置
cfg := &config.Config{
Sys: config.SysConfig{
ChromePath: "chrome/chrome.exe", // Chrome浏览器路径
ChromeDataDir: "chrome_data", // Chrome数据目录
CookiesDir: "cookies", // Cookie存储目录
LogsDir: "logs", // 日志目录
},
}
ctx := context.Background()
logger := log.New(os.Stdout, "", log.LstdFlags)
// 创建管理器
manager := collect.NewCollectManager(ctx, cfg, logger)
// 设置参数
params := &collect.CollectParams{
Headless: false, // 是否无头模式false显示浏览器窗口
UserIndex: "user_001", // 用户索引
PlatIndex: "wenxin", // 平台索引
RequestID: "req_001", // 请求ID
Platform: "wenxin", // 平台类型
}
// 向文心一言提问
question := "请介绍一下Go语言的特点"
answer, err := manager.AskQuestion("wenxin", params, question)
if err != nil {
fmt.Printf("错误: %v\n", err)
return
}
fmt.Printf("问题: %s\n", question)
fmt.Printf("答案: %s\n", answer)
}
```
### 2. 多平台对比
```go
// 向多个AI平台提问同一个问题
platforms := []string{"wenxin", "deepseek", "doubao", "qianwen"}
question := "什么是人工智能?"
for _, platform := range platforms {
params := &collect.CollectParams{
Headless: true,
UserIndex: "user_001",
PlatIndex: platform,
RequestID: fmt.Sprintf("req_%s", platform),
Platform: platform,
}
answer, err := manager.AskQuestion(platform, params, question)
if err != nil {
fmt.Printf("[%s] 错误: %v\n", platform, err)
continue
}
fmt.Printf("[%s] 答案: %s\n\n", platform, answer)
}
```
### 3. 登录管理
```go
// 首次使用时需要登录
params := &collect.CollectParams{
Headless: false, // 显示浏览器窗口以便扫码登录
UserIndex: "user_001",
PlatIndex: "wenxin",
RequestID: "login_req",
Platform: "wenxin",
}
// 等待登录(会打开浏览器窗口,需要手动扫码或输入账号密码)
success, msg := manager.WaitLogin("wenxin", params)
if success {
fmt.Println("登录成功Cookie已保存")
} else {
fmt.Printf("登录失败: %s\n", msg)
}
// 后续使用会自动加载Cookie无需重复登录
params.Headless = true // 可以切换到无头模式
answer, _ := manager.AskQuestion("wenxin", params, "你好")
```
### 4. 列出支持的平台
```go
platforms := manager.ListPlatforms()
fmt.Printf("支持的平台: %v\n", platforms)
// 输出: 支持的平台: [wenxin deepseek doubao qianwen]
```
## 配置说明
### 必需的配置项
```go
type SysConfig struct {
ChromePath string // Chrome浏览器可执行文件路径
ChromeDataDir string // Chrome用户数据目录
CookiesDir string // Cookie存储目录
LogsDir string // 日志文件目录
}
```
### 示例配置
```go
cfg := &config.Config{
Sys: config.SysConfig{
ChromePath: "/usr/bin/google-chrome", // Linux
// ChromePath: "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe", // Windows
ChromeDataDir: "./chrome_data",
CookiesDir: "./cookies",
LogsDir: "./logs",
},
}
```
## 工作流程
1. **初始化浏览器** - 启动Chrome浏览器实例
2. **加载Cookie** - 从本地文件加载之前的登录状态
3. **检查登录** - 验证是否已登录
4. **导航到聊天页面** - 打开AI平台的对话界面
5. **输入问题** - 在输入框中输入问题
6. **点击发送** - 触发AI回答
7. **等待回答** - 等待AI生成完整答案
8. **提取答案** - 从页面中提取回答内容
9. **返回结果** - 将答案返回给调用者
## 注意事项
### 1. 首次使用需要登录
每个平台首次使用时需要手动登录:
- 设置 `Headless: false` 显示浏览器窗口
- 调用 `WaitLogin()` 方法
- 在浏览器中完成登录操作(扫码或输入账号密码)
- 登录成功后Cookie会自动保存
### 2. Cookie管理
- Cookie保存在 `cookies/{UserIndex}/{PlatIndex}.json`
- 下次使用会自动加载Cookie无需重复登录
- 如果登录失效,重新调用 `WaitLogin()` 即可
### 3. 选择器适配
由于AI平台的页面结构可能会更新如果遇到问题可能需要调整CSS选择器
- 在对应的Collector文件中修改 `inputSelectors`、`sendSelectors`、`answerSelectors`
- 可以通过浏览器的开发者工具查看最新的元素选择器
### 4. 超时设置
- 登录超时: 180-300秒
- 回答超时: 120秒
- 可根据实际情况在代码中调整
### 5. 无头模式
- 开发调试时建议设置 `Headless: false`
- 生产环境可以设置 `Headless: true` 节省资源
## 扩展新平台
如果要添加新的AI平台需要
1. 创建新的Collector文件`newplatform.go`
2. 实现 `CollectorInterface` 接口
3. 继承 `BaseCollector` 基础结构
4. 在 `interface.go``CollectorMap` 中注册
示例:
```go
package collect
import (
"context"
"geo/internal/config"
"log"
)
type NewPlatformCollector struct {
*BaseCollector
}
func NewNewPlatformCollector(ctx context.Context, params *CollectParams, cfg *config.Config, logger *log.Logger) CollectorInterface {
collector := &NewPlatformCollector{
BaseCollector: NewBaseCollector(ctx, params, cfg, logger),
}
collector.LoginURL = "https://example.com/login"
collector.ChatURL = "https://example.com/chat"
return collector
}
// 实现 CheckLoginStatus、WaitLogin、AskQuestion 等方法
// ...
```
然后在 `interface.go` 中注册:
```go
var CollectorMap = map[string]*CollectorValue{
// ... 其他平台
"newplatform": {
Name: "新平台",
InitMethod: NewNewPlatformCollector,
Platform: "newplatform",
},
}
```
## 故障排查
### 1. 浏览器启动失败
- 检查 `ChromePath` 是否正确
- 确认Chrome版本是否兼容
- 查看日志文件了解详细错误
### 2. 找不到输入框或发送按钮
- 页面结构可能已更新
- 打开浏览器Headless: false查看实际DOM结构
- 更新对应的选择器
### 3. 登录状态失效
- 删除对应的Cookie文件
- 重新调用 `WaitLogin()` 登录
- 检查账号是否正常
### 4. 获取不到答案
- 增加超时时间
- 检查网络连接
- 查看页面是否有验证码或其他拦截
## 技术栈
- **go-rod**: Chrome DevTools Protocol的Go语言封装
- **Chrome/Chromium**: 浏览器引擎
- **Context**: Go上下文管理
- **JSON**: Cookie序列化
## 许可证
与项目主许可证保持一致。

321
internal/collect/base.go Normal file
View File

@ -0,0 +1,321 @@
package collect
import (
"context"
"encoding/json"
"fmt"
"geo/internal/config"
"log"
"os"
"path/filepath"
"time"
"github.com/go-rod/rod"
"github.com/go-rod/rod/lib/launcher"
"github.com/go-rod/rod/lib/proto"
)
// BaseCollector 基础收集器结构
type BaseCollector struct {
ctx context.Context
Headless bool
UserIndex string
PlatIndex string
RequestID string
Platform string
Browser *rod.Browser
Page *rod.Page
Logger *log.Logger
LogFile *os.File
LoginURL string
ChatURL string
CookiesFile string
config *config.Config
MaxRetries int
RetryDelay int
}
// NewBaseCollector 构造函数
func NewBaseCollector(ctx context.Context, params *CollectParams, config *config.Config, logger *log.Logger) *BaseCollector {
var baseLogger *log.Logger
var logFile *os.File
if logger != nil {
baseLogger = logger
logFile = nil
} else {
logsDir := config.Sys.LogsDir
if logsDir == "" {
logsDir = "./logs"
}
os.MkdirAll(logsDir, 0755)
logFile, _ = os.Create(filepath.Join(logsDir, fmt.Sprintf("collect_%s_%s.log", params.RequestID, params.Platform)))
baseLogger = log.New(logFile, "", log.LstdFlags)
}
base := &BaseCollector{
ctx: ctx,
Headless: params.Headless,
UserIndex: params.UserIndex,
PlatIndex: params.PlatIndex,
RequestID: params.RequestID,
Platform: params.Platform,
Logger: baseLogger,
LogFile: logFile,
config: config,
MaxRetries: 3,
RetryDelay: 200,
}
base.CookiesFile = filepath.Join(base.cookiesDir(), params.PlatIndex+".json")
return base
}
// cookiesDir 获取cookie目录
func (b *BaseCollector) cookiesDir() string {
dir := filepath.Join(b.config.Sys.CookiesDir, b.UserIndex)
os.MkdirAll(dir, 0755)
return dir
}
// SetupDriver 初始化浏览器驱动
func (b *BaseCollector) SetupDriver() error {
b.LogInfo("初始化浏览器...")
userDataDir := filepath.Join(b.config.Sys.ChromeDataDir, b.UserIndex, b.RequestID+fmt.Sprintf("___%d", time.Now().UnixNano()))
os.MkdirAll(userDataDir, 0755)
l := launcher.New().
Bin(b.config.Sys.ChromePath).
UserDataDir(userDataDir).
Headless(b.Headless).
Leakless(false).
Set("disable-blink-features", "AutomationControlled")
if b.Headless {
l.Set("headless", "new")
l.Set("disable-gpu")
l.Set("no-sandbox")
l.Set("disable-dev-shm-usage")
} else {
l.Set("window-size", "1920,1080")
l.Set("start-maximized")
l.Delete("headless")
}
l.UserDataDir(userDataDir)
l.Set("window-size", "1920,1080")
l.Set("lang", "zh-CN")
l.Set("force-device-scale-factor", "1")
url, err := l.Launch()
if err != nil {
return fmt.Errorf("启动浏览器失败: %v", err)
}
b.Browser = rod.New().Context(b.ctx).ControlURL(url).MustConnect()
b.Page = b.Browser.MustPage()
return nil
}
// Close 关闭浏览器和日志文件
func (b *BaseCollector) Close() {
if b.Page != nil {
b.Page.Close()
}
if b.Browser != nil {
b.Browser.Close()
}
if b.LogFile != nil {
b.LogFile.Close()
}
}
// SaveCookies 保存cookies
func (b *BaseCollector) SaveCookies() error {
cookies, err := b.Page.Cookies(nil)
if err != nil {
return err
}
data, err := json.Marshal(cookies)
if err != nil {
return err
}
return os.WriteFile(b.CookiesFile, data, 0644)
}
// LoadCookies 加载cookies
func (b *BaseCollector) LoadCookies() error {
data, err := os.ReadFile(b.CookiesFile)
if err != nil {
return err
}
var cookies []*proto.NetworkCookieParam
if err := json.Unmarshal(data, &cookies); err != nil {
return err
}
return b.Page.SetCookies(cookies)
}
// DelCookies 删除cookies
func (b *BaseCollector) DelCookies() error {
return os.Remove(b.CookiesFile)
}
// WaitForPageReady 等待页面加载完成
func (b *BaseCollector) WaitForPageReady(timeout int) error {
return b.Page.Context(b.ctx).WaitLoad()
}
// WaitForElement 等待元素出现
func (b *BaseCollector) WaitForElement(selector string, timeout int) (*rod.Element, error) {
return b.Page.Context(b.ctx).Timeout(time.Duration(timeout) * time.Second).Element(selector)
}
// WaitForElementVisible 等待元素可见
func (b *BaseCollector) WaitForElementVisible(selector string, timeout int) (*rod.Element, error) {
el, err := b.WaitForElement(selector, timeout)
if err != nil {
return nil, err
}
if err := el.WaitVisible(); err != nil {
return nil, err
}
return el, nil
}
// WaitForElementClickable 等待元素可点击
func (b *BaseCollector) WaitForElementClickable(selector string, timeout int) (*rod.Element, error) {
el, err := b.WaitForElementVisible(selector, timeout)
if err != nil {
return nil, err
}
if err := el.WaitEnabled(); err != nil {
return nil, err
}
return el, nil
}
// JSClick JavaScript点击元素
func (b *BaseCollector) JSClick(element *rod.Element) error {
if element == nil {
b.Logger.Printf("element is nil")
return fmt.Errorf("element is nil")
}
err := element.Click(proto.InputMouseButtonLeft, 1)
if err != nil {
b.Logger.Printf("click fail: " + err.Error())
}
return err
}
// SetInputValue 设置输入框值
func (b *BaseCollector) SetInputValue(element *rod.Element, value string) error {
_, err := element.Evaluate(&rod.EvalOptions{
JS: `(el, val) => { el.value = val; el.dispatchEvent(new Event('input', {bubbles: true})); el.dispatchEvent(new Event('change', {bubbles: true})); }`,
JSArgs: []interface{}{value},
})
return err
}
// ClearInput 清空输入框
func (b *BaseCollector) ClearInput(element *rod.Element) error {
_, err := element.Evaluate(&rod.EvalOptions{
JS: `el => { el.value = ''; el.dispatchEvent(new Event('input', {bubbles: true})); }`,
})
return err
}
// Sleep 等待指定秒数
func (b *BaseCollector) Sleep(seconds int) {
time.Sleep(time.Duration(seconds) * time.Second)
}
// SleepMs 等待指定毫秒数
func (b *BaseCollector) SleepMs(milliseconds int) {
time.Sleep(time.Duration(milliseconds) * time.Millisecond)
}
// LogInfo 记录信息日志
func (b *BaseCollector) LogInfo(message string) {
b.Logger.Printf("📌 %s", message)
}
// LogInfof 格式化记录信息日志
func (b *BaseCollector) LogInfof(format string, args ...interface{}) {
b.Logger.Printf("📌 "+format, args...)
}
// LogError 记录错误日志
func (b *BaseCollector) LogError(message string) {
b.Logger.Printf("❌ %s", message)
}
// LogStep 记录步骤日志
func (b *BaseCollector) LogStep(stepName string, success bool, message string) {
if success {
b.Logger.Printf("✅ %s: 成功 %s", stepName, message)
} else {
b.Logger.Printf("❌ %s: 失败 %s", stepName, message)
}
}
// GetCurrentURL 获取当前URL
func (b *BaseCollector) GetCurrentURL() string {
info := b.Page.MustInfo()
return info.URL
}
// Screenshot 截图
func (b *BaseCollector) Screenshot(filename string) error {
data, err := b.Page.Screenshot(false, nil)
if err != nil {
return err
}
return os.WriteFile(filename, data, 0644)
}
// CheckLoginStatus 检查登录状态(需要子类实现)
func (b *BaseCollector) CheckLoginStatus() bool {
return false
}
// WaitLogin 等待登录(需要子类实现)
func (b *BaseCollector) WaitLogin() (bool, string) {
return false, "需要实现"
}
// AskQuestion 提问并获取答案(需要子类实现)
func (b *BaseCollector) AskQuestion(question string) (string, error) {
return "", fmt.Errorf("需要实现")
}
// InitPage 初始化页面
func (b *BaseCollector) InitPage() error {
// 尝试加载cookies
if err := b.LoadCookies(); err == nil {
b.Page.MustNavigate(b.ChatURL)
b.WaitForPageReady(5)
b.Sleep(2)
}
b.SaveCookies()
return nil
}
// SafeElement 安全地获取元素
func (b *BaseCollector) SafeElement(selector string) (*rod.Element, error) {
exists, _, err := b.Page.Has(selector)
if err != nil {
return nil, err
}
if !exists {
return nil, nil
}
return b.Page.Element(selector)
}

View File

@ -0,0 +1,291 @@
package collect
import (
"context"
"fmt"
"geo/internal/config"
"log"
"strings"
"time"
"github.com/go-rod/rod"
"github.com/go-rod/rod/lib/proto"
)
// DeepseekCollector DeepSeek收集器
type DeepseekCollector struct {
*BaseCollector
}
// NewDeepseekCollector 创建DeepSeek收集器
func NewDeepseekCollector(ctx context.Context, params *CollectParams, cfg *config.Config, logger *log.Logger) CollectorInterface {
collector := &DeepseekCollector{
BaseCollector: NewBaseCollector(ctx, params, cfg, logger),
}
// 设置DeepSeek的URL
collector.LoginURL = "https://chat.deepseek.com/"
collector.ChatURL = "https://chat.deepseek.com/"
return collector
}
// CheckLoginStatus 检查登录状态
func (c *DeepseekCollector) CheckLoginStatus() bool {
currentURL := c.GetCurrentURL()
// 如果在首页或登录页面,可能未登录
if strings.Contains(currentURL, "chat.deepseek.com") {
// 检查是否有用户头像或登录标识
userAvatar, err := c.SafeElement(".user-avatar, [class*='avatar'], [class*='profile']")
if err == nil && userAvatar != nil {
return true
}
// 检查是否有聊天输入框(登录后才有)
inputBox, err := c.SafeElement("textarea, [contenteditable='true']")
if err == nil && inputBox != nil {
return true
}
}
return false
}
// WaitLogin 等待登录
func (c *DeepseekCollector) WaitLogin() (bool, string) {
c.LogInfo("开始等待DeepSeek登录...")
if err := c.SetupDriver(); err != nil {
return false, fmt.Sprintf("浏览器启动失败: %v", err)
}
defer c.Close()
// 访问聊天页面
c.Page.MustNavigate(c.ChatURL)
c.Sleep(3)
// 检查是否已登录
if c.CheckLoginStatus() {
c.SaveCookies()
c.LogInfo("已有登录状态")
return true, "already_logged_in"
}
c.LogInfo("未检测到登录状态,请登录账号...")
// 等待用户手动登录最多300秒
for i := 0; i < 300; i++ {
if c.CheckLoginStatus() {
c.SaveCookies()
c.LogInfo("登录成功")
return true, "login_success"
}
time.Sleep(1 * time.Second)
}
return false, "登录超时,请检查网络或账号状态"
}
// AskQuestion 提问并获取答案
func (c *DeepseekCollector) AskQuestion(question string) (string, error) {
c.LogInfo(fmt.Sprintf("开始向DeepSeek提问: %s", question))
// 初始化浏览器
if err := c.SetupDriver(); err != nil {
return "", fmt.Errorf("浏览器启动失败: %v", err)
}
defer c.Close()
// 初始化页面
if err := c.InitPage(); err != nil {
return "", fmt.Errorf("页面初始化失败请先调用WaitLogin登录: %v", err)
}
c.Sleep(3)
// 输入问题
if err := c.inputQuestion(question); err != nil {
return "", fmt.Errorf("输入问题失败: %v", err)
}
// 点击发送
if err := c.clickSendButton(); err != nil {
return "", fmt.Errorf("点击发送按钮失败: %v", err)
}
// 等待并获取答案
answer, err := c.waitForAnswer()
if err != nil {
return "", fmt.Errorf("获取答案失败: %v", err)
}
c.LogInfo(fmt.Sprintf("成功获取DeepSeek答案长度: %d 字符", len(answer)))
return answer, nil
}
// inputQuestion 输入问题
func (c *DeepseekCollector) inputQuestion(question string) error {
c.LogInfo("输入问题到DeepSeek...")
// DeepSeek的输入框选择器
inputSelectors := []string{
"textarea[placeholder*='输入']",
"textarea[placeholder*='问']",
"textarea",
"[contenteditable='true']",
".chat-input textarea",
"#message-input",
}
var inputBox *rod.Element
var err error
for _, selector := range inputSelectors {
inputBox, err = c.WaitForElementVisible(selector, 10)
if err == nil && inputBox != nil {
c.LogInfo(fmt.Sprintf("找到输入框: %s", selector))
break
}
}
if inputBox == nil {
return fmt.Errorf("未找到输入框")
}
// 点击获取焦点
if err := inputBox.Click(proto.InputMouseButtonLeft, 1); err != nil {
return fmt.Errorf("点击输入框失败: %v", err)
}
c.SleepMs(500)
// 清空输入框
if err := c.ClearInput(inputBox); err != nil {
c.LogInfo(fmt.Sprintf("清空输入框失败: %v", err))
}
c.SleepMs(300)
// 输入问题
if err := c.SetInputValue(inputBox, question); err != nil {
inputBox.Input(question)
}
c.LogInfo(fmt.Sprintf("问题已输入"))
c.SleepMs(1000)
return nil
}
// clickSendButton 点击发送按钮
func (c *DeepseekCollector) clickSendButton() error {
c.LogInfo("点击发送按钮...")
// 发送按钮选择器
sendSelectors := []string{
"button[class*='send']",
"button[class*='submit']",
".send-button",
".submit-button",
"button svg[path*='send']",
"[aria-label*='发送']",
"[aria-label*='Send']",
}
var sendBtn *rod.Element
var err error
for _, selector := range sendSelectors {
sendBtn, err = c.WaitForElementClickable(selector, 5)
if err == nil && sendBtn != nil {
c.LogInfo(fmt.Sprintf("找到发送按钮: %s", selector))
break
}
}
if sendBtn == nil {
// 尝试查找发送图标
sendBtn, err = c.Page.Element("button svg")
if err != nil {
return fmt.Errorf("未找到发送按钮")
}
}
c.SleepMs(500)
// 点击发送按钮
if err := c.JSClick(sendBtn); err != nil {
return fmt.Errorf("点击发送按钮失败: %v", err)
}
c.LogInfo("已点击发送按钮")
c.SleepMs(2000)
return nil
}
// waitForAnswer 等待并获取答案
func (c *DeepseekCollector) waitForAnswer() (string, error) {
c.LogInfo("等待DeepSeek回答...")
timeout := 120 // 最大等待时间(秒)
startTime := time.Now()
lastAnswerLength := 0
for time.Since(startTime).Seconds() < float64(timeout) {
// 查找答案区域
answerSelectors := []string{
".message-content",
".response-content",
"[class*='assistant'] [class*='content']",
"[class*='ai'] [class*='message']",
".chat-message.ai",
}
for _, selector := range answerSelectors {
answerElements, err := c.Page.Elements(selector)
if err == nil && len(answerElements) > 0 {
// 获取最后一个答案元素
lastAnswer := answerElements[len(answerElements)-1]
visible, _ := lastAnswer.Visible()
if visible {
text, err := lastAnswer.Text()
if err == nil && len(strings.TrimSpace(text)) > 0 {
// 检查是否正在生成
isGenerating := strings.Contains(text, "正在") ||
strings.Contains(text, "思考") ||
strings.Contains(text, "generating")
if !isGenerating {
// 检查答案是否还在增长
currentLength := len(text)
if currentLength == lastAnswerLength && currentLength > 10 {
// 答案不再增长,认为已完成
c.LogInfo("获取到完整答案")
return strings.TrimSpace(text), nil
}
lastAnswerLength = currentLength
}
}
}
}
}
c.SleepMs(1500)
}
return "", fmt.Errorf("等待答案超时")
}
// SafeElement 安全地获取元素
func (c *DeepseekCollector) SafeElement(selector string) (*rod.Element, error) {
exists, _, err := c.Page.Has(selector)
if err != nil {
return nil, err
}
if !exists {
return nil, nil
}
return c.Page.Element(selector)
}

293
internal/collect/doubao.go Normal file
View File

@ -0,0 +1,293 @@
package collect
import (
"context"
"fmt"
"geo/internal/config"
"log"
"strings"
"time"
"github.com/go-rod/rod"
"github.com/go-rod/rod/lib/proto"
)
// DoubaoCollector 豆包收集器
type DoubaoCollector struct {
*BaseCollector
}
// NewDoubaoCollector 创建豆包收集器
func NewDoubaoCollector(ctx context.Context, params *CollectParams, cfg *config.Config, logger *log.Logger) CollectorInterface {
collector := &DoubaoCollector{
BaseCollector: NewBaseCollector(ctx, params, cfg, logger),
}
// 设置豆包的URL
collector.LoginURL = "https://www.doubao.com/"
collector.ChatURL = "https://www.doubao.com/chat/"
return collector
}
// CheckLoginStatus 检查登录状态
func (c *DoubaoCollector) CheckLoginStatus() bool {
currentURL := c.GetCurrentURL()
// 检查是否在聊天页面
if strings.Contains(currentURL, "doubao.com") {
// 查找用户信息元素
userInfo, err := c.SafeElement(".user-info, .avatar, [class*='user-profile']")
if err == nil && userInfo != nil {
return true
}
// 检查是否有输入框
inputBox, err := c.SafeElement("textarea, [contenteditable='true']")
if err == nil && inputBox != nil {
return true
}
}
return false
}
// WaitLogin 等待登录
func (c *DoubaoCollector) WaitLogin() (bool, string) {
c.LogInfo("开始等待豆包登录...")
if err := c.SetupDriver(); err != nil {
return false, fmt.Sprintf("浏览器启动失败: %v", err)
}
defer c.Close()
// 访问豆包首页
c.Page.MustNavigate(c.LoginURL)
c.Sleep(3)
// 检查是否已登录
if c.CheckLoginStatus() {
c.SaveCookies()
c.LogInfo("已有登录状态")
return true, "already_logged_in"
}
c.LogInfo("请登录豆包账号...")
// 等待用户手动登录最多300秒
for i := 0; i < 300; i++ {
if c.CheckLoginStatus() {
c.SaveCookies()
c.LogInfo("登录成功")
return true, "login_success"
}
time.Sleep(1 * time.Second)
}
return false, "登录超时,请检查网络或账号状态"
}
// AskQuestion 提问并获取答案
func (c *DoubaoCollector) AskQuestion(question string) (string, error) {
c.LogInfo(fmt.Sprintf("开始向豆包提问: %s", question))
// 初始化浏览器
if err := c.SetupDriver(); err != nil {
return "", fmt.Errorf("浏览器启动失败: %v", err)
}
defer c.Close()
// 初始化页面
if err := c.InitPage(); err != nil {
return "", fmt.Errorf("页面初始化失败请先调用WaitLogin登录: %v", err)
}
c.Sleep(3)
// 输入问题
if err := c.inputQuestion(question); err != nil {
return "", fmt.Errorf("输入问题失败: %v", err)
}
// 点击发送
if err := c.clickSendButton(); err != nil {
return "", fmt.Errorf("点击发送按钮失败: %v", err)
}
// 等待并获取答案
answer, err := c.waitForAnswer()
if err != nil {
return "", fmt.Errorf("获取答案失败: %v", err)
}
c.LogInfo(fmt.Sprintf("成功获取豆包答案,长度: %d 字符", len(answer)))
return answer, nil
}
// inputQuestion 输入问题
func (c *DoubaoCollector) inputQuestion(question string) error {
c.LogInfo("输入问题到豆包...")
// 豆包的输入框选择器
inputSelectors := []string{
"textarea[placeholder*='输入']",
"textarea[placeholder*='问']",
"textarea",
"[contenteditable='true']",
".chat-input textarea",
"#input-box",
".input-area textarea",
}
var inputBox *rod.Element
var err error
for _, selector := range inputSelectors {
inputBox, err = c.WaitForElementVisible(selector, 10)
if err == nil && inputBox != nil {
c.LogInfo(fmt.Sprintf("找到输入框: %s", selector))
break
}
}
if inputBox == nil {
return fmt.Errorf("未找到输入框")
}
// 点击获取焦点
if err := inputBox.Click(proto.InputMouseButtonLeft, 1); err != nil {
return fmt.Errorf("点击输入框失败: %v", err)
}
c.SleepMs(500)
// 清空输入框
if err := c.ClearInput(inputBox); err != nil {
c.LogInfo(fmt.Sprintf("清空输入框失败: %v", err))
}
c.SleepMs(300)
// 输入问题
if err := c.SetInputValue(inputBox, question); err != nil {
inputBox.Input(question)
}
c.LogInfo(fmt.Sprintf("问题已输入"))
c.SleepMs(1000)
return nil
}
// clickSendButton 点击发送按钮
func (c *DoubaoCollector) clickSendButton() error {
c.LogInfo("点击发送按钮...")
// 发送按钮选择器
sendSelectors := []string{
"button[class*='send']",
"button[class*='submit']",
".send-btn",
".submit-btn",
"button svg[path*='send']",
"[aria-label*='发送']",
".send-icon",
}
var sendBtn *rod.Element
var err error
for _, selector := range sendSelectors {
sendBtn, err = c.WaitForElementClickable(selector, 5)
if err == nil && sendBtn != nil {
c.LogInfo(fmt.Sprintf("找到发送按钮: %s", selector))
break
}
}
if sendBtn == nil {
// 尝试查找发送图标
sendBtn, err = c.Page.Element("button svg")
if err != nil {
return fmt.Errorf("未找到发送按钮")
}
}
c.SleepMs(500)
// 点击发送按钮
if err := c.JSClick(sendBtn); err != nil {
return fmt.Errorf("点击发送按钮失败: %v", err)
}
c.LogInfo("已点击发送按钮")
c.SleepMs(2000)
return nil
}
// waitForAnswer 等待并获取答案
func (c *DoubaoCollector) waitForAnswer() (string, error) {
c.LogInfo("等待豆包回答...")
timeout := 120 // 最大等待时间(秒)
startTime := time.Now()
lastAnswerLength := 0
for time.Since(startTime).Seconds() < float64(timeout) {
// 查找答案区域
answerSelectors := []string{
".message-content",
".response-text",
"[class*='assistant'] [class*='content']",
"[class*='bot'] [class*='message']",
".chat-message.bot",
".answer-box",
}
for _, selector := range answerSelectors {
answerElements, err := c.Page.Elements(selector)
if err == nil && len(answerElements) > 0 {
// 获取最后一个答案元素
lastAnswer := answerElements[len(answerElements)-1]
visible, _ := lastAnswer.Visible()
if visible {
text, err := lastAnswer.Text()
if err == nil && len(strings.TrimSpace(text)) > 0 {
// 检查是否正在生成
isGenerating := strings.Contains(text, "正在") ||
strings.Contains(text, "思考中") ||
strings.Contains(text, "typing")
if !isGenerating {
// 检查答案是否还在增长
currentLength := len(text)
if currentLength == lastAnswerLength && currentLength > 10 {
// 答案不再增长,认为已完成
c.LogInfo("获取到完整答案")
return strings.TrimSpace(text), nil
}
lastAnswerLength = currentLength
}
}
}
}
}
c.SleepMs(1500)
}
return "", fmt.Errorf("等待答案超时")
}
// SafeElement 安全地获取元素
func (c *DoubaoCollector) SafeElement(selector string) (*rod.Element, error) {
exists, _, err := c.Page.Has(selector)
if err != nil {
return nil, err
}
if !exists {
return nil, nil
}
return c.Page.Element(selector)
}

View File

@ -0,0 +1,62 @@
package collect
import (
"context"
"geo/internal/config"
"log"
)
// CollectorInterface AI平台收集器接口
type CollectorInterface interface {
// WaitLogin 等待登录
WaitLogin() (bool, string)
// AskQuestion 提问并获取答案
AskQuestion(question string) (string, error)
}
// NewCollector 创建收集器的工厂函数类型
type NewCollector func(
ctx context.Context,
param *CollectParams,
cfg *config.Config,
logger *log.Logger) CollectorInterface
// CollectorValue 收集器配置信息
type CollectorValue struct {
Name string // 平台名称
InitMethod NewCollector // 初始化方法
Platform string // 平台标识: wenxin, deepseek, doubao, qianwen
}
// CollectParams 收集任务参数
type CollectParams struct {
Headless bool // 是否无头模式
UserIndex string // 用户索引
PlatIndex string // 平台索引
RequestID string // 请求ID
Platform string // 平台类型
}
// CollectorMap 收集器注册表
var CollectorMap = map[string]*CollectorValue{
"wenxin": {
Name: "文心一言",
InitMethod: NewWenxinCollector,
Platform: "wenxin",
},
"deepseek": {
Name: "DeepSeek",
InitMethod: NewDeepseekCollector,
Platform: "deepseek",
},
"doubao": {
Name: "豆包",
InitMethod: NewDoubaoCollector,
Platform: "doubao",
},
"qianwen": {
Name: "通义千问",
InitMethod: NewQianwenCollector,
Platform: "qianwen",
},
}

View File

@ -0,0 +1,68 @@
package collect
import (
"context"
"fmt"
"geo/internal/config"
"log"
)
// CollectManager 收集管理器
type CollectManager struct {
ctx context.Context
config *config.Config
logger *log.Logger
}
// NewCollectManager 创建收集管理器
func NewCollectManager(ctx context.Context, cfg *config.Config, logger *log.Logger) *CollectManager {
return &CollectManager{
ctx: ctx,
config: cfg,
logger: logger,
}
}
// GetCollector 获取指定平台的收集器
func (m *CollectManager) GetCollector(platform string, params *CollectParams) (CollectorInterface, error) {
collectorValue, ok := CollectorMap[platform]
if !ok {
return nil, fmt.Errorf("不支持的平台: %s", platform)
}
collector := collectorValue.InitMethod(m.ctx, params, m.config, m.logger)
if collector == nil {
return nil, fmt.Errorf("创建收集器失败: %s", platform)
}
return collector, nil
}
// AskQuestion 向指定AI平台提问
func (m *CollectManager) AskQuestion(platform string, params *CollectParams, question string) (string, error) {
collector, err := m.GetCollector(platform, params)
if err != nil {
return "", err
}
return collector.AskQuestion(question)
}
// WaitLogin 等待指定平台登录
func (m *CollectManager) WaitLogin(platform string, params *CollectParams) (bool, string) {
collector, err := m.GetCollector(platform, params)
if err != nil {
return false, err.Error()
}
return collector.WaitLogin()
}
// ListPlatforms 列出所有支持的平台
func (m *CollectManager) ListPlatforms() []string {
platforms := make([]string, 0, len(CollectorMap))
for platform := range CollectorMap {
platforms = append(platforms, platform)
}
return platforms
}

297
internal/collect/qianwen.go Normal file
View File

@ -0,0 +1,297 @@
package collect
import (
"context"
"fmt"
"geo/internal/config"
"log"
"strings"
"time"
"github.com/go-rod/rod"
"github.com/go-rod/rod/lib/proto"
)
// QianwenCollector 通义千问收集器
type QianwenCollector struct {
*BaseCollector
}
// NewQianwenCollector 创建通义千问收集器
func NewQianwenCollector(ctx context.Context, params *CollectParams, cfg *config.Config, logger *log.Logger) CollectorInterface {
collector := &QianwenCollector{
BaseCollector: NewBaseCollector(ctx, params, cfg, logger),
}
// 设置通义千问的URL
collector.LoginURL = "https://tongyi.aliyun.com/qianwen/"
collector.ChatURL = "https://tongyi.aliyun.com/qianwen/"
return collector
}
// CheckLoginStatus 检查登录状态
func (c *QianwenCollector) CheckLoginStatus() bool {
currentURL := c.GetCurrentURL()
// 检查是否在通义千问页面
if strings.Contains(currentURL, "tongyi.aliyun.com") {
// 查找用户信息元素
userInfo, err := c.SafeElement(".user-avatar, .avatar, [class*='user'], [class*='profile']")
if err == nil && userInfo != nil {
return true
}
// 检查是否有输入框
inputBox, err := c.SafeElement("textarea, [contenteditable='true']")
if err == nil && inputBox != nil {
return true
}
}
return false
}
// WaitLogin 等待登录
func (c *QianwenCollector) WaitLogin() (bool, string) {
c.LogInfo("开始等待通义千问登录...")
if err := c.SetupDriver(); err != nil {
return false, fmt.Sprintf("浏览器启动失败: %v", err)
}
defer c.Close()
// 访问通义千问页面
c.Page.MustNavigate(c.ChatURL)
c.Sleep(3)
// 检查是否已登录
if c.CheckLoginStatus() {
c.SaveCookies()
c.LogInfo("已有登录状态")
return true, "already_logged_in"
}
c.LogInfo("请登录阿里云账号...")
// 等待用户手动登录最多300秒
for i := 0; i < 300; i++ {
if c.CheckLoginStatus() {
c.SaveCookies()
c.LogInfo("登录成功")
return true, "login_success"
}
time.Sleep(1 * time.Second)
}
return false, "登录超时,请检查网络或账号状态"
}
// AskQuestion 提问并获取答案
func (c *QianwenCollector) AskQuestion(question string) (string, error) {
c.LogInfo(fmt.Sprintf("开始向通义千问提问: %s", question))
// 初始化浏览器
if err := c.SetupDriver(); err != nil {
return "", fmt.Errorf("浏览器启动失败: %v", err)
}
defer c.Close()
// 初始化页面
if err := c.InitPage(); err != nil {
return "", fmt.Errorf("页面初始化失败请先调用WaitLogin登录: %v", err)
}
c.Sleep(3)
// 输入问题
if err := c.inputQuestion(question); err != nil {
return "", fmt.Errorf("输入问题失败: %v", err)
}
// 点击发送
if err := c.clickSendButton(); err != nil {
return "", fmt.Errorf("点击发送按钮失败: %v", err)
}
// 等待并获取答案
answer, err := c.waitForAnswer()
if err != nil {
return "", fmt.Errorf("获取答案失败: %v", err)
}
c.LogInfo(fmt.Sprintf("成功获取通义千问答案,长度: %d 字符", len(answer)))
return answer, nil
}
// inputQuestion 输入问题
func (c *QianwenCollector) inputQuestion(question string) error {
c.LogInfo("输入问题到通义千问...")
// 通义千问的输入框选择器
inputSelectors := []string{
"textarea[placeholder*='输入']",
"textarea[placeholder*='问']",
"textarea",
"[contenteditable='true']",
".chat-input textarea",
"#chat-input",
".input-box textarea",
".question-input",
}
var inputBox *rod.Element
var err error
for _, selector := range inputSelectors {
inputBox, err = c.WaitForElementVisible(selector, 10)
if err == nil && inputBox != nil {
c.LogInfo(fmt.Sprintf("找到输入框: %s", selector))
break
}
}
if inputBox == nil {
return fmt.Errorf("未找到输入框")
}
// 点击获取焦点
if err := inputBox.Click(proto.InputMouseButtonLeft, 1); err != nil {
return fmt.Errorf("点击输入框失败: %v", err)
}
c.SleepMs(500)
// 清空输入框
if err := c.ClearInput(inputBox); err != nil {
c.LogInfo(fmt.Sprintf("清空输入框失败: %v", err))
}
c.SleepMs(300)
// 输入问题
if err := c.SetInputValue(inputBox, question); err != nil {
inputBox.Input(question)
}
c.LogInfo(fmt.Sprintf("问题已输入"))
c.SleepMs(1000)
return nil
}
// clickSendButton 点击发送按钮
func (c *QianwenCollector) clickSendButton() error {
c.LogInfo("点击发送按钮...")
// 发送按钮选择器
sendSelectors := []string{
"button[class*='send']",
"button[class*='submit']",
".send-btn",
".submit-btn",
"button svg[path*='send']",
"[aria-label*='发送']",
".send-icon",
".submit-icon",
}
var sendBtn *rod.Element
var err error
for _, selector := range sendSelectors {
sendBtn, err = c.WaitForElementClickable(selector, 5)
if err == nil && sendBtn != nil {
c.LogInfo(fmt.Sprintf("找到发送按钮: %s", selector))
break
}
}
if sendBtn == nil {
// 尝试通过SVG图标查找
sendBtn, err = c.Page.Element("button svg")
if err != nil {
return fmt.Errorf("未找到发送按钮")
}
}
c.SleepMs(500)
// 点击发送按钮
if err := c.JSClick(sendBtn); err != nil {
return fmt.Errorf("点击发送按钮失败: %v", err)
}
c.LogInfo("已点击发送按钮")
c.SleepMs(2000)
return nil
}
// waitForAnswer 等待并获取答案
func (c *QianwenCollector) waitForAnswer() (string, error) {
c.LogInfo("等待通义千问回答...")
timeout := 120 // 最大等待时间(秒)
startTime := time.Now()
lastAnswerLength := 0
for time.Since(startTime).Seconds() < float64(timeout) {
// 查找答案区域
answerSelectors := []string{
".message-content",
".response-text",
"[class*='assistant'] [class*='content']",
"[class*='ai'] [class*='message']",
".chat-message.ai",
".answer-content",
".qianwen-answer",
}
for _, selector := range answerSelectors {
answerElements, err := c.Page.Elements(selector)
if err == nil && len(answerElements) > 0 {
// 获取最后一个答案元素
lastAnswer := answerElements[len(answerElements)-1]
visible, _ := lastAnswer.Visible()
if visible {
text, err := lastAnswer.Text()
if err == nil && len(strings.TrimSpace(text)) > 0 {
// 检查是否正在生成
isGenerating := strings.Contains(text, "正在") ||
strings.Contains(text, "思考中") ||
strings.Contains(text, "typing") ||
strings.Contains(text, "生成中")
if !isGenerating {
// 检查答案是否还在增长
currentLength := len(text)
if currentLength == lastAnswerLength && currentLength > 10 {
// 答案不再增长,认为已完成
c.LogInfo("获取到完整答案")
return strings.TrimSpace(text), nil
}
lastAnswerLength = currentLength
}
}
}
}
}
c.SleepMs(1500)
}
return "", fmt.Errorf("等待答案超时")
}
// SafeElement 安全地获取元素
func (c *QianwenCollector) SafeElement(selector string) (*rod.Element, error) {
exists, _, err := c.Page.Has(selector)
if err != nil {
return nil, err
}
if !exists {
return nil, nil
}
return c.Page.Element(selector)
}

35
internal/collect/utils.go Normal file
View File

@ -0,0 +1,35 @@
package collect
import (
"regexp"
"strings"
)
// CleanHTMLTags 清理HTML标签只保留纯文本内容
// 这个函数是包内公共方法供所有AI平台使用
func CleanHTMLTags(html string) string {
if html == "" {
return ""
}
// 移除所有HTML标签
re := regexp.MustCompile(`<[^>]*>`)
cleaned := re.ReplaceAllString(html, "")
// 解码常见的HTML实体
cleaned = strings.ReplaceAll(cleaned, "&nbsp;", " ")
cleaned = strings.ReplaceAll(cleaned, "&lt;", "<")
cleaned = strings.ReplaceAll(cleaned, "&gt;", ">")
cleaned = strings.ReplaceAll(cleaned, "&amp;", "&")
cleaned = strings.ReplaceAll(cleaned, "&quot;", "\"")
cleaned = strings.ReplaceAll(cleaned, "&#39;", "'")
// 去除多余的空格和换行
cleaned = strings.TrimSpace(cleaned)
// 将多个连续空格替换为单个空格
multipleSpaces := regexp.MustCompile(`\s+`)
cleaned = multipleSpaces.ReplaceAllString(cleaned, " ")
return cleaned
}

413
internal/collect/wenxin.go Normal file
View File

@ -0,0 +1,413 @@
package collect
import (
"context"
"fmt"
"geo/internal/config"
"log"
"strings"
"time"
"github.com/go-rod/rod"
"github.com/go-rod/rod/lib/proto"
)
// WenxinCollector 文心一言收集器
type WenxinCollector struct {
*BaseCollector
}
// NewWenxinCollector 创建文心一言收集器
func NewWenxinCollector(ctx context.Context, params *CollectParams, cfg *config.Config, logger *log.Logger) CollectorInterface {
collector := &WenxinCollector{
BaseCollector: NewBaseCollector(ctx, params, cfg, logger),
}
// 设置文心一言的URL
collector.LoginURL = "https://passport.baidu.com/v2/?login"
collector.ChatURL = "https://yiyan.baidu.com/"
return collector
}
// CheckLoginStatus 检查登录状态
func (c *WenxinCollector) CheckLoginStatus() bool {
currentURL := c.GetCurrentURL()
// 如果在登录页面,说明未登录
if strings.Contains(currentURL, "passport.baidu.com") {
return false
}
// 检查页面上是否存在内容为"登录"或"Login"的button如果存在说明未登录
loginButtons, err := c.Page.Elements("button")
if err == nil {
for _, btn := range loginButtons {
text, _ := btn.Text()
trimmedText := strings.TrimSpace(text)
if trimmedText == "登录" || trimmedText == "Login" {
c.LogInfo(fmt.Sprintf("检测到页面上有'%s'按钮,说明未登录", trimmedText))
return false
}
}
}
// 如果没有找到"登录"或"Login"按钮,说明已登录
return true
}
// WaitLogin 等待登录
func (c *WenxinCollector) WaitLogin() (bool, string) {
c.LogInfo("开始等待文心一言登录...")
if err := c.SetupDriver(); err != nil {
return false, fmt.Sprintf("浏览器启动失败: %v", err)
}
defer c.Close()
// 访问聊天页面
c.Page.MustNavigate(c.ChatURL)
c.Sleep(3)
// 检查是否已登录
if c.CheckLoginStatus() {
c.SaveCookies()
c.LogInfo("已有登录状态")
return true, "already_logged_in"
}
c.LogInfo("检测到未登录,请在当前页面完成登录(扫码或输入账号密码)...")
// 不跳转页面,在当前页面循环检查登录按钮是否存在
// 最多等待300秒
for i := 0; i < 5000; i++ {
// 检查页面上是否还存在"登录"或"Login"按钮
loginButtonExists := false
buttons, err := c.Page.Elements("button")
if err == nil {
for _, btn := range buttons {
text, _ := btn.Text()
trimmedText := strings.TrimSpace(text)
if trimmedText == "登录" || trimmedText == "Login" {
loginButtonExists = true
break
}
}
}
// 如果登录按钮不存在,说明已登录
if !loginButtonExists {
c.Sleep(2) // 等待页面稳定
c.SaveCookies()
c.LogInfo("登录成功:登录按钮已消失")
return true, "login_success"
}
// 每秒检查一次
time.Sleep(1 * time.Second)
// 每30秒输出一次提示
if i > 0 && i%30 == 0 {
c.LogInfo(fmt.Sprintf("等待登录中... 已等待 %d 秒", i))
}
}
return false, "登录超时,请检查网络或账号状态"
}
// AskQuestion 提问并获取答案
func (c *WenxinCollector) AskQuestion(question string) (string, error) {
c.LogInfo(fmt.Sprintf("开始提问: %s", question))
// 初始化浏览器
if err := c.SetupDriver(); err != nil {
return "", fmt.Errorf("浏览器启动失败: %v", err)
}
defer c.Close()
//初始化页面加载cookies和检查登录
if err := c.InitPage(); err != nil {
return "", fmt.Errorf("页面初始化失败请先调用WaitLogin登录: %v", err)
}
// 等待页面完全加载
c.Sleep(3)
// 查找输入框并输入问题
if err := c.inputQuestion(question); err != nil {
return "", fmt.Errorf("输入问题失败: %v", err)
}
// 点击发送按钮
if err := c.clickSendButton(); err != nil {
return "", fmt.Errorf("点击发送按钮失败: %v", err)
}
// 等待并获取答案
answer, err := c.waitForAnswer()
if err != nil {
return "", fmt.Errorf("获取答案失败: %v", err)
}
c.LogInfo(fmt.Sprintf("成功获取答案,长度: %d 字符", len(answer)))
return answer, nil
}
// inputQuestion 输入问题
func (c *WenxinCollector) inputQuestion(question string) error {
c.LogInfo("输入问题...")
// 文心一言的输入框选择器 - 根据实际页面结构调整
inputSelectors := []string{
"[contenteditable='true']",
"div[contenteditable]",
".editable__T7WAW4uW",
"[class*='editable']",
}
var inputBox *rod.Element
var err error
for _, selector := range inputSelectors {
inputBox, err = c.WaitForElementVisible(selector, 10)
if err == nil && inputBox != nil {
c.LogInfo(fmt.Sprintf("找到输入框: %s", selector))
break
}
}
if inputBox == nil {
return fmt.Errorf("未找到输入框")
}
// 点击获取焦点
if err := inputBox.Click(proto.InputMouseButtonLeft, 1); err != nil {
return fmt.Errorf("点击输入框失败: %v", err)
}
c.SleepMs(500)
// fallback: 使用Focus + Input
inputBox.Focus()
c.SleepMs(200)
inputBox.Input(question)
c.LogInfo(fmt.Sprintf("问题已输入: %s", question))
c.SleepMs(1000)
return nil
}
// clickSendButton 点击发送按钮
func (c *WenxinCollector) clickSendButton() error {
c.LogInfo("点击发送按钮...")
// 使用正则匹配包含"send"的class防CSS混淆
allElements, err := c.Page.Elements("*")
if err != nil {
return fmt.Errorf("获取页面元素失败: %v", err)
}
var sendBtn *rod.Element
for _, elem := range allElements {
classAttr, _ := elem.Attribute("class")
if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "send") {
// 检查是否是可点击的元素button、div等
tagName, _ := elem.Property("tagName")
if tagName.Str() == "BUTTON" || tagName.Str() == "DIV" {
sendBtn = elem
c.LogInfo(fmt.Sprintf("通过正则找到发送按钮: class=%s, tag=%s", *classAttr, tagName.Str()))
break
}
}
}
if sendBtn == nil {
// fallback: 尝试查找最后一个button
buttons, _ := c.Page.Elements("button")
if len(buttons) > 0 {
sendBtn = buttons[len(buttons)-1]
c.LogInfo("使用最后一个button作为发送按钮")
}
}
if sendBtn == nil {
return fmt.Errorf("未找到发送按钮")
}
c.SleepMs(500)
// 滚动到可见区域
if err := sendBtn.ScrollIntoView(); err != nil {
c.LogInfo(fmt.Sprintf("滚动失败: %v", err))
}
c.SleepMs(300)
// 点击发送按钮
c.LogInfo("执行点击...")
if err := sendBtn.Click(proto.InputMouseButtonLeft, 1); err != nil {
return fmt.Errorf("点击发送按钮失败: %v", err)
}
c.LogInfo("已点击发送按钮")
c.SleepMs(1000)
// 检测是否发送成功检查send按钮是否消失或变成pause按钮
maxWaitTime := 10 // 最多等待10秒
for i := 0; i < maxWaitTime*2; i++ {
// 检查是否存在pause开头的按钮表示正在生成
pauseExists, err := c.hasPauseButton()
if err == nil && pauseExists {
c.LogInfo("✓ 检测到pause按钮消息发送成功AI正在回答...")
return nil
}
// 检查send按钮是否还存在
sendExists, _ := c.hasSendButton()
if !sendExists {
c.LogInfo("✓ send按钮已消失消息发送成功")
return nil
}
c.SleepMs(500)
}
c.LogInfo("⚠ 无法确认消息是否发送成功,但已尽力尝试")
return nil
}
// hasSendButton 检查是否存在send开头的按钮
func (c *WenxinCollector) hasSendButton() (bool, error) {
allElements, err := c.Page.Elements("*")
if err != nil {
return false, err
}
for _, elem := range allElements {
classAttr, _ := elem.Attribute("class")
if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "send") {
tagName, _ := elem.Property("tagName")
if tagName.Str() == "BUTTON" || tagName.Str() == "DIV" {
return true, nil
}
}
}
return false, nil
}
// hasPauseButton 检查是否存在pause开头的按钮
func (c *WenxinCollector) hasPauseButton() (bool, error) {
allElements, err := c.Page.Elements("*")
if err != nil {
return false, err
}
for _, elem := range allElements {
classAttr, _ := elem.Attribute("class")
if classAttr != nil && strings.Contains(strings.ToLower(*classAttr), "pause") {
tagName, _ := elem.Property("tagName")
if tagName.Str() == "BUTTON" || tagName.Str() == "DIV" {
return true, nil
}
}
}
return false, nil
}
// waitForAnswer 等待并获取答案(处理流式输出)
func (c *WenxinCollector) waitForAnswer() (string, error) {
c.LogInfo("等待AI回答...")
timeout := 180 // 最大等待时间(秒),流式输出可能需要更长时间
startTime := time.Now()
var lastAnswer string
var stableCount int // 稳定计数器连续N次内容不变则认为完成
const requiredStableCount = 5 // 需要连续5次内容不变才认为完成
isAnswering := false // 标记是否正在回答中
for time.Since(startTime).Seconds() < float64(timeout) {
// 检查是否存在pause按钮表示正在生成答案
pauseExists, _ := c.hasPauseButton()
if pauseExists {
if !isAnswering {
c.LogInfo("检测到pause按钮AI正在生成回答...")
isAnswering = true
}
} else if isAnswering {
// pause按钮消失可能回答完成了
c.LogInfo("pause按钮消失检查回答是否完成...")
// 再等待几次确认内容稳定
if stableCount >= requiredStableCount && lastAnswer != "" {
c.LogInfo(fmt.Sprintf("✓ AI回答完成最终长度: %d 字符", len(lastAnswer)))
return lastAnswer, nil
}
}
// 直接通过ID查找答案容器
answerElem, err := c.Page.Element("#answer_text_id")
var answerText string
if err == nil && answerElem != nil {
// 获取整个HTML内容
htmlContent, err := answerElem.HTML()
if err == nil && len(strings.TrimSpace(htmlContent)) > 30 {
// 清理HTML标签只保留纯文本
answerText = CleanHTMLTags(htmlContent)
c.LogInfo(fmt.Sprintf("找到答案容器,清理后文本长度: %d", len(answerText)))
} else {
// 如果HTML获取失败尝试获取文本
textContent, _ := answerElem.Text()
answerText = strings.TrimSpace(textContent)
c.LogInfo(fmt.Sprintf("找到答案容器,文本长度: %d", len(answerText)))
}
} else {
c.LogInfo("未找到#answer_text_id元素")
}
// 检查是否获取到答案
if answerText != "" && len(answerText) > 30 {
// 检查内容是否稳定(流式输出完成)
if answerText == lastAnswer {
stableCount++
c.LogInfo(fmt.Sprintf("答案稳定中... (%d/%d), 长度: %d", stableCount, requiredStableCount, len(answerText)))
// 如果pause按钮不存在且内容稳定说明回答完成
if !pauseExists && stableCount >= requiredStableCount {
c.LogInfo(fmt.Sprintf("✓ AI回答完成最终长度: %d 字符", len(answerText)))
return answerText, nil
}
} else {
// 内容还在变化,重置计数器
stableCount = 0
lastAnswer = answerText
if pauseExists {
c.LogInfo(fmt.Sprintf("检测到流式输出,当前长度: %d 字符", len(answerText)))
}
}
}
c.SleepMs(1500) // 每1.5秒检查一次
// 每10秒输出一次等待状态
elapsed := int(time.Since(startTime).Seconds())
if elapsed > 0 && elapsed%10 == 0 {
c.LogInfo(fmt.Sprintf("等待AI回答中... 已等待 %d 秒", elapsed))
}
}
return "", fmt.Errorf("等待答案超时(%d秒", timeout)
}
// SafeElement 安全地获取元素
func (c *WenxinCollector) SafeElement(selector string) (*rod.Element, error) {
exists, _, err := c.Page.Has(selector)
if err != nil {
return nil, err
}
if !exists {
return nil, nil
}
return c.Page.Element(selector)
}