geoGo/internal/collect/base.go

305 lines
7.3 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package collect
import (
"context"
"encoding/json"
"fmt"
"geo/internal/config"
"os"
"path/filepath"
"time"
"github.com/go-rod/rod"
"github.com/go-rod/rod/lib/launcher"
"github.com/go-rod/rod/lib/proto"
"github.com/gofiber/fiber/v2/log"
)
// BaseCollector 基础收集器结构
type BaseCollector struct {
ctx context.Context
Headless bool
RequestID string
Platform string
KeyWords []string
Browser *rod.Browser
Page *rod.Page
Logger log.AllLogger
LoginURL string
ChatURL string
CookiesFile string
config *config.Config
MaxRetries int
RetryDelay int
}
// NewBaseCollector 构造函数
func NewBaseCollector(ctx context.Context, params *CollectParams, config *config.Config, logger log.AllLogger) *BaseCollector {
var baseLogger log.AllLogger
if logger != nil {
baseLogger = logger
} else {
baseLogger = log.DefaultLogger()
}
base := &BaseCollector{
ctx: ctx,
Headless: params.Headless,
RequestID: params.RequestID,
Platform: params.Platform,
KeyWords: params.KeyWords,
Logger: baseLogger,
config: config,
MaxRetries: 3,
RetryDelay: 200,
}
// Cookie文件按平台区分而不是按用户索引
base.CookiesFile = filepath.Join(base.cookiesDir(), params.Platform+".json")
return base
}
// SetupDriver 初始化浏览器驱动
func (b *BaseCollector) SetupDriver() error {
userDataDir := filepath.Join(b.config.Sys.ChromeDataDir, b.Platform, b.RequestID+fmt.Sprintf("___%d", time.Now().UnixNano()))
os.MkdirAll(userDataDir, 0755)
l := launcher.New().
Bin(b.config.Sys.ChromePath).
UserDataDir(userDataDir).
Headless(b.Headless).
Leakless(false).
Set("disable-blink-features", "AutomationControlled")
if b.Headless {
l.Set("headless", "new")
l.Set("disable-gpu")
l.Set("no-sandbox")
l.Set("disable-dev-shm-usage")
} else {
l.Set("window-size", "1920,1080")
l.Set("start-maximized")
l.Delete("headless")
}
l.Set("lang", "zh-CN")
l.Set("accept-lang", "zh-CN,zh;q=0.9,en;q=0.8")
l.Set("force-device-scale-factor", "1")
l.Set("timezone", "Asia/Shanghai")
url, err := l.Launch()
if err != nil {
return fmt.Errorf("启动浏览器失败: %v", err)
}
b.Browser = rod.New().Context(b.ctx).ControlURL(url).MustConnect()
b.Page = b.Browser.MustPage()
return nil
}
// Close 关闭浏览器
func (b *BaseCollector) Close() {
if b.Page != nil {
b.Page.Close()
}
if b.Browser != nil {
b.Browser.Close()
}
}
// SaveCookies 保存cookies
func (b *BaseCollector) SaveCookies() error {
cookies, err := b.Page.Cookies(nil)
if err != nil {
return err
}
data, err := json.Marshal(cookies)
if err != nil {
return err
}
return os.WriteFile(b.CookiesFile, data, 0644)
}
// LoadCookies 加载cookies
func (b *BaseCollector) LoadCookies() error {
data, err := os.ReadFile(b.CookiesFile)
if err != nil {
return err
}
var cookies []*proto.NetworkCookieParam
if err := json.Unmarshal(data, &cookies); err != nil {
return err
}
return b.Page.SetCookies(cookies)
}
// DelCookies 删除cookies
func (b *BaseCollector) DelCookies() error {
return os.Remove(b.CookiesFile)
}
// WaitForPageReady 等待页面加载完成
func (b *BaseCollector) WaitForPageReady(timeout int) error {
return b.Page.Context(b.ctx).WaitLoad()
}
// WaitForElement 等待元素出现
func (b *BaseCollector) WaitForElement(selector string, timeout int) (*rod.Element, error) {
return b.Page.Context(b.ctx).Timeout(time.Duration(timeout) * time.Second).Element(selector)
}
// WaitForElementVisible 等待元素可见
func (b *BaseCollector) WaitForElementVisible(selector string, timeout int) (*rod.Element, error) {
el, err := b.WaitForElement(selector, timeout)
if err != nil {
return nil, err
}
if err := el.WaitVisible(); err != nil {
return nil, err
}
return el, nil
}
// WaitForElementClickable 等待元素可点击
func (b *BaseCollector) WaitForElementClickable(selector string, timeout int) (*rod.Element, error) {
el, err := b.WaitForElementVisible(selector, timeout)
if err != nil {
return nil, err
}
if err := el.WaitEnabled(); err != nil {
return nil, err
}
return el, nil
}
// JSClick JavaScript点击元素
func (b *BaseCollector) JSClick(element *rod.Element) error {
if element == nil {
b.Logger.Warn("element is nil")
return fmt.Errorf("element is nil")
}
err := element.Click(proto.InputMouseButtonLeft, 1)
if err != nil {
b.Logger.Errorf("click fail: %v", err)
}
return err
}
// SetInputValue 设置输入框值
func (b *BaseCollector) SetInputValue(element *rod.Element, value string) error {
_, err := element.Evaluate(&rod.EvalOptions{
JS: `(el, val) => { el.value = val; el.dispatchEvent(new Event('input', {bubbles: true})); el.dispatchEvent(new Event('change', {bubbles: true})); }`,
JSArgs: []interface{}{value},
})
return err
}
// ClearInput 清空输入框
func (b *BaseCollector) ClearInput(element *rod.Element) error {
_, err := element.Evaluate(&rod.EvalOptions{
JS: `el => { el.value = ''; el.dispatchEvent(new Event('input', {bubbles: true})); }`,
})
return err
}
// Sleep 等待指定秒数
func (b *BaseCollector) Sleep(seconds int) {
time.Sleep(time.Duration(seconds) * time.Second)
}
// SleepMs 等待指定毫秒数
func (b *BaseCollector) SleepMs(milliseconds int) {
time.Sleep(time.Duration(milliseconds) * time.Millisecond)
}
// LogInfo 记录信息日志
func (b *BaseCollector) LogInfo(message string) {
b.Logger.Infof("📌 %s", message)
}
// LogInfof 格式化记录信息日志
func (b *BaseCollector) LogInfof(format string, args ...interface{}) {
b.Logger.Infof("📌 "+format, args...)
}
// LogError 记录错误日志
func (b *BaseCollector) LogError(message string) {
b.Logger.Errorf("❌ %s", message)
}
// LogStep 记录步骤日志
func (b *BaseCollector) LogStep(stepName string, success bool, message string) {
if success {
b.Logger.Infof("✅ %s: 成功 %s", stepName, message)
} else {
b.Logger.Errorf("❌ %s: 失败 %s", stepName, message)
}
}
// GetCurrentURL 获取当前URL
func (b *BaseCollector) GetCurrentURL() string {
info := b.Page.MustInfo()
return info.URL
}
// Screenshot 截图
func (b *BaseCollector) Screenshot(filename string) error {
data, err := b.Page.Screenshot(false, nil)
if err != nil {
return err
}
return os.WriteFile(filename, data, 0644)
}
// CheckLoginStatus 检查登录状态(需要子类实现)
func (b *BaseCollector) CheckLoginStatus() bool {
return false
}
// WaitLogin 等待登录(需要子类实现)
func (b *BaseCollector) WaitLogin() (bool, string) {
return false, "需要实现"
}
// AskQuestion 提问并获取答案(需要子类实现)
func (b *BaseCollector) AskQuestion(question string) (*CollectResult, error) {
return nil, fmt.Errorf("需要实现")
}
// InitPage 初始化页面
func (b *BaseCollector) InitPage() error {
// 尝试加载cookies
if err := b.LoadCookies(); err == nil {
b.Page.MustNavigate(b.ChatURL)
b.WaitForPageReady(5)
}
b.SaveCookies()
return nil
}
// SafeElement 安全地获取元素
func (b *BaseCollector) SafeElement(selector string) (*rod.Element, error) {
exists, _, err := b.Page.Has(selector)
if err != nil {
return nil, err
}
if !exists {
return nil, nil
}
return b.Page.Element(selector)
}
// cookiesDir 获取cookie目录 - 按平台区分
func (b *BaseCollector) cookiesDir() string {
dir := filepath.Join(b.config.Sys.CookiesDir, b.Platform)
os.MkdirAll(dir, 0755)
return dir
}