geoGo/internal/collect/base.go

322 lines
7.7 KiB
Go

package collect
import (
"context"
"encoding/json"
"fmt"
"geo/internal/config"
"log"
"os"
"path/filepath"
"time"
"github.com/go-rod/rod"
"github.com/go-rod/rod/lib/launcher"
"github.com/go-rod/rod/lib/proto"
)
// BaseCollector 基础收集器结构
type BaseCollector struct {
ctx context.Context
Headless bool
UserIndex string
PlatIndex string
RequestID string
Platform string
Browser *rod.Browser
Page *rod.Page
Logger *log.Logger
LogFile *os.File
LoginURL string
ChatURL string
CookiesFile string
config *config.Config
MaxRetries int
RetryDelay int
}
// NewBaseCollector 构造函数
func NewBaseCollector(ctx context.Context, params *CollectParams, config *config.Config, logger *log.Logger) *BaseCollector {
var baseLogger *log.Logger
var logFile *os.File
if logger != nil {
baseLogger = logger
logFile = nil
} else {
logsDir := config.Sys.LogsDir
if logsDir == "" {
logsDir = "./logs"
}
os.MkdirAll(logsDir, 0755)
logFile, _ = os.Create(filepath.Join(logsDir, fmt.Sprintf("collect_%s_%s.log", params.RequestID, params.Platform)))
baseLogger = log.New(logFile, "", log.LstdFlags)
}
base := &BaseCollector{
ctx: ctx,
Headless: params.Headless,
UserIndex: params.UserIndex,
PlatIndex: params.PlatIndex,
RequestID: params.RequestID,
Platform: params.Platform,
Logger: baseLogger,
LogFile: logFile,
config: config,
MaxRetries: 3,
RetryDelay: 200,
}
base.CookiesFile = filepath.Join(base.cookiesDir(), params.PlatIndex+".json")
return base
}
// cookiesDir 获取cookie目录
func (b *BaseCollector) cookiesDir() string {
dir := filepath.Join(b.config.Sys.CookiesDir, b.UserIndex)
os.MkdirAll(dir, 0755)
return dir
}
// SetupDriver 初始化浏览器驱动
func (b *BaseCollector) SetupDriver() error {
b.LogInfo("初始化浏览器...")
userDataDir := filepath.Join(b.config.Sys.ChromeDataDir, b.UserIndex, b.RequestID+fmt.Sprintf("___%d", time.Now().UnixNano()))
os.MkdirAll(userDataDir, 0755)
l := launcher.New().
Bin(b.config.Sys.ChromePath).
UserDataDir(userDataDir).
Headless(b.Headless).
Leakless(false).
Set("disable-blink-features", "AutomationControlled")
if b.Headless {
l.Set("headless", "new")
l.Set("disable-gpu")
l.Set("no-sandbox")
l.Set("disable-dev-shm-usage")
} else {
l.Set("window-size", "1920,1080")
l.Set("start-maximized")
l.Delete("headless")
}
l.UserDataDir(userDataDir)
l.Set("window-size", "1920,1080")
l.Set("lang", "zh-CN")
l.Set("force-device-scale-factor", "1")
url, err := l.Launch()
if err != nil {
return fmt.Errorf("启动浏览器失败: %v", err)
}
b.Browser = rod.New().Context(b.ctx).ControlURL(url).MustConnect()
b.Page = b.Browser.MustPage()
return nil
}
// Close 关闭浏览器和日志文件
func (b *BaseCollector) Close() {
if b.Page != nil {
b.Page.Close()
}
if b.Browser != nil {
b.Browser.Close()
}
if b.LogFile != nil {
b.LogFile.Close()
}
}
// SaveCookies 保存cookies
func (b *BaseCollector) SaveCookies() error {
cookies, err := b.Page.Cookies(nil)
if err != nil {
return err
}
data, err := json.Marshal(cookies)
if err != nil {
return err
}
return os.WriteFile(b.CookiesFile, data, 0644)
}
// LoadCookies 加载cookies
func (b *BaseCollector) LoadCookies() error {
data, err := os.ReadFile(b.CookiesFile)
if err != nil {
return err
}
var cookies []*proto.NetworkCookieParam
if err := json.Unmarshal(data, &cookies); err != nil {
return err
}
return b.Page.SetCookies(cookies)
}
// DelCookies 删除cookies
func (b *BaseCollector) DelCookies() error {
return os.Remove(b.CookiesFile)
}
// WaitForPageReady 等待页面加载完成
func (b *BaseCollector) WaitForPageReady(timeout int) error {
return b.Page.Context(b.ctx).WaitLoad()
}
// WaitForElement 等待元素出现
func (b *BaseCollector) WaitForElement(selector string, timeout int) (*rod.Element, error) {
return b.Page.Context(b.ctx).Timeout(time.Duration(timeout) * time.Second).Element(selector)
}
// WaitForElementVisible 等待元素可见
func (b *BaseCollector) WaitForElementVisible(selector string, timeout int) (*rod.Element, error) {
el, err := b.WaitForElement(selector, timeout)
if err != nil {
return nil, err
}
if err := el.WaitVisible(); err != nil {
return nil, err
}
return el, nil
}
// WaitForElementClickable 等待元素可点击
func (b *BaseCollector) WaitForElementClickable(selector string, timeout int) (*rod.Element, error) {
el, err := b.WaitForElementVisible(selector, timeout)
if err != nil {
return nil, err
}
if err := el.WaitEnabled(); err != nil {
return nil, err
}
return el, nil
}
// JSClick JavaScript点击元素
func (b *BaseCollector) JSClick(element *rod.Element) error {
if element == nil {
b.Logger.Printf("element is nil")
return fmt.Errorf("element is nil")
}
err := element.Click(proto.InputMouseButtonLeft, 1)
if err != nil {
b.Logger.Printf("click fail: " + err.Error())
}
return err
}
// SetInputValue 设置输入框值
func (b *BaseCollector) SetInputValue(element *rod.Element, value string) error {
_, err := element.Evaluate(&rod.EvalOptions{
JS: `(el, val) => { el.value = val; el.dispatchEvent(new Event('input', {bubbles: true})); el.dispatchEvent(new Event('change', {bubbles: true})); }`,
JSArgs: []interface{}{value},
})
return err
}
// ClearInput 清空输入框
func (b *BaseCollector) ClearInput(element *rod.Element) error {
_, err := element.Evaluate(&rod.EvalOptions{
JS: `el => { el.value = ''; el.dispatchEvent(new Event('input', {bubbles: true})); }`,
})
return err
}
// Sleep 等待指定秒数
func (b *BaseCollector) Sleep(seconds int) {
time.Sleep(time.Duration(seconds) * time.Second)
}
// SleepMs 等待指定毫秒数
func (b *BaseCollector) SleepMs(milliseconds int) {
time.Sleep(time.Duration(milliseconds) * time.Millisecond)
}
// LogInfo 记录信息日志
func (b *BaseCollector) LogInfo(message string) {
b.Logger.Printf("📌 %s", message)
}
// LogInfof 格式化记录信息日志
func (b *BaseCollector) LogInfof(format string, args ...interface{}) {
b.Logger.Printf("📌 "+format, args...)
}
// LogError 记录错误日志
func (b *BaseCollector) LogError(message string) {
b.Logger.Printf("❌ %s", message)
}
// LogStep 记录步骤日志
func (b *BaseCollector) LogStep(stepName string, success bool, message string) {
if success {
b.Logger.Printf("✅ %s: 成功 %s", stepName, message)
} else {
b.Logger.Printf("❌ %s: 失败 %s", stepName, message)
}
}
// GetCurrentURL 获取当前URL
func (b *BaseCollector) GetCurrentURL() string {
info := b.Page.MustInfo()
return info.URL
}
// Screenshot 截图
func (b *BaseCollector) Screenshot(filename string) error {
data, err := b.Page.Screenshot(false, nil)
if err != nil {
return err
}
return os.WriteFile(filename, data, 0644)
}
// CheckLoginStatus 检查登录状态(需要子类实现)
func (b *BaseCollector) CheckLoginStatus() bool {
return false
}
// WaitLogin 等待登录(需要子类实现)
func (b *BaseCollector) WaitLogin() (bool, string) {
return false, "需要实现"
}
// AskQuestion 提问并获取答案(需要子类实现)
func (b *BaseCollector) AskQuestion(question string) (string, error) {
return "", fmt.Errorf("需要实现")
}
// InitPage 初始化页面
func (b *BaseCollector) InitPage() error {
// 尝试加载cookies
if err := b.LoadCookies(); err == nil {
b.Page.MustNavigate(b.ChatURL)
b.WaitForPageReady(5)
b.Sleep(2)
}
b.SaveCookies()
return nil
}
// SafeElement 安全地获取元素
func (b *BaseCollector) SafeElement(selector string) (*rod.Element, error) {
exists, _, err := b.Page.Has(selector)
if err != nil {
return nil, err
}
if !exists {
return nil, nil
}
return b.Page.Element(selector)
}