307 lines
7.4 KiB
Go
307 lines
7.4 KiB
Go
package collect
|
||
|
||
import (
|
||
"context"
|
||
"encoding/json"
|
||
"fmt"
|
||
"geo/internal/config"
|
||
"os"
|
||
"path/filepath"
|
||
"time"
|
||
|
||
"github.com/go-rod/rod"
|
||
"github.com/go-rod/rod/lib/launcher"
|
||
"github.com/go-rod/rod/lib/proto"
|
||
"github.com/gofiber/fiber/v2/log"
|
||
)
|
||
|
||
// BaseCollector 基础收集器结构
|
||
type BaseCollector struct {
|
||
ctx context.Context
|
||
Headless bool
|
||
RequestID string
|
||
Platform string
|
||
KeyWords []string
|
||
|
||
Browser *rod.Browser
|
||
Page *rod.Page
|
||
|
||
Logger log.AllLogger
|
||
|
||
LoginURL string
|
||
ChatURL string
|
||
CookiesFile string
|
||
|
||
config *config.Config
|
||
|
||
MaxRetries int
|
||
RetryDelay int
|
||
}
|
||
|
||
// NewBaseCollector 构造函数
|
||
func NewBaseCollector(ctx context.Context, params *CollectParams, config *config.Config, logger log.AllLogger) *BaseCollector {
|
||
var baseLogger log.AllLogger
|
||
|
||
if logger != nil {
|
||
baseLogger = logger
|
||
} else {
|
||
baseLogger = log.DefaultLogger()
|
||
}
|
||
|
||
base := &BaseCollector{
|
||
ctx: ctx,
|
||
Headless: params.Headless,
|
||
RequestID: params.RequestID,
|
||
Platform: params.Platform,
|
||
KeyWords: params.KeyWords,
|
||
Logger: baseLogger,
|
||
config: config,
|
||
MaxRetries: 3,
|
||
RetryDelay: 200,
|
||
}
|
||
|
||
// Cookie文件按平台区分,而不是按用户索引
|
||
base.CookiesFile = filepath.Join(base.cookiesDir(), params.Platform+".json")
|
||
return base
|
||
}
|
||
|
||
// SetupDriver 初始化浏览器驱动
|
||
func (b *BaseCollector) SetupDriver() error {
|
||
userDataDir := filepath.Join(b.config.Sys.ChromeDataDir, b.Platform, b.RequestID+fmt.Sprintf("___%d", time.Now().UnixNano()))
|
||
os.MkdirAll(userDataDir, 0755)
|
||
|
||
l := launcher.New().
|
||
Bin(b.config.Sys.ChromePath).
|
||
UserDataDir(userDataDir).
|
||
Headless(b.Headless).
|
||
Leakless(false).
|
||
Set("disable-blink-features", "AutomationControlled")
|
||
|
||
if b.Headless {
|
||
l.Set("headless", "new")
|
||
l.Set("disable-gpu")
|
||
l.Set("no-sandbox")
|
||
l.Set("disable-dev-shm-usage")
|
||
} else {
|
||
l.Set("window-size", "1920,1080")
|
||
l.Set("start-maximized")
|
||
l.Delete("headless")
|
||
}
|
||
|
||
l.Set("lang", "zh-CN")
|
||
l.Set("accept-lang", "zh-CN,zh;q=0.9,en;q=0.8")
|
||
l.Set("force-device-scale-factor", "1")
|
||
l.Set("timezone", "Asia/Shanghai")
|
||
|
||
url, err := l.Launch()
|
||
if err != nil {
|
||
return fmt.Errorf("启动浏览器失败: %v", err)
|
||
}
|
||
|
||
b.Browser = rod.New().Context(b.ctx).ControlURL(url).MustConnect()
|
||
b.Page = b.Browser.MustPage()
|
||
|
||
return nil
|
||
}
|
||
|
||
// Close 关闭浏览器
|
||
func (b *BaseCollector) Close() {
|
||
if b.Page != nil {
|
||
b.Page.Close()
|
||
}
|
||
if b.Browser != nil {
|
||
b.Browser.Close()
|
||
}
|
||
}
|
||
|
||
// SaveCookies 保存cookies
|
||
func (b *BaseCollector) SaveCookies() error {
|
||
cookies, err := b.Page.Cookies(nil)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
data, err := json.Marshal(cookies)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
return os.WriteFile(b.CookiesFile, data, 0644)
|
||
}
|
||
|
||
// LoadCookies 加载cookies
|
||
func (b *BaseCollector) LoadCookies() error {
|
||
data, err := os.ReadFile(b.CookiesFile)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
var cookies []*proto.NetworkCookieParam
|
||
if err := json.Unmarshal(data, &cookies); err != nil {
|
||
return err
|
||
}
|
||
return b.Page.SetCookies(cookies)
|
||
}
|
||
|
||
// DelCookies 删除cookies
|
||
func (b *BaseCollector) DelCookies() error {
|
||
return os.Remove(b.CookiesFile)
|
||
}
|
||
|
||
// WaitForPageReady 等待页面加载完成
|
||
func (b *BaseCollector) WaitForPageReady(timeout int) error {
|
||
return b.Page.Context(b.ctx).WaitLoad()
|
||
}
|
||
|
||
// WaitForElement 等待元素出现
|
||
func (b *BaseCollector) WaitForElement(selector string, timeout int) (*rod.Element, error) {
|
||
return b.Page.Context(b.ctx).Timeout(time.Duration(timeout) * time.Second).Element(selector)
|
||
}
|
||
|
||
// WaitForElementVisible 等待元素可见
|
||
func (b *BaseCollector) WaitForElementVisible(selector string, timeout int) (*rod.Element, error) {
|
||
el, err := b.WaitForElement(selector, timeout)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
if err := el.WaitVisible(); err != nil {
|
||
return nil, err
|
||
}
|
||
return el, nil
|
||
}
|
||
|
||
// WaitForElementClickable 等待元素可点击
|
||
func (b *BaseCollector) WaitForElementClickable(selector string, timeout int) (*rod.Element, error) {
|
||
el, err := b.WaitForElementVisible(selector, timeout)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
if err := el.WaitEnabled(); err != nil {
|
||
return nil, err
|
||
}
|
||
return el, nil
|
||
}
|
||
|
||
// JSClick JavaScript点击元素
|
||
func (b *BaseCollector) JSClick(element *rod.Element) error {
|
||
if element == nil {
|
||
b.Logger.Warn("element is nil")
|
||
return fmt.Errorf("element is nil")
|
||
}
|
||
err := element.Click(proto.InputMouseButtonLeft, 1)
|
||
if err != nil {
|
||
b.Logger.Errorf("click fail: %v", err)
|
||
}
|
||
return err
|
||
}
|
||
|
||
// SetInputValue 设置输入框值
|
||
func (b *BaseCollector) SetInputValue(element *rod.Element, value string) error {
|
||
_, err := element.Evaluate(&rod.EvalOptions{
|
||
JS: `(el, val) => { el.value = val; el.dispatchEvent(new Event('input', {bubbles: true})); el.dispatchEvent(new Event('change', {bubbles: true})); }`,
|
||
JSArgs: []interface{}{value},
|
||
})
|
||
return err
|
||
}
|
||
|
||
// ClearInput 清空输入框
|
||
func (b *BaseCollector) ClearInput(element *rod.Element) error {
|
||
_, err := element.Evaluate(&rod.EvalOptions{
|
||
JS: `el => { el.value = ''; el.dispatchEvent(new Event('input', {bubbles: true})); }`,
|
||
})
|
||
return err
|
||
}
|
||
|
||
// Sleep 等待指定秒数
|
||
func (b *BaseCollector) Sleep(seconds int) {
|
||
time.Sleep(time.Duration(seconds) * time.Second)
|
||
}
|
||
|
||
// SleepMs 等待指定毫秒数
|
||
func (b *BaseCollector) SleepMs(milliseconds int) {
|
||
time.Sleep(time.Duration(milliseconds) * time.Millisecond)
|
||
}
|
||
|
||
// LogInfo 记录信息日志
|
||
func (b *BaseCollector) LogInfo(message string) {
|
||
b.Logger.Infof("📌 %s", message)
|
||
}
|
||
|
||
// LogInfof 格式化记录信息日志
|
||
func (b *BaseCollector) LogInfof(format string, args ...interface{}) {
|
||
b.Logger.Infof("📌 "+format, args...)
|
||
}
|
||
|
||
// LogError 记录错误日志
|
||
func (b *BaseCollector) LogError(message string) {
|
||
b.Logger.Errorf("❌ %s", message)
|
||
}
|
||
|
||
// LogStep 记录步骤日志
|
||
func (b *BaseCollector) LogStep(stepName string, success bool, message string) {
|
||
if success {
|
||
b.Logger.Infof("✅ %s: 成功 %s", stepName, message)
|
||
} else {
|
||
b.Logger.Errorf("❌ %s: 失败 %s", stepName, message)
|
||
}
|
||
}
|
||
|
||
// GetCurrentURL 获取当前URL
|
||
func (b *BaseCollector) GetCurrentURL() string {
|
||
info := b.Page.MustInfo()
|
||
return info.URL
|
||
}
|
||
|
||
// Screenshot 截图
|
||
func (b *BaseCollector) Screenshot(filename string) error {
|
||
data, err := b.Page.Screenshot(false, nil)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
return os.WriteFile(filename, data, 0644)
|
||
}
|
||
|
||
// CheckLoginStatus 检查登录状态(需要子类实现)
|
||
func (b *BaseCollector) CheckLoginStatus() bool {
|
||
return false
|
||
}
|
||
|
||
// WaitLogin 等待登录(需要子类实现)
|
||
func (b *BaseCollector) WaitLogin() (bool, string) {
|
||
return false, "需要实现"
|
||
}
|
||
|
||
// AskQuestion 提问并获取答案(需要子类实现)
|
||
func (b *BaseCollector) AskQuestion(question string) (*CollectResult, error) {
|
||
return nil, fmt.Errorf("需要实现")
|
||
}
|
||
|
||
// InitPage 初始化页面
|
||
func (b *BaseCollector) InitPage() error {
|
||
// 尝试加载cookies
|
||
if err := b.LoadCookies(); err == nil {
|
||
b.Page.MustNavigate(b.ChatURL)
|
||
b.WaitForPageReady(5)
|
||
b.Sleep(3)
|
||
}
|
||
b.SaveCookies()
|
||
return nil
|
||
}
|
||
|
||
// SafeElement 安全地获取元素
|
||
func (b *BaseCollector) SafeElement(selector string) (*rod.Element, error) {
|
||
exists, _, err := b.Page.Has(selector)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
if !exists {
|
||
return nil, nil
|
||
}
|
||
return b.Page.Element(selector)
|
||
}
|
||
|
||
// cookiesDir 获取cookie目录 - 按平台区分
|
||
func (b *BaseCollector) cookiesDir() string {
|
||
// 将cookie存储在 cookies/platform/{Platform} 目录下
|
||
dir := filepath.Join(b.config.Sys.PlatformCookieDir, b.Platform)
|
||
os.MkdirAll(dir, 0755)
|
||
return dir
|
||
}
|