geoGo/internal/publisher/baijiahao.go

642 lines
16 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package publisher
import (
"context"
"fmt"
"geo/internal/config"
"log"
"path/filepath"
"strings"
"time"
"github.com/go-rod/rod"
"github.com/go-rod/rod/lib/proto"
)
type BaijiahaoPublisher struct {
*BasePublisher
}
func NewBaijiahaoPublisher(ctx context.Context, task *TaskParams, cfg *config.Config, logger *log.Logger) PublisherInerface {
return &BaijiahaoPublisher{NewBasePublisher(ctx, task, cfg, logger)}
}
func (p *BaijiahaoPublisher) CheckLoginStatus() bool {
currentURL := p.GetCurrentURL()
if strings.Contains(currentURL, p.LoginURL) {
return false
}
return true
}
func (p *BaijiahaoPublisher) CheckLogin() (bool, string) {
if err := p.SetupDriver(); err != nil {
return false, fmt.Sprintf("浏览器启动失败: %v", err)
}
p.Page.MustNavigate(p.EditorURL)
p.Sleep(3)
p.WaitForPageReady(5)
if p.CheckLoginStatus() {
p.SaveCookies()
return true, "已登录"
}
return false, "未登录"
}
func (p *BaijiahaoPublisher) WaitLogin() (bool, string) {
if err := p.SetupDriver(); err != nil {
return false, fmt.Sprintf("浏览器启动失败: %v", err)
}
p.Page.MustNavigate(p.LoginedURL)
p.Sleep(3)
if p.CheckLoginStatus() {
p.SaveCookies()
return true, "already_logged_in"
}
p.Page.MustNavigate(p.LoginURL)
p.LogInfo("请扫描二维码登录...")
for i := 0; i < 120; i++ {
p.Sleep(1)
if p.CheckLoginStatus() {
p.SaveCookies()
return true, "login_success"
}
}
return false, "登录超时"
}
func (p *BaijiahaoPublisher) checkElementExists(selector string, timeout int) bool {
_, err := p.WaitForElement(selector, timeout)
return err == nil
}
func (p *BaijiahaoPublisher) PublishNote() (bool, string) {
if err := p.SetupDriver(); err != nil {
return false, fmt.Sprintf("浏览器启动失败: %v", err)
}
defer p.Close()
if p.LoadCookies() == nil {
p.Page.MustNavigate(p.EditorURL)
p.WaitForPageReady(5)
if p.CheckLoginStatus() {
return p.doPublish()
}
}
if p.CheckLoginStatus() {
p.SaveCookies()
return p.doPublish()
}
return false, "需要登录"
}
func (p *BaijiahaoPublisher) doPublish() (bool, string) {
p.LogInfo("开始发布百家号文章...")
steps := []struct {
name string
fn func() error
}{
{"点击hover", p.clickHoverButton},
{"输入内容", p.inputContent},
{"输入标题", p.inputTitle},
{"设置封面", p.uploadImage},
{"点击发布按钮", p.clickPublish},
//{"处理确认弹窗", p.handleConfirmModal},
}
//https://baijiahao.baidu.com/builder/rc/clue?aside=0&footer=true&from=news&firstPublish=undefined&word_bag_id=null
for _, step := range steps {
if err := step.fn(); err != nil {
p.LogStep(step.name, false, err.Error())
return false, fmt.Sprintf("%s失败: %v", step.name, err)
}
p.LogStep(step.name, true, "")
p.SleepMs(500)
}
return p.waitForPublishResult()
}
func (p *BaijiahaoPublisher) clickHoverButton() error {
p.LogInfo("点击HoverButton...")
// 等待弹窗稳定
p.WaitForPageReady(5)
_, err := p.Page.Element("div.cheetah-tour-content")
if err != nil {
return err
}
maxAttempts := 10
for i := 0; i < maxAttempts; i++ {
// 检查弹窗是否存在
exists, _, err := p.Page.Has(".cheetah-tour")
if err != nil || !exists {
p.LogInfo("弹窗已关闭")
break
}
// 查找并点击弹窗中的按钮
result, err := p.Page.Eval(`() => {
const btns = document.querySelectorAll('.cheetah-tour .cheetah-btn, .cheetah-tour-next-btn, .cheetah-tour-close');
for (const btn of btns) {
if (btn.offsetParent !== null) {
const text = btn.innerText || btn.textContent;
btn.click();
return text;
}
}
return null;
}`)
if err != nil {
p.LogInfof("执行点击出错: %v", err)
continue
}
p.LogInfof("第%d次点击按钮: %v", i+1, result)
// 等待弹窗响应
time.Sleep(500 * time.Millisecond)
}
p.LogInfo("引导弹窗处理完成")
return nil
}
func (p *BaijiahaoPublisher) inputTitle() error {
p.LogInfo("输入文章标题...")
titleSelectors := []string{
".client_pages_edit_components_titleInput ._9ddb7e475b559749-editor",
".input-box ._9ddb7e475b559749-editor",
"[contenteditable='true']",
".bjh-news-drag-tip + div [contenteditable='true']",
}
var titleInput *rod.Element
for _, selector := range titleSelectors {
titleInput, _ = p.WaitForElementVisible(selector, 5)
if titleInput != nil {
p.LogInfo(fmt.Sprintf("找到标题输入框: %s", selector))
break
}
}
if titleInput == nil {
return fmt.Errorf("未找到标题输入框")
}
titleInput.Click(proto.InputMouseButtonLeft, 1)
p.SleepMs(500)
currentTitle, _ := titleInput.Text()
if currentTitle != "" {
p.LogInfo(fmt.Sprintf("清空当前标题: %s", currentTitle[:min(50, len(currentTitle))]))
p.ClearContentEditable(titleInput)
p.SleepMs(200)
}
titleInput.Input(p.Title)
p.LogInfo(fmt.Sprintf("新标题已输入: %s", p.Title))
p.triggerInputEvents(titleInput)
p.SleepMs(500)
finalTitle, _ := titleInput.Text()
if finalTitle != p.Title {
p.Page.Eval(fmt.Sprintf(`() => { arguments[0].innerHTML = '%s'; }`, p.Title))
p.triggerInputEvents(titleInput)
p.LogInfo("已通过 JavaScript 重新设置标题")
}
return nil
}
func (p *BaijiahaoPublisher) inputContent() error {
p.LogInfo("开始导入文档内容...")
// 1. 找到 id="edui41" 的 div 并 hover
edui41, err := p.WaitForElement("#edui41", 10)
if err != nil {
return fmt.Errorf("未找到编辑器工具栏: %v", err)
}
// 鼠标 hover
if err := edui41.Hover(); err != nil {
return fmt.Errorf("hover 失败: %v", err)
}
p.LogInfo("已 hover 到编辑器工具栏")
p.SleepMs(500)
// 2. 查找并点击"导入文档"
var importDocBtn *rod.Element
// 等待 popover 出现
for i := 0; i < 10; i++ {
// 查找 class 包含 cheetah-popover 的元素
popover, err := p.Page.Element("[class*='cheetah-popover']")
if err != nil || popover == nil {
p.SleepMs(500)
continue
}
// 在 popover 内查找 class 包含 "-label" 且文本为"导入文档"的 div
// 使用正则匹配 class 包含随机字符-label 的模式
importDocBtn, err = popover.ElementX("//div[contains(@class, '-label') and contains(text(), '导入文档')]")
if err == nil && importDocBtn != nil {
p.LogInfo("找到导入文档按钮")
break
}
// 备用查找方式:直接在整个页面中查找
importDocBtn, err = p.Page.ElementX("//div[contains(@class, '-label') and contains(text(), '导入文档')]")
if err == nil && importDocBtn != nil {
p.LogInfo("通过 XPath 找到导入文档按钮")
break
}
p.SleepMs(500)
}
if importDocBtn == nil {
return fmt.Errorf("未找到导入文档按钮")
}
// 点击导入文档按钮
if err := p.JSClick(importDocBtn); err != nil {
return fmt.Errorf("点击导入文档按钮失败: %v", err)
}
p.LogInfo("已点击导入文档按钮")
p.SleepMs(1000)
// 3. 查找 dialog 中的文件上传 input
var fileInput *rod.Element
for i := 0; i < 10; i++ {
// 查找 role="dialog" 的元素
dialog, err := p.Page.Element("[role='dialog']")
if err != nil || dialog == nil {
p.SleepMs(500)
continue
}
// 在 dialog 内查找 name="file" 的 input
fileInput, err = dialog.Element("input[name='file']")
if err == nil && fileInput != nil {
p.LogInfo("找到文件上传输入框")
break
}
// 备用:直接在整个页面中查找
fileInput, err = p.Page.Element("input[name='file']")
if err == nil && fileInput != nil {
p.LogInfo("通过全局选择器找到文件上传输入框")
break
}
p.SleepMs(500)
}
if fileInput == nil {
return fmt.Errorf("未找到文件上传输入框")
}
// 4. 上传文档
if p.SourcePath == "" {
return fmt.Errorf("未提供文档路径")
}
if err := fileInput.SetFiles([]string{p.SourcePath}); err != nil {
return fmt.Errorf("上传文档失败: %v", err)
}
p.LogInfo(fmt.Sprintf("已上传文档: %s", p.SourcePath))
// 5. 等待导入成功
// 提取文件名(不含路径)
fileName := filepath.Base(p.SourcePath)
// 等待导入成功的提示
for i := 0; i < 30; i++ {
// 查找包含文件名的成功提示
successMsg, err := p.Page.ElementX(fmt.Sprintf("//*[contains(text(), '%s') and (contains(text(), '成功') or contains(text(), '导入'))]", fileName))
if err == nil && successMsg != nil {
text, _ := successMsg.Text()
p.LogInfo(fmt.Sprintf("文档导入成功: %s", text))
p.SleepMs(2000) // 等待内容加载完成
return nil
}
// 通用成功提示查找
successMsg, err = p.Page.ElementX("//*[contains(text(), '导入成功')]")
if err == nil && successMsg != nil {
text, _ := successMsg.Text()
p.LogInfo(fmt.Sprintf("文档导入成功: %s", text))
p.SleepMs(2000)
return nil
}
// 查找是否有错误提示
errorMsg, err := p.Page.ElementX("//*[contains(text(), '失败') or contains(text(), '错误')]")
if err == nil && errorMsg != nil {
text, _ := errorMsg.Text()
if strings.Contains(text, fileName) || strings.Contains(text, "导入") {
return fmt.Errorf("文档导入失败: %s", text)
}
}
p.SleepMs(500)
}
// 虽然没有明确的成功提示,但等待几秒让内容加载
p.LogInfo("等待内容加载完成...")
p.SleepMs(3000)
return nil
}
func (p *BaijiahaoPublisher) uploadImage() error {
if p.ImagePath == "" {
p.LogInfo("未提供封面图片路径,跳过封面设置")
return nil
}
p.LogInfo("设置文章封面...")
// 查找并点击封面选择区域
coverSelectors := []string{
".cheetah-spin-container",
"._73a3a52aab7e3a36-default",
".cover-selector",
"[class*='spin-container']",
}
var coverArea *rod.Element
for _, selector := range coverSelectors {
coverArea, _ = p.WaitForElement(selector, 3)
if coverArea != nil {
visible, _ := coverArea.Visible()
if visible {
p.LogInfo(fmt.Sprintf("找到封面区域: %s", selector))
break
}
}
}
if coverArea != nil {
p.ScrollToElement(coverArea)
p.SleepMs(500)
p.JSClick(coverArea)
p.LogInfo("已点击封面选择区域")
p.SleepMs(2000)
}
//// 查找并点击上传区域
//uploadSelectors := []string{
// "div[class*='cheetah-upload']",
// ".cheetah-upload",
// "div[class*='upload']",
// ".upload-area",
// "._73a3a52aab7e3a36-content",
// "._93c3fe2a3121c388-item",
//}
//var uploadArea *rod.Element
//for _, selector := range uploadSelectors {
// elements, _ := p.Page.Elements(selector)
// for _, elem := range elements {
// visible, _ := elem.Visible()
// if visible {
// uploadArea = elem
// p.LogInfo(fmt.Sprintf("找到上传区域: %s", selector))
// break
// }
// }
// if uploadArea != nil {
// break
// }
//}
//if uploadArea != nil {
// p.ScrollToElement(uploadArea)
// p.SleepMs(500)
// p.JSClick(uploadArea)
// p.LogInfo("已点击图片上传区域")
// p.SleepMs(1000)
//}
//
//// 查找cheetah-upload组件
//componentSelectors := []string{
// "div[class*='cheetah-upload']",
// ".cheetah-upload",
// "div[class*='upload']",
//}
//var uploadComponent *rod.Element
//for _, selector := range componentSelectors {
// elements, _ := p.Page.Elements(selector)
// for _, elem := range elements {
// visible, _ := elem.Visible()
// if visible {
// uploadComponent = elem
// p.LogInfo(fmt.Sprintf("找到cheetah-upload组件: %s", selector))
// break
// }
// }
// if uploadComponent != nil {
// break
// }
//}
//if uploadComponent != nil {
// p.ScrollToElement(uploadComponent)
// p.SleepMs(500)
// p.JSClick(uploadComponent)
// p.LogInfo("已点击cheetah-upload上传组件")
// p.SleepMs(2000)
//}
//
//// 查找文件上传输入框
//var fileInput *rod.Element
//for i := 0; i < 10; i++ {
// fileInput, _ = p.Page.Element("input[name='media'][type='file'][accept='image/*']")
// if fileInput != nil {
// p.LogInfo("找到文件上传输入框")
// break
// }
// fileInput, _ = p.Page.Element("input[type='file'][accept*='image']")
// if fileInput != nil {
// p.LogInfo("通过备用选择器找到文件上传输入框")
// break
// }
// p.SleepMs(500)
//}
//if fileInput != nil {
// fileInput.SetFiles([]string{p.ImagePath})
// p.LogInfo(fmt.Sprintf("图片上传成功: %s", p.ImagePath))
// p.Sleep(3)
//}
// 查找并点击确认按钮
// 查找并点击确认按钮
// 查找并点击确认按钮
var confirmBtn *rod.Element
for i := 0; i < p.MaxRetries; i++ {
p.LogInfo("正在查找确认按钮...")
// 精确匹配button 包含 cheetah-btn-primary 类,且 span 文本为"确定 (1)"
confirmBtn, _ = p.Page.ElementX("//button[contains(@class, 'cheetah-btn-primary')]//span[text()='确定 (1)']/..")
if confirmBtn != nil {
visible, _ := confirmBtn.Visible()
if visible {
p.LogInfo("找到确认按钮")
break
}
}
// 备选:只匹配 span 文本
confirmBtn, _ = p.Page.ElementX("//span[text()='确定 (1)']/..")
if confirmBtn != nil {
visible, _ := confirmBtn.Visible()
if visible {
p.LogInfo("通过 span 文本找到确认按钮")
break
}
}
// 备选:文本包含"确定"和数字
confirmBtn, _ = p.Page.ElementX("//button[contains(@class, 'cheetah-btn-primary') and contains(., '确定')]")
if confirmBtn != nil {
visible, _ := confirmBtn.Visible()
if visible {
p.LogInfo("通过文本内容找到确认按钮")
break
}
}
p.SleepMs(p.RetryDelay)
}
if confirmBtn != nil {
p.JSClick(confirmBtn)
p.LogInfo("已点击确认按钮")
p.SleepMs(2000)
} else {
return fmt.Errorf("未找到确认按钮")
}
return nil
}
func (p *BaijiahaoPublisher) clickPublish() error {
p.LogInfo("点击发布按钮...")
publishSelectors := []string{
"[data-testid='publish-btn']",
".op-list-right .cheetah-btn-primary",
}
var publishBtn *rod.Element
for i := 0; i < 10; i++ {
for _, selector := range publishSelectors {
publishBtn, _ = p.Page.Element(selector)
if publishBtn != nil {
visible, _ := publishBtn.Visible()
if visible {
p.LogInfo(fmt.Sprintf("找到发布按钮: %s", selector))
break
}
}
}
if publishBtn != nil {
break
}
publishBtn, _ = p.Page.ElementX("//button[contains(text(), '发布')]")
if publishBtn != nil {
visible, _ := publishBtn.Visible()
if visible {
p.LogInfo("通过XPath找到发布按钮")
break
}
}
p.Sleep(1)
}
if publishBtn == nil {
return fmt.Errorf("未找到发布按钮")
}
p.ScrollToElement(publishBtn)
p.Sleep(1)
for attempt := 0; attempt < 3; attempt++ {
err := p.JSClick(publishBtn)
if err == nil {
p.LogInfo(fmt.Sprintf("已通过JavaScript点击发布按钮 (尝试 %d)", attempt+1))
p.Sleep(3)
return nil
}
err = publishBtn.Click(proto.InputMouseButtonLeft, 1)
if err == nil {
p.LogInfo(fmt.Sprintf("已通过普通点击发布按钮 (尝试 %d)", attempt+1))
p.Sleep(3)
return nil
}
p.Sleep(1)
}
return fmt.Errorf("点击发布按钮失败")
}
func (p *BaijiahaoPublisher) handleConfirmModal() error {
confirmBtn, _ := p.WaitForElement(".cheetah-modal .cheetah-btn-primary", 3)
if confirmBtn != nil {
p.JSClick(confirmBtn)
p.LogInfo("已点击确认弹窗")
p.Sleep(2)
}
return nil
}
func (p *BaijiahaoPublisher) waitForPublishResult() (bool, string) {
p.LogInfo("等待发布结果...")
for attempt := 0; attempt < 60; attempt++ {
currentURL := p.GetCurrentURL()
p.LogInfo(fmt.Sprintf("第 %d 次检查 - URL: %s", attempt+1, currentURL))
if strings.Contains(currentURL, "clue") {
p.LogInfo(fmt.Sprintf("发布成功URL: %s", currentURL))
return true, "发布成功"
}
elements, _ := p.Page.Elements(".cheetah-message-success, .cheetah-message-info")
for _, elem := range elements {
visible, _ := elem.Visible()
if visible {
text, _ := elem.Text()
if strings.Contains(text, "成功") || strings.Contains(text, "发布") {
p.LogInfo(fmt.Sprintf("发布成功: %s", text))
return true, text
}
}
}
elements, _ = p.Page.Elements(".cheetah-message-error, .cheetah-message-warning")
for _, elem := range elements {
visible, _ := elem.Visible()
if visible {
text, _ := elem.Text()
if strings.Contains(text, "失败") || strings.Contains(text, "错误") {
p.LogError(fmt.Sprintf("发布失败: %s", text))
return false, fmt.Sprintf("发布失败: %s", text)
}
}
}
p.Sleep(1)
}
return false, "发布结果未知"
}
func (p *BaijiahaoPublisher) triggerInputEvents(el *rod.Element) {
el.Eval(`() => {
arguments[0].dispatchEvent(new Event('input', {bubbles: true}));
arguments[0].dispatchEvent(new Event('change', {bubbles: true}));
arguments[0].dispatchEvent(new Event('blur', {bubbles: true}));
}`)
}
func min(a, b int) int {
if a < b {
return a
}
return b
}