scbank-sync/scbank_collector.py

196 lines
7.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import json
import time
import random
import os
from datetime import datetime
from playwright.sync_api import sync_playwright
class SCBankCollector:
def __init__(self):
self.target_url = "https://jf.scbank.cn:8085/#/orderManagement/deliveryOrders"
# 动态生成文件名: data/raw_YYYYMMDD_HHMMSS.jsonl
self.ts_str = datetime.now().strftime("%Y%m%d_%H%M%S")
# 配置账号密码
self.username = "Lsxd01"
self.password = "Lsxd@2026"
# 确保数据目录存在
if not os.path.exists("data"):
os.makedirs("data")
self.browser = None
self.page = None
def log(self, msg):
print(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}")
def start_browser(self):
self.log("启动浏览器...")
p = sync_playwright().start()
# 尝试使用本地浏览器 (Chrome 或 Edge)
browser = None
for channel in ["chrome", "msedge"]:
try:
self.log(f"尝试启动本地 {channel}...")
browser = p.chromium.launch(
channel=channel,
headless=False,
args=["--disable-blink-features=AutomationControlled"]
)
self.log(f"成功启动 {channel}")
break
except Exception as e:
self.log(f"启动 {channel} 失败,尝试下一个...")
# 如果本地浏览器都失败,尝试使用内置 Chromium (如果已安装)
if not browser:
self.log("未找到本地 Chrome 或 Edge尝试使用内置 Chromium...")
try:
browser = p.chromium.launch(
headless=False,
args=["--disable-blink-features=AutomationControlled"]
)
except Exception as e:
self.log(f"[FATAL] 无法启动任何浏览器: {e}")
self.log("请确保已安装 Google Chrome 或 Microsoft Edge 浏览器。")
raise e
self.browser = browser
context = self.browser.new_context(
viewport={'width': 1920, 'height': 1080},
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
accept_downloads=True # 明确允许下载
)
self.page = context.new_page()
def run(self):
try:
self.start_browser()
# 1. 登录
self.log(f"正在打开页面: {self.target_url}")
try:
self.page.goto(self.target_url)
# 等待跳转到登录页并加载输入框
try:
self.log("等待登录页面加载...")
self.page.wait_for_selector('input[name="username"]', timeout=10000)
# 自动填入账号密码
self.log(f"正在自动填入账号: {self.username}")
self.page.fill('input[name="username"]', self.username)
self.page.fill('input[name="password"]', self.password)
self.log("账号密码已填入")
except Exception as e:
self.log(f"自动填入账号密码失败 (可能已登录或页面结构变化): {e}")
except: pass
self.log(">>> 请在浏览器中完成登录操作 (输入验证码并点击登录) <<<")
# 等待 URL 包含 homePage (用户指定)
try:
self.page.wait_for_url("**/homePage**", timeout=0)
self.log("检测到登录成功!")
time.sleep(1)
except Exception as e:
self.log(f"登录等待超时或失败: {e}")
return
# 2. 强制跳转目标页
if "deliveryOrders" not in self.page.url:
self.log(f"跳转至订单管理页面: {self.target_url}")
self.page.goto(self.target_url)
self.page.wait_for_load_state("domcontentloaded")
time.sleep(0.5)
# 3. 筛选状态
self._filter_status()
# 4. 执行批量导出下载
self._download_excel()
self.log("采集任务完成。")
except Exception as e:
self.log(f"[FATAL] 脚本异常: {e}")
finally:
# 数据抓取完成后,浏览器不退出
# if self.browser:
# self.browser.close()
self.log("浏览器保持开启状态,请手动关闭。")
def _filter_status(self):
self.log("正在点击“待发货”标签页")
try:
# 定位 Tab
tab_selector = ".el-tabs__item:has-text('待发货')"
tab = self.page.locator(tab_selector)
if tab.count() > 0:
if "is-active" not in tab.get_attribute("class"):
tab.click()
self.log("已点击“待发货”标签页")
time.sleep(1)
else:
self.log("“待发货”标签页已经是选中状态")
else:
self.log(" [WARN] 未找到“待发货”Tab")
except Exception as e:
self.log(f"筛选操作失败: {e}")
def _download_excel(self):
"""执行批量导出操作"""
try:
self.log("准备触发批量发货...")
# 1. 点击批量发货按钮
# 通过包含的文本或者 class 寻找按钮
batch_ship_btn = self.page.locator("button:has-text('批量发货')")
if batch_ship_btn.count() > 0:
batch_ship_btn.first.click()
self.log("已点击 '批量发货' 按钮,等待弹窗加载...")
time.sleep(2) # 等待弹窗和里面的按钮渲染
else:
self.log("[WARN] 未找到 '批量发货' 按钮")
return
# 2. 点击导出待发货订单按钮,并拦截下载
self.log("尝试寻找并点击 '导出待发货订单' 按钮...")
# 使用文本包含来定位按钮,即使它在复杂的结构中
export_btn = self.page.locator("button:has-text('导出待发货订单')")
if export_btn.count() == 0:
self.log("[ERROR] 弹窗中未找到 '导出待发货订单' 按钮,可能是因为无待发货订单或者页面结构变更")
# 按 ESC 关闭弹窗,防止阻塞
self.page.keyboard.press("Escape")
return
# 开始监听下载事件
self.log("开始监听文件下载...")
with self.page.expect_download(timeout=60000) as download_info:
export_btn.first.click()
self.log("已点击 '导出待发货订单'")
download = download_info.value
# 保存文件到 data 目录
file_name = f"shipping_order_{self.ts_str}.xls"
save_path = os.path.join("data", file_name)
self.log(f"正在保存文件...")
download.save_as(save_path)
self.log(f"✅ 文件下载成功: {save_path}")
# 按 ESC 关闭弹窗
time.sleep(1)
self.page.keyboard.press("Escape")
except Exception as e:
self.log(f"执行批量导出失败: {e}")
if __name__ == "__main__":
collector = SCBankCollector()
collector.run()