196 lines
7.6 KiB
Python
196 lines
7.6 KiB
Python
import json
|
||
import time
|
||
import random
|
||
import os
|
||
from datetime import datetime
|
||
from playwright.sync_api import sync_playwright
|
||
|
||
class SCBankCollector:
|
||
def __init__(self):
|
||
self.target_url = "https://jf.scbank.cn:8085/#/orderManagement/deliveryOrders"
|
||
# 动态生成文件名: data/raw_YYYYMMDD_HHMMSS.jsonl
|
||
self.ts_str = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||
|
||
# 配置账号密码
|
||
self.username = "Lsxd01"
|
||
self.password = "Lsxd@2026"
|
||
|
||
# 确保数据目录存在
|
||
if not os.path.exists("data"):
|
||
os.makedirs("data")
|
||
|
||
self.browser = None
|
||
self.page = None
|
||
|
||
def log(self, msg):
|
||
print(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}")
|
||
|
||
def start_browser(self):
|
||
self.log("启动浏览器...")
|
||
p = sync_playwright().start()
|
||
|
||
# 尝试使用本地浏览器 (Chrome 或 Edge)
|
||
browser = None
|
||
for channel in ["chrome", "msedge"]:
|
||
try:
|
||
self.log(f"尝试启动本地 {channel}...")
|
||
browser = p.chromium.launch(
|
||
channel=channel,
|
||
headless=False,
|
||
args=["--disable-blink-features=AutomationControlled"]
|
||
)
|
||
self.log(f"成功启动 {channel}")
|
||
break
|
||
except Exception as e:
|
||
self.log(f"启动 {channel} 失败,尝试下一个...")
|
||
|
||
# 如果本地浏览器都失败,尝试使用内置 Chromium (如果已安装)
|
||
if not browser:
|
||
self.log("未找到本地 Chrome 或 Edge,尝试使用内置 Chromium...")
|
||
try:
|
||
browser = p.chromium.launch(
|
||
headless=False,
|
||
args=["--disable-blink-features=AutomationControlled"]
|
||
)
|
||
except Exception as e:
|
||
self.log(f"[FATAL] 无法启动任何浏览器: {e}")
|
||
self.log("请确保已安装 Google Chrome 或 Microsoft Edge 浏览器。")
|
||
raise e
|
||
|
||
self.browser = browser
|
||
context = self.browser.new_context(
|
||
viewport={'width': 1920, 'height': 1080},
|
||
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||
accept_downloads=True # 明确允许下载
|
||
)
|
||
self.page = context.new_page()
|
||
|
||
def run(self):
|
||
try:
|
||
self.start_browser()
|
||
|
||
# 1. 登录
|
||
self.log(f"正在打开页面: {self.target_url}")
|
||
try:
|
||
self.page.goto(self.target_url)
|
||
# 等待跳转到登录页并加载输入框
|
||
try:
|
||
self.log("等待登录页面加载...")
|
||
self.page.wait_for_selector('input[name="username"]', timeout=10000)
|
||
|
||
# 自动填入账号密码
|
||
self.log(f"正在自动填入账号: {self.username}")
|
||
self.page.fill('input[name="username"]', self.username)
|
||
self.page.fill('input[name="password"]', self.password)
|
||
self.log("账号密码已填入")
|
||
except Exception as e:
|
||
self.log(f"自动填入账号密码失败 (可能已登录或页面结构变化): {e}")
|
||
|
||
except: pass
|
||
|
||
self.log(">>> 请在浏览器中完成登录操作 (输入验证码并点击登录) <<<")
|
||
# 等待 URL 包含 homePage (用户指定)
|
||
try:
|
||
self.page.wait_for_url("**/homePage**", timeout=0)
|
||
self.log("检测到登录成功!")
|
||
time.sleep(1)
|
||
except Exception as e:
|
||
self.log(f"登录等待超时或失败: {e}")
|
||
return
|
||
|
||
# 2. 强制跳转目标页
|
||
if "deliveryOrders" not in self.page.url:
|
||
self.log(f"跳转至订单管理页面: {self.target_url}")
|
||
self.page.goto(self.target_url)
|
||
self.page.wait_for_load_state("domcontentloaded")
|
||
time.sleep(0.5)
|
||
|
||
# 3. 筛选状态
|
||
self._filter_status()
|
||
|
||
# 4. 执行批量导出下载
|
||
self._download_excel()
|
||
|
||
self.log("采集任务完成。")
|
||
|
||
except Exception as e:
|
||
self.log(f"[FATAL] 脚本异常: {e}")
|
||
finally:
|
||
# 数据抓取完成后,浏览器不退出
|
||
# if self.browser:
|
||
# self.browser.close()
|
||
self.log("浏览器保持开启状态,请手动关闭。")
|
||
|
||
def _filter_status(self):
|
||
self.log("正在点击“待发货”标签页")
|
||
try:
|
||
# 定位 Tab
|
||
tab_selector = ".el-tabs__item:has-text('待发货')"
|
||
tab = self.page.locator(tab_selector)
|
||
|
||
if tab.count() > 0:
|
||
if "is-active" not in tab.get_attribute("class"):
|
||
tab.click()
|
||
self.log("已点击“待发货”标签页")
|
||
time.sleep(1)
|
||
else:
|
||
self.log("“待发货”标签页已经是选中状态")
|
||
else:
|
||
self.log(" [WARN] 未找到“待发货”Tab")
|
||
except Exception as e:
|
||
self.log(f"筛选操作失败: {e}")
|
||
|
||
def _download_excel(self):
|
||
"""执行批量导出操作"""
|
||
try:
|
||
self.log("准备触发批量发货...")
|
||
|
||
# 1. 点击批量发货按钮
|
||
# 通过包含的文本或者 class 寻找按钮
|
||
batch_ship_btn = self.page.locator("button:has-text('批量发货')")
|
||
if batch_ship_btn.count() > 0:
|
||
batch_ship_btn.first.click()
|
||
self.log("已点击 '批量发货' 按钮,等待弹窗加载...")
|
||
time.sleep(2) # 等待弹窗和里面的按钮渲染
|
||
else:
|
||
self.log("[WARN] 未找到 '批量发货' 按钮")
|
||
return
|
||
|
||
# 2. 点击导出待发货订单按钮,并拦截下载
|
||
self.log("尝试寻找并点击 '导出待发货订单' 按钮...")
|
||
|
||
# 使用文本包含来定位按钮,即使它在复杂的结构中
|
||
export_btn = self.page.locator("button:has-text('导出待发货订单')")
|
||
if export_btn.count() == 0:
|
||
self.log("[ERROR] 弹窗中未找到 '导出待发货订单' 按钮,可能是因为无待发货订单或者页面结构变更")
|
||
# 按 ESC 关闭弹窗,防止阻塞
|
||
self.page.keyboard.press("Escape")
|
||
return
|
||
|
||
# 开始监听下载事件
|
||
self.log("开始监听文件下载...")
|
||
with self.page.expect_download(timeout=60000) as download_info:
|
||
export_btn.first.click()
|
||
self.log("已点击 '导出待发货订单'")
|
||
|
||
download = download_info.value
|
||
|
||
# 保存文件到 data 目录
|
||
file_name = f"shipping_order_{self.ts_str}.xls"
|
||
save_path = os.path.join("data", file_name)
|
||
|
||
self.log(f"正在保存文件...")
|
||
download.save_as(save_path)
|
||
self.log(f"✅ 文件下载成功: {save_path}")
|
||
|
||
# 按 ESC 关闭弹窗
|
||
time.sleep(1)
|
||
self.page.keyboard.press("Escape")
|
||
|
||
except Exception as e:
|
||
self.log(f"执行批量导出失败: {e}")
|
||
|
||
if __name__ == "__main__":
|
||
collector = SCBankCollector()
|
||
collector.run()
|