import json import time import random import os from datetime import datetime from playwright.sync_api import sync_playwright class SCBankCollector: def __init__(self): self.target_url = "https://jf.scbank.cn:8085/#/orderManagement/deliveryOrders" # 动态生成文件名: data/raw_YYYYMMDD_HHMMSS.jsonl self.ts_str = datetime.now().strftime("%Y%m%d_%H%M%S") # 配置账号密码 self.username = "Lsxd01" self.password = "Lsxd@2026" # 确保数据目录存在 if not os.path.exists("data"): os.makedirs("data") self.browser = None self.page = None def log(self, msg): print(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}") def start_browser(self): self.log("启动浏览器...") p = sync_playwright().start() # 尝试使用本地浏览器 (Chrome 或 Edge) browser = None for channel in ["chrome", "msedge"]: try: self.log(f"尝试启动本地 {channel}...") browser = p.chromium.launch( channel=channel, headless=False, args=["--disable-blink-features=AutomationControlled"] ) self.log(f"成功启动 {channel}") break except Exception as e: self.log(f"启动 {channel} 失败,尝试下一个...") # 如果本地浏览器都失败,尝试使用内置 Chromium (如果已安装) if not browser: self.log("未找到本地 Chrome 或 Edge,尝试使用内置 Chromium...") try: browser = p.chromium.launch( headless=False, args=["--disable-blink-features=AutomationControlled"] ) except Exception as e: self.log(f"[FATAL] 无法启动任何浏览器: {e}") self.log("请确保已安装 Google Chrome 或 Microsoft Edge 浏览器。") raise e self.browser = browser context = self.browser.new_context( viewport={'width': 1920, 'height': 1080}, user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", accept_downloads=True # 明确允许下载 ) self.page = context.new_page() def run(self): try: self.start_browser() # 1. 登录 self.log(f"正在打开页面: {self.target_url}") try: self.page.goto(self.target_url) # 等待跳转到登录页并加载输入框 try: self.log("等待登录页面加载...") self.page.wait_for_selector('input[name="username"]', timeout=10000) # 自动填入账号密码 self.log(f"正在自动填入账号: {self.username}") self.page.fill('input[name="username"]', self.username) self.page.fill('input[name="password"]', self.password) self.log("账号密码已填入") except Exception as e: self.log(f"自动填入账号密码失败 (可能已登录或页面结构变化): {e}") except: pass self.log(">>> 请在浏览器中完成登录操作 (输入验证码并点击登录) <<<") # 等待 URL 包含 homePage (用户指定) try: self.page.wait_for_url("**/homePage**", timeout=0) self.log("检测到登录成功!") time.sleep(1) except Exception as e: self.log(f"登录等待超时或失败: {e}") return # 2. 强制跳转目标页 if "deliveryOrders" not in self.page.url: self.log(f"跳转至订单管理页面: {self.target_url}") self.page.goto(self.target_url) self.page.wait_for_load_state("domcontentloaded") time.sleep(0.5) # 3. 筛选状态 self._filter_status() # 4. 执行批量导出下载 self._download_excel() self.log("采集任务完成。") except Exception as e: self.log(f"[FATAL] 脚本异常: {e}") finally: # 数据抓取完成后,浏览器不退出 # if self.browser: # self.browser.close() self.log("浏览器保持开启状态,请手动关闭。") def _filter_status(self): self.log("正在点击“待发货”标签页") try: # 定位 Tab tab_selector = ".el-tabs__item:has-text('待发货')" tab = self.page.locator(tab_selector) if tab.count() > 0: if "is-active" not in tab.get_attribute("class"): tab.click() self.log("已点击“待发货”标签页") time.sleep(1) else: self.log("“待发货”标签页已经是选中状态") else: self.log(" [WARN] 未找到“待发货”Tab") except Exception as e: self.log(f"筛选操作失败: {e}") def _download_excel(self): """执行批量导出操作""" try: self.log("准备触发批量发货...") # 1. 点击批量发货按钮 # 通过包含的文本或者 class 寻找按钮 batch_ship_btn = self.page.locator("button:has-text('批量发货')") if batch_ship_btn.count() > 0: batch_ship_btn.first.click() self.log("已点击 '批量发货' 按钮,等待弹窗加载...") time.sleep(2) # 等待弹窗和里面的按钮渲染 else: self.log("[WARN] 未找到 '批量发货' 按钮") return # 2. 点击导出待发货订单按钮,并拦截下载 self.log("尝试寻找并点击 '导出待发货订单' 按钮...") # 使用文本包含来定位按钮,即使它在复杂的结构中 export_btn = self.page.locator("button:has-text('导出待发货订单')") if export_btn.count() == 0: self.log("[ERROR] 弹窗中未找到 '导出待发货订单' 按钮,可能是因为无待发货订单或者页面结构变更") # 按 ESC 关闭弹窗,防止阻塞 self.page.keyboard.press("Escape") return # 开始监听下载事件 self.log("开始监听文件下载...") with self.page.expect_download(timeout=60000) as download_info: export_btn.first.click() self.log("已点击 '导出待发货订单'") download = download_info.value # 保存文件到 data 目录 file_name = f"shipping_order_{self.ts_str}.xls" save_path = os.path.join("data", file_name) self.log(f"正在保存文件...") download.save_as(save_path) self.log(f"✅ 文件下载成功: {save_path}") # 按 ESC 关闭弹窗 time.sleep(1) self.page.keyboard.press("Escape") except Exception as e: self.log(f"执行批量导出失败: {e}") if __name__ == "__main__": collector = SCBankCollector() collector.run()