import json import time import random import os import sys from datetime import datetime from playwright.sync_api import sync_playwright from chaojiying import ChaojiyingClient class SCBankCollector: def __init__(self): self.target_url = "https://jf.scbank.cn:8085/#/orderManagement/deliveryOrders" # 动态生成文件名: data/raw_YYYYMMDD_HHMMSS.jsonl self.ts_str = datetime.now().strftime("%Y%m%d_%H%M%S") # 读取外置配置文件 self.config = self._load_config() self.username = self.config.get("scbank_username", "") self.password = self.config.get("scbank_password", "") # 初始化超级鹰客户端 cjy_user = self.config.get("chaojiying_username", "") cjy_pass = self.config.get("chaojiying_password", "") cjy_softid = self.config.get("chaojiying_softid", "96001") self.cjy_client = ChaojiyingClient(cjy_user, cjy_pass, cjy_softid) if cjy_user and cjy_pass else None # 确保数据目录存在 if not os.path.exists("data"): os.makedirs("data") self.browser = None self.page = None def _load_config(self): config_file = "config.txt" config_data = { "商城账号": "", "商城密码": "", "超级鹰账号": "", "超级鹰密码": "", "超级鹰软件ID": "96001" } if not os.path.exists(config_file): print(f"[WARN] 未找到配置文件 {config_file},将创建一个默认模板,请填写后重新运行。") template = ( "# ==========================================\n" "# 四川银行权益商城自动化工具 - 配置文件\n" "# 请在等号 (=) 后面填写您的账号和密码\n" "# 注意:等号前后可以有空格,但不要删除等号\n" "# ==========================================\n\n" "商城账号 = \n" "商城密码 = \n\n" "# --- 以下为验证码自动识别配置 (选填) ---\n" "# 如果不填写,程序运行时将暂停并等待您手动输入验证码\n" "超级鹰账号 = \n" "超级鹰密码 = \n" "超级鹰软件ID = 96001\n" ) with open(config_file, "w", encoding="utf-8") as f: f.write(template) sys.exit(1) try: with open(config_file, "r", encoding="utf-8") as f: for line in f: line = line.strip() # 跳过注释和空行 if not line or line.startswith("#"): continue if "=" in line: key, val = line.split("=", 1) config_data[key.strip()] = val.strip() # 将中文 key 映射回内部使用的 key return { "scbank_username": config_data.get("商城账号", ""), "scbank_password": config_data.get("商城密码", ""), "chaojiying_username": config_data.get("超级鹰账号", ""), "chaojiying_password": config_data.get("超级鹰密码", ""), "chaojiying_softid": config_data.get("超级鹰软件ID", "96001") } except Exception as e: print(f"[ERROR] 读取配置文件失败: {e}") sys.exit(1) def log(self, msg): print(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}") def start_browser(self): self.log("启动浏览器...") p = sync_playwright().start() # 尝试使用本地浏览器 (Chrome 或 Edge) browser = None for channel in ["chrome", "msedge"]: try: self.log(f"尝试启动本地 {channel}...") browser = p.chromium.launch( channel=channel, headless=False, args=["--disable-blink-features=AutomationControlled"] ) self.log(f"成功启动 {channel}") break except Exception as e: self.log(f"启动 {channel} 失败,尝试下一个...") # 如果本地浏览器都失败,尝试使用内置 Chromium (如果已安装) if not browser: self.log("未找到本地 Chrome 或 Edge,尝试使用内置 Chromium...") try: browser = p.chromium.launch( headless=False, args=["--disable-blink-features=AutomationControlled"] ) except Exception as e: self.log(f"[FATAL] 无法启动任何浏览器: {e}") self.log("请确保已安装 Google Chrome 或 Microsoft Edge 浏览器。") raise e self.browser = browser context = self.browser.new_context( viewport={'width': 1920, 'height': 1080}, user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", accept_downloads=True # 明确允许下载 ) self.page = context.new_page() def _auto_solve_captcha(self): """自动识别并填写验证码""" if not self.cjy_client: self.log("[WARN] 未配置超级鹰账号,跳过自动识别验证码。请手动输入验证码。") return False try: self.log("尝试获取验证码图片...") # 四川银行登录页的验证码图片选择器 captcha_img_selector = 'img.code-image' # 等待图片加载完成 self.page.wait_for_selector(captcha_img_selector, timeout=5000) # 获取图片的 base64 数据或截图 # 因为是图片验证码,我们可以直接用 playwright 的 screenshot 功能获取二进制 image_bytes = self.page.locator(captcha_img_selector).screenshot() self.log("正在调用超级鹰进行识别...") # 1902: 4-6位英文数字混合 result = self.cjy_client.solve_captcha(image_bytes, codetype=1902) if result and result.get('err_no') == 0: code = result.get('pic_str') self.log(f"✅ 验证码识别成功: {code}") # 填入验证码 self.page.fill('input[name="code"]', code) # 点击登录 self.log("尝试自动点击登录...") self.page.locator('button.login-btn').click() return True else: print(result) err_str = result.get('err_str', '未知错误') if result else '返回为空' self.log(f"❌ 验证码识别失败: {err_str}") return False except Exception as e: self.log(f"自动处理验证码发生异常: {e}") return False def run(self): try: self.start_browser() # 1. 登录 self.log(f"正在打开页面: {self.target_url}") try: self.page.goto(self.target_url) # 等待跳转到登录页并加载输入框 try: self.log("等待登录页面加载...") self.page.wait_for_selector('input[name="username"]', timeout=10000) # 自动填入账号密码 if self.username and self.password: self.log(f"正在自动填入账号: {self.username}") self.page.fill('input[name="username"]', self.username) self.page.fill('input[name="password"]', self.password) self.log("账号密码已填入") else: self.log("[WARN] 配置文件中未提供商城账号密码,请手动输入") except Exception as e: self.log(f"自动填入账号密码失败 (可能已登录或页面结构变化): {e}") except: pass # 尝试自动识别验证码并重试 max_retries = 3 retry_count = 0 login_success = False while retry_count < max_retries and not login_success: if self.cjy_client: self.log(f"--- 第 {retry_count + 1} 次尝试登录 ---") success = self._auto_solve_captcha() if not success: self.log("验证码识别失败,等待后重试...") time.sleep(2) # 点击验证码图片刷新 try: self.page.locator('img.code-image').click() time.sleep(1) except: pass retry_count += 1 continue else: self.log(">>> 未配置超级鹰,请在浏览器中手动完成登录操作 (输入验证码并点击登录) <<<") # 等待 URL 包含 homePage (用户指定) 或者 错误提示出现 try: # 使用 wait_for_url 或者检查是否有错误弹窗 # 这里等待较短时间,如果失败则重试 self.page.wait_for_url("**/homePage**", timeout=10000 if self.cjy_client else 0) self.log("✅ 检测到登录成功!") login_success = True time.sleep(1) except Exception as e: if self.cjy_client: self.log(f"登录等待超时或失败,可能验证码错误。") # 检查是否有错误提示 (比如 el-message) try: error_msg = self.page.locator('.el-message__content').inner_text(timeout=1000) self.log(f"页面提示: {error_msg}") # 刷新验证码图片 self.page.locator('img.code-image').click() time.sleep(1) except: pass retry_count += 1 else: self.log(f"登录等待超时或失败: {e}") return if not login_success: self.log("[ERROR] 达到最大登录重试次数,放弃自动登录。请手动干预。") return # 2. 强制跳转目标页 if "deliveryOrders" not in self.page.url: self.log(f"跳转至订单管理页面: {self.target_url}") self.page.goto(self.target_url) self.page.wait_for_load_state("domcontentloaded") time.sleep(0.5) # 3. 筛选状态 self._filter_status() # 4. 执行批量导出下载 self._download_excel() self.log("采集任务完成。") except Exception as e: self.log(f"[FATAL] 脚本异常: {e}") finally: # 数据抓取完成后,浏览器不退出 # if self.browser: # self.browser.close() self.log("浏览器保持开启状态,请手动关闭。") def _filter_status(self): self.log("正在点击“待发货”标签页") try: # 定位 Tab tab_selector = ".el-tabs__item:has-text('待发货')" tab = self.page.locator(tab_selector) if tab.count() > 0: if "is-active" not in tab.get_attribute("class"): tab.click() self.log("已点击“待发货”标签页") time.sleep(1) else: self.log("“待发货”标签页已经是选中状态") else: self.log(" [WARN] 未找到“待发货”Tab") except Exception as e: self.log(f"筛选操作失败: {e}") def _download_excel(self): """执行批量导出操作""" try: self.log("准备触发批量发货...") # 1. 点击批量发货按钮 # 通过包含的文本或者 class 寻找按钮 batch_ship_btn = self.page.locator("button:has-text('批量发货')") if batch_ship_btn.count() > 0: batch_ship_btn.first.click() self.log("已点击 '批量发货' 按钮,等待弹窗加载...") time.sleep(2) # 等待弹窗和里面的按钮渲染 else: self.log("[WARN] 未找到 '批量发货' 按钮") return # 2. 点击导出待发货订单按钮,并拦截下载 self.log("尝试寻找并点击 '导出待发货订单' 按钮...") # 使用文本包含来定位按钮,即使它在复杂的结构中 export_btn = self.page.locator("button:has-text('导出待发货订单')") if export_btn.count() == 0: self.log("[ERROR] 弹窗中未找到 '导出待发货订单' 按钮,可能是因为无待发货订单或者页面结构变更") # 按 ESC 关闭弹窗,防止阻塞 self.page.keyboard.press("Escape") return # 开始监听下载事件 self.log("开始监听文件下载...") with self.page.expect_download(timeout=60000) as download_info: export_btn.first.click() self.log("已点击 '导出待发货订单'") download = download_info.value # 保存文件到 data 目录 file_name = f"shipping_order_{self.ts_str}.xls" save_path = os.path.join("data", file_name) self.log(f"正在保存文件...") download.save_as(save_path) self.log(f"✅ 文件下载成功: {save_path}") # 按 ESC 关闭弹窗 time.sleep(1) self.page.keyboard.press("Escape") except Exception as e: self.log(f"执行批量导出失败: {e}") if __name__ == "__main__": collector = SCBankCollector() collector.run()