scbank-sync/scbank_collector.py

import json
import time
import random
import os
import sys
from datetime import datetime
from playwright.sync_api import sync_playwright
from chaojiying import ChaojiyingClient

class SCBankCollector:
    def __init__(self):
        self.target_url = "https://jf.scbank.cn:8085/#/orderManagement/deliveryOrders"
        # 动态生成文件名: data/raw_YYYYMMDD_HHMMSS.jsonl
        self.ts_str = datetime.now().strftime("%Y%m%d_%H%M%S")

        # 读取外置配置文件
        self.config = self._load_config()

        self.username = self.config.get("scbank_username", "")
        self.password = self.config.get("scbank_password", "")

        # 初始化超级鹰客户端
        cjy_user = self.config.get("chaojiying_username", "")
        cjy_pass = self.config.get("chaojiying_password", "")
        cjy_softid = self.config.get("chaojiying_softid", "96001")
        self.cjy_client = ChaojiyingClient(cjy_user, cjy_pass, cjy_softid) if cjy_user and cjy_pass else None

        # 确保数据目录存在
        if not os.path.exists("data"):
            os.makedirs("data")

        self.browser = None
        self.page = None

    def _load_config(self):
        config_file = "config.txt"
        config_data = {
            "商城账号": "",
            "商城密码": "",
            "超级鹰账号": "",
            "超级鹰密码": "",
            "超级鹰软件ID": "96001"
        }

        if not os.path.exists(config_file):
            print(f"[ERROR] 未找到配置文件 {config_file}，请确保该文件与程序在同一目录下。")
            sys.exit(1)

        try:
            with open(config_file, "r", encoding="utf-8") as f:
                for line in f:
                    line = line.strip()
                    # 跳过注释和空行
                    if not line or line.startswith("#"):
                        continue

                    if "=" in line:
                        key, val = line.split("=", 1)
                        config_data[key.strip()] = val.strip()

            # 将中文 key 映射回内部使用的 key
            return {
                "scbank_username": config_data.get("商城账号", ""),
                "scbank_password": config_data.get("商城密码", ""),
                "chaojiying_username": config_data.get("超级鹰账号", ""),
                "chaojiying_password": config_data.get("超级鹰密码", ""),
                "chaojiying_softid": config_data.get("超级鹰软件ID", "96001")
            }

        except Exception as e:
            print(f"[ERROR] 读取配置文件失败: {e}")
            sys.exit(1)

    def log(self, msg):
        print(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}")

    def start_browser(self):
        self.log("启动浏览器...")
        p = sync_playwright().start()

        # 尝试使用本地浏览器 (Chrome 或 Edge)
        browser = None
        for channel in ["chrome", "msedge"]:
            try:
                self.log(f"尝试启动本地 {channel}...")
                browser = p.chromium.launch(
                    channel=channel,
                    headless=False,
                    args=["--disable-blink-features=AutomationControlled"]
                )
                self.log(f"成功启动 {channel}")
                break
            except Exception as e:
                self.log(f"启动 {channel} 失败，尝试下一个...")

        # 如果本地浏览器都失败，尝试使用内置 Chromium (如果已安装)
        if not browser:
            self.log("未找到本地 Chrome 或 Edge，尝试使用内置 Chromium...")
            try:
                browser = p.chromium.launch(
                    headless=False,
                    args=["--disable-blink-features=AutomationControlled"]
                )
            except Exception as e:
                self.log(f"[FATAL] 无法启动任何浏览器: {e}")
                self.log("请确保已安装 Google Chrome 或 Microsoft Edge 浏览器。")
                raise e

        self.browser = browser
        context = self.browser.new_context(
            viewport={'width': 1920, 'height': 1080},
            user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
            accept_downloads=True # 明确允许下载
        )
        self.page = context.new_page()

    def _auto_solve_captcha(self):
        """自动识别并填写验证码"""
        if not self.cjy_client:
            self.log("[WARN] 未配置超级鹰账号，跳过自动识别验证码。请手动输入验证码。")
            return False, None

        try:
            self.log("尝试获取验证码图片...")
            # 四川银行登录页的验证码图片选择器
            captcha_img_selector = 'img.code-image'

            # 等待图片加载完成
            self.page.wait_for_selector(captcha_img_selector, timeout=5000)

            # 获取图片的 base64 数据或截图
            # 因为是图片验证码，我们可以直接用 playwright 的 screenshot 功能获取二进制
            image_bytes = self.page.locator(captcha_img_selector).screenshot()

            self.log("正在调用超级鹰进行识别...")
            # 1902: 4-6位英文数字混合
            result = self.cjy_client.solve_captcha(image_bytes, codetype=1902)

            if result and result.get('err_no') == 0:
                code = result.get('pic_str')
                pic_id = result.get('pic_id')
                self.log(f"✅ 验证码识别成功: {code}")

                # 填入验证码
                self.page.fill('input[name="code"]', code)

                # 点击登录
                self.log("尝试自动点击登录...")
                self.page.locator('button.login-btn').click()
                return True, pic_id
            else:
                print(result)
                err_str = result.get('err_str', '未知错误') if result else '返回为空'
                self.log(f"❌ 验证码识别失败: {err_str}")
                return False, None

        except Exception as e:
            self.log(f"自动处理验证码发生异常: {e}")
            return False, None

    def run(self):
        try:
            self.start_browser()

            # 1. 登录
            self.log(f"正在打开页面: {self.target_url}")
            try:
                self.page.goto(self.target_url)
                # 等待跳转到登录页并加载输入框
                try:
                    self.log("等待登录页面加载...")
                    self.page.wait_for_selector('input[name="username"]', timeout=10000)

                    # 自动填入账号密码
                    if self.username and self.password:
                        self.log(f"正在自动填入账号: {self.username}")
                        self.page.fill('input[name="username"]', self.username)
                        self.page.fill('input[name="password"]', self.password)
                        self.log("账号密码已填入")
                    else:
                        self.log("[WARN] 配置文件中未提供商城账号密码，请手动输入")
                except Exception as e:
                    self.log(f"自动填入账号密码失败 (可能已登录或页面结构变化): {e}")

            except: pass

            # 尝试自动识别验证码并重试
            max_retries = 3
            retry_count = 0
            login_success = False

            while retry_count < max_retries and not login_success:
                if self.cjy_client:
                    self.log(f"--- 第 {retry_count + 1} 次尝试登录 ---")
                    success, pic_id = self._auto_solve_captcha()
                    if not success:
                        self.log("验证码识别请求失败，等待后重试...")
                        time.sleep(1)
                        retry_count += 1
                        continue
                else:
                    self.log(">>> 未配置超级鹰，请在浏览器中手动完成登录操作 (输入验证码并点击登录) <<<")

                # 等待 URL 包含 homePage (用户指定) 或者 错误提示出现
                try:
                    # 使用 wait_for_url 或者检查是否有错误弹窗
                    # 如果使用了超级鹰，缩短等待时间，因为错误提示出得很快
                    self.page.wait_for_url("**/homePage**", timeout=3000 if self.cjy_client else 0)
                    self.log("✅ 检测到登录成功！")
                    login_success = True
                    time.sleep(1)
                except Exception as e:
                    if self.cjy_client:
                        self.log(f"登录等待超时或失败，可能验证码错误。")

                        # 验证码识别错误，按要求以 50% 的概率调用报错返分接口
                        if pic_id and random.random() < 0.8:
                            self.log(f"触发报错返分机制 (pic_id: {pic_id})...")
                            try:
                                self.cjy_client.report_error(pic_id)
                                self.log("报错返分请求已发送")
                            except Exception as re:
                                self.log(f"报错返分请求异常: {re}")

                        # 检查是否有错误提示 (比如 el-message)
                        try:
                            error_msg = self.page.locator('.el-message__content').inner_text(timeout=1000)
                            self.log(f"页面提示: {error_msg}")
                        except: pass

                        # 点击登录失败后，验证码会自动刷新，不需要手动点击刷新，稍微等待一下让新图片加载即可
                        time.sleep(1)

                        retry_count += 1
                    else:
                        self.log(f"登录等待超时或失败: {e}")
                        return

            if not login_success:
                self.log(f"[WARN] 达到最大自动登录重试次数 ({max_retries}次)。请在浏览器中手动输入验证码并完成登录！")
                try:
                    # 退回到手动等待模式，无限制等待直到登录成功
                    self.page.wait_for_url("**/homePage**", timeout=0)
                    self.log("✅ 检测到手动登录成功！")
                except Exception as e:
                    self.log(f"手动登录等待失败: {e}")
                    return

            # 2. 强制跳转目标页
            if "deliveryOrders" not in self.page.url:
                self.log(f"跳转至订单管理页面: {self.target_url}")
                self.page.goto(self.target_url)
                self.page.wait_for_load_state("domcontentloaded")
                time.sleep(0.5)

            # 3. 筛选状态
            self._filter_status()

            # 4. 执行批量导出下载
            self._download_excel()

            self.log("采集任务完成。")

        except Exception as e:
            self.log(f"[FATAL] 脚本异常: {e}")
        finally:
            # 数据抓取完成后，浏览器不退出
            # if self.browser:
            #     self.browser.close()
            self.log("浏览器保持开启状态，请手动关闭。")

    def _filter_status(self):
        self.log("正在点击“待发货”标签页")
        try:
            # 定位 Tab
            tab_selector = ".el-tabs__item:has-text('待发货')"
            tab = self.page.locator(tab_selector)

            if tab.count() > 0:
                if "is-active" not in tab.get_attribute("class"):
                    tab.click()
                    self.log("已点击“待发货”标签页")
                    time.sleep(1)
                else:
                    self.log("“待发货”标签页已经是选中状态")
            else:
                self.log("  [WARN] 未找到“待发货”Tab")
        except Exception as e:
            self.log(f"筛选操作失败: {e}")

    def _download_excel(self):
        """执行批量导出操作"""
        try:
            self.log("准备触发批量发货...")

            # 1. 点击批量发货按钮
            # 通过包含的文本或者 class 寻找按钮
            batch_ship_btn = self.page.locator("button:has-text('批量发货')")
            if batch_ship_btn.count() > 0:
                batch_ship_btn.first.click()
                self.log("已点击 '批量发货' 按钮，等待弹窗加载...")
                time.sleep(2) # 等待弹窗和里面的按钮渲染
            else:
                self.log("[WARN] 未找到 '批量发货' 按钮")
                return

            # 2. 点击导出待发货订单按钮，并拦截下载
            self.log("尝试寻找并点击 '导出待发货订单' 按钮...")

            # 使用文本包含来定位按钮，即使它在复杂的结构中
            export_btn = self.page.locator("button:has-text('导出待发货订单')")
            if export_btn.count() == 0:
                self.log("[ERROR] 弹窗中未找到 '导出待发货订单' 按钮，可能是因为无待发货订单或者页面结构变更")
                # 按 ESC 关闭弹窗，防止阻塞
                self.page.keyboard.press("Escape")
                return

            # 开始监听下载事件
            self.log("开始监听文件下载...")
            with self.page.expect_download(timeout=60000) as download_info:
                export_btn.first.click()
                self.log("已点击 '导出待发货订单'")

            download = download_info.value

            # 保存文件到 data 目录
            file_name = f"shipping_order_{self.ts_str}.xls"
            save_path = os.path.join("data", file_name)

            self.log(f"正在保存文件...")
            download.save_as(save_path)
            self.log(f"✅ 文件下载成功: {save_path}")

            # 按 ESC 关闭弹窗
            time.sleep(1)
            self.page.keyboard.press("Escape")

        except Exception as e:
            self.log(f"执行批量导出失败: {e}")

if __name__ == "__main__":
    collector = SCBankCollector()
    collector.run()