344 lines
15 KiB
Python
344 lines
15 KiB
Python
import json
|
||
import time
|
||
import random
|
||
import os
|
||
import sys
|
||
from datetime import datetime
|
||
from playwright.sync_api import sync_playwright
|
||
from chaojiying import ChaojiyingClient
|
||
|
||
class SCBankCollector:
|
||
def __init__(self):
|
||
self.target_url = "https://jf.scbank.cn:8085/#/orderManagement/deliveryOrders"
|
||
# 动态生成文件名: data/raw_YYYYMMDD_HHMMSS.jsonl
|
||
self.ts_str = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||
|
||
# 读取外置配置文件
|
||
self.config = self._load_config()
|
||
|
||
self.username = self.config.get("scbank_username", "")
|
||
self.password = self.config.get("scbank_password", "")
|
||
|
||
# 初始化超级鹰客户端
|
||
cjy_user = self.config.get("chaojiying_username", "")
|
||
cjy_pass = self.config.get("chaojiying_password", "")
|
||
cjy_softid = self.config.get("chaojiying_softid", "96001")
|
||
self.cjy_client = ChaojiyingClient(cjy_user, cjy_pass, cjy_softid) if cjy_user and cjy_pass else None
|
||
|
||
# 确保数据目录存在
|
||
if not os.path.exists("data"):
|
||
os.makedirs("data")
|
||
|
||
self.browser = None
|
||
self.page = None
|
||
|
||
def _load_config(self):
|
||
config_file = "config.txt"
|
||
config_data = {
|
||
"商城账号": "",
|
||
"商城密码": "",
|
||
"超级鹰账号": "",
|
||
"超级鹰密码": "",
|
||
"超级鹰软件ID": "96001"
|
||
}
|
||
|
||
if not os.path.exists(config_file):
|
||
print(f"[ERROR] 未找到配置文件 {config_file},请确保该文件与程序在同一目录下。")
|
||
sys.exit(1)
|
||
|
||
try:
|
||
with open(config_file, "r", encoding="utf-8") as f:
|
||
for line in f:
|
||
line = line.strip()
|
||
# 跳过注释和空行
|
||
if not line or line.startswith("#"):
|
||
continue
|
||
|
||
if "=" in line:
|
||
key, val = line.split("=", 1)
|
||
config_data[key.strip()] = val.strip()
|
||
|
||
# 将中文 key 映射回内部使用的 key
|
||
return {
|
||
"scbank_username": config_data.get("商城账号", ""),
|
||
"scbank_password": config_data.get("商城密码", ""),
|
||
"chaojiying_username": config_data.get("超级鹰账号", ""),
|
||
"chaojiying_password": config_data.get("超级鹰密码", ""),
|
||
"chaojiying_softid": config_data.get("超级鹰软件ID", "96001")
|
||
}
|
||
|
||
except Exception as e:
|
||
print(f"[ERROR] 读取配置文件失败: {e}")
|
||
sys.exit(1)
|
||
|
||
def log(self, msg):
|
||
print(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}")
|
||
|
||
def start_browser(self):
|
||
self.log("启动浏览器...")
|
||
p = sync_playwright().start()
|
||
|
||
# 尝试使用本地浏览器 (Chrome 或 Edge)
|
||
browser = None
|
||
for channel in ["chrome", "msedge"]:
|
||
try:
|
||
self.log(f"尝试启动本地 {channel}...")
|
||
browser = p.chromium.launch(
|
||
channel=channel,
|
||
headless=False,
|
||
args=["--disable-blink-features=AutomationControlled"]
|
||
)
|
||
self.log(f"成功启动 {channel}")
|
||
break
|
||
except Exception as e:
|
||
self.log(f"启动 {channel} 失败,尝试下一个...")
|
||
|
||
# 如果本地浏览器都失败,尝试使用内置 Chromium (如果已安装)
|
||
if not browser:
|
||
self.log("未找到本地 Chrome 或 Edge,尝试使用内置 Chromium...")
|
||
try:
|
||
browser = p.chromium.launch(
|
||
headless=False,
|
||
args=["--disable-blink-features=AutomationControlled"]
|
||
)
|
||
except Exception as e:
|
||
self.log(f"[FATAL] 无法启动任何浏览器: {e}")
|
||
self.log("请确保已安装 Google Chrome 或 Microsoft Edge 浏览器。")
|
||
raise e
|
||
|
||
self.browser = browser
|
||
context = self.browser.new_context(
|
||
viewport={'width': 1920, 'height': 1080},
|
||
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||
accept_downloads=True # 明确允许下载
|
||
)
|
||
self.page = context.new_page()
|
||
|
||
def _auto_solve_captcha(self):
|
||
"""自动识别并填写验证码"""
|
||
if not self.cjy_client:
|
||
self.log("[WARN] 未配置超级鹰账号,跳过自动识别验证码。请手动输入验证码。")
|
||
return False, None
|
||
|
||
try:
|
||
self.log("尝试获取验证码图片...")
|
||
# 四川银行登录页的验证码图片选择器
|
||
captcha_img_selector = 'img.code-image'
|
||
|
||
# 等待图片加载完成
|
||
self.page.wait_for_selector(captcha_img_selector, timeout=5000)
|
||
|
||
# 获取图片的 base64 数据或截图
|
||
# 因为是图片验证码,我们可以直接用 playwright 的 screenshot 功能获取二进制
|
||
image_bytes = self.page.locator(captcha_img_selector).screenshot()
|
||
|
||
self.log("正在调用超级鹰进行识别...")
|
||
# 1902: 4-6位英文数字混合
|
||
result = self.cjy_client.solve_captcha(image_bytes, codetype=1902)
|
||
|
||
if result and result.get('err_no') == 0:
|
||
code = result.get('pic_str')
|
||
pic_id = result.get('pic_id')
|
||
self.log(f"✅ 验证码识别成功: {code}")
|
||
|
||
# 填入验证码
|
||
self.page.fill('input[name="code"]', code)
|
||
|
||
# 点击登录
|
||
self.log("尝试自动点击登录...")
|
||
self.page.locator('button.login-btn').click()
|
||
return True, pic_id
|
||
else:
|
||
print(result)
|
||
err_str = result.get('err_str', '未知错误') if result else '返回为空'
|
||
self.log(f"❌ 验证码识别失败: {err_str}")
|
||
return False, None
|
||
|
||
except Exception as e:
|
||
self.log(f"自动处理验证码发生异常: {e}")
|
||
return False, None
|
||
|
||
def run(self):
|
||
try:
|
||
self.start_browser()
|
||
|
||
# 1. 登录
|
||
self.log(f"正在打开页面: {self.target_url}")
|
||
try:
|
||
self.page.goto(self.target_url)
|
||
# 等待跳转到登录页并加载输入框
|
||
try:
|
||
self.log("等待登录页面加载...")
|
||
self.page.wait_for_selector('input[name="username"]', timeout=10000)
|
||
|
||
# 自动填入账号密码
|
||
if self.username and self.password:
|
||
self.log(f"正在自动填入账号: {self.username}")
|
||
self.page.fill('input[name="username"]', self.username)
|
||
self.page.fill('input[name="password"]', self.password)
|
||
self.log("账号密码已填入")
|
||
else:
|
||
self.log("[WARN] 配置文件中未提供商城账号密码,请手动输入")
|
||
except Exception as e:
|
||
self.log(f"自动填入账号密码失败 (可能已登录或页面结构变化): {e}")
|
||
|
||
except: pass
|
||
|
||
# 尝试自动识别验证码并重试
|
||
max_retries = 3
|
||
retry_count = 0
|
||
login_success = False
|
||
|
||
while retry_count < max_retries and not login_success:
|
||
if self.cjy_client:
|
||
self.log(f"--- 第 {retry_count + 1} 次尝试登录 ---")
|
||
success, pic_id = self._auto_solve_captcha()
|
||
if not success:
|
||
self.log("验证码识别请求失败,等待后重试...")
|
||
time.sleep(1)
|
||
retry_count += 1
|
||
continue
|
||
else:
|
||
self.log(">>> 未配置超级鹰,请在浏览器中手动完成登录操作 (输入验证码并点击登录) <<<")
|
||
|
||
# 等待 URL 包含 homePage (用户指定) 或者 错误提示出现
|
||
try:
|
||
# 使用 wait_for_url 或者检查是否有错误弹窗
|
||
# 如果使用了超级鹰,缩短等待时间,因为错误提示出得很快
|
||
self.page.wait_for_url("**/homePage**", timeout=3000 if self.cjy_client else 0)
|
||
self.log("✅ 检测到登录成功!")
|
||
login_success = True
|
||
time.sleep(1)
|
||
except Exception as e:
|
||
if self.cjy_client:
|
||
self.log(f"登录等待超时或失败,可能验证码错误。")
|
||
|
||
# 验证码识别错误,按要求以 50% 的概率调用报错返分接口
|
||
if pic_id and random.random() < 0.8:
|
||
self.log(f"触发报错返分机制 (pic_id: {pic_id})...")
|
||
try:
|
||
self.cjy_client.report_error(pic_id)
|
||
self.log("报错返分请求已发送")
|
||
except Exception as re:
|
||
self.log(f"报错返分请求异常: {re}")
|
||
|
||
# 检查是否有错误提示 (比如 el-message)
|
||
try:
|
||
error_msg = self.page.locator('.el-message__content').inner_text(timeout=1000)
|
||
self.log(f"页面提示: {error_msg}")
|
||
except: pass
|
||
|
||
# 点击登录失败后,验证码会自动刷新,不需要手动点击刷新,稍微等待一下让新图片加载即可
|
||
time.sleep(1)
|
||
|
||
retry_count += 1
|
||
else:
|
||
self.log(f"登录等待超时或失败: {e}")
|
||
return
|
||
|
||
if not login_success:
|
||
self.log(f"[WARN] 达到最大自动登录重试次数 ({max_retries}次)。请在浏览器中手动输入验证码并完成登录!")
|
||
try:
|
||
# 退回到手动等待模式,无限制等待直到登录成功
|
||
self.page.wait_for_url("**/homePage**", timeout=0)
|
||
self.log("✅ 检测到手动登录成功!")
|
||
except Exception as e:
|
||
self.log(f"手动登录等待失败: {e}")
|
||
return
|
||
|
||
# 2. 强制跳转目标页
|
||
if "deliveryOrders" not in self.page.url:
|
||
self.log(f"跳转至订单管理页面: {self.target_url}")
|
||
self.page.goto(self.target_url)
|
||
self.page.wait_for_load_state("domcontentloaded")
|
||
time.sleep(0.5)
|
||
|
||
# 3. 筛选状态
|
||
self._filter_status()
|
||
|
||
# 4. 执行批量导出下载
|
||
self._download_excel()
|
||
|
||
self.log("采集任务完成。")
|
||
|
||
except Exception as e:
|
||
self.log(f"[FATAL] 脚本异常: {e}")
|
||
finally:
|
||
# 数据抓取完成后,浏览器不退出
|
||
# if self.browser:
|
||
# self.browser.close()
|
||
self.log("浏览器保持开启状态,请手动关闭。")
|
||
|
||
def _filter_status(self):
|
||
self.log("正在点击“待发货”标签页")
|
||
try:
|
||
# 定位 Tab
|
||
tab_selector = ".el-tabs__item:has-text('待发货')"
|
||
tab = self.page.locator(tab_selector)
|
||
|
||
if tab.count() > 0:
|
||
if "is-active" not in tab.get_attribute("class"):
|
||
tab.click()
|
||
self.log("已点击“待发货”标签页")
|
||
time.sleep(1)
|
||
else:
|
||
self.log("“待发货”标签页已经是选中状态")
|
||
else:
|
||
self.log(" [WARN] 未找到“待发货”Tab")
|
||
except Exception as e:
|
||
self.log(f"筛选操作失败: {e}")
|
||
|
||
def _download_excel(self):
|
||
"""执行批量导出操作"""
|
||
try:
|
||
self.log("准备触发批量发货...")
|
||
|
||
# 1. 点击批量发货按钮
|
||
# 通过包含的文本或者 class 寻找按钮
|
||
batch_ship_btn = self.page.locator("button:has-text('批量发货')")
|
||
if batch_ship_btn.count() > 0:
|
||
batch_ship_btn.first.click()
|
||
self.log("已点击 '批量发货' 按钮,等待弹窗加载...")
|
||
time.sleep(2) # 等待弹窗和里面的按钮渲染
|
||
else:
|
||
self.log("[WARN] 未找到 '批量发货' 按钮")
|
||
return
|
||
|
||
# 2. 点击导出待发货订单按钮,并拦截下载
|
||
self.log("尝试寻找并点击 '导出待发货订单' 按钮...")
|
||
|
||
# 使用文本包含来定位按钮,即使它在复杂的结构中
|
||
export_btn = self.page.locator("button:has-text('导出待发货订单')")
|
||
if export_btn.count() == 0:
|
||
self.log("[ERROR] 弹窗中未找到 '导出待发货订单' 按钮,可能是因为无待发货订单或者页面结构变更")
|
||
# 按 ESC 关闭弹窗,防止阻塞
|
||
self.page.keyboard.press("Escape")
|
||
return
|
||
|
||
# 开始监听下载事件
|
||
self.log("开始监听文件下载...")
|
||
with self.page.expect_download(timeout=60000) as download_info:
|
||
export_btn.first.click()
|
||
self.log("已点击 '导出待发货订单'")
|
||
|
||
download = download_info.value
|
||
|
||
# 保存文件到 data 目录
|
||
file_name = f"shipping_order_{self.ts_str}.xls"
|
||
save_path = os.path.join("data", file_name)
|
||
|
||
self.log(f"正在保存文件...")
|
||
download.save_as(save_path)
|
||
self.log(f"✅ 文件下载成功: {save_path}")
|
||
|
||
# 按 ESC 关闭弹窗
|
||
time.sleep(1)
|
||
self.page.keyboard.press("Escape")
|
||
|
||
except Exception as e:
|
||
self.log(f"执行批量导出失败: {e}")
|
||
|
||
if __name__ == "__main__":
|
||
collector = SCBankCollector()
|
||
collector.run()
|