scbank-sync/scbank_hook.py

215 lines
8.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import json
import time
import random
from playwright.sync_api import sync_playwright
# 目标网址
TARGET_URL = "https://jf.scbank.cn:8085/#/orderManagement/deliveryOrders"
OUTPUT_FILE = "scbank_data.jsonl"
def process_details(page):
"""
点击当前页面的所有详情按钮
基于 HTML 分析:详情按钮是 <button class="el-button--text"><span>详情</span></button>
"""
# 使用精准选择器:可见的、文本包含"详情"的 ElementUI 文字按钮
detail_selector = "button.el-button--text:has-text('详情')"
try:
# 等待按钮出现 - 这里也是人机交互timeout=0 死等
page.wait_for_selector(detail_selector, timeout=0)
# 获取所有匹配的按钮
# 注意ElementUI 表格若有固定列会渲染双份表格一份header一份body或者一份固定列一份滚动列
# 所以必须过滤 visible=true
buttons = page.locator(detail_selector).all()
visible_buttons = [btn for btn in buttons if btn.is_visible()]
print(f"[当前页] 发现 {len(visible_buttons)} 个可见详情按钮")
for i, btn in enumerate(visible_buttons):
try:
btn.click()
# print(f" -> 点击第 {i+1} 个详情")
# 等待数据加载 (Hook 会自动捕获)
# 随机等待
time.sleep(random.uniform(1.5, 2.5))
# 关闭详情页
# 策略ElementUI 弹窗通常可以通过按 ESC 关闭
page.keyboard.press("Escape")
time.sleep(0.5)
except Exception as e:
print(f" [警告] 详情操作失败: {e}")
except Exception as e:
print(f"[提示] 当前页无详情按钮或加载超时: {e}")
def run():
print(f"[系统] 启动精准采集脚本...")
print(f"[系统] 数据将保存至: {OUTPUT_FILE}")
with sync_playwright() as p:
# 1. 启动浏览器
browser = p.chromium.launch(
headless=False,
args=["--disable-blink-features=AutomationControlled"]
)
context = browser.new_context(
viewport={'width': 1920, 'height': 1080},
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
)
page = context.new_page()
# =====================================================
# 2. Hook 注入 (去噪优化版)
# =====================================================
page.add_init_script("""
const originalParse = JSON.parse;
JSON.parse = function(text, reviver) {
const data = originalParse(text, reviver);
try {
if (data && typeof data === 'object') {
const str = JSON.stringify(data);
// 【过滤噪音】排除包含 IsQX 的干扰接口 (通常用于权限检查)
if (str.includes('IsQX') || (data.body && data.body.IsQX)) {
return data;
}
// 1. 列表接口 /selectOrderlist -> 通常返回 { rows: [...], total: ... }
const isList = (data.rows && Array.isArray(data.rows) && data.rows.length > 0) ||
(data.list && Array.isArray(data.list) && data.list.length > 0);
// 2. 详情接口 /getorderdetail -> 通常返回 { orderNo: '...', ... }
// 为了防止漏掉,只要包含 orderNo 且长度足够大(排除简单的状态返回),都抓取
// 一个完整的订单详情通常包含收货人、地址、商品列表等,长度肯定超过 300 字符
const isDetail = str.includes('orderNo') && str.length > 300;
if (isList || isDetail) {
console.log('__INTERCEPTED__:' + str);
}
}
} catch (e) {}
return data;
}
""")
# =====================================================
# 3. 数据监听
# =====================================================
def handle_console(msg):
if msg.text.startswith("__INTERCEPTED__:"):
json_str = msg.text.replace("__INTERCEPTED__:", "")
try:
data = json.loads(json_str)
# 简单去重或标记
desc = "数据"
if 'rows' in data or 'list' in data:
desc = "【列表】"
elif 'orderNo' in str(data):
desc = "【详情】"
# 写入文件
with open(OUTPUT_FILE, "a", encoding="utf-8") as f:
f.write(json.dumps(data, ensure_ascii=False) + "\n")
print(f"[捕获]{desc} 长度: {len(str(data))}")
except:
pass
page.on("console", handle_console)
# =====================================================
# 4. 业务流程
# =====================================================
print(f"[操作] 正在打开页面,请手动登录...")
try:
page.goto(TARGET_URL)
except: pass
print(">>> 请在浏览器中完成登录操作 <<<")
# 等待登录成功
try:
# 进入首页视为登录成功
page.wait_for_url("**/homePage", timeout=0)
print("[状态] 检测到登录成功!")
# 1. 强制跳转到目标页面 (如果当前不在目标页)
# 有时候登录后会跳转到首页或 dashboard
if "deliveryOrders" not in page.url:
print(f"[跳转] 正在前往目标页面: {TARGET_URL}")
page.goto(TARGET_URL)
page.wait_for_load_state("domcontentloaded")
time.sleep(0.5) # 等待页面组件渲染
else:
print("[状态] 已在目标页面")
# 2. 选中“待发货”
print("[操作] 正在点击“待发货”标签页")
try:
# 定位 Tab
tab_selector = ".el-tabs__item:has-text('待发货')"
tab = page.locator(tab_selector)
if tab.count() > 0:
# 检查是否已经选中
if "is-active" in tab.get_attribute("class"):
print(" [状态] “待发货”标签页已经是选中状态")
else:
tab.click()
print("[状态] 已点击“待发货”标签页")
time.sleep(1) # 等待 Tab 切换动画
else:
print(" [警告] 未找到“待发货”标签页,跳过筛选")
except Exception as e:
print(f" [错误] 筛选操作失败 (非致命,继续尝试抓取): {e}")
except Exception as e:
print(f"[错误] 等待登录或初始化失败: {e}")
# 循环翻页
page_num = 1
while True:
print(f"\n========== 正在处理第 {page_num} 页 ==========")
# 1. 采集当前页详情
process_details(page)
# 2. 翻页逻辑
print("[翻页] 检查下一页...")
# 【精准选择器】基于 HTML 分析: ElementUI 的下一页按钮 class 为 btn-next
next_btn = page.locator(".btn-next")
# 检查按钮是否存在
if next_btn.count() == 0:
print("[结束] 未找到下一页按钮 (.btn-next)")
break
# 检查是否禁用 (disabled 属性)
if next_btn.is_disabled():
print("[结束] 下一页按钮已禁用,采集完成。")
break
try:
next_btn.click()
page_num += 1
print(f"[操作] 翻页 -> 第 {page_num}")
time.sleep(3) # 等待新页面列表加载
except Exception as e:
print(f"[错误] 翻页失败: {e}")
break
print(f"\n[系统] 任务完成。数据已保存至 {OUTPUT_FILE}")
time.sleep(5)
browser.close()
if __name__ == "__main__":
run()