feat: 增加验证码自动验证
This commit is contained in:
parent
00dcb45882
commit
7b2d476b81
|
|
@ -0,0 +1,54 @@
|
||||||
|
import requests
|
||||||
|
from hashlib import md5
|
||||||
|
|
||||||
|
class ChaojiyingClient:
|
||||||
|
"""超级鹰验证码识别 API 客户端"""
|
||||||
|
|
||||||
|
def __init__(self, username, password, soft_id):
|
||||||
|
self.username = username
|
||||||
|
# 超级鹰要求密码经过 md5 加密
|
||||||
|
# self.password = md5(password.encode('utf-8')).hexdigest()
|
||||||
|
self.password = password
|
||||||
|
self.soft_id = soft_id
|
||||||
|
self.base_params = {
|
||||||
|
'user': self.username,
|
||||||
|
'pass': self.password,
|
||||||
|
'softid': self.soft_id,
|
||||||
|
}
|
||||||
|
self.headers = {
|
||||||
|
'Connection': 'Keep-Alive',
|
||||||
|
'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)',
|
||||||
|
}
|
||||||
|
|
||||||
|
def solve_captcha(self, image_bytes, codetype=1902):
|
||||||
|
"""
|
||||||
|
上传验证码图片并识别
|
||||||
|
:param image_bytes: 图片二进制数据
|
||||||
|
:param codetype: 题目类型 (1902 为 4-6 位英文数字混合)
|
||||||
|
:return: 识别结果 (dict)
|
||||||
|
"""
|
||||||
|
url = 'http://upload.chaojiying.net/Upload/Processing.php'
|
||||||
|
params = {
|
||||||
|
'codetype': codetype,
|
||||||
|
}
|
||||||
|
params.update(self.base_params)
|
||||||
|
files = {'userfile': ('captcha.jpg', image_bytes)}
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.post(url, data=params, files=files, headers=self.headers, timeout=30)
|
||||||
|
return response.json()
|
||||||
|
except Exception as e:
|
||||||
|
return {'err_no': -1, 'err_str': f"网络请求异常: {str(e)}"}
|
||||||
|
|
||||||
|
def report_error(self, pic_id):
|
||||||
|
"""
|
||||||
|
识别错误时,报错返分
|
||||||
|
"""
|
||||||
|
url = 'http://upload.chaojiying.net/Upload/ReportError.php'
|
||||||
|
params = { 'id': pic_id }
|
||||||
|
params.update(self.base_params)
|
||||||
|
try:
|
||||||
|
response = requests.post(url, data=params, headers=self.headers, timeout=10)
|
||||||
|
return response.json()
|
||||||
|
except Exception as e:
|
||||||
|
return {'err_no': -1, 'err_str': str(e)}
|
||||||
|
|
@ -0,0 +1,14 @@
|
||||||
|
# ==========================================
|
||||||
|
# 四川银行权益商城自动化工具 - 配置文件
|
||||||
|
# 请在等号 (=) 后面填写您的账号和密码
|
||||||
|
# 注意:等号前后可以有空格,但不要删除等号
|
||||||
|
# ==========================================
|
||||||
|
|
||||||
|
商城账号 = Lsxd01
|
||||||
|
商城密码 = Lsxd@2026
|
||||||
|
|
||||||
|
# --- 以下为验证码自动识别配置 (选填) ---
|
||||||
|
# 如果不填写,程序运行时将暂停并等待您手动输入验证码
|
||||||
|
超级鹰账号 = fuxiaochao
|
||||||
|
超级鹰密码 = um0e01no
|
||||||
|
超级鹰软件ID = 51d1ea8ea340b3d229f87c96c47f50b6
|
||||||
|
|
@ -2,8 +2,10 @@ import json
|
||||||
import time
|
import time
|
||||||
import random
|
import random
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from playwright.sync_api import sync_playwright
|
from playwright.sync_api import sync_playwright
|
||||||
|
from chaojiying import ChaojiyingClient
|
||||||
|
|
||||||
class SCBankCollector:
|
class SCBankCollector:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
|
@ -11,9 +13,17 @@ class SCBankCollector:
|
||||||
# 动态生成文件名: data/raw_YYYYMMDD_HHMMSS.jsonl
|
# 动态生成文件名: data/raw_YYYYMMDD_HHMMSS.jsonl
|
||||||
self.ts_str = datetime.now().strftime("%Y%m%d_%H%M%S")
|
self.ts_str = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||||
|
|
||||||
# 配置账号密码
|
# 读取外置配置文件
|
||||||
self.username = "Lsxd01"
|
self.config = self._load_config()
|
||||||
self.password = "Lsxd@2026"
|
|
||||||
|
self.username = self.config.get("scbank_username", "")
|
||||||
|
self.password = self.config.get("scbank_password", "")
|
||||||
|
|
||||||
|
# 初始化超级鹰客户端
|
||||||
|
cjy_user = self.config.get("chaojiying_username", "")
|
||||||
|
cjy_pass = self.config.get("chaojiying_password", "")
|
||||||
|
cjy_softid = self.config.get("chaojiying_softid", "96001")
|
||||||
|
self.cjy_client = ChaojiyingClient(cjy_user, cjy_pass, cjy_softid) if cjy_user and cjy_pass else None
|
||||||
|
|
||||||
# 确保数据目录存在
|
# 确保数据目录存在
|
||||||
if not os.path.exists("data"):
|
if not os.path.exists("data"):
|
||||||
|
|
@ -22,6 +32,61 @@ class SCBankCollector:
|
||||||
self.browser = None
|
self.browser = None
|
||||||
self.page = None
|
self.page = None
|
||||||
|
|
||||||
|
def _load_config(self):
|
||||||
|
config_file = "config.txt"
|
||||||
|
config_data = {
|
||||||
|
"商城账号": "",
|
||||||
|
"商城密码": "",
|
||||||
|
"超级鹰账号": "",
|
||||||
|
"超级鹰密码": "",
|
||||||
|
"超级鹰软件ID": "96001"
|
||||||
|
}
|
||||||
|
|
||||||
|
if not os.path.exists(config_file):
|
||||||
|
print(f"[WARN] 未找到配置文件 {config_file},将创建一个默认模板,请填写后重新运行。")
|
||||||
|
template = (
|
||||||
|
"# ==========================================\n"
|
||||||
|
"# 四川银行权益商城自动化工具 - 配置文件\n"
|
||||||
|
"# 请在等号 (=) 后面填写您的账号和密码\n"
|
||||||
|
"# 注意:等号前后可以有空格,但不要删除等号\n"
|
||||||
|
"# ==========================================\n\n"
|
||||||
|
"商城账号 = \n"
|
||||||
|
"商城密码 = \n\n"
|
||||||
|
"# --- 以下为验证码自动识别配置 (选填) ---\n"
|
||||||
|
"# 如果不填写,程序运行时将暂停并等待您手动输入验证码\n"
|
||||||
|
"超级鹰账号 = \n"
|
||||||
|
"超级鹰密码 = \n"
|
||||||
|
"超级鹰软件ID = 96001\n"
|
||||||
|
)
|
||||||
|
with open(config_file, "w", encoding="utf-8") as f:
|
||||||
|
f.write(template)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(config_file, "r", encoding="utf-8") as f:
|
||||||
|
for line in f:
|
||||||
|
line = line.strip()
|
||||||
|
# 跳过注释和空行
|
||||||
|
if not line or line.startswith("#"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if "=" in line:
|
||||||
|
key, val = line.split("=", 1)
|
||||||
|
config_data[key.strip()] = val.strip()
|
||||||
|
|
||||||
|
# 将中文 key 映射回内部使用的 key
|
||||||
|
return {
|
||||||
|
"scbank_username": config_data.get("商城账号", ""),
|
||||||
|
"scbank_password": config_data.get("商城密码", ""),
|
||||||
|
"chaojiying_username": config_data.get("超级鹰账号", ""),
|
||||||
|
"chaojiying_password": config_data.get("超级鹰密码", ""),
|
||||||
|
"chaojiying_softid": config_data.get("超级鹰软件ID", "96001")
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[ERROR] 读取配置文件失败: {e}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
def log(self, msg):
|
def log(self, msg):
|
||||||
print(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}")
|
print(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}")
|
||||||
|
|
||||||
|
|
@ -65,6 +130,49 @@ class SCBankCollector:
|
||||||
)
|
)
|
||||||
self.page = context.new_page()
|
self.page = context.new_page()
|
||||||
|
|
||||||
|
def _auto_solve_captcha(self):
|
||||||
|
"""自动识别并填写验证码"""
|
||||||
|
if not self.cjy_client:
|
||||||
|
self.log("[WARN] 未配置超级鹰账号,跳过自动识别验证码。请手动输入验证码。")
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.log("尝试获取验证码图片...")
|
||||||
|
# 四川银行登录页的验证码图片选择器
|
||||||
|
captcha_img_selector = 'img.code-image'
|
||||||
|
|
||||||
|
# 等待图片加载完成
|
||||||
|
self.page.wait_for_selector(captcha_img_selector, timeout=5000)
|
||||||
|
|
||||||
|
# 获取图片的 base64 数据或截图
|
||||||
|
# 因为是图片验证码,我们可以直接用 playwright 的 screenshot 功能获取二进制
|
||||||
|
image_bytes = self.page.locator(captcha_img_selector).screenshot()
|
||||||
|
|
||||||
|
self.log("正在调用超级鹰进行识别...")
|
||||||
|
# 1902: 4-6位英文数字混合
|
||||||
|
result = self.cjy_client.solve_captcha(image_bytes, codetype=1902)
|
||||||
|
|
||||||
|
if result and result.get('err_no') == 0:
|
||||||
|
code = result.get('pic_str')
|
||||||
|
self.log(f"✅ 验证码识别成功: {code}")
|
||||||
|
|
||||||
|
# 填入验证码
|
||||||
|
self.page.fill('input[name="code"]', code)
|
||||||
|
|
||||||
|
# 点击登录
|
||||||
|
self.log("尝试自动点击登录...")
|
||||||
|
self.page.locator('button.login-btn').click()
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
print(result)
|
||||||
|
err_str = result.get('err_str', '未知错误') if result else '返回为空'
|
||||||
|
self.log(f"❌ 验证码识别失败: {err_str}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.log(f"自动处理验证码发生异常: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
try:
|
try:
|
||||||
self.start_browser()
|
self.start_browser()
|
||||||
|
|
@ -79,23 +187,66 @@ class SCBankCollector:
|
||||||
self.page.wait_for_selector('input[name="username"]', timeout=10000)
|
self.page.wait_for_selector('input[name="username"]', timeout=10000)
|
||||||
|
|
||||||
# 自动填入账号密码
|
# 自动填入账号密码
|
||||||
self.log(f"正在自动填入账号: {self.username}")
|
if self.username and self.password:
|
||||||
self.page.fill('input[name="username"]', self.username)
|
self.log(f"正在自动填入账号: {self.username}")
|
||||||
self.page.fill('input[name="password"]', self.password)
|
self.page.fill('input[name="username"]', self.username)
|
||||||
self.log("账号密码已填入")
|
self.page.fill('input[name="password"]', self.password)
|
||||||
|
self.log("账号密码已填入")
|
||||||
|
else:
|
||||||
|
self.log("[WARN] 配置文件中未提供商城账号密码,请手动输入")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.log(f"自动填入账号密码失败 (可能已登录或页面结构变化): {e}")
|
self.log(f"自动填入账号密码失败 (可能已登录或页面结构变化): {e}")
|
||||||
|
|
||||||
except: pass
|
except: pass
|
||||||
|
|
||||||
self.log(">>> 请在浏览器中完成登录操作 (输入验证码并点击登录) <<<")
|
# 尝试自动识别验证码并重试
|
||||||
# 等待 URL 包含 homePage (用户指定)
|
max_retries = 3
|
||||||
try:
|
retry_count = 0
|
||||||
self.page.wait_for_url("**/homePage**", timeout=0)
|
login_success = False
|
||||||
self.log("检测到登录成功!")
|
|
||||||
time.sleep(1)
|
while retry_count < max_retries and not login_success:
|
||||||
except Exception as e:
|
if self.cjy_client:
|
||||||
self.log(f"登录等待超时或失败: {e}")
|
self.log(f"--- 第 {retry_count + 1} 次尝试登录 ---")
|
||||||
|
success = self._auto_solve_captcha()
|
||||||
|
if not success:
|
||||||
|
self.log("验证码识别失败,等待后重试...")
|
||||||
|
time.sleep(2)
|
||||||
|
# 点击验证码图片刷新
|
||||||
|
try:
|
||||||
|
self.page.locator('img.code-image').click()
|
||||||
|
time.sleep(1)
|
||||||
|
except: pass
|
||||||
|
retry_count += 1
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
self.log(">>> 未配置超级鹰,请在浏览器中手动完成登录操作 (输入验证码并点击登录) <<<")
|
||||||
|
|
||||||
|
# 等待 URL 包含 homePage (用户指定) 或者 错误提示出现
|
||||||
|
try:
|
||||||
|
# 使用 wait_for_url 或者检查是否有错误弹窗
|
||||||
|
# 这里等待较短时间,如果失败则重试
|
||||||
|
self.page.wait_for_url("**/homePage**", timeout=10000 if self.cjy_client else 0)
|
||||||
|
self.log("✅ 检测到登录成功!")
|
||||||
|
login_success = True
|
||||||
|
time.sleep(1)
|
||||||
|
except Exception as e:
|
||||||
|
if self.cjy_client:
|
||||||
|
self.log(f"登录等待超时或失败,可能验证码错误。")
|
||||||
|
# 检查是否有错误提示 (比如 el-message)
|
||||||
|
try:
|
||||||
|
error_msg = self.page.locator('.el-message__content').inner_text(timeout=1000)
|
||||||
|
self.log(f"页面提示: {error_msg}")
|
||||||
|
# 刷新验证码图片
|
||||||
|
self.page.locator('img.code-image').click()
|
||||||
|
time.sleep(1)
|
||||||
|
except: pass
|
||||||
|
retry_count += 1
|
||||||
|
else:
|
||||||
|
self.log(f"登录等待超时或失败: {e}")
|
||||||
|
return
|
||||||
|
|
||||||
|
if not login_success:
|
||||||
|
self.log("[ERROR] 达到最大登录重试次数,放弃自动登录。请手动干预。")
|
||||||
return
|
return
|
||||||
|
|
||||||
# 2. 强制跳转目标页
|
# 2. 强制跳转目标页
|
||||||
|
|
|
||||||
4
使用说明.txt
4
使用说明.txt
|
|
@ -8,14 +8,14 @@
|
||||||
2. 确保您的电脑已安装 Google Chrome 或 Microsoft Edge 浏览器。
|
2. 确保您的电脑已安装 Google Chrome 或 Microsoft Edge 浏览器。
|
||||||
3. 双击运行文件夹中的 "scbank_tool.exe"。
|
3. 双击运行文件夹中的 "scbank_tool.exe"。
|
||||||
4. 按照屏幕提示操作:
|
4. 按照屏幕提示操作:
|
||||||
- 输入 "1" 并回车:启动采集。程序会自动打开浏览器,请登录后进入订单页面,选择下单时间后点击查询,程序会自动抓取数据。
|
- 输入 "1" 并回车:启动采集。程序会自动打开浏览器,请登录后进入订单页面,自动下载待发货订单。
|
||||||
- 输入 "2" 并回车:启动处理。程序会自动读取抓取到的数据,上传到货易通系统,并生成 Excel 报表。
|
- 输入 "2" 并回车:启动处理。程序会自动读取抓取到的数据,上传到货易通系统,并生成 Excel 报表。
|
||||||
|
|
||||||
二、 文件结构说明
|
二、 文件结构说明
|
||||||
|
|
||||||
- scbank_tool.exe : 主程序,双击运行。
|
- scbank_tool.exe : 主程序,双击运行。
|
||||||
- product_map.txt : 商品名称映射配置文件 (文本文件,可用记事本编辑)。
|
- product_map.txt : 商品名称映射配置文件 (文本文件,可用记事本编辑)。
|
||||||
- data/ : [自动生成] 存放采集到的原始数据 (.jsonl)。
|
- data/ : [自动生成] 存放采集到的原始数据 (.jsonl|.xls)。
|
||||||
- data/archive/ : [自动生成] 存放处理完成并归档的数据。
|
- data/archive/ : [自动生成] 存放处理完成并归档的数据。
|
||||||
- output/ : [自动生成] 存放处理结果和 Excel 报表。
|
- output/ : [自动生成] 存放处理结果和 Excel 报表。
|
||||||
- error.log : [自动生成] 如果程序闪退或出错,错误信息会保存在这里。
|
- error.log : [自动生成] 如果程序闪退或出错,错误信息会保存在这里。
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue