feat: 增加验证码自动验证
This commit is contained in:
parent
00dcb45882
commit
7b2d476b81
|
|
@ -0,0 +1,54 @@
|
|||
import requests
|
||||
from hashlib import md5
|
||||
|
||||
class ChaojiyingClient:
|
||||
"""超级鹰验证码识别 API 客户端"""
|
||||
|
||||
def __init__(self, username, password, soft_id):
|
||||
self.username = username
|
||||
# 超级鹰要求密码经过 md5 加密
|
||||
# self.password = md5(password.encode('utf-8')).hexdigest()
|
||||
self.password = password
|
||||
self.soft_id = soft_id
|
||||
self.base_params = {
|
||||
'user': self.username,
|
||||
'pass': self.password,
|
||||
'softid': self.soft_id,
|
||||
}
|
||||
self.headers = {
|
||||
'Connection': 'Keep-Alive',
|
||||
'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)',
|
||||
}
|
||||
|
||||
def solve_captcha(self, image_bytes, codetype=1902):
|
||||
"""
|
||||
上传验证码图片并识别
|
||||
:param image_bytes: 图片二进制数据
|
||||
:param codetype: 题目类型 (1902 为 4-6 位英文数字混合)
|
||||
:return: 识别结果 (dict)
|
||||
"""
|
||||
url = 'http://upload.chaojiying.net/Upload/Processing.php'
|
||||
params = {
|
||||
'codetype': codetype,
|
||||
}
|
||||
params.update(self.base_params)
|
||||
files = {'userfile': ('captcha.jpg', image_bytes)}
|
||||
|
||||
try:
|
||||
response = requests.post(url, data=params, files=files, headers=self.headers, timeout=30)
|
||||
return response.json()
|
||||
except Exception as e:
|
||||
return {'err_no': -1, 'err_str': f"网络请求异常: {str(e)}"}
|
||||
|
||||
def report_error(self, pic_id):
|
||||
"""
|
||||
识别错误时,报错返分
|
||||
"""
|
||||
url = 'http://upload.chaojiying.net/Upload/ReportError.php'
|
||||
params = { 'id': pic_id }
|
||||
params.update(self.base_params)
|
||||
try:
|
||||
response = requests.post(url, data=params, headers=self.headers, timeout=10)
|
||||
return response.json()
|
||||
except Exception as e:
|
||||
return {'err_no': -1, 'err_str': str(e)}
|
||||
|
|
@ -0,0 +1,14 @@
|
|||
# ==========================================
|
||||
# 四川银行权益商城自动化工具 - 配置文件
|
||||
# 请在等号 (=) 后面填写您的账号和密码
|
||||
# 注意:等号前后可以有空格,但不要删除等号
|
||||
# ==========================================
|
||||
|
||||
商城账号 = Lsxd01
|
||||
商城密码 = Lsxd@2026
|
||||
|
||||
# --- 以下为验证码自动识别配置 (选填) ---
|
||||
# 如果不填写,程序运行时将暂停并等待您手动输入验证码
|
||||
超级鹰账号 = fuxiaochao
|
||||
超级鹰密码 = um0e01no
|
||||
超级鹰软件ID = 51d1ea8ea340b3d229f87c96c47f50b6
|
||||
|
|
@ -2,8 +2,10 @@ import json
|
|||
import time
|
||||
import random
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from playwright.sync_api import sync_playwright
|
||||
from chaojiying import ChaojiyingClient
|
||||
|
||||
class SCBankCollector:
|
||||
def __init__(self):
|
||||
|
|
@ -11,9 +13,17 @@ class SCBankCollector:
|
|||
# 动态生成文件名: data/raw_YYYYMMDD_HHMMSS.jsonl
|
||||
self.ts_str = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
|
||||
# 配置账号密码
|
||||
self.username = "Lsxd01"
|
||||
self.password = "Lsxd@2026"
|
||||
# 读取外置配置文件
|
||||
self.config = self._load_config()
|
||||
|
||||
self.username = self.config.get("scbank_username", "")
|
||||
self.password = self.config.get("scbank_password", "")
|
||||
|
||||
# 初始化超级鹰客户端
|
||||
cjy_user = self.config.get("chaojiying_username", "")
|
||||
cjy_pass = self.config.get("chaojiying_password", "")
|
||||
cjy_softid = self.config.get("chaojiying_softid", "96001")
|
||||
self.cjy_client = ChaojiyingClient(cjy_user, cjy_pass, cjy_softid) if cjy_user and cjy_pass else None
|
||||
|
||||
# 确保数据目录存在
|
||||
if not os.path.exists("data"):
|
||||
|
|
@ -22,6 +32,61 @@ class SCBankCollector:
|
|||
self.browser = None
|
||||
self.page = None
|
||||
|
||||
def _load_config(self):
|
||||
config_file = "config.txt"
|
||||
config_data = {
|
||||
"商城账号": "",
|
||||
"商城密码": "",
|
||||
"超级鹰账号": "",
|
||||
"超级鹰密码": "",
|
||||
"超级鹰软件ID": "96001"
|
||||
}
|
||||
|
||||
if not os.path.exists(config_file):
|
||||
print(f"[WARN] 未找到配置文件 {config_file},将创建一个默认模板,请填写后重新运行。")
|
||||
template = (
|
||||
"# ==========================================\n"
|
||||
"# 四川银行权益商城自动化工具 - 配置文件\n"
|
||||
"# 请在等号 (=) 后面填写您的账号和密码\n"
|
||||
"# 注意:等号前后可以有空格,但不要删除等号\n"
|
||||
"# ==========================================\n\n"
|
||||
"商城账号 = \n"
|
||||
"商城密码 = \n\n"
|
||||
"# --- 以下为验证码自动识别配置 (选填) ---\n"
|
||||
"# 如果不填写,程序运行时将暂停并等待您手动输入验证码\n"
|
||||
"超级鹰账号 = \n"
|
||||
"超级鹰密码 = \n"
|
||||
"超级鹰软件ID = 96001\n"
|
||||
)
|
||||
with open(config_file, "w", encoding="utf-8") as f:
|
||||
f.write(template)
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
with open(config_file, "r", encoding="utf-8") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
# 跳过注释和空行
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
|
||||
if "=" in line:
|
||||
key, val = line.split("=", 1)
|
||||
config_data[key.strip()] = val.strip()
|
||||
|
||||
# 将中文 key 映射回内部使用的 key
|
||||
return {
|
||||
"scbank_username": config_data.get("商城账号", ""),
|
||||
"scbank_password": config_data.get("商城密码", ""),
|
||||
"chaojiying_username": config_data.get("超级鹰账号", ""),
|
||||
"chaojiying_password": config_data.get("超级鹰密码", ""),
|
||||
"chaojiying_softid": config_data.get("超级鹰软件ID", "96001")
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
print(f"[ERROR] 读取配置文件失败: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
def log(self, msg):
|
||||
print(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}")
|
||||
|
||||
|
|
@ -65,6 +130,49 @@ class SCBankCollector:
|
|||
)
|
||||
self.page = context.new_page()
|
||||
|
||||
def _auto_solve_captcha(self):
|
||||
"""自动识别并填写验证码"""
|
||||
if not self.cjy_client:
|
||||
self.log("[WARN] 未配置超级鹰账号,跳过自动识别验证码。请手动输入验证码。")
|
||||
return False
|
||||
|
||||
try:
|
||||
self.log("尝试获取验证码图片...")
|
||||
# 四川银行登录页的验证码图片选择器
|
||||
captcha_img_selector = 'img.code-image'
|
||||
|
||||
# 等待图片加载完成
|
||||
self.page.wait_for_selector(captcha_img_selector, timeout=5000)
|
||||
|
||||
# 获取图片的 base64 数据或截图
|
||||
# 因为是图片验证码,我们可以直接用 playwright 的 screenshot 功能获取二进制
|
||||
image_bytes = self.page.locator(captcha_img_selector).screenshot()
|
||||
|
||||
self.log("正在调用超级鹰进行识别...")
|
||||
# 1902: 4-6位英文数字混合
|
||||
result = self.cjy_client.solve_captcha(image_bytes, codetype=1902)
|
||||
|
||||
if result and result.get('err_no') == 0:
|
||||
code = result.get('pic_str')
|
||||
self.log(f"✅ 验证码识别成功: {code}")
|
||||
|
||||
# 填入验证码
|
||||
self.page.fill('input[name="code"]', code)
|
||||
|
||||
# 点击登录
|
||||
self.log("尝试自动点击登录...")
|
||||
self.page.locator('button.login-btn').click()
|
||||
return True
|
||||
else:
|
||||
print(result)
|
||||
err_str = result.get('err_str', '未知错误') if result else '返回为空'
|
||||
self.log(f"❌ 验证码识别失败: {err_str}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
self.log(f"自动处理验证码发生异常: {e}")
|
||||
return False
|
||||
|
||||
def run(self):
|
||||
try:
|
||||
self.start_browser()
|
||||
|
|
@ -79,23 +187,66 @@ class SCBankCollector:
|
|||
self.page.wait_for_selector('input[name="username"]', timeout=10000)
|
||||
|
||||
# 自动填入账号密码
|
||||
self.log(f"正在自动填入账号: {self.username}")
|
||||
self.page.fill('input[name="username"]', self.username)
|
||||
self.page.fill('input[name="password"]', self.password)
|
||||
self.log("账号密码已填入")
|
||||
if self.username and self.password:
|
||||
self.log(f"正在自动填入账号: {self.username}")
|
||||
self.page.fill('input[name="username"]', self.username)
|
||||
self.page.fill('input[name="password"]', self.password)
|
||||
self.log("账号密码已填入")
|
||||
else:
|
||||
self.log("[WARN] 配置文件中未提供商城账号密码,请手动输入")
|
||||
except Exception as e:
|
||||
self.log(f"自动填入账号密码失败 (可能已登录或页面结构变化): {e}")
|
||||
|
||||
except: pass
|
||||
|
||||
self.log(">>> 请在浏览器中完成登录操作 (输入验证码并点击登录) <<<")
|
||||
# 等待 URL 包含 homePage (用户指定)
|
||||
try:
|
||||
self.page.wait_for_url("**/homePage**", timeout=0)
|
||||
self.log("检测到登录成功!")
|
||||
time.sleep(1)
|
||||
except Exception as e:
|
||||
self.log(f"登录等待超时或失败: {e}")
|
||||
# 尝试自动识别验证码并重试
|
||||
max_retries = 3
|
||||
retry_count = 0
|
||||
login_success = False
|
||||
|
||||
while retry_count < max_retries and not login_success:
|
||||
if self.cjy_client:
|
||||
self.log(f"--- 第 {retry_count + 1} 次尝试登录 ---")
|
||||
success = self._auto_solve_captcha()
|
||||
if not success:
|
||||
self.log("验证码识别失败,等待后重试...")
|
||||
time.sleep(2)
|
||||
# 点击验证码图片刷新
|
||||
try:
|
||||
self.page.locator('img.code-image').click()
|
||||
time.sleep(1)
|
||||
except: pass
|
||||
retry_count += 1
|
||||
continue
|
||||
else:
|
||||
self.log(">>> 未配置超级鹰,请在浏览器中手动完成登录操作 (输入验证码并点击登录) <<<")
|
||||
|
||||
# 等待 URL 包含 homePage (用户指定) 或者 错误提示出现
|
||||
try:
|
||||
# 使用 wait_for_url 或者检查是否有错误弹窗
|
||||
# 这里等待较短时间,如果失败则重试
|
||||
self.page.wait_for_url("**/homePage**", timeout=10000 if self.cjy_client else 0)
|
||||
self.log("✅ 检测到登录成功!")
|
||||
login_success = True
|
||||
time.sleep(1)
|
||||
except Exception as e:
|
||||
if self.cjy_client:
|
||||
self.log(f"登录等待超时或失败,可能验证码错误。")
|
||||
# 检查是否有错误提示 (比如 el-message)
|
||||
try:
|
||||
error_msg = self.page.locator('.el-message__content').inner_text(timeout=1000)
|
||||
self.log(f"页面提示: {error_msg}")
|
||||
# 刷新验证码图片
|
||||
self.page.locator('img.code-image').click()
|
||||
time.sleep(1)
|
||||
except: pass
|
||||
retry_count += 1
|
||||
else:
|
||||
self.log(f"登录等待超时或失败: {e}")
|
||||
return
|
||||
|
||||
if not login_success:
|
||||
self.log("[ERROR] 达到最大登录重试次数,放弃自动登录。请手动干预。")
|
||||
return
|
||||
|
||||
# 2. 强制跳转目标页
|
||||
|
|
|
|||
4
使用说明.txt
4
使用说明.txt
|
|
@ -8,14 +8,14 @@
|
|||
2. 确保您的电脑已安装 Google Chrome 或 Microsoft Edge 浏览器。
|
||||
3. 双击运行文件夹中的 "scbank_tool.exe"。
|
||||
4. 按照屏幕提示操作:
|
||||
- 输入 "1" 并回车:启动采集。程序会自动打开浏览器,请登录后进入订单页面,选择下单时间后点击查询,程序会自动抓取数据。
|
||||
- 输入 "1" 并回车:启动采集。程序会自动打开浏览器,请登录后进入订单页面,自动下载待发货订单。
|
||||
- 输入 "2" 并回车:启动处理。程序会自动读取抓取到的数据,上传到货易通系统,并生成 Excel 报表。
|
||||
|
||||
二、 文件结构说明
|
||||
|
||||
- scbank_tool.exe : 主程序,双击运行。
|
||||
- product_map.txt : 商品名称映射配置文件 (文本文件,可用记事本编辑)。
|
||||
- data/ : [自动生成] 存放采集到的原始数据 (.jsonl)。
|
||||
- data/ : [自动生成] 存放采集到的原始数据 (.jsonl|.xls)。
|
||||
- data/archive/ : [自动生成] 存放处理完成并归档的数据。
|
||||
- output/ : [自动生成] 存放处理结果和 Excel 报表。
|
||||
- error.log : [自动生成] 如果程序闪退或出错,错误信息会保存在这里。
|
||||
|
|
|
|||
Loading…
Reference in New Issue