整体基本完成
This commit is contained in:
parent
219cb8ae37
commit
3bab25f983
|
|
@ -4,4 +4,8 @@ output/
|
|||
.trae/
|
||||
实物系统API文档.pdf
|
||||
*.pyc
|
||||
__pycache__/
|
||||
__pycache__/
|
||||
build/
|
||||
dist/
|
||||
.temp*
|
||||
*.spec
|
||||
|
|
@ -0,0 +1,82 @@
|
|||
# 四川银行权益商城自动化工具 (SCBank Tool)
|
||||
|
||||
本项目用于自动化处理四川银行权益商城的订单数据,包含数据采集(Collector)和数据处理与上传(Processor)两个主要功能。
|
||||
|
||||
## 核心功能
|
||||
|
||||
1. **数据采集 (Collector)**:
|
||||
- 自动启动 Chromium 浏览器,监听网络请求。
|
||||
- 自动识别并捕获“列表”和“详情”数据包,保存为 `.jsonl` 格式。
|
||||
- 自动翻页和点击详情,无需人工干预。
|
||||
- 数据保存在 `data/` 目录下。
|
||||
|
||||
2. **数据处理 (Processor)**:
|
||||
- 扫描 `data/` 目录下的原始数据。
|
||||
- 智能合并列表数据与详情数据。
|
||||
- 对接内部实物系统 API,自动上传订单。
|
||||
- 导出处理结果为 Excel 文件至 `output/` 目录。
|
||||
- 自动归档已处理文件至 `data/archive/` 目录。
|
||||
|
||||
## 文件说明
|
||||
|
||||
- `main.py`: 程序入口,提供交互式菜单。
|
||||
- `scbank_collector.py`: 采集模块,负责浏览器自动化和数据抓取。
|
||||
- `scbank_processor.py`: 处理模块,负责数据清洗、API 上传和 Excel 导出。
|
||||
- `requirements.txt`: Python 依赖列表。
|
||||
- `build_portable.ps1`: Windows 打包脚本。
|
||||
|
||||
## 运行方式 (源码运行)
|
||||
|
||||
### 前置要求
|
||||
1. 安装 Python 3.8 或以上版本。
|
||||
2. 确保电脑上已安装 **Google Chrome** 或 **Microsoft Edge** 浏览器。
|
||||
3. 安装依赖包:
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
### 启动程序
|
||||
```bash
|
||||
python main.py
|
||||
```
|
||||
按提示选择功能:
|
||||
- 输入 `1` 启动采集。程序会自动寻找本机安装的 Chrome 或 Edge 浏览器。
|
||||
- 输入 `2` 启动处理。
|
||||
|
||||
## Windows 打包指南 (便携式打包 - 无需安装Python)
|
||||
|
||||
如果你不想在 Windows 系统中安装 Python,可以使用以下脚本进行一次性打包。
|
||||
脚本会自动下载一个临时的 Python 环境,打包完成后自动删除,不会污染你的系统。
|
||||
|
||||
### 1. 执行打包脚本
|
||||
在 Windows 中双击运行项目根目录下的 `build_portable.ps1` 文件。
|
||||
或者在 PowerShell 中运行:
|
||||
```powershell
|
||||
.\build_portable.ps1
|
||||
```
|
||||
|
||||
该脚本会自动执行以下操作:
|
||||
1. 下载 Python 3.10 便携版 (Embeddable Package)。
|
||||
2. 配置临时的 pip 环境。
|
||||
3. 安装依赖库 (pandas, playwright, pyinstaller 等)。
|
||||
4. 执行打包命令。
|
||||
5. 清理临时文件。
|
||||
|
||||
### 2. 获取结果
|
||||
打包完成后,可执行文件位于:
|
||||
`dist\scbank_tool\scbank_tool.exe`
|
||||
|
||||
### 3. 运行注意事项
|
||||
- **网络连接**: 脚本需要下载 Python (约 20MB) 和依赖包,请确保网络畅通。
|
||||
- **权限**: 脚本需要在当前目录创建临时文件夹,请确保有写入权限。
|
||||
|
||||
## 常见问题
|
||||
|
||||
- **Q: 运行时提示 `KeyError: 'pageDataList'` 或类似错误?**
|
||||
- A: 可能是网页结构发生变化。请检查 `scbank_collector.py` 中的 Hook 逻辑。
|
||||
|
||||
- **Q: 上传订单失败?**
|
||||
- A: 请检查 `scbank_processor.py` 中的 `InternalApiClient` 类,确认 `app_id` 和 API 地址是否正确。
|
||||
|
||||
- **Q: 打包后运行闪退?**
|
||||
- A: 请在命令行中运行 exe 文件,查看具体报错信息。常见原因是缺少依赖或路径问题。
|
||||
|
|
@ -0,0 +1,73 @@
|
|||
<#
|
||||
.SYNOPSIS
|
||||
Automated build script using Portable Python (No installation required)
|
||||
.DESCRIPTION
|
||||
This script downloads a temporary Python environment, installs dependencies,
|
||||
builds the executable, and then cleans up the Python environment.
|
||||
Your Windows system remains clean.
|
||||
#>
|
||||
|
||||
$ErrorActionPreference = "Stop"
|
||||
|
||||
# Configuration
|
||||
$PythonVer = "3.10.11"
|
||||
$PythonUrl = "https://www.python.org/ftp/python/$PythonVer/python-$PythonVer-embed-amd64.zip"
|
||||
$GetPipUrl = "https://bootstrap.pypa.io/get-pip.py"
|
||||
$WorkDir = Get-Location
|
||||
$TempDir = Join-Path $WorkDir ".temp_python"
|
||||
$PythonExe = Join-Path $TempDir "python.exe"
|
||||
$PipExe = Join-Path $TempDir "Scripts\pip.exe"
|
||||
|
||||
Write-Host "[Step 1/6] Setting up temporary workspace..." -ForegroundColor Cyan
|
||||
if (Test-Path $TempDir) { Remove-Item $TempDir -Recurse -Force }
|
||||
New-Item -ItemType Directory -Path $TempDir | Out-Null
|
||||
|
||||
Write-Host "[Step 2/6] Downloading Portable Python $PythonVer..." -ForegroundColor Cyan
|
||||
$ZipPath = Join-Path $WorkDir "python-embed.zip"
|
||||
Invoke-WebRequest -Uri $PythonUrl -OutFile $ZipPath
|
||||
Expand-Archive -Path $ZipPath -DestinationPath $TempDir
|
||||
Remove-Item $ZipPath
|
||||
|
||||
# Fix python3xx._pth to allow 'import site' (Crucial for pip)
|
||||
$PthFile = Get-ChildItem $TempDir -Filter "python*._pth" | Select-Object -First 1
|
||||
if ($PthFile) {
|
||||
$Content = Get-Content $PthFile.FullName
|
||||
$Content = $Content -replace "#import site", "import site"
|
||||
$Content | Set-Content $PthFile.FullName
|
||||
Write-Host " -> Enabled 'import site' in $($PthFile.Name)" -ForegroundColor Gray
|
||||
}
|
||||
|
||||
Write-Host "[Step 3/6] Installing pip..." -ForegroundColor Cyan
|
||||
$GetPipPath = Join-Path $TempDir "get-pip.py"
|
||||
Invoke-WebRequest -Uri $GetPipUrl -OutFile $GetPipPath
|
||||
& $PythonExe $GetPipPath --no-warn-script-location
|
||||
Remove-Item $GetPipPath
|
||||
|
||||
Write-Host "[Step 4/6] Installing dependencies..." -ForegroundColor Cyan
|
||||
# Install PyInstaller first
|
||||
& $PythonExe -m pip install pyinstaller --no-warn-script-location
|
||||
# Install greenlet binary (Critical Fix)
|
||||
# Playwright depends on greenlet, which needs C++ runtime.
|
||||
# Pre-built wheel usually works, but sometimes fails in embeddable python.
|
||||
# Explicitly installing it helps.
|
||||
& $PythonExe -m pip install greenlet --no-warn-script-location
|
||||
|
||||
# Install project requirements
|
||||
& $PythonExe -m pip install -r requirements.txt --no-warn-script-location -i https://pypi.tuna.tsinghua.edu.cn/simple
|
||||
|
||||
Write-Host "[Step 5/6] Building executable..." -ForegroundColor Cyan
|
||||
# Run PyInstaller
|
||||
& $PythonExe -m PyInstaller --noconfirm --onedir --console --clean --name "scbank_tool" main.py
|
||||
|
||||
if ($LASTEXITCODE -eq 0) {
|
||||
Write-Host "[Success] Build completed successfully!" -ForegroundColor Green
|
||||
Write-Host "Executable location: dist\scbank_tool\scbank_tool.exe"
|
||||
} else {
|
||||
Write-Host "[Error] Build failed." -ForegroundColor Red
|
||||
}
|
||||
|
||||
Write-Host "[Step 6/6] Cleaning up temporary Python environment..." -ForegroundColor Cyan
|
||||
# Remove-Item $TempDir -Recurse -Force
|
||||
|
||||
Write-Host "Done."
|
||||
Read-Host "Press Enter to exit..."
|
||||
21
main.py
21
main.py
|
|
@ -23,6 +23,9 @@ def main():
|
|||
sys.exit(0)
|
||||
else:
|
||||
print("[错误] 无效选项,请重新输入。")
|
||||
|
||||
# 暂停一下,避免刷屏太快
|
||||
# input("\n按回车键继续...")
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
|
|
@ -30,3 +33,21 @@ if __name__ == "__main__":
|
|||
except KeyboardInterrupt:
|
||||
print("\n[系统] 用户中断,程序已退出。")
|
||||
sys.exit(0)
|
||||
except Exception as e:
|
||||
# 直接把错误写到当前目录的 error.log 文件
|
||||
with open("error.log", "w", encoding="utf-8") as f:
|
||||
import traceback
|
||||
f.write(f"Error: {str(e)}\n")
|
||||
f.write(traceback.format_exc())
|
||||
|
||||
# 尝试打印
|
||||
print(f"\n[致命错误] {e}")
|
||||
print("错误日志已保存至 error.log")
|
||||
|
||||
# 死循环等待用户输入,防止窗口关闭
|
||||
while True:
|
||||
try:
|
||||
input("程序发生错误,请查看 error.log。按回车键尝试退出...")
|
||||
except:
|
||||
break
|
||||
sys.exit(1)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,5 @@
|
|||
playwright
|
||||
pandas
|
||||
openpyxl
|
||||
requests
|
||||
pyinstaller
|
||||
|
|
@ -10,6 +10,11 @@ class SCBankCollector:
|
|||
self.target_url = "https://jf.scbank.cn:8085/#/orderManagement/deliveryOrders"
|
||||
# 动态生成文件名: data/raw_YYYYMMDD_HHMMSS.jsonl
|
||||
self.ts_str = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
|
||||
# 确保数据目录存在
|
||||
if not os.path.exists("data"):
|
||||
os.makedirs("data")
|
||||
|
||||
self.output_file = f"data/raw_{self.ts_str}.jsonl"
|
||||
self.browser = None
|
||||
self.page = None
|
||||
|
|
@ -37,10 +42,36 @@ class SCBankCollector:
|
|||
def start_browser(self):
|
||||
self.log("启动浏览器...")
|
||||
p = sync_playwright().start()
|
||||
self.browser = p.chromium.launch(
|
||||
headless=False,
|
||||
args=["--disable-blink-features=AutomationControlled"]
|
||||
)
|
||||
|
||||
# 尝试使用本地浏览器 (Chrome 或 Edge)
|
||||
browser = None
|
||||
for channel in ["chrome", "msedge"]:
|
||||
try:
|
||||
self.log(f"尝试启动本地 {channel}...")
|
||||
browser = p.chromium.launch(
|
||||
channel=channel,
|
||||
headless=False,
|
||||
args=["--disable-blink-features=AutomationControlled"]
|
||||
)
|
||||
self.log(f"成功启动 {channel}")
|
||||
break
|
||||
except Exception as e:
|
||||
self.log(f"启动 {channel} 失败,尝试下一个...")
|
||||
|
||||
# 如果本地浏览器都失败,尝试使用内置 Chromium (如果已安装)
|
||||
if not browser:
|
||||
self.log("未找到本地 Chrome 或 Edge,尝试使用内置 Chromium...")
|
||||
try:
|
||||
browser = p.chromium.launch(
|
||||
headless=False,
|
||||
args=["--disable-blink-features=AutomationControlled"]
|
||||
)
|
||||
except Exception as e:
|
||||
self.log(f"[FATAL] 无法启动任何浏览器: {e}")
|
||||
self.log("请确保已安装 Google Chrome 或 Microsoft Edge 浏览器。")
|
||||
raise e
|
||||
|
||||
self.browser = browser
|
||||
context = self.browser.new_context(
|
||||
viewport={'width': 1920, 'height': 1080},
|
||||
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
|
|
@ -175,7 +206,7 @@ class SCBankCollector:
|
|||
"""点击当前页所有详情"""
|
||||
try:
|
||||
# 必须等待行出现
|
||||
self.page.wait_for_selector(".el-table__row", timeout=5000)
|
||||
self.page.wait_for_selector(".el-table__row", timeout=0)
|
||||
except:
|
||||
self.log("当前页无数据或加载超时")
|
||||
return
|
||||
|
|
|
|||
|
|
@ -13,13 +13,14 @@ class InternalApiClient:
|
|||
"""
|
||||
def __init__(self):
|
||||
# 生产环境
|
||||
# self.api_base_url = "https://hyt.86698.cn/open"
|
||||
self.api_base_url = "https://hyt.86698.cn/open"
|
||||
# 测试环境 (默认)
|
||||
self.api_base_url = "http://120.55.12.245:8109"
|
||||
# self.api_base_url = "http://120.55.12.245:8109"
|
||||
|
||||
# 占位符 AppId,实际需替换
|
||||
# self.app_id = "8ce4d435fa77492f84d2fafa241c2804"
|
||||
self.app_id = "e699e6ef74504f4d86776b3d244ce602"
|
||||
# 生产环境
|
||||
self.app_id = "8ce4d435fa77492f84d2fafa241c2804"
|
||||
# 测试环境 (默认)
|
||||
# self.app_id = "e699e6ef74504f4d86776b3d244ce602"
|
||||
|
||||
def _post(self, path, data):
|
||||
"""发送 POST 请求"""
|
||||
|
|
@ -136,6 +137,12 @@ class SCBankProcessor:
|
|||
self.data_dir = "data"
|
||||
self.archive_dir = "data/archive"
|
||||
self.output_dir = "output"
|
||||
|
||||
# 确保目录结构存在
|
||||
for d in [self.data_dir, self.archive_dir, self.output_dir]:
|
||||
if not os.path.exists(d):
|
||||
os.makedirs(d)
|
||||
|
||||
self.client = InternalApiClient()
|
||||
|
||||
def log(self, msg):
|
||||
|
|
@ -253,12 +260,17 @@ class SCBankProcessor:
|
|||
except:
|
||||
pass
|
||||
|
||||
# 收件人信息
|
||||
receive_info = detail.get("mallOrderReceiveInfo", {})
|
||||
|
||||
results.append({
|
||||
"处理状态": "成功" if success else "失败",
|
||||
"失败原因": "" if success else msg,
|
||||
"下单时间": order_time,
|
||||
"订单编号": order_no,
|
||||
"下单用户手机号码": detail.get("orderMobile"),
|
||||
"收货人": receive_info.get("receiverName"),
|
||||
"收货地址": receive_info.get("fullAddress"),
|
||||
"下单用户手机号码": receive_info.get("receiverMobile", "00000000000"),
|
||||
"商户名称": detail.get("exMerchant"),
|
||||
"订单金额": detail.get("orderAmt"),
|
||||
"商品名称": goods_name,
|
||||
|
|
|
|||
Loading…
Reference in New Issue