diff --git a/.gitignore b/.gitignore index 7fe09a6..554892e 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,8 @@ output/ .trae/ 实物系统API文档.pdf *.pyc -__pycache__/ \ No newline at end of file +__pycache__/ +build/ +dist/ +.temp* +*.spec \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..f344218 --- /dev/null +++ b/README.md @@ -0,0 +1,82 @@ +# 四川银行权益商城自动化工具 (SCBank Tool) + +本项目用于自动化处理四川银行权益商城的订单数据,包含数据采集(Collector)和数据处理与上传(Processor)两个主要功能。 + +## 核心功能 + +1. **数据采集 (Collector)**: + - 自动启动 Chromium 浏览器,监听网络请求。 + - 自动识别并捕获“列表”和“详情”数据包,保存为 `.jsonl` 格式。 + - 自动翻页和点击详情,无需人工干预。 + - 数据保存在 `data/` 目录下。 + +2. **数据处理 (Processor)**: + - 扫描 `data/` 目录下的原始数据。 + - 智能合并列表数据与详情数据。 + - 对接内部实物系统 API,自动上传订单。 + - 导出处理结果为 Excel 文件至 `output/` 目录。 + - 自动归档已处理文件至 `data/archive/` 目录。 + +## 文件说明 + +- `main.py`: 程序入口,提供交互式菜单。 +- `scbank_collector.py`: 采集模块,负责浏览器自动化和数据抓取。 +- `scbank_processor.py`: 处理模块,负责数据清洗、API 上传和 Excel 导出。 +- `requirements.txt`: Python 依赖列表。 +- `build_portable.ps1`: Windows 打包脚本。 + +## 运行方式 (源码运行) + +### 前置要求 +1. 安装 Python 3.8 或以上版本。 +2. 确保电脑上已安装 **Google Chrome** 或 **Microsoft Edge** 浏览器。 +3. 安装依赖包: + ```bash + pip install -r requirements.txt + ``` + +### 启动程序 +```bash +python main.py +``` +按提示选择功能: +- 输入 `1` 启动采集。程序会自动寻找本机安装的 Chrome 或 Edge 浏览器。 +- 输入 `2` 启动处理。 + +## Windows 打包指南 (便携式打包 - 无需安装Python) + +如果你不想在 Windows 系统中安装 Python,可以使用以下脚本进行一次性打包。 +脚本会自动下载一个临时的 Python 环境,打包完成后自动删除,不会污染你的系统。 + +### 1. 执行打包脚本 +在 Windows 中双击运行项目根目录下的 `build_portable.ps1` 文件。 +或者在 PowerShell 中运行: +```powershell +.\build_portable.ps1 +``` + +该脚本会自动执行以下操作: +1. 下载 Python 3.10 便携版 (Embeddable Package)。 +2. 配置临时的 pip 环境。 +3. 安装依赖库 (pandas, playwright, pyinstaller 等)。 +4. 执行打包命令。 +5. 清理临时文件。 + +### 2. 获取结果 +打包完成后,可执行文件位于: +`dist\scbank_tool\scbank_tool.exe` + +### 3. 运行注意事项 +- **网络连接**: 脚本需要下载 Python (约 20MB) 和依赖包,请确保网络畅通。 +- **权限**: 脚本需要在当前目录创建临时文件夹,请确保有写入权限。 + +## 常见问题 + +- **Q: 运行时提示 `KeyError: 'pageDataList'` 或类似错误?** + - A: 可能是网页结构发生变化。请检查 `scbank_collector.py` 中的 Hook 逻辑。 + +- **Q: 上传订单失败?** + - A: 请检查 `scbank_processor.py` 中的 `InternalApiClient` 类,确认 `app_id` 和 API 地址是否正确。 + +- **Q: 打包后运行闪退?** + - A: 请在命令行中运行 exe 文件,查看具体报错信息。常见原因是缺少依赖或路径问题。 diff --git a/build_portable.ps1 b/build_portable.ps1 new file mode 100644 index 0000000..814470a --- /dev/null +++ b/build_portable.ps1 @@ -0,0 +1,73 @@ +<# +.SYNOPSIS + Automated build script using Portable Python (No installation required) +.DESCRIPTION + This script downloads a temporary Python environment, installs dependencies, + builds the executable, and then cleans up the Python environment. + Your Windows system remains clean. +#> + +$ErrorActionPreference = "Stop" + +# Configuration +$PythonVer = "3.10.11" +$PythonUrl = "https://www.python.org/ftp/python/$PythonVer/python-$PythonVer-embed-amd64.zip" +$GetPipUrl = "https://bootstrap.pypa.io/get-pip.py" +$WorkDir = Get-Location +$TempDir = Join-Path $WorkDir ".temp_python" +$PythonExe = Join-Path $TempDir "python.exe" +$PipExe = Join-Path $TempDir "Scripts\pip.exe" + +Write-Host "[Step 1/6] Setting up temporary workspace..." -ForegroundColor Cyan +if (Test-Path $TempDir) { Remove-Item $TempDir -Recurse -Force } +New-Item -ItemType Directory -Path $TempDir | Out-Null + +Write-Host "[Step 2/6] Downloading Portable Python $PythonVer..." -ForegroundColor Cyan +$ZipPath = Join-Path $WorkDir "python-embed.zip" +Invoke-WebRequest -Uri $PythonUrl -OutFile $ZipPath +Expand-Archive -Path $ZipPath -DestinationPath $TempDir +Remove-Item $ZipPath + +# Fix python3xx._pth to allow 'import site' (Crucial for pip) +$PthFile = Get-ChildItem $TempDir -Filter "python*._pth" | Select-Object -First 1 +if ($PthFile) { + $Content = Get-Content $PthFile.FullName + $Content = $Content -replace "#import site", "import site" + $Content | Set-Content $PthFile.FullName + Write-Host " -> Enabled 'import site' in $($PthFile.Name)" -ForegroundColor Gray +} + +Write-Host "[Step 3/6] Installing pip..." -ForegroundColor Cyan +$GetPipPath = Join-Path $TempDir "get-pip.py" +Invoke-WebRequest -Uri $GetPipUrl -OutFile $GetPipPath +& $PythonExe $GetPipPath --no-warn-script-location +Remove-Item $GetPipPath + +Write-Host "[Step 4/6] Installing dependencies..." -ForegroundColor Cyan +# Install PyInstaller first +& $PythonExe -m pip install pyinstaller --no-warn-script-location +# Install greenlet binary (Critical Fix) +# Playwright depends on greenlet, which needs C++ runtime. +# Pre-built wheel usually works, but sometimes fails in embeddable python. +# Explicitly installing it helps. +& $PythonExe -m pip install greenlet --no-warn-script-location + +# Install project requirements +& $PythonExe -m pip install -r requirements.txt --no-warn-script-location -i https://pypi.tuna.tsinghua.edu.cn/simple + +Write-Host "[Step 5/6] Building executable..." -ForegroundColor Cyan +# Run PyInstaller +& $PythonExe -m PyInstaller --noconfirm --onedir --console --clean --name "scbank_tool" main.py + +if ($LASTEXITCODE -eq 0) { + Write-Host "[Success] Build completed successfully!" -ForegroundColor Green + Write-Host "Executable location: dist\scbank_tool\scbank_tool.exe" +} else { + Write-Host "[Error] Build failed." -ForegroundColor Red +} + +Write-Host "[Step 6/6] Cleaning up temporary Python environment..." -ForegroundColor Cyan +# Remove-Item $TempDir -Recurse -Force + +Write-Host "Done." +Read-Host "Press Enter to exit..." diff --git a/main.py b/main.py index 6ab36d4..2c51668 100644 --- a/main.py +++ b/main.py @@ -23,6 +23,9 @@ def main(): sys.exit(0) else: print("[错误] 无效选项,请重新输入。") + + # 暂停一下,避免刷屏太快 + # input("\n按回车键继续...") if __name__ == "__main__": try: @@ -30,3 +33,21 @@ if __name__ == "__main__": except KeyboardInterrupt: print("\n[系统] 用户中断,程序已退出。") sys.exit(0) + except Exception as e: + # 直接把错误写到当前目录的 error.log 文件 + with open("error.log", "w", encoding="utf-8") as f: + import traceback + f.write(f"Error: {str(e)}\n") + f.write(traceback.format_exc()) + + # 尝试打印 + print(f"\n[致命错误] {e}") + print("错误日志已保存至 error.log") + + # 死循环等待用户输入,防止窗口关闭 + while True: + try: + input("程序发生错误,请查看 error.log。按回车键尝试退出...") + except: + break + sys.exit(1) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..25b74dc --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +playwright +pandas +openpyxl +requests +pyinstaller diff --git a/scbank_collector.py b/scbank_collector.py index 9ec624f..558b242 100644 --- a/scbank_collector.py +++ b/scbank_collector.py @@ -10,6 +10,11 @@ class SCBankCollector: self.target_url = "https://jf.scbank.cn:8085/#/orderManagement/deliveryOrders" # 动态生成文件名: data/raw_YYYYMMDD_HHMMSS.jsonl self.ts_str = datetime.now().strftime("%Y%m%d_%H%M%S") + + # 确保数据目录存在 + if not os.path.exists("data"): + os.makedirs("data") + self.output_file = f"data/raw_{self.ts_str}.jsonl" self.browser = None self.page = None @@ -37,10 +42,36 @@ class SCBankCollector: def start_browser(self): self.log("启动浏览器...") p = sync_playwright().start() - self.browser = p.chromium.launch( - headless=False, - args=["--disable-blink-features=AutomationControlled"] - ) + + # 尝试使用本地浏览器 (Chrome 或 Edge) + browser = None + for channel in ["chrome", "msedge"]: + try: + self.log(f"尝试启动本地 {channel}...") + browser = p.chromium.launch( + channel=channel, + headless=False, + args=["--disable-blink-features=AutomationControlled"] + ) + self.log(f"成功启动 {channel}") + break + except Exception as e: + self.log(f"启动 {channel} 失败,尝试下一个...") + + # 如果本地浏览器都失败,尝试使用内置 Chromium (如果已安装) + if not browser: + self.log("未找到本地 Chrome 或 Edge,尝试使用内置 Chromium...") + try: + browser = p.chromium.launch( + headless=False, + args=["--disable-blink-features=AutomationControlled"] + ) + except Exception as e: + self.log(f"[FATAL] 无法启动任何浏览器: {e}") + self.log("请确保已安装 Google Chrome 或 Microsoft Edge 浏览器。") + raise e + + self.browser = browser context = self.browser.new_context( viewport={'width': 1920, 'height': 1080}, user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" @@ -175,7 +206,7 @@ class SCBankCollector: """点击当前页所有详情""" try: # 必须等待行出现 - self.page.wait_for_selector(".el-table__row", timeout=5000) + self.page.wait_for_selector(".el-table__row", timeout=0) except: self.log("当前页无数据或加载超时") return diff --git a/scbank_processor.py b/scbank_processor.py index f21d1fc..d624fc1 100644 --- a/scbank_processor.py +++ b/scbank_processor.py @@ -13,13 +13,14 @@ class InternalApiClient: """ def __init__(self): # 生产环境 - # self.api_base_url = "https://hyt.86698.cn/open" + self.api_base_url = "https://hyt.86698.cn/open" # 测试环境 (默认) - self.api_base_url = "http://120.55.12.245:8109" + # self.api_base_url = "http://120.55.12.245:8109" - # 占位符 AppId,实际需替换 - # self.app_id = "8ce4d435fa77492f84d2fafa241c2804" - self.app_id = "e699e6ef74504f4d86776b3d244ce602" + # 生产环境 + self.app_id = "8ce4d435fa77492f84d2fafa241c2804" + # 测试环境 (默认) + # self.app_id = "e699e6ef74504f4d86776b3d244ce602" def _post(self, path, data): """发送 POST 请求""" @@ -136,6 +137,12 @@ class SCBankProcessor: self.data_dir = "data" self.archive_dir = "data/archive" self.output_dir = "output" + + # 确保目录结构存在 + for d in [self.data_dir, self.archive_dir, self.output_dir]: + if not os.path.exists(d): + os.makedirs(d) + self.client = InternalApiClient() def log(self, msg): @@ -253,12 +260,17 @@ class SCBankProcessor: except: pass + # 收件人信息 + receive_info = detail.get("mallOrderReceiveInfo", {}) + results.append({ "处理状态": "成功" if success else "失败", "失败原因": "" if success else msg, "下单时间": order_time, "订单编号": order_no, - "下单用户手机号码": detail.get("orderMobile"), + "收货人": receive_info.get("receiverName"), + "收货地址": receive_info.get("fullAddress"), + "下单用户手机号码": receive_info.get("receiverMobile", "00000000000"), "商户名称": detail.get("exMerchant"), "订单金额": detail.get("orderAmt"), "商品名称": goods_name,