feat: 增加excel处理模式

2026-03-19 14:48:24 +08:00 · 2026-03-19 14:48:24 +08:00 · d68e87fb3a
parent 33a3cb3361
commit d68e87fb3a
4 changed files with 87 additions and 6 deletions
--- a/main.py
+++ b/main.py
@ -6,19 +6,24 @@ def main():
    while True:
        print("\n=== 四川银行权益商城自动化工具 ===")
        print("1. 启动采集 (Collector) -> 浏览器抓取")
-        print("2. 执行同步 (Processor) -> 货易通上传")
-        print("3. 退出")
-        choice = input("请输入选项 [1-3]: ").strip()
+        print("2. 执行同步 (Processor) -> 货易通上传 (处理JSONL)")
+        print("3. 执行同步 (Processor) -> 货易通上传 (处理Excel)")
+        print("4. 退出")
+        choice = input("请输入选项 [1-4]: ").strip()

        if choice == "1":
            print("\n[系统] 正在启动采集器...")
            collector = SCBankCollector()
            collector.run()
        elif choice == "2":
-            print("\n[系统] 正在启动处理器...")
+            print("\n[系统] 正在启动处理器 (JSONL模式)...")
            processor = SCBankProcessor()
            processor.run()
        elif choice == "3":
+            print("\n[系统] 正在启动处理器 (Excel模式)...")
+            processor = SCBankProcessor()
+            processor.run_excel()
+        elif choice == "4":
            print("\n[系统] 退出程序。")
            sys.exit(0)
        else:
--- a/product_map.txt
+++ b/product_map.txt
@ -15,3 +15,10 @@
 # 某某大米:10kg装>>>>东北大米-10kg
 # 
 # --- 请在下方添加您的配置 ---
+
+洁柔纸面巾(雅白纯木软抽)提>>>>洁柔抽纸软抽3层100抽自然无香面巾纸擦手纸卫生纸家用 1提3包TR071401A
+美的电磁炉 MC-DZ03SZ>>>>美的电磁炉MC-DZ03SZ
+美的电饼铛 MC-JHN34Q>>>>美的电饼铛MC-JHN34Q
+BULL公牛 盒装卵石灰30W便携插座 MCB303>>>>BULL公牛盒装卵石灰30 W便携插座MCB303
+鲁香六味调味品臻品礼盒>>>>鲁花 鲁香六味 调味品臻品礼盒
+追觅　洗地机　G600 黑色>>>>追觅无线家用洗地机G600
--- a/requirements.txt
+++ b/requirements.txt
@ -1,5 +1,6 @@
 playwright
 pandas
 openpyxl
+xlrd
 requests
 pyinstaller
--- a/scbank_processor.py
+++ b/scbank_processor.py
@ -192,7 +192,7 @@ class SCBankProcessor:
        # 1. 扫描文件
        files = [f for f in os.listdir(self.data_dir) if f.startswith("raw_") and f.endswith(".jsonl")]
        if not files:
-            self.log("没有待处理的数据文件。")
+            self.log("没有待处理的JSONL数据文件。")
            return

        self.log(f"发现 {len(files)} 个待处理文件: {files}")
@ -201,6 +201,65 @@ class SCBankProcessor:
            file_path = os.path.join(self.data_dir, file_name)
            self._process_file(file_path, file_name)

+    def run_excel(self):
+        """
+        处理导出的 Excel 数据
+        """
+        # 查找 data 目录下的 excel 文件
+        files = [f for f in os.listdir(self.data_dir) if f.endswith(".xls") or f.endswith(".xlsx")]
+        # 过滤掉已经被处理或不需要处理的文件（如果有特定的前缀）
+        # 这里假设直接处理所有的 .xls 和 .xlsx 文件
+        
+        if not files:
+            self.log("没有找到待处理的 Excel 文件 (.xls / .xlsx)。")
+            return
+            
+        self.log(f"发现 {len(files)} 个 Excel 待处理文件: {files}")
+        
+        for file_name in files:
+            file_path = os.path.join(self.data_dir, file_name)
+            self._process_excel_file(file_path, file_name)
+
+    def _process_excel_file(self, file_path, file_name):
+        self.log(f"正在处理 Excel 文件: {file_name}")
+        
+        try:
+            df = pd.read_excel(file_path)
+        except Exception as e:
+            self.log(f"读取 Excel 文件失败: {e}")
+            return
+            
+        if df.empty:
+            self.log("Excel 文件内容为空，跳过")
+            return
+            
+        unique_details = {}
+        for _, row in df.iterrows():
+            order_no = str(row.get("订单号", ""))
+            if not order_no or order_no == "nan":
+                continue
+                
+            # 构造与 JSON 一致的 detail 结构
+            detail = {
+                "orderNo": order_no,
+                "orderCreateTime": str(row.get("下单时间", "")),
+                "mallOrderReceiveInfo": {
+                    "receiverName": str(row.get("收货人名称", "")) if pd.notna(row.get("收货人名称")) else "",
+                    "receiverMobile": str(row.get("收货人电话", "")) if pd.notna(row.get("收货人电话")) else "00000000000",
+                    "fullAddress": str(row.get("收货地址", "")) if pd.notna(row.get("收货地址")) else "",
+                    "remark": str(row.get("备注", "")) if pd.notna(row.get("备注")) else ""
+                },
+                "goodsInfoList": [
+                    {"spuName": str(row.get("产品名称", "未知商品")) if pd.notna(row.get("产品名称")) else "未知商品"}
+                ],
+                "orderAmt": float(row.get("单价", 0.0)) if pd.notna(row.get("单价")) else 0.0,
+                "exMerchant": "成都蓝色兄弟网络科技有限公司"
+            }
+            unique_details[order_no] = detail
+            
+        self.log(f"解析出 {len(unique_details)} 条唯一订单 (来自 Excel)")
+        self._process_unique_details(unique_details, file_name, file_path)
+
    def _process_file(self, file_path, file_name):
        self.log(f"正在处理文件: {file_name}")
        
@ -264,7 +323,9 @@ class SCBankProcessor:
                    unique_details[order_no].update(body)

        self.log(f"解析出 {len(unique_details)} 条唯一订单 (列表+详情合并)")
+        self._process_unique_details(unique_details, file_name, file_path)

+    def _process_unique_details(self, unique_details, file_name, file_path):
        # 4. 业务处理与同步
        results = []
        for order_no, detail in unique_details.items():
@ -319,7 +380,14 @@ class SCBankProcessor:

        # 5. 导出 Excel
        if results:
-            ts = file_name.replace("raw_", "").replace(".jsonl", "")
+            # 处理不同后缀和前缀的文件名，生成统一的结果文件名
+            base_name = os.path.splitext(file_name)[0]
+            if base_name.startswith("raw_"):
+                ts = base_name.replace("raw_", "")
+            else:
+                # 针对非 raw_ 前缀的文件（如直接下载的 shipping_order）附加时间戳
+                ts = f"{base_name}_{datetime.now().strftime('%H%M%S')}"
+                
            output_file = os.path.join(self.output_dir, f"result_{ts}.xlsx")
            df = pd.DataFrame(results)
            df.to_excel(output_file, index=False)