feat: 增加excel处理模式
This commit is contained in:
parent
33a3cb3361
commit
d68e87fb3a
13
main.py
13
main.py
|
|
@ -6,19 +6,24 @@ def main():
|
|||
while True:
|
||||
print("\n=== 四川银行权益商城自动化工具 ===")
|
||||
print("1. 启动采集 (Collector) -> 浏览器抓取")
|
||||
print("2. 执行同步 (Processor) -> 货易通上传")
|
||||
print("3. 退出")
|
||||
choice = input("请输入选项 [1-3]: ").strip()
|
||||
print("2. 执行同步 (Processor) -> 货易通上传 (处理JSONL)")
|
||||
print("3. 执行同步 (Processor) -> 货易通上传 (处理Excel)")
|
||||
print("4. 退出")
|
||||
choice = input("请输入选项 [1-4]: ").strip()
|
||||
|
||||
if choice == "1":
|
||||
print("\n[系统] 正在启动采集器...")
|
||||
collector = SCBankCollector()
|
||||
collector.run()
|
||||
elif choice == "2":
|
||||
print("\n[系统] 正在启动处理器...")
|
||||
print("\n[系统] 正在启动处理器 (JSONL模式)...")
|
||||
processor = SCBankProcessor()
|
||||
processor.run()
|
||||
elif choice == "3":
|
||||
print("\n[系统] 正在启动处理器 (Excel模式)...")
|
||||
processor = SCBankProcessor()
|
||||
processor.run_excel()
|
||||
elif choice == "4":
|
||||
print("\n[系统] 退出程序。")
|
||||
sys.exit(0)
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -15,3 +15,10 @@
|
|||
# 某某大米:10kg装>>>>东北大米-10kg
|
||||
#
|
||||
# --- 请在下方添加您的配置 ---
|
||||
|
||||
洁柔纸面巾(雅白纯木软抽)提>>>>洁柔抽纸软抽3层100抽自然无香面巾纸擦手纸卫生纸家用 1提3包TR071401A
|
||||
美的电磁炉 MC-DZ03SZ>>>>美的电磁炉MC-DZ03SZ
|
||||
美的电饼铛 MC-JHN34Q>>>>美的电饼铛MC-JHN34Q
|
||||
BULL公牛 盒装卵石灰30W便携插座 MCB303>>>>BULL公牛盒装卵石灰30 W便携插座MCB303
|
||||
鲁香六味调味品臻品礼盒>>>>鲁花 鲁香六味 调味品臻品礼盒
|
||||
追觅 洗地机 G600 黑色>>>>追觅无线家用洗地机G600
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
playwright
|
||||
pandas
|
||||
openpyxl
|
||||
xlrd
|
||||
requests
|
||||
pyinstaller
|
||||
|
|
|
|||
|
|
@ -192,7 +192,7 @@ class SCBankProcessor:
|
|||
# 1. 扫描文件
|
||||
files = [f for f in os.listdir(self.data_dir) if f.startswith("raw_") and f.endswith(".jsonl")]
|
||||
if not files:
|
||||
self.log("没有待处理的数据文件。")
|
||||
self.log("没有待处理的JSONL数据文件。")
|
||||
return
|
||||
|
||||
self.log(f"发现 {len(files)} 个待处理文件: {files}")
|
||||
|
|
@ -201,6 +201,65 @@ class SCBankProcessor:
|
|||
file_path = os.path.join(self.data_dir, file_name)
|
||||
self._process_file(file_path, file_name)
|
||||
|
||||
def run_excel(self):
|
||||
"""
|
||||
处理导出的 Excel 数据
|
||||
"""
|
||||
# 查找 data 目录下的 excel 文件
|
||||
files = [f for f in os.listdir(self.data_dir) if f.endswith(".xls") or f.endswith(".xlsx")]
|
||||
# 过滤掉已经被处理或不需要处理的文件(如果有特定的前缀)
|
||||
# 这里假设直接处理所有的 .xls 和 .xlsx 文件
|
||||
|
||||
if not files:
|
||||
self.log("没有找到待处理的 Excel 文件 (.xls / .xlsx)。")
|
||||
return
|
||||
|
||||
self.log(f"发现 {len(files)} 个 Excel 待处理文件: {files}")
|
||||
|
||||
for file_name in files:
|
||||
file_path = os.path.join(self.data_dir, file_name)
|
||||
self._process_excel_file(file_path, file_name)
|
||||
|
||||
def _process_excel_file(self, file_path, file_name):
|
||||
self.log(f"正在处理 Excel 文件: {file_name}")
|
||||
|
||||
try:
|
||||
df = pd.read_excel(file_path)
|
||||
except Exception as e:
|
||||
self.log(f"读取 Excel 文件失败: {e}")
|
||||
return
|
||||
|
||||
if df.empty:
|
||||
self.log("Excel 文件内容为空,跳过")
|
||||
return
|
||||
|
||||
unique_details = {}
|
||||
for _, row in df.iterrows():
|
||||
order_no = str(row.get("订单号", ""))
|
||||
if not order_no or order_no == "nan":
|
||||
continue
|
||||
|
||||
# 构造与 JSON 一致的 detail 结构
|
||||
detail = {
|
||||
"orderNo": order_no,
|
||||
"orderCreateTime": str(row.get("下单时间", "")),
|
||||
"mallOrderReceiveInfo": {
|
||||
"receiverName": str(row.get("收货人名称", "")) if pd.notna(row.get("收货人名称")) else "",
|
||||
"receiverMobile": str(row.get("收货人电话", "")) if pd.notna(row.get("收货人电话")) else "00000000000",
|
||||
"fullAddress": str(row.get("收货地址", "")) if pd.notna(row.get("收货地址")) else "",
|
||||
"remark": str(row.get("备注", "")) if pd.notna(row.get("备注")) else ""
|
||||
},
|
||||
"goodsInfoList": [
|
||||
{"spuName": str(row.get("产品名称", "未知商品")) if pd.notna(row.get("产品名称")) else "未知商品"}
|
||||
],
|
||||
"orderAmt": float(row.get("单价", 0.0)) if pd.notna(row.get("单价")) else 0.0,
|
||||
"exMerchant": "成都蓝色兄弟网络科技有限公司"
|
||||
}
|
||||
unique_details[order_no] = detail
|
||||
|
||||
self.log(f"解析出 {len(unique_details)} 条唯一订单 (来自 Excel)")
|
||||
self._process_unique_details(unique_details, file_name, file_path)
|
||||
|
||||
def _process_file(self, file_path, file_name):
|
||||
self.log(f"正在处理文件: {file_name}")
|
||||
|
||||
|
|
@ -264,7 +323,9 @@ class SCBankProcessor:
|
|||
unique_details[order_no].update(body)
|
||||
|
||||
self.log(f"解析出 {len(unique_details)} 条唯一订单 (列表+详情合并)")
|
||||
self._process_unique_details(unique_details, file_name, file_path)
|
||||
|
||||
def _process_unique_details(self, unique_details, file_name, file_path):
|
||||
# 4. 业务处理与同步
|
||||
results = []
|
||||
for order_no, detail in unique_details.items():
|
||||
|
|
@ -319,7 +380,14 @@ class SCBankProcessor:
|
|||
|
||||
# 5. 导出 Excel
|
||||
if results:
|
||||
ts = file_name.replace("raw_", "").replace(".jsonl", "")
|
||||
# 处理不同后缀和前缀的文件名,生成统一的结果文件名
|
||||
base_name = os.path.splitext(file_name)[0]
|
||||
if base_name.startswith("raw_"):
|
||||
ts = base_name.replace("raw_", "")
|
||||
else:
|
||||
# 针对非 raw_ 前缀的文件(如直接下载的 shipping_order)附加时间戳
|
||||
ts = f"{base_name}_{datetime.now().strftime('%H%M%S')}"
|
||||
|
||||
output_file = os.path.join(self.output_dir, f"result_{ts}.xlsx")
|
||||
df = pd.DataFrame(results)
|
||||
df.to_excel(output_file, index=False)
|
||||
|
|
|
|||
Loading…
Reference in New Issue