scbank-sync/scbank_processor.py

180 lines
6.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import json
import time
import os
import shutil
import random
import pandas as pd
from datetime import datetime
class InternalApiClient:
"""
内部接口客户端桩代码 (Mock)
"""
def __init__(self):
self.api_url = "http://internal-system.local/api"
def get_internal_product_id(self, scbank_goods_name):
"""
Mock: 根据商城商品名查询内部商品编号
"""
# 模拟延时
time.sleep(0.1)
if not scbank_goods_name: return None
# 简单模拟
if "iPhone" in scbank_goods_name: return "P_APPLE_001"
if "华为" in scbank_goods_name: return "P_HUAWEI_002"
return "P_UNKNOWN_999"
def upload_order(self, order_detail):
"""
Mock: 上传订单
返回: (Success: bool, Message: str)
"""
time.sleep(0.2)
# 模拟随机成功率 90%
if random.random() < 0.9:
return True, "上传成功"
else:
return False, "模拟网络超时"
class SCBankProcessor:
def __init__(self):
self.data_dir = "data"
self.archive_dir = "data/archive"
self.output_dir = "output"
self.client = InternalApiClient()
def log(self, msg):
print(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}")
def run(self):
# 1. 扫描文件
files = [f for f in os.listdir(self.data_dir) if f.startswith("raw_") and f.endswith(".jsonl")]
if not files:
self.log("没有待处理的数据文件。")
return
self.log(f"发现 {len(files)} 个待处理文件: {files}")
for file_name in files:
file_path = os.path.join(self.data_dir, file_name)
self._process_file(file_path, file_name)
def _process_file(self, file_path, file_name):
self.log(f"正在处理文件: {file_name}")
# 2. 读取与清洗
raw_records = []
try:
with open(file_path, "r", encoding="utf-8") as f:
for line in f:
if line.strip():
try:
raw_records.append(json.loads(line))
except: pass
except Exception as e:
self.log(f"读取文件失败: {e}")
return
if not raw_records:
self.log("文件内容为空,跳过")
return
# 3. 数据结构化
# 提取详情数据 (以此为主)
details = []
for r in raw_records:
if r.get("type") == "detail":
payload = r.get("payload", {})
# 根据真实数据结构: payload -> body -> orderNo
body = payload.get("body", {})
if body and "orderNo" in str(body):
# 如果 body 是字符串,尝试解析
if isinstance(body, str):
try: body = json.loads(body)
except: pass
details.append(body)
# 去重 (按 orderNo)
# 注意:这里假设 payload 是 dict如果已经是 dict 则直接用
# 如果是字符串则需要 json.loads
unique_details = {}
for d in details:
# 兼容 payload 可能是字符串的情况 (虽然 Collector 存的是 dict)
if isinstance(d, str):
try: d = json.loads(d)
except: continue
order_no = d.get("orderNo") or d.get("orderId")
if order_no:
unique_details[order_no] = d
self.log(f"解析出 {len(unique_details)} 条有效唯一订单详情")
# 4. 业务处理与同步
results = []
for order_no, detail in unique_details.items():
# 解析商品信息
# goodsInfoList 是一个列表,这里取第一个商品作为主商品
goods_list = detail.get("goodsInfoList", [])
goods_name = "未知商品"
if goods_list and len(goods_list) > 0:
goods_name = goods_list[0].get("spuName", "未知商品")
# Step 1: 查内部 ID
internal_pid = self.client.get_internal_product_id(goods_name)
# Step 2: 上传
success, msg = self.client.upload_order(detail)
# 时间格式化: 2026-03-10T00:50:37.000+0000 -> 2026-03-10 00:50:37
order_time = detail.get("orderCreateTime")
if order_time:
try:
# 尝试解析 ISO 8601 格式
# 注意: python 3.7+ 的 fromisoformat 处理带时区的比较麻烦,这里用 strptime
# 格式: 2026-03-10T14:42:28.000+0000
# %z 只能解析 +0000 这种无冒号的时区
dt = datetime.strptime(order_time, "%Y-%m-%dT%H:%M:%S.%f%z")
order_time = dt.strftime("%Y-%m-%d %H:%M:%S")
except ValueError:
try:
# 备用: 如果是 +00:00 这种格式,或者其他微秒位数不同
dt = datetime.strptime(order_time.split('.')[0], "%Y-%m-%dT%H:%M:%S")
order_time = dt.strftime("%Y-%m-%d %H:%M:%S")
except:
pass
results.append({
"处理状态": "成功" if success else "失败",
"失败原因": "" if success else msg,
"下单时间": order_time,
"订单编号": order_no,
"下单用户手机号码": detail.get("orderMobile"),
"商户名称": detail.get("exMerchant"),
"订单金额": detail.get("orderAmt"),
"商品名称": goods_name,
"内部商品ID": internal_pid,
})
# 5. 导出 Excel
if results:
ts = file_name.replace("raw_", "").replace(".jsonl", "")
output_file = os.path.join(self.output_dir, f"result_{ts}.xlsx")
df = pd.DataFrame(results)
df.to_excel(output_file, index=False)
self.log(f"结果已导出至: {output_file}")
# 6. 归档
try:
shutil.move(file_path, os.path.join(self.archive_dir, file_name))
self.log(f"源文件已归档至: {self.archive_dir}")
except Exception as e:
self.log(f"归档失败: {e}")
if __name__ == "__main__":
processor = SCBankProcessor()
processor.run()