scbank-sync/process_jsonl.py

48 lines
1.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import json
from pathlib import Path
def process_jsonl_to_json(input_path: str, output_path: str) -> None:
"""
处理jsonl文件提取body内容并以orderNo为维度聚合为json
"""
order_dict = {}
with open(input_path, 'r', encoding='utf-8') as f:
for line_num, line in enumerate(f, 1):
line = line.strip()
if not line:
continue
try:
data = json.loads(line)
body = data.get('body', {})
# 处理两种body结构包含pageDataList列表或直接是订单对象
if 'pageDataList' in body:
# 列表形式,多个订单
for order in body['pageDataList']:
order_no = order.get('orderNo')
if order_no:
order_dict[order_no] = order
else:
# 单个订单对象
order_no = body.get('orderNo')
if order_no:
order_dict[order_no] = body
except json.JSONDecodeError as e:
print(f"{line_num}行解析失败: {e}")
continue
# 保存为json文件
with open(output_path, 'w', encoding='utf-8') as f:
json.dump(order_dict, f, ensure_ascii=False, indent=2)
print(f"处理完成!共提取{len(order_dict)}个订单,已保存到{output_path}")
if __name__ == "__main__":
input_file = "/mnt/d/code/project/python/scbank/scbank_data.jsonl"
output_file = "/mnt/d/code/project/python/scbank/order_data.json"
process_jsonl_to_json(input_file, output_file)