48 lines
1.7 KiB
Python
48 lines
1.7 KiB
Python
import json
|
||
from pathlib import Path
|
||
|
||
def process_jsonl_to_json(input_path: str, output_path: str) -> None:
|
||
"""
|
||
处理jsonl文件,提取body内容并以orderNo为维度聚合为json
|
||
"""
|
||
order_dict = {}
|
||
|
||
with open(input_path, 'r', encoding='utf-8') as f:
|
||
for line_num, line in enumerate(f, 1):
|
||
line = line.strip()
|
||
if not line:
|
||
continue
|
||
|
||
try:
|
||
data = json.loads(line)
|
||
body = data.get('body', {})
|
||
|
||
# 处理两种body结构:包含pageDataList列表或直接是订单对象
|
||
if 'pageDataList' in body:
|
||
# 列表形式,多个订单
|
||
for order in body['pageDataList']:
|
||
order_no = order.get('orderNo')
|
||
if order_no:
|
||
order_dict[order_no] = order
|
||
else:
|
||
# 单个订单对象
|
||
order_no = body.get('orderNo')
|
||
if order_no:
|
||
order_dict[order_no] = body
|
||
|
||
except json.JSONDecodeError as e:
|
||
print(f"第{line_num}行解析失败: {e}")
|
||
continue
|
||
|
||
# 保存为json文件
|
||
with open(output_path, 'w', encoding='utf-8') as f:
|
||
json.dump(order_dict, f, ensure_ascii=False, indent=2)
|
||
|
||
print(f"处理完成!共提取{len(order_dict)}个订单,已保存到{output_path}")
|
||
|
||
if __name__ == "__main__":
|
||
input_file = "/mnt/d/code/project/python/scbank/scbank_data.jsonl"
|
||
output_file = "/mnt/d/code/project/python/scbank/order_data.json"
|
||
|
||
process_jsonl_to_json(input_file, output_file)
|