import json from pathlib import Path def process_jsonl_to_json(input_path: str, output_path: str) -> None: """ 处理jsonl文件,提取body内容并以orderNo为维度聚合为json """ order_dict = {} with open(input_path, 'r', encoding='utf-8') as f: for line_num, line in enumerate(f, 1): line = line.strip() if not line: continue try: data = json.loads(line) body = data.get('body', {}) # 处理两种body结构:包含pageDataList列表或直接是订单对象 if 'pageDataList' in body: # 列表形式,多个订单 for order in body['pageDataList']: order_no = order.get('orderNo') if order_no: order_dict[order_no] = order else: # 单个订单对象 order_no = body.get('orderNo') if order_no: order_dict[order_no] = body except json.JSONDecodeError as e: print(f"第{line_num}行解析失败: {e}") continue # 保存为json文件 with open(output_path, 'w', encoding='utf-8') as f: json.dump(order_dict, f, ensure_ascii=False, indent=2) print(f"处理完成!共提取{len(order_dict)}个订单,已保存到{output_path}") if __name__ == "__main__": input_file = "/mnt/d/code/project/python/scbank/scbank_data.jsonl" output_file = "/mnt/d/code/project/python/scbank/order_data.json" process_jsonl_to_json(input_file, output_file)