import json import re from typing import Dict, List from datetime import datetime class ReportGenerator: """生成比较报告""" @staticmethod def generate_json_report(comparison_result: Dict, output_file: str): """生成JSON格式报告""" with open(f"{output_file}.json", 'w', encoding='utf-8') as f: json.dump(comparison_result, f, ensure_ascii=False, indent=2) print(f"✅ JSON报告已生成: {output_file}.json") @staticmethod def generate_markdown_report(comparison_result: Dict, output_file: str): """生成Markdown格式报告 - 与原版本保持一致""" with open(f"{output_file}.md", 'w', encoding='utf-8') as f: f.write("# OCR结果对比报告\n\n") # 基本信息 f.write("## 基本信息\n\n") f.write(f"- **文件1**: `{comparison_result['file1_path']}`\n") f.write(f"- **文件2**: `{comparison_result['file2_path']}`\n") f.write(f"- **比较时间**: {comparison_result.get('timestamp', datetime.now().strftime('%Y-%m-%d %H:%M:%S'))}\n\n") # 统计信息 stats = comparison_result['statistics'] f.write("## 统计信息\n\n") f.write(f"- 总差异数量: **{stats['total_differences']}**\n") f.write(f"- 表格差异: **{stats['table_differences']}**\n") f.write(f"- 其中表格金额差异: **{stats.get('amount_differences', 0)}**\n") f.write(f"- 段落差异: **{stats['paragraph_differences']}**\n") f.write(f"- 高严重度: **{stats.get('high_severity', 0)}**\n") f.write(f"- 中严重度: **{stats.get('medium_severity', 0)}**\n") f.write(f"- 低严重度: **{stats.get('low_severity', 0)}**\n") f.write(f"- 文件1表格数: {comparison_result.get('file1_tables', 0)}\n") f.write(f"- 文件2表格数: {comparison_result.get('file2_tables', 0)}\n") f.write(f"- 文件1段落数: {comparison_result.get('file1_paragraphs', 0)}\n") f.write(f"- 文件2段落数: {comparison_result.get('file2_paragraphs', 0)}\n\n") # 差异摘要 if stats['total_differences'] == 0: f.write("## 结论\n\n") f.write("🎉 **完美匹配!没有发现任何差异。**\n\n") else: f.write("## 差异摘要\n\n") # ✅ 类型映射(与原版本完全一致) type_name_map = { 'table_amount': '💰 表格金额差异', 'table_text': '📝 表格文本差异', 'table_datetime': '📅 表格日期时间差异', 'table_pre_header': '📋 表头前内容差异', 'table_header_position': '📍 表头位置差异', 'table_header_mismatch': '⚠️ 表头不匹配', 'table_header_critical': '❌ 表头严重错误', 'table_column_type_mismatch': '🔀 列类型不匹配', 'table_row_missing': '🚫 表格行缺失', 'table_row_data': '📊 表格数据差异', 'table_structure': '🏗️ 表格结构差异', 'paragraph': '📄 段落差异', 'paragraph_punctuation': '🔤 段落标点差异' } # 按类型分组显示差异 diff_by_type = {} for diff in comparison_result['differences']: diff_type = diff['type'] if diff_type not in diff_by_type: diff_by_type[diff_type] = [] diff_by_type[diff_type].append(diff) for diff_type, diffs in diff_by_type.items(): type_name = type_name_map.get(diff_type, f'❓ {diff_type}') f.write(f"### {type_name} ({len(diffs)}个)\n\n") for i, diff in enumerate(diffs, 1): f.write(f"**{i}. {diff.get('position', 'N/A')}**\n") f.write(f"- 文件1: `{diff.get('file1_value', '')}`\n") f.write(f"- 文件2: `{diff.get('file2_value', '')}`\n") f.write(f"- 说明: {diff.get('description', 'N/A')}\n") if 'severity' in diff: severity_icon = {'critical': '🔴', 'high': '🟠', 'medium': '🟡', 'low': '🟢'} f.write(f"- 严重度: {severity_icon.get(diff['severity'], '⚪')} {diff['severity']}\n") f.write("\n") # 详细差异列表 if comparison_result['differences']: f.write("## 详细差异列表\n\n") f.write("| 序号 | 类型 | 位置 | 文件1内容 | 文件2内容 | 描述 | 严重度 |\n") f.write("| --- | --- | --- | --- | --- | --- | --- |\n") for i, diff in enumerate(comparison_result['differences'], 1): severity = diff.get('severity', 'N/A') position = diff.get('position', 'N/A') file1_value = str(diff.get('file1_value', ''))[:50] file2_value = str(diff.get('file2_value', ''))[:50] description = diff.get('description', 'N/A') # 截断长文本 if len(str(diff.get('file1_value', ''))) > 50: file1_value += '...' if len(str(diff.get('file2_value', ''))) > 50: file2_value += '...' f.write(f"| {i} | {diff['type']} | {position} | ") f.write(f"`{file1_value}` | ") f.write(f"`{file2_value}` | ") f.write(f"{description} | {severity} |\n") print(f"✅ Markdown报告已生成: {output_file}.md") @staticmethod def generate_report(comparison_result: Dict, output_file: str, output_format: str): """根据格式生成报告""" if output_format in ['json', 'both']: ReportGenerator.generate_json_report(comparison_result, output_file) if output_format in ['markdown', 'both']: ReportGenerator.generate_markdown_report(comparison_result, output_file)