| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124 |
- import json
- import re
- from typing import Dict, List
- from datetime import datetime
- class ReportGenerator:
- """生成比较报告"""
-
- @staticmethod
- def generate_json_report(comparison_result: Dict, output_file: str):
- """生成JSON格式报告"""
- with open(f"{output_file}.json", 'w', encoding='utf-8') as f:
- json.dump(comparison_result, f, ensure_ascii=False, indent=2)
- print(f"✅ JSON报告已生成: {output_file}.json")
-
- @staticmethod
- def generate_markdown_report(comparison_result: Dict, output_file: str):
- """生成Markdown格式报告 - 与原版本保持一致"""
- with open(f"{output_file}.md", 'w', encoding='utf-8') as f:
- f.write("# OCR结果对比报告\n\n")
-
- # 基本信息
- f.write("## 基本信息\n\n")
- f.write(f"- **文件1**: `{comparison_result['file1_path']}`\n")
- f.write(f"- **文件2**: `{comparison_result['file2_path']}`\n")
- f.write(f"- **比较时间**: {comparison_result.get('timestamp', datetime.now().strftime('%Y-%m-%d %H:%M:%S'))}\n\n")
-
- # 统计信息
- stats = comparison_result['statistics']
- f.write("## 统计信息\n\n")
- f.write(f"- 总差异数量: **{stats['total_differences']}**\n")
- f.write(f"- 表格差异: **{stats['table_differences']}**\n")
- f.write(f"- 其中表格金额差异: **{stats.get('amount_differences', 0)}**\n")
- f.write(f"- 段落差异: **{stats['paragraph_differences']}**\n")
- f.write(f"- 高严重度: **{stats.get('high_severity', 0)}**\n")
- f.write(f"- 中严重度: **{stats.get('medium_severity', 0)}**\n")
- f.write(f"- 低严重度: **{stats.get('low_severity', 0)}**\n")
- f.write(f"- 文件1表格数: {comparison_result.get('file1_tables', 0)}\n")
- f.write(f"- 文件2表格数: {comparison_result.get('file2_tables', 0)}\n")
- f.write(f"- 文件1段落数: {comparison_result.get('file1_paragraphs', 0)}\n")
- f.write(f"- 文件2段落数: {comparison_result.get('file2_paragraphs', 0)}\n\n")
-
- # 差异摘要
- if stats['total_differences'] == 0:
- f.write("## 结论\n\n")
- f.write("🎉 **完美匹配!没有发现任何差异。**\n\n")
- else:
- f.write("## 差异摘要\n\n")
-
- # ✅ 类型映射(与原版本完全一致)
- type_name_map = {
- 'table_amount': '💰 表格金额差异',
- 'table_text': '📝 表格文本差异',
- 'table_datetime': '📅 表格日期时间差异',
- 'table_pre_header': '📋 表头前内容差异',
- 'table_header_position': '📍 表头位置差异',
- 'table_header_mismatch': '⚠️ 表头不匹配',
- 'table_header_critical': '❌ 表头严重错误',
- 'table_column_type_mismatch': '🔀 列类型不匹配',
- 'table_row_missing': '🚫 表格行缺失',
- 'table_row_data': '📊 表格数据差异',
- 'table_structure': '🏗️ 表格结构差异',
- 'paragraph': '📄 段落差异',
- 'paragraph_punctuation': '🔤 段落标点差异'
- }
-
- # 按类型分组显示差异
- diff_by_type = {}
- for diff in comparison_result['differences']:
- diff_type = diff['type']
- if diff_type not in diff_by_type:
- diff_by_type[diff_type] = []
- diff_by_type[diff_type].append(diff)
-
- for diff_type, diffs in diff_by_type.items():
- type_name = type_name_map.get(diff_type, f'❓ {diff_type}')
-
- f.write(f"### {type_name} ({len(diffs)}个)\n\n")
-
- for i, diff in enumerate(diffs, 1):
- f.write(f"**{i}. {diff.get('position', 'N/A')}**\n")
- f.write(f"- 文件1: `{diff.get('file1_value', '')}`\n")
- f.write(f"- 文件2: `{diff.get('file2_value', '')}`\n")
- f.write(f"- 说明: {diff.get('description', 'N/A')}\n")
- if 'severity' in diff:
- severity_icon = {'critical': '🔴', 'high': '🟠', 'medium': '🟡', 'low': '🟢'}
- f.write(f"- 严重度: {severity_icon.get(diff['severity'], '⚪')} {diff['severity']}\n")
- f.write("\n")
-
- # 详细差异列表
- if comparison_result['differences']:
- f.write("## 详细差异列表\n\n")
- f.write("| 序号 | 类型 | 位置 | 文件1内容 | 文件2内容 | 描述 | 严重度 |\n")
- f.write("| --- | --- | --- | --- | --- | --- | --- |\n")
-
- for i, diff in enumerate(comparison_result['differences'], 1):
- severity = diff.get('severity', 'N/A')
- position = diff.get('position', 'N/A')
- file1_value = str(diff.get('file1_value', ''))[:50]
- file2_value = str(diff.get('file2_value', ''))[:50]
- description = diff.get('description', 'N/A')
-
- # 截断长文本
- if len(str(diff.get('file1_value', ''))) > 50:
- file1_value += '...'
- if len(str(diff.get('file2_value', ''))) > 50:
- file2_value += '...'
-
- f.write(f"| {i} | {diff['type']} | {position} | ")
- f.write(f"`{file1_value}` | ")
- f.write(f"`{file2_value}` | ")
- f.write(f"{description} | {severity} |\n")
-
- print(f"✅ Markdown报告已生成: {output_file}.md")
-
- @staticmethod
- def generate_report(comparison_result: Dict, output_file: str, output_format: str):
- """根据格式生成报告"""
- if output_format in ['json', 'both']:
- ReportGenerator.generate_json_report(comparison_result, output_file)
-
- if output_format in ['markdown', 'both']:
- ReportGenerator.generate_markdown_report(comparison_result, output_file)
|