|
|
@@ -0,0 +1,61 @@
|
|
|
+import difflib
|
|
|
+
|
|
|
+def generate_unified_diff(file1_path, file2_path):
|
|
|
+ """
|
|
|
+ 生成类似git diff的统一差异格式输出
|
|
|
+ """
|
|
|
+ with open(file1_path, 'r', encoding='utf-8') as f1, open(file2_path, 'r', encoding='utf-8') as f2:
|
|
|
+ file1_lines = f1.readlines()
|
|
|
+ file2_lines = f2.readlines()
|
|
|
+
|
|
|
+ # 使用unified_diff生成差异
|
|
|
+ diff = difflib.unified_diff(
|
|
|
+ file1_lines,
|
|
|
+ file2_lines,
|
|
|
+ fromfile=file1_path,
|
|
|
+ tofile=file2_path,
|
|
|
+ lineterm='' # 确保每行末尾不添加额外字符
|
|
|
+ )
|
|
|
+
|
|
|
+ # 将差异生成器转换为列表并返回,或直接打印
|
|
|
+ diff_output = list(diff)
|
|
|
+ return diff_output
|
|
|
+
|
|
|
+import difflib
|
|
|
+
|
|
|
+def generate_html_diff(file1_path, file2_path, output_html_path):
|
|
|
+ """
|
|
|
+ 生成HTML格式的差异报告并保存到文件
|
|
|
+ """
|
|
|
+ with open(file1_path, 'r', encoding='utf-8') as f1, open(file2_path, 'r', encoding='utf-8') as f2:
|
|
|
+ file1_lines = f1.readlines()
|
|
|
+ file2_lines = f2.readlines()
|
|
|
+
|
|
|
+ # 创建HtmlDiff对象并生成差异HTML内容
|
|
|
+ diff_html = difflib.HtmlDiff().make_file(
|
|
|
+ file1_lines,
|
|
|
+ file2_lines,
|
|
|
+ fromdesc=file1_path,
|
|
|
+ todesc=file2_path,
|
|
|
+ context=True # 显示上下文
|
|
|
+ )
|
|
|
+
|
|
|
+ # 将HTML内容写入文件
|
|
|
+ with open(output_html_path, 'w', encoding='utf-8') as html_file:
|
|
|
+ html_file.write(diff_html)
|
|
|
+ print(f"HTML差异报告已生成: {output_html_path}")
|
|
|
+
|
|
|
+if __name__ == "__main__":
|
|
|
+ # 使用示例
|
|
|
+ file1 = '/Users/zhch158/workspace/data/至远彩色印刷工业有限公司/data_DotsOCR_Results/2023年度报告母公司_page_001.md'
|
|
|
+ file2 = '/Users/zhch158/workspace/repository.git/ocr_verify/output/pre_validation/2023年度报告母公司_page_001.md'
|
|
|
+ diff_result = generate_unified_diff(file1, file2)
|
|
|
+
|
|
|
+ # 打印差异结果
|
|
|
+ if diff_result:
|
|
|
+ print("\n".join(diff_result))
|
|
|
+ else:
|
|
|
+ print("文件内容相同,无差异。")
|
|
|
+
|
|
|
+ # 使用示例
|
|
|
+ generate_html_diff(file1, file2, 'diff_report.html')
|