unified_diff.py 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
  1. import difflib
  2. def generate_unified_diff(file1_path, file2_path):
  3. """
  4. 生成类似git diff的统一差异格式输出
  5. """
  6. with open(file1_path, 'r', encoding='utf-8') as f1, open(file2_path, 'r', encoding='utf-8') as f2:
  7. file1_lines = f1.readlines()
  8. file2_lines = f2.readlines()
  9. # 使用unified_diff生成差异
  10. diff = difflib.unified_diff(
  11. file1_lines,
  12. file2_lines,
  13. fromfile=file1_path,
  14. tofile=file2_path,
  15. lineterm='' # 确保每行末尾不添加额外字符
  16. )
  17. # 将差异生成器转换为列表并返回,或直接打印
  18. diff_output = list(diff)
  19. return diff_output
  20. import difflib
  21. def generate_html_diff(file1_path, file2_path, output_html_path):
  22. """
  23. 生成HTML格式的差异报告并保存到文件
  24. """
  25. with open(file1_path, 'r', encoding='utf-8') as f1, open(file2_path, 'r', encoding='utf-8') as f2:
  26. file1_lines = f1.readlines()
  27. file2_lines = f2.readlines()
  28. # 创建HtmlDiff对象并生成差异HTML内容
  29. diff_html = difflib.HtmlDiff().make_file(
  30. file1_lines,
  31. file2_lines,
  32. fromdesc=file1_path,
  33. todesc=file2_path,
  34. context=True # 显示上下文
  35. )
  36. # 将HTML内容写入文件
  37. with open(output_html_path, 'w', encoding='utf-8') as html_file:
  38. html_file.write(diff_html)
  39. print(f"HTML差异报告已生成: {output_html_path}")
  40. if __name__ == "__main__":
  41. # 使用示例
  42. file1 = '/Users/zhch158/workspace/data/至远彩色印刷工业有限公司/data_DotsOCR_Results/2023年度报告母公司_page_001.md'
  43. file2 = '/Users/zhch158/workspace/repository.git/ocr_verify/output/pre_validation/2023年度报告母公司_page_001.md'
  44. diff_result = generate_unified_diff(file1, file2)
  45. # 打印差异结果
  46. if diff_result:
  47. print("\n".join(diff_result))
  48. else:
  49. print("文件内容相同,无差异。")
  50. # 使用示例
  51. generate_html_diff(file1, file2, 'diff_report.html')