| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061 |
- import difflib
- def generate_unified_diff(file1_path, file2_path):
- """
- 生成类似git diff的统一差异格式输出
- """
- with open(file1_path, 'r', encoding='utf-8') as f1, open(file2_path, 'r', encoding='utf-8') as f2:
- file1_lines = f1.readlines()
- file2_lines = f2.readlines()
- # 使用unified_diff生成差异
- diff = difflib.unified_diff(
- file1_lines,
- file2_lines,
- fromfile=file1_path,
- tofile=file2_path,
- lineterm='' # 确保每行末尾不添加额外字符
- )
-
- # 将差异生成器转换为列表并返回,或直接打印
- diff_output = list(diff)
- return diff_output
- import difflib
- def generate_html_diff(file1_path, file2_path, output_html_path):
- """
- 生成HTML格式的差异报告并保存到文件
- """
- with open(file1_path, 'r', encoding='utf-8') as f1, open(file2_path, 'r', encoding='utf-8') as f2:
- file1_lines = f1.readlines()
- file2_lines = f2.readlines()
- # 创建HtmlDiff对象并生成差异HTML内容
- diff_html = difflib.HtmlDiff().make_file(
- file1_lines,
- file2_lines,
- fromdesc=file1_path,
- todesc=file2_path,
- context=True # 显示上下文
- )
- # 将HTML内容写入文件
- with open(output_html_path, 'w', encoding='utf-8') as html_file:
- html_file.write(diff_html)
- print(f"HTML差异报告已生成: {output_html_path}")
- if __name__ == "__main__":
- # 使用示例
- file1 = '/Users/zhch158/workspace/data/至远彩色印刷工业有限公司/data_DotsOCR_Results/2023年度报告母公司_page_001.md'
- file2 = '/Users/zhch158/workspace/repository.git/ocr_verify/output/pre_validation/2023年度报告母公司_page_001.md'
- diff_result = generate_unified_diff(file1, file2)
- # 打印差异结果
- if diff_result:
- print("\n".join(diff_result))
- else:
- print("文件内容相同,无差异。")
- # 使用示例
- generate_html_diff(file1, file2, 'diff_report.html')
|