import difflib def generate_unified_diff(file1_path, file2_path): """ 生成类似git diff的统一差异格式输出 """ with open(file1_path, 'r', encoding='utf-8') as f1, open(file2_path, 'r', encoding='utf-8') as f2: file1_lines = f1.readlines() file2_lines = f2.readlines() # 使用unified_diff生成差异 diff = difflib.unified_diff( file1_lines, file2_lines, fromfile=file1_path, tofile=file2_path, lineterm='' # 确保每行末尾不添加额外字符 ) # 将差异生成器转换为列表并返回,或直接打印 diff_output = list(diff) return diff_output import difflib def generate_html_diff(file1_path, file2_path, output_html_path): """ 生成HTML格式的差异报告并保存到文件 """ with open(file1_path, 'r', encoding='utf-8') as f1, open(file2_path, 'r', encoding='utf-8') as f2: file1_lines = f1.readlines() file2_lines = f2.readlines() # 创建HtmlDiff对象并生成差异HTML内容 diff_html = difflib.HtmlDiff().make_file( file1_lines, file2_lines, fromdesc=file1_path, todesc=file2_path, context=True # 显示上下文 ) # 将HTML内容写入文件 with open(output_html_path, 'w', encoding='utf-8') as html_file: html_file.write(diff_html) print(f"HTML差异报告已生成: {output_html_path}") if __name__ == "__main__": # 使用示例 file1 = '/Users/zhch158/workspace/data/至远彩色印刷工业有限公司/data_DotsOCR_Results/2023年度报告母公司_page_001.md' file2 = '/Users/zhch158/workspace/repository.git/ocr_verify/output/pre_validation/2023年度报告母公司_page_001.md' diff_result = generate_unified_diff(file1, file2) # 打印差异结果 if diff_result: print("\n".join(diff_result)) else: print("文件内容相同,无差异。") # 使用示例 generate_html_diff(file1, file2, 'diff_report.html')