ソースを参照

Add unified diff and HTML diff generation functions

- Implemented `generate_unified_diff` to produce a unified diff output similar to git diff.
- Added `generate_html_diff` to create an HTML report of differences between two files.
- Included example usage in the main block for testing the functions with specified file paths.
zhch158_admin 1 ヶ月 前
コミット
3229a4cace
2 ファイル変更126 行追加0 行削除
  1. 65 0
      test/diff_report.html
  2. 61 0
      test/unified_diff.py

ファイルの差分が大きいため隠しています
+ 65 - 0
test/diff_report.html


+ 61 - 0
test/unified_diff.py

@@ -0,0 +1,61 @@
+import difflib
+
+def generate_unified_diff(file1_path, file2_path):
+    """
+    生成类似git diff的统一差异格式输出
+    """
+    with open(file1_path, 'r', encoding='utf-8') as f1, open(file2_path, 'r', encoding='utf-8') as f2:
+        file1_lines = f1.readlines()
+        file2_lines = f2.readlines()
+
+    # 使用unified_diff生成差异
+    diff = difflib.unified_diff(
+        file1_lines, 
+        file2_lines, 
+        fromfile=file1_path, 
+        tofile=file2_path,
+        lineterm=''  # 确保每行末尾不添加额外字符
+    )
+    
+    # 将差异生成器转换为列表并返回,或直接打印
+    diff_output = list(diff)
+    return diff_output
+
+import difflib
+
+def generate_html_diff(file1_path, file2_path, output_html_path):
+    """
+    生成HTML格式的差异报告并保存到文件
+    """
+    with open(file1_path, 'r', encoding='utf-8') as f1, open(file2_path, 'r', encoding='utf-8') as f2:
+        file1_lines = f1.readlines()
+        file2_lines = f2.readlines()
+
+    # 创建HtmlDiff对象并生成差异HTML内容
+    diff_html = difflib.HtmlDiff().make_file(
+        file1_lines, 
+        file2_lines, 
+        fromdesc=file1_path, 
+        todesc=file2_path,
+        context=True  # 显示上下文
+    )
+
+    # 将HTML内容写入文件
+    with open(output_html_path, 'w', encoding='utf-8') as html_file:
+        html_file.write(diff_html)
+    print(f"HTML差异报告已生成: {output_html_path}")
+
+if __name__ == "__main__":
+	# 使用示例
+	file1 = '/Users/zhch158/workspace/data/至远彩色印刷工业有限公司/data_DotsOCR_Results/2023年度报告母公司_page_001.md'
+	file2 = '/Users/zhch158/workspace/repository.git/ocr_verify/output/pre_validation/2023年度报告母公司_page_001.md'
+	diff_result = generate_unified_diff(file1, file2)
+
+	# 打印差异结果
+	if diff_result:
+		print("\n".join(diff_result))
+	else:
+		print("文件内容相同,无差异。")
+
+	# 使用示例
+	generate_html_diff(file1, file2, 'diff_report.html')

この差分においてかなりの量のファイルが変更されているため、一部のファイルを表示していません