|
|
@@ -13,7 +13,7 @@ class OCRResultComparator:
|
|
|
def __init__(self):
|
|
|
self.differences = []
|
|
|
self.similarity_threshold = 85 # 相似度阈值,超过85%认为是匹配的
|
|
|
- self.max_paragraph_window = 3 # 最大合并段落数
|
|
|
+ self.max_paragraph_window = 6 # 最大合并段落数
|
|
|
|
|
|
def normalize_text(self, text: str) -> str:
|
|
|
"""标准化文本:去除多余空格、回车等无效字符"""
|