from fuzzywuzzy import fuzz from typing import Dict, List class SimilarityCalculator: """文本相似度计算""" @staticmethod def calculate_text_similarity(text1: str, text2: str) -> float: """改进的相似度计算""" if not text1 and not text2: return 100.0 if not text1 or not text2: return 0.0 if text1 == text2: return 100.0 similarity_scores = [fuzz.ratio(text1, text2)] return max(similarity_scores) @staticmethod def check_punctuation_differences(text1: str, text2: str, normalize_func) -> List[Dict]: """检查两段文本的标点符号差异""" differences = [] normalized1 = normalize_func(text1) normalized2 = normalize_func(text2) if normalized1 == normalized2 and text1 != text2: min_len = min(len(text1), len(text2)) for i in range(min_len): if text1[i] != text2[i]: char1 = text1[i] char2 = text2[i] if normalize_func(char1) == normalize_func(char2): start = max(0, i - 3) end = min(len(text1), i + 4) context1 = text1[start:end] context2 = text2[start:end] differences.append({ 'position': i, 'char1': char1, 'char2': char2, 'context1': context1, 'context2': context2, 'type': 'full_half_width' }) return differences