| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253 |
- from fuzzywuzzy import fuzz
- from typing import Dict, List
- class SimilarityCalculator:
- """文本相似度计算"""
-
- @staticmethod
- def calculate_text_similarity(text1: str, text2: str) -> float:
- """改进的相似度计算"""
- if not text1 and not text2:
- return 100.0
- if not text1 or not text2:
- return 0.0
-
- if text1 == text2:
- return 100.0
-
- similarity_scores = [fuzz.ratio(text1, text2)]
- return max(similarity_scores)
-
- @staticmethod
- def check_punctuation_differences(text1: str, text2: str, normalize_func) -> List[Dict]:
- """检查两段文本的标点符号差异"""
- differences = []
-
- normalized1 = normalize_func(text1)
- normalized2 = normalize_func(text2)
-
- if normalized1 == normalized2 and text1 != text2:
- min_len = min(len(text1), len(text2))
-
- for i in range(min_len):
- if text1[i] != text2[i]:
- char1 = text1[i]
- char2 = text2[i]
-
- if normalize_func(char1) == normalize_func(char2):
- start = max(0, i - 3)
- end = min(len(text1), i + 4)
- context1 = text1[start:end]
- context2 = text2[start:end]
-
- differences.append({
- 'position': i,
- 'char1': char1,
- 'char2': char2,
- 'context1': context1,
- 'context2': context2,
- 'type': 'full_half_width'
- })
-
- return differences
|