|
|
@@ -122,7 +122,7 @@ class OCRLayoutManager:
|
|
|
|
|
|
content = re.sub(r'<table[^>]*>.*?</table>', extract_table, content, flags=re.DOTALL)
|
|
|
|
|
|
- # 3. 对表格使用 BeautifulSoup 精确处理
|
|
|
+ # 3. 对表格使用 BeautifulSoup 精确处理(只高亮文本,不高亮整个单元格)
|
|
|
highlighted_tables = []
|
|
|
|
|
|
for table_html in tables:
|
|
|
@@ -132,11 +132,17 @@ class OCRLayoutManager:
|
|
|
for td in soup.find_all(['td', 'th']):
|
|
|
cell_text = td.get_text(strip=True)
|
|
|
if cell_text == text_to_highlight:
|
|
|
- # 给整个单元格添加高亮类
|
|
|
- current_classes = td.get('class', [])
|
|
|
- td['class'] = current_classes + highlight_class.split()
|
|
|
+ # 🎯 只高亮文本,不高亮整个单元格
|
|
|
+ # 清空单元格内容
|
|
|
+ td.clear()
|
|
|
+ # 创建高亮 span 包裹文本
|
|
|
+ span = soup.new_tag('span')
|
|
|
+ span['class'] = highlight_class.split()
|
|
|
if title:
|
|
|
- td['title'] = title
|
|
|
+ span['title'] = title
|
|
|
+ span.string = text_to_highlight
|
|
|
+ # 将 span 添加到单元格
|
|
|
+ td.append(span)
|
|
|
|
|
|
highlighted_tables.append(str(soup))
|
|
|
|