|
@@ -29,10 +29,7 @@ if str(ocr_platform_root) not in sys.path:
|
|
|
# 从 ocr_utils 导入通用工具
|
|
# 从 ocr_utils 导入通用工具
|
|
|
from ocr_utils.html_utils import convert_html_table_to_markdown, parse_html_tables
|
|
from ocr_utils.html_utils import convert_html_table_to_markdown, parse_html_tables
|
|
|
from ocr_utils.visualization_utils import VisualizationUtils
|
|
from ocr_utils.visualization_utils import VisualizationUtils
|
|
|
-from ocr_utils.module_debug_viz import (
|
|
|
|
|
- OCR_BOX_LINE_THICKNESS,
|
|
|
|
|
- ocr_box_color_rgb,
|
|
|
|
|
-)
|
|
|
|
|
|
|
+from ocr_utils.module_debug_viz import OCR_BOX_LINE_THICKNESS
|
|
|
|
|
|
|
|
# BeautifulSoup用于精确HTML表格处理
|
|
# BeautifulSoup用于精确HTML表格处理
|
|
|
from bs4 import BeautifulSoup
|
|
from bs4 import BeautifulSoup
|
|
@@ -55,13 +52,14 @@ def category_to_plotly_rgba(category: str, alpha: float = 0.85) -> str:
|
|
|
|
|
|
|
|
def ocr_box_plotly_rgba(alpha: float = 0.85) -> str:
|
|
def ocr_box_plotly_rgba(alpha: float = 0.85) -> str:
|
|
|
"""OCR 亮蓝(与 module_debug_viz / *_ocr_spans 一致)。"""
|
|
"""OCR 亮蓝(与 module_debug_viz / *_ocr_spans 一致)。"""
|
|
|
- r, g, b = ocr_box_color_rgb()
|
|
|
|
|
|
|
+ r, g, b = VisualizationUtils.COLOR_MAP['ocr_box']
|
|
|
return f"rgba({r}, {g}, {b}, {alpha})"
|
|
return f"rgba({r}, {g}, {b}, {alpha})"
|
|
|
|
|
|
|
|
|
|
|
|
|
-# 仅 layout 结构框按类别着色;其余按 OCR 亮蓝实线/虚线
|
|
|
|
|
|
|
+# 布局结构框按 COLOR_MAP 类别着色;其余按 OCR 亮蓝实线/虚线
|
|
|
LAYOUT_STRUCTURE_CATEGORIES = frozenset({
|
|
LAYOUT_STRUCTURE_CATEGORIES = frozenset({
|
|
|
'table_body', 'table', 'image_body', 'image', 'figure', 'chart',
|
|
'table_body', 'table', 'image_body', 'image', 'figure', 'chart',
|
|
|
|
|
+ 'seal',
|
|
|
})
|
|
})
|
|
|
|
|
|
|
|
# detect_image_orientation_by_opencv 保留在 ocr_validator_file_utils
|
|
# detect_image_orientation_by_opencv 保留在 ocr_validator_file_utils
|