|
@@ -48,6 +48,7 @@ class ElementProcessors:
|
|
|
wired_table_recognizer: Optional[Any] = None,
|
|
wired_table_recognizer: Optional[Any] = None,
|
|
|
table_classifier: Optional[Any] = None,
|
|
table_classifier: Optional[Any] = None,
|
|
|
vl_recognizer_lazy_loader: Optional[Any] = None, # 🆕 懒加载回调
|
|
vl_recognizer_lazy_loader: Optional[Any] = None, # 🆕 懒加载回调
|
|
|
|
|
+ seal_ocr_recognizer: Optional[Any] = None, # 🆕 印章 OCR 识别器
|
|
|
):
|
|
):
|
|
|
"""
|
|
"""
|
|
|
初始化元素处理器
|
|
初始化元素处理器
|
|
@@ -60,6 +61,7 @@ class ElementProcessors:
|
|
|
wired_table_recognizer: 有线表格识别器(可选)
|
|
wired_table_recognizer: 有线表格识别器(可选)
|
|
|
table_classifier: 表格分类器(区分有线/无线表格,可选)
|
|
table_classifier: 表格分类器(区分有线/无线表格,可选)
|
|
|
vl_recognizer_lazy_loader: VL识别器懒加载回调函数(可选)
|
|
vl_recognizer_lazy_loader: VL识别器懒加载回调函数(可选)
|
|
|
|
|
+ seal_ocr_recognizer: 印章 OCR 识别器(可选,不存在时回退 VLM)
|
|
|
"""
|
|
"""
|
|
|
self.preprocessor = preprocessor
|
|
self.preprocessor = preprocessor
|
|
|
self.ocr_recognizer = ocr_recognizer
|
|
self.ocr_recognizer = ocr_recognizer
|
|
@@ -67,6 +69,7 @@ class ElementProcessors:
|
|
|
self.table_cell_matcher = table_cell_matcher
|
|
self.table_cell_matcher = table_cell_matcher
|
|
|
self.wired_table_recognizer = wired_table_recognizer
|
|
self.wired_table_recognizer = wired_table_recognizer
|
|
|
self.table_classifier = table_classifier
|
|
self.table_classifier = table_classifier
|
|
|
|
|
+ self.seal_ocr_recognizer = seal_ocr_recognizer
|
|
|
|
|
|
|
|
# VL 识别器懒加载支持
|
|
# VL 识别器懒加载支持
|
|
|
self._vl_recognizer_lazy_loader = vl_recognizer_lazy_loader
|
|
self._vl_recognizer_lazy_loader = vl_recognizer_lazy_loader
|
|
@@ -729,23 +732,47 @@ class ElementProcessors:
|
|
|
layout_item: Dict[str, Any]
|
|
layout_item: Dict[str, Any]
|
|
|
) -> Dict[str, Any]:
|
|
) -> Dict[str, Any]:
|
|
|
"""
|
|
"""
|
|
|
- 处理印章(seal)元素 - 使用 VLM 识别
|
|
|
|
|
-
|
|
|
|
|
|
|
+ 处理印章(seal)元素 - 优先使用 SealOCRRecognizer,回退 VLM
|
|
|
|
|
+
|
|
|
Args:
|
|
Args:
|
|
|
image: 页面图像
|
|
image: 页面图像
|
|
|
layout_item: 布局检测项
|
|
layout_item: 布局检测项
|
|
|
-
|
|
|
|
|
|
|
+
|
|
|
Returns:
|
|
Returns:
|
|
|
处理后的元素字典
|
|
处理后的元素字典
|
|
|
"""
|
|
"""
|
|
|
bbox = layout_item.get('bbox', [0, 0, 0, 0])
|
|
bbox = layout_item.get('bbox', [0, 0, 0, 0])
|
|
|
category = layout_item.get('category', 'seal')
|
|
category = layout_item.get('category', 'seal')
|
|
|
cropped_region = CoordinateUtils.crop_region(image, bbox)
|
|
cropped_region = CoordinateUtils.crop_region(image, bbox)
|
|
|
-
|
|
|
|
|
|
|
+
|
|
|
content = {'text': '', 'confidence': 0.0}
|
|
content = {'text': '', 'confidence': 0.0}
|
|
|
-
|
|
|
|
|
|
|
+
|
|
|
|
|
+ # 优先使用 SealOCRRecognizer(MinerU 印章专用 OCR)
|
|
|
|
|
+ if self.seal_ocr_recognizer is not None:
|
|
|
|
|
+ try:
|
|
|
|
|
+ seal_result = self.seal_ocr_recognizer.recognize(cropped_region)
|
|
|
|
|
+ if seal_result.get('text', '').strip():
|
|
|
|
|
+ content = {
|
|
|
|
|
+ 'text': seal_result['text'],
|
|
|
|
|
+ 'confidence': seal_result.get('confidence', 0.0),
|
|
|
|
|
+ 'texts': seal_result.get('texts', []),
|
|
|
|
|
+ 'details': seal_result.get('details', []),
|
|
|
|
|
+ 'recognition_method': 'seal_ocr',
|
|
|
|
|
+ }
|
|
|
|
|
+ logger.info(f"🔖 Seal recognized (OCR): {content['text'][:50]}..."
|
|
|
|
|
+ if len(content['text']) > 50
|
|
|
|
|
+ else f"🔖 Seal recognized (OCR): {content['text']}")
|
|
|
|
|
+ return {
|
|
|
|
|
+ 'type': category,
|
|
|
|
|
+ 'bbox': bbox,
|
|
|
|
|
+ 'confidence': layout_item.get('confidence', 0.0),
|
|
|
|
|
+ 'content': content
|
|
|
|
|
+ }
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ logger.warning(f"SealOCRRecognizer failed, falling back to VLM: {e}")
|
|
|
|
|
+
|
|
|
|
|
+ # 回退:使用 VLM 识别
|
|
|
try:
|
|
try:
|
|
|
- # 懒加载 VL 识别器
|
|
|
|
|
vl_recognizer = self._ensure_vl_recognizer()
|
|
vl_recognizer = self._ensure_vl_recognizer()
|
|
|
if vl_recognizer is None:
|
|
if vl_recognizer is None:
|
|
|
logger.error("❌ VL recognizer not available for seal recognition")
|
|
logger.error("❌ VL recognizer not available for seal recognition")
|
|
@@ -754,19 +781,20 @@ class ElementProcessors:
|
|
|
'bbox': bbox,
|
|
'bbox': bbox,
|
|
|
'content': content
|
|
'content': content
|
|
|
}
|
|
}
|
|
|
-
|
|
|
|
|
- # 使用 recognize_text 方法,传入 element_type='seal'
|
|
|
|
|
- # GLM-OCR 适配器会根据 element_type 使用相应的提示词
|
|
|
|
|
|
|
+
|
|
|
seal_result = vl_recognizer.recognize_text(cropped_region, element_type='seal')
|
|
seal_result = vl_recognizer.recognize_text(cropped_region, element_type='seal')
|
|
|
content = {
|
|
content = {
|
|
|
'text': seal_result.get('text', ''),
|
|
'text': seal_result.get('text', ''),
|
|
|
- 'confidence': seal_result.get('confidence', 0.0)
|
|
|
|
|
|
|
+ 'confidence': seal_result.get('confidence', 0.0),
|
|
|
|
|
+ 'recognition_method': 'vlm',
|
|
|
}
|
|
}
|
|
|
-
|
|
|
|
|
- logger.info(f"🔖 Seal recognized: {content['text'][:50]}..." if len(content['text']) > 50 else f"🔖 Seal recognized: {content['text']}")
|
|
|
|
|
|
|
+
|
|
|
|
|
+ logger.info(f"🔖 Seal recognized (VLM): {content['text'][:50]}..."
|
|
|
|
|
+ if len(content['text']) > 50
|
|
|
|
|
+ else f"🔖 Seal recognized (VLM): {content['text']}")
|
|
|
except Exception as e:
|
|
except Exception as e:
|
|
|
logger.warning(f"Seal recognition failed: {e}")
|
|
logger.warning(f"Seal recognition failed: {e}")
|
|
|
-
|
|
|
|
|
|
|
+
|
|
|
return {
|
|
return {
|
|
|
'type': category,
|
|
'type': category,
|
|
|
'bbox': bbox,
|
|
'bbox': bbox,
|