Pārlūkot izejas kodu

feat(新增印章OCR识别器): 在ElementProcessors类中添加seal_ocr_recognizer参数,优化印章处理逻辑,优先使用SealOCRRecognizer进行识别,回退至VLM,提升印章识别的准确性与灵活性。

zhch158_admin 1 mēnesi atpakaļ
vecāks
revīzija
31ae5b84ca
1 mainītis faili ar 41 papildinājumiem un 13 dzēšanām
  1. 41 13
      ocr_tools/universal_doc_parser/core/element_processors.py

+ 41 - 13
ocr_tools/universal_doc_parser/core/element_processors.py

@@ -48,6 +48,7 @@ class ElementProcessors:
         wired_table_recognizer: Optional[Any] = None,
         table_classifier: Optional[Any] = None,
         vl_recognizer_lazy_loader: Optional[Any] = None,  # 🆕 懒加载回调
+        seal_ocr_recognizer: Optional[Any] = None,  # 🆕 印章 OCR 识别器
     ):
         """
         初始化元素处理器
@@ -60,6 +61,7 @@ class ElementProcessors:
             wired_table_recognizer: 有线表格识别器(可选)
             table_classifier: 表格分类器(区分有线/无线表格,可选)
             vl_recognizer_lazy_loader: VL识别器懒加载回调函数(可选)
+            seal_ocr_recognizer: 印章 OCR 识别器(可选,不存在时回退 VLM)
         """
         self.preprocessor = preprocessor
         self.ocr_recognizer = ocr_recognizer
@@ -67,6 +69,7 @@ class ElementProcessors:
         self.table_cell_matcher = table_cell_matcher
         self.wired_table_recognizer = wired_table_recognizer
         self.table_classifier = table_classifier
+        self.seal_ocr_recognizer = seal_ocr_recognizer
         
         # VL 识别器懒加载支持
         self._vl_recognizer_lazy_loader = vl_recognizer_lazy_loader
@@ -729,23 +732,47 @@ class ElementProcessors:
         layout_item: Dict[str, Any]
     ) -> Dict[str, Any]:
         """
-        处理印章(seal)元素 - 使用 VLM 识别
-        
+        处理印章(seal)元素 - 优先使用 SealOCRRecognizer,回退 VLM
+
         Args:
             image: 页面图像
             layout_item: 布局检测项
-            
+
         Returns:
             处理后的元素字典
         """
         bbox = layout_item.get('bbox', [0, 0, 0, 0])
         category = layout_item.get('category', 'seal')
         cropped_region = CoordinateUtils.crop_region(image, bbox)
-        
+
         content = {'text': '', 'confidence': 0.0}
-        
+
+        # 优先使用 SealOCRRecognizer(MinerU 印章专用 OCR)
+        if self.seal_ocr_recognizer is not None:
+            try:
+                seal_result = self.seal_ocr_recognizer.recognize(cropped_region)
+                if seal_result.get('text', '').strip():
+                    content = {
+                        'text': seal_result['text'],
+                        'confidence': seal_result.get('confidence', 0.0),
+                        'texts': seal_result.get('texts', []),
+                        'details': seal_result.get('details', []),
+                        'recognition_method': 'seal_ocr',
+                    }
+                    logger.info(f"🔖 Seal recognized (OCR): {content['text'][:50]}..."
+                                if len(content['text']) > 50
+                                else f"🔖 Seal recognized (OCR): {content['text']}")
+                    return {
+                        'type': category,
+                        'bbox': bbox,
+                        'confidence': layout_item.get('confidence', 0.0),
+                        'content': content
+                    }
+            except Exception as e:
+                logger.warning(f"SealOCRRecognizer failed, falling back to VLM: {e}")
+
+        # 回退:使用 VLM 识别
         try:
-            # 懒加载 VL 识别器
             vl_recognizer = self._ensure_vl_recognizer()
             if vl_recognizer is None:
                 logger.error("❌ VL recognizer not available for seal recognition")
@@ -754,19 +781,20 @@ class ElementProcessors:
                     'bbox': bbox,
                     'content': content
                 }
-            
-            # 使用 recognize_text 方法,传入 element_type='seal'
-            # GLM-OCR 适配器会根据 element_type 使用相应的提示词
+
             seal_result = vl_recognizer.recognize_text(cropped_region, element_type='seal')
             content = {
                 'text': seal_result.get('text', ''),
-                'confidence': seal_result.get('confidence', 0.0)
+                'confidence': seal_result.get('confidence', 0.0),
+                'recognition_method': 'vlm',
             }
-            
-            logger.info(f"🔖 Seal recognized: {content['text'][:50]}..." if len(content['text']) > 50 else f"🔖 Seal recognized: {content['text']}")
+
+            logger.info(f"🔖 Seal recognized (VLM): {content['text'][:50]}..."
+                        if len(content['text']) > 50
+                        else f"🔖 Seal recognized (VLM): {content['text']}")
         except Exception as e:
             logger.warning(f"Seal recognition failed: {e}")
-        
+
         return {
             'type': category,
             'bbox': bbox,