4 月之前 · e126aaed5a
--- a/ocr_tools/universal_doc_parser/core/element_processors.py
+++ b/ocr_tools/universal_doc_parser/core/element_processors.py
@@ -655,6 +655,57 @@ class ElementProcessors:
 
				             'content': content
			
 
				         }
			
 
				     
			
 
				+    def process_seal_element(
			
 
				+        self,
			
 
				+        image: np.ndarray,
			
 
				+        layout_item: Dict[str, Any]
			
 
				+    ) -> Dict[str, Any]:
			
 
				+        """
			
 
				+        处理印章（seal）元素 - 使用 VLM 识别
			
 
				+        
			
 
				+        Args:
			
 
				+            image: 页面图像
			
 
				+            layout_item: 布局检测项
			
 
				+            
			
 
				+        Returns:
			
 
				+            处理后的元素字典
			
 
				+        """
			
 
				+        bbox = layout_item.get('bbox', [0, 0, 0, 0])
			
 
				+        category = layout_item.get('category', 'seal')
			
 
				+        cropped_region = CoordinateUtils.crop_region(image, bbox)
			
 
				+        
			
 
				+        content = {'text': '', 'confidence': 0.0}
			
 
				+        
			
 
				+        try:
			
 
				+            # 懒加载 VL 识别器
			
 
				+            vl_recognizer = self._ensure_vl_recognizer()
			
 
				+            if vl_recognizer is None:
			
 
				+                logger.error("❌ VL recognizer not available for seal recognition")
			
 
				+                return {
			
 
				+                    'type': category,
			
 
				+                    'bbox': bbox,
			
 
				+                    'content': content
			
 
				+                }
			
 
				+            
			
 
				+            # 使用 recognize_text 方法，传入 element_type='seal'
			
 
				+            # GLM-OCR 适配器会根据 element_type 使用相应的提示词
			
 
				+            seal_result = vl_recognizer.recognize_text(cropped_region, element_type='seal')
			
 
				+            content = {
			
 
				+                'text': seal_result.get('text', ''),
			
 
				+                'confidence': seal_result.get('confidence', 0.0)
			
 
				+            }
			
 
				+            
			
 
				+            logger.info(f"🔖 Seal recognized: {content['text'][:50]}..." if len(content['text']) > 50 else f"🔖 Seal recognized: {content['text']}")
			
 
				+        except Exception as e:
			
 
				+            logger.warning(f"Seal recognition failed: {e}")
			
 
				+        
			
 
				+        return {
			
 
				+            'type': category,
			
 
				+            'bbox': bbox,
			
 
				+            'confidence': layout_item.get('confidence', 0.0),
			
 
				+            'content': content
			
 
				+        }
			
 
				+    
			
 
				     def process_image_element(
			
 
				         self,
			
 
				         image: np.ndarray,
			
--- a/ocr_tools/universal_doc_parser/core/model_factory.py
+++ b/ocr_tools/universal_doc_parser/core/model_factory.py
@@ -39,9 +39,13 @@ class ModelFactory:
 
				     def create_layout_detector(cls, config: Dict[str, Any]) -> BaseLayoutDetector:
			
 
				         # 根据配置创建检测器
			
 
				         module_name = config.get('module', 'mineru')
			
 
				-        if module_name == 'paddle':
			
 
				+        model_name = config.get('model_name', 'default')
			
 
				+        if module_name == 'paddle' and model_name == 'RT-DETR-H_layout_17cls':
			
 
				             from models.adapters import PaddleLayoutDetector
			
 
				             detector = PaddleLayoutDetector(config)
			
 
				+        elif module_name == 'paddle' and model_name == 'PP-DocLayoutV3':
			
 
				+            from models.adapters import PPDocLayoutV3Detector
			
 
				+            detector = PPDocLayoutV3Detector(config)
			
 
				         elif module_name == 'docling':
			
 
				             from models.adapters import DoclingLayoutDetector
			
 
				             detector = DoclingLayoutDetector(config)
			
@@ -74,6 +78,9 @@ class ModelFactory:
 
				         elif module_name == 'mineru':
			
 
				             from models.adapters import MinerUVLRecognizer
			
 
				             recognizer = MinerUVLRecognizer(config)
			
 
				+        elif module_name == 'glmocr':
			
 
				+            from models.adapters import GLMOCRVLRecognizer
			
 
				+            recognizer = GLMOCRVLRecognizer(config)
			
 
				         else:
			
 
				             raise ValueError(f"Unknown VL recognizer module: {module_name}")
			
 
				             
			
--- a/ocr_tools/universal_doc_parser/core/pipeline_manager_v2.py
+++ b/ocr_tools/universal_doc_parser/core/pipeline_manager_v2.py
@@ -88,6 +88,9 @@ class EnhancedDocPipeline:
 
				         'interline_equation_yolo', 'interline_equation_number'
			
 
				     ]
			
 
				     
			
 
				+    # Seal（印章）类元素 - 需要 VLM 识别
			
 
				+    SEAL_CATEGORIES = ['seal']
			
 
				+    
			
 
				     # 丢弃类元素（水印、装饰等）
			
 
				     DISCARD_CATEGORIES = ['abandon', 'discarded']
			
 
				     
			
@@ -750,6 +753,7 @@ class EnhancedDocPipeline:
 
				             'image_body': [],
			
 
				             'image_text': [],
			
 
				             'equation': [],
			
 
				+            'seal': [],  # 🔧 添加 seal 类别
			
 
				             'code': [],
			
 
				             'discard': []
			
 
				         }
			
@@ -769,6 +773,8 @@ class EnhancedDocPipeline:
 
				                 classified['image_text'].append(item)
			
 
				             elif category in self.EQUATION_CATEGORIES:
			
 
				                 classified['equation'].append(item)
			
 
				+            elif category in self.SEAL_CATEGORIES:
			
 
				+                classified['seal'].append(item)
			
 
				             elif category in self.CODE_CATEGORIES:
			
 
				                 classified['code'].append(item)
			
 
				             elif category in self.DISCARD_CATEGORIES:
			
@@ -784,6 +790,7 @@ class EnhancedDocPipeline:
 
				                    f"image={len(classified['image_body'])}, "
			
 
				                    f"image_text={len(classified['image_text'])}, "
			
 
				                    f"equation={len(classified['equation'])}, "
			
 
				+                   f"seal={len(classified['seal'])}, "
			
 
				                    f"code={len(classified['code'])}, "
			
 
				                    f"discard={len(classified['discard'])}")
			
 
				         
			
@@ -952,6 +959,17 @@ class EnhancedDocPipeline:
 
				                 logger.warning(f"⚠️ Equation processing failed: {e}")
			
 
				                 processed_elements.append(ElementProcessors.create_error_element(item, str(e)))
			
 
				         
			
 
				+        # 🔧 处理 Seal（印章）元素 - 使用 VLM 识别
			
 
				+        for item in classified_elements['seal']:
			
 
				+            try:
			
 
				+                element = self.element_processors.process_seal_element(
			
 
				+                    detection_image, item
			
 
				+                )
			
 
				+                processed_elements.append(element)
			
 
				+            except Exception as e:
			
 
				+                logger.warning(f"⚠️ Seal processing failed: {e}")
			
 
				+                processed_elements.append(ElementProcessors.create_error_element(item, str(e)))
			
 
				+        
			
 
				         # 处理图片主体
			
 
				         for item in classified_elements['image_body']:
			
 
				             try: