Переглянути джерело

feat(element_processors): 添加 VL 识别器懒加载支持,优化初始化流程

zhch158_admin 2 тижнів тому
батько
коміт
5c01daf7ab

+ 42 - 4
ocr_tools/universal_doc_parser/core/element_processors.py

@@ -46,6 +46,7 @@ class ElementProcessors:
         table_cell_matcher: Optional[Any] = None,
         wired_table_recognizer: Optional[Any] = None,
         table_classifier: Optional[Any] = None,
+        vl_recognizer_lazy_loader: Optional[Any] = None,  # 🆕 懒加载回调
     ):
         """
         初始化元素处理器
@@ -53,10 +54,11 @@ class ElementProcessors:
         Args:
             preprocessor: 预处理器(方向检测)
             ocr_recognizer: OCR识别器
-            vl_recognizer: VL识别器(表格、公式)
+            vl_recognizer: VL识别器(表格、公式)- 可为 None(懒加载)
             table_cell_matcher: 表格单元格匹配器
             wired_table_recognizer: 有线表格识别器(可选)
             table_classifier: 表格分类器(区分有线/无线表格,可选)
+            vl_recognizer_lazy_loader: VL识别器懒加载回调函数(可选)
         """
         self.preprocessor = preprocessor
         self.ocr_recognizer = ocr_recognizer
@@ -64,6 +66,26 @@ class ElementProcessors:
         self.table_cell_matcher = table_cell_matcher
         self.wired_table_recognizer = wired_table_recognizer
         self.table_classifier = table_classifier
+        
+        # VL 识别器懒加载支持
+        self._vl_recognizer_lazy_loader = vl_recognizer_lazy_loader
+        self._vl_recognizer_loaded = False
+        
+        # VL 识别器懒加载支持
+        self._vl_recognizer_lazy_loader = vl_recognizer_lazy_loader
+        self._vl_recognizer_loaded = False
+    
+    def _ensure_vl_recognizer(self):
+        """确保 VL 识别器已初始化(懒加载)"""
+        if self._vl_recognizer_loaded:
+            return self.vl_recognizer
+        
+        if self.vl_recognizer is None and self._vl_recognizer_lazy_loader is not None:
+            logger.info("🔄 Triggering VL recognizer lazy loading...")
+            self.vl_recognizer = self._vl_recognizer_lazy_loader()
+        
+        self._vl_recognizer_loaded = True
+        return self.vl_recognizer
     
     def _convert_ocr_details_to_absolute(
         self,
@@ -471,10 +493,16 @@ class ElementProcessors:
         cropped_table, ocr_boxes, table_angle, ocr_source, crop_padding, orig_size_before_rotation = \
             self._prepare_table_ocr(image, bbox, pre_matched_spans)
         
-        # VLM 识别获取表格结构HTML
+        # VLM 识别获取表格结构HTML(懒加载)
         table_html = ""
         try:
-            vl_result = self.vl_recognizer.recognize_table(
+            vl_recognizer = self._ensure_vl_recognizer()
+            if vl_recognizer is None:
+                logger.error("❌ VL recognizer not available for table recognition")
+                # return self._create_empty_table_result(layout_item, bbox, table_angle, ocr_source)
+                raise RuntimeError("VL recognizer not available")
+            
+            vl_result = vl_recognizer.recognize_table(
                 cropped_table,
                 return_cells_coordinate=True
             )
@@ -602,7 +630,17 @@ class ElementProcessors:
         content = {'latex': '', 'confidence': 0.0}
         
         try:
-            formula_result = self.vl_recognizer.recognize_formula(cropped_region)
+            # 懒加载 VL 识别器
+            vl_recognizer = self._ensure_vl_recognizer()
+            if vl_recognizer is None:
+                logger.error("❌ VL recognizer not available for formula recognition")
+                return {
+                    'type': category,
+                    'bbox': bbox,
+                    'content': content
+                }
+            
+            formula_result = vl_recognizer.recognize_formula(cropped_region)
             content = {
                 'latex': formula_result.get('latex', ''),
                 'confidence': formula_result.get('confidence', 0.0)