浏览代码

feat(pipeline_manager): 实现 VL 识别器的懒加载机制,优化初始化流程

zhch158_admin 2 周之前
父节点
当前提交
1163b7d3be
共有 1 个文件被更改,包括 32 次插入8 次删除
  1. 32 8
      ocr_tools/universal_doc_parser/core/pipeline_manager_v2.py

+ 32 - 8
ocr_tools/universal_doc_parser/core/pipeline_manager_v2.py

@@ -115,6 +115,31 @@ class EnhancedDocPipeline:
         
         logger.info(f"✅ Pipeline initialized for scene: {self.scene_name}")
     
+    def _ensure_vl_recognizer(self):
+        """懒加载 VL 识别器(仅在需要时初始化,且只初始化一次)"""
+        if self._vl_recognizer_initialized:
+            return self.vl_recognizer
+        
+        if self._vl_recognizer_config == {}:
+            logger.warning("⚠️ VL recognizer not configured, skipping initialization")
+            self._vl_recognizer_initialized = True
+            return None
+        
+        try:
+            logger.info("🔄 Lazy-loading VL recognizer...")
+            self.vl_recognizer = ModelFactory.create_vl_recognizer(
+                self._vl_recognizer_config
+            )
+            self._vl_recognizer_initialized = True
+            self._vl_recognizer_checked = True
+            logger.info("✅ VL recognizer initialized successfully")
+            return self.vl_recognizer
+        except Exception as e:
+            logger.error(f"❌ Failed to initialize VL recognizer: {e}")
+            self._vl_recognizer_initialized = True  # 标记为已尝试初始化
+            self.vl_recognizer = None
+            return None
+    
     def _init_components(self):
         """初始化处理组件"""
         try:
@@ -135,13 +160,11 @@ class EnhancedDocPipeline:
             else:
                 self._smart_router_needs_ocr = False
             
-            # 3. VL识别器(表格、公式)
-            if self.config.get('vl_recognition', {}) != {}:
-                self.vl_recognizer = ModelFactory.create_vl_recognizer(
-                    self.config['vl_recognition']
-                )
-            else:
-                self.vl_recognizer = None
+            # 3. VL识别器(表格、公式)- 使用懒加载
+            self.vl_recognizer = None
+            self._vl_recognizer_config = self.config.get('vl_recognition', {})
+            self._vl_recognizer_initialized = False
+            self._vl_recognizer_checked = False  # 是否已检测过连接
             
             # 4. OCR识别器
             self.ocr_recognizer = ModelFactory.create_ocr_recognizer(
@@ -195,7 +218,7 @@ class EnhancedDocPipeline:
         else:
             logger.warning("⚠️ Merger components not available, cell coordinate matching disabled")
         
-        # 创建元素处理器
+        # 创建元素处理器(传入懒加载回调)
         self.element_processors = ElementProcessors(
             preprocessor=self.preprocessor,
             ocr_recognizer=self.ocr_recognizer,
@@ -203,6 +226,7 @@ class EnhancedDocPipeline:
             table_cell_matcher=table_cell_matcher,
             wired_table_recognizer=getattr(self, 'wired_table_recognizer', None),
             table_classifier=getattr(self, 'table_classifier', None),
+            vl_recognizer_lazy_loader=self._ensure_vl_recognizer,  # 🎯 传入懒加载回调
         )
     
     # ==================== 主处理流程 ====================