|
|
@@ -115,6 +115,31 @@ class EnhancedDocPipeline:
|
|
|
|
|
|
logger.info(f"✅ Pipeline initialized for scene: {self.scene_name}")
|
|
|
|
|
|
+ def _ensure_vl_recognizer(self):
|
|
|
+ """懒加载 VL 识别器(仅在需要时初始化,且只初始化一次)"""
|
|
|
+ if self._vl_recognizer_initialized:
|
|
|
+ return self.vl_recognizer
|
|
|
+
|
|
|
+ if self._vl_recognizer_config == {}:
|
|
|
+ logger.warning("⚠️ VL recognizer not configured, skipping initialization")
|
|
|
+ self._vl_recognizer_initialized = True
|
|
|
+ return None
|
|
|
+
|
|
|
+ try:
|
|
|
+ logger.info("🔄 Lazy-loading VL recognizer...")
|
|
|
+ self.vl_recognizer = ModelFactory.create_vl_recognizer(
|
|
|
+ self._vl_recognizer_config
|
|
|
+ )
|
|
|
+ self._vl_recognizer_initialized = True
|
|
|
+ self._vl_recognizer_checked = True
|
|
|
+ logger.info("✅ VL recognizer initialized successfully")
|
|
|
+ return self.vl_recognizer
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"❌ Failed to initialize VL recognizer: {e}")
|
|
|
+ self._vl_recognizer_initialized = True # 标记为已尝试初始化
|
|
|
+ self.vl_recognizer = None
|
|
|
+ return None
|
|
|
+
|
|
|
def _init_components(self):
|
|
|
"""初始化处理组件"""
|
|
|
try:
|
|
|
@@ -135,13 +160,11 @@ class EnhancedDocPipeline:
|
|
|
else:
|
|
|
self._smart_router_needs_ocr = False
|
|
|
|
|
|
- # 3. VL识别器(表格、公式)
|
|
|
- if self.config.get('vl_recognition', {}) != {}:
|
|
|
- self.vl_recognizer = ModelFactory.create_vl_recognizer(
|
|
|
- self.config['vl_recognition']
|
|
|
- )
|
|
|
- else:
|
|
|
- self.vl_recognizer = None
|
|
|
+ # 3. VL识别器(表格、公式)- 使用懒加载
|
|
|
+ self.vl_recognizer = None
|
|
|
+ self._vl_recognizer_config = self.config.get('vl_recognition', {})
|
|
|
+ self._vl_recognizer_initialized = False
|
|
|
+ self._vl_recognizer_checked = False # 是否已检测过连接
|
|
|
|
|
|
# 4. OCR识别器
|
|
|
self.ocr_recognizer = ModelFactory.create_ocr_recognizer(
|
|
|
@@ -195,7 +218,7 @@ class EnhancedDocPipeline:
|
|
|
else:
|
|
|
logger.warning("⚠️ Merger components not available, cell coordinate matching disabled")
|
|
|
|
|
|
- # 创建元素处理器
|
|
|
+ # 创建元素处理器(传入懒加载回调)
|
|
|
self.element_processors = ElementProcessors(
|
|
|
preprocessor=self.preprocessor,
|
|
|
ocr_recognizer=self.ocr_recognizer,
|
|
|
@@ -203,6 +226,7 @@ class EnhancedDocPipeline:
|
|
|
table_cell_matcher=table_cell_matcher,
|
|
|
wired_table_recognizer=getattr(self, 'wired_table_recognizer', None),
|
|
|
table_classifier=getattr(self, 'table_classifier', None),
|
|
|
+ vl_recognizer_lazy_loader=self._ensure_vl_recognizer, # 🎯 传入懒加载回调
|
|
|
)
|
|
|
|
|
|
# ==================== 主处理流程 ====================
|