浏览代码

refactor(magic_pdf): implement model singleton pattern for custom models

Introduce a Singleton pattern to manage custom models in the magic_pdf module.
This change improves the efficiency by ensuring that a single instance of the
custom model is created and reused, thereby reducing the overhead of multiple
instantiate calls for the same model configuration.
myhloli 1 年之前
父节点
当前提交
054abe33d6
共有 1 个文件被更改,包括 25 次插入1 次删除
  1. 25 1
      magic_pdf/model/doc_analyze_by_custom_model.py

+ 25 - 1
magic_pdf/model/doc_analyze_by_custom_model.py

@@ -48,7 +48,23 @@ def load_images_from_pdf(pdf_bytes: bytes, dpi=200) -> list:
     return images
 
 
-def doc_analyze(pdf_bytes: bytes, ocr: bool = False, show_log: bool = False):
+class ModelSingleton:
+    _instance = None
+    _models = {}
+
+    def __new__(cls, *args, **kwargs):
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+        return cls._instance
+
+    def get_model(self, ocr: bool, show_log: bool):
+        key = (ocr, show_log)
+        if key not in self._models:
+            self._models[key] = custom_model_init(ocr=ocr, show_log=show_log)
+        return self._models[key]
+
+
+def custom_model_init(ocr: bool = False, show_log: bool = False):
     model = None
 
     if model_config.__model_mode__ == "lite":
@@ -76,6 +92,14 @@ def doc_analyze(pdf_bytes: bytes, ocr: bool = False, show_log: bool = False):
         logger.error("use_inside_model is False, not allow to use inside model")
         exit(1)
 
+    return custom_model
+
+
+def doc_analyze(pdf_bytes: bytes, ocr: bool = False, show_log: bool = False):
+
+    model_manager = ModelSingleton()
+    custom_model = model_manager.get_model(ocr, show_log)
+
     images = load_images_from_pdf(pdf_bytes)
 
     model_json = []