Преглед на файлове

refactor(model): update model initialization and dependencies

- Update config version to1.2.0
- Refactor model initialization in model_init.py- Update dependencies in requirements.txt files
- Remove unused imports and models
- Add conditional imports for table models
myhloli преди 8 месеца
родител
ревизия
2f3b66a5d0

+ 2 - 7
docker/ascend_npu/requirements.txt

@@ -7,19 +7,14 @@ numpy>=1.21.6,<2.0.0
 fast-langdetect>=0.2.3,<0.3.0
 scikit-learn>=1.0.2
 pdfminer.six==20231228
-unimernet==0.2.3
-torch>=2.2.2,<=2.3.1
-torchvision>=0.17.2,<=0.18.1
+torch==2.3.1
+torchvision==0.18.1
 matplotlib
 ultralytics>=8.3.48
 paddleocr==2.7.3
 paddlepaddle==3.0.0rc1
-struct-eqtable==0.3.2
-einops
-accelerate
 rapidocr-paddle>=1.4.5,<2.0.0
 rapidocr-onnxruntime>=1.4.4,<2.0.0
 rapid-table>=1.0.3,<2.0.0
 doclayout-yolo==0.0.2b1
 openai
-detectron2

+ 2 - 7
docker/china/requirements.txt

@@ -7,18 +7,13 @@ numpy>=1.21.6,<2.0.0
 fast-langdetect>=0.2.3,<0.3.0
 scikit-learn>=1.0.2
 pdfminer.six==20231228
-unimernet==0.2.3
-torch>=2.2.2,<=2.3.1
-torchvision>=0.17.2,<=0.18.1
+torch>=2.2.2,!=2.5.0,!=2.5.1,<=2.6.0
+torchvision
 matplotlib
 ultralytics>=8.3.48
 paddleocr==2.7.3
-struct-eqtable==0.3.2
-einops
-accelerate
 rapidocr-paddle>=1.4.5,<2.0.0
 rapidocr-onnxruntime>=1.4.4,<2.0.0
 rapid-table>=1.0.3,<2.0.0
 doclayout-yolo==0.0.2b1
 openai
-detectron2

+ 2 - 7
docker/global/requirements.txt

@@ -7,18 +7,13 @@ numpy>=1.21.6,<2.0.0
 fast-langdetect>=0.2.3,<0.3.0
 scikit-learn>=1.0.2
 pdfminer.six==20231228
-unimernet==0.2.3
-torch>=2.2.2,<=2.3.1
-torchvision>=0.17.2,<=0.18.1
+torch>=2.2.2,!=2.5.0,!=2.5.1,<=2.6.0
+torchvision
 matplotlib
 ultralytics>=8.3.48
 paddleocr==2.7.3
-struct-eqtable==0.3.2
-einops
-accelerate
 rapidocr-paddle>=1.4.5,<2.0.0
 rapidocr-onnxruntime>=1.4.4,<2.0.0
 rapid-table>=1.0.3,<2.0.0
 doclayout-yolo==0.0.2b1
 openai
-detectron2

+ 1 - 1
magic-pdf.template.json

@@ -40,5 +40,5 @@
             "enable": false
         }
     },
-    "config_version": "1.1.1"
+    "config_version": "1.2.0"
 }

+ 7 - 17
magic_pdf/model/sub_modules/model_init.py

@@ -1,16 +1,10 @@
-import os
-
 import torch
 from loguru import logger
 
 from magic_pdf.config.constants import MODEL_NAME
 from magic_pdf.model.model_list import AtomicModel
-from magic_pdf.model.sub_modules.language_detection.yolov11.YOLOv11 import \
-    YOLOv11LangDetModel
-from magic_pdf.model.sub_modules.layout.doclayout_yolo.DocLayoutYOLO import \
-    DocLayoutYOLOModel
-from magic_pdf.model.sub_modules.layout.layoutlmv3.model_init import \
-    Layoutlmv3_Predictor
+from magic_pdf.model.sub_modules.language_detection.yolov11.YOLOv11 import YOLOv11LangDetModel
+from magic_pdf.model.sub_modules.layout.doclayout_yolo.DocLayoutYOLO import DocLayoutYOLOModel
 from magic_pdf.model.sub_modules.mfd.yolov8.YOLOv8 import YOLOv8MFDModel
 from magic_pdf.model.sub_modules.mfr.unimernet.Unimernet import UnimernetModel
 
@@ -18,10 +12,8 @@ try:
     from magic_pdf_ascend_plugin.libs.license_verifier import (
         LicenseExpiredError, LicenseFormatError, LicenseSignatureError,
         load_license)
-    from magic_pdf_ascend_plugin.model_plugin.ocr.paddleocr.ppocr_273_npu import \
-        ModifiedPaddleOCR
-    from magic_pdf_ascend_plugin.model_plugin.table.rapidtable.rapid_table_npu import \
-        RapidTableModel
+    from magic_pdf_ascend_plugin.model_plugin.ocr.paddleocr.ppocr_273_npu import ModifiedPaddleOCR
+    from magic_pdf_ascend_plugin.model_plugin.table.rapidtable.rapid_table_npu import RapidTableModel
     license_key = load_license()
     logger.info(f'Using Ascend Plugin Success, License id is {license_key["payload"]["id"]},'
                 f' License expired at {license_key["payload"]["date"]["end_date"]}')
@@ -42,16 +34,13 @@ except Exception as e:
     # from magic_pdf.model.sub_modules.ocr.paddleocr.ppocr_291_mod import ModifiedPaddleOCR
     from magic_pdf.model.sub_modules.table.rapidtable.rapid_table import RapidTableModel
 
-from magic_pdf.model.sub_modules.table.structeqtable.struct_eqtable import \
-    StructTableModel
-from magic_pdf.model.sub_modules.table.tablemaster.tablemaster_paddle import \
-    TableMasterPaddleModel
-
 
 def table_model_init(table_model_type, model_path, max_time, _device_='cpu', ocr_engine=None, table_sub_model_name=None):
     if table_model_type == MODEL_NAME.STRUCT_EQTABLE:
+        from magic_pdf.model.sub_modules.table.structeqtable.struct_eqtable import StructTableModel
         table_model = StructTableModel(model_path, max_new_tokens=2048, max_time=max_time)
     elif table_model_type == MODEL_NAME.TABLE_MASTER:
+        from magic_pdf.model.sub_modules.table.tablemaster.tablemaster_paddle import TableMasterPaddleModel
         config = {
             'model_dir': model_path,
             'device': _device_
@@ -79,6 +68,7 @@ def mfr_model_init(weight_dir, cfg_path, device='cpu'):
 
 
 def layout_model_init(weight, config_file, device):
+    from magic_pdf.model.sub_modules.layout.layoutlmv3.model_init import Layoutlmv3_Predictor
     model = Layoutlmv3_Predictor(weight, config_file, device)
     return model
 

+ 5 - 5
scripts/download_models.py

@@ -16,7 +16,7 @@ def download_and_modify_json(url, local_filename, modifications):
     if os.path.exists(local_filename):
         data = json.load(open(local_filename))
         config_version = data.get('config_version', '0.0.0')
-        if config_version < '1.1.1':
+        if config_version < '1.2.0':
             data = download_json(url)
     else:
         data = download_json(url)
@@ -32,12 +32,12 @@ def download_and_modify_json(url, local_filename, modifications):
 
 if __name__ == '__main__':
     mineru_patterns = [
-        "models/Layout/LayoutLMv3/*",
+        # "models/Layout/LayoutLMv3/*",
         "models/Layout/YOLO/*",
         "models/MFD/YOLO/*",
-        "models/MFR/unimernet_small_2501/*",
-        "models/TabRec/TableMaster/*",
-        "models/TabRec/StructEqTable/*",
+        "models/MFR/unimernet_hf_small_2503/*",
+        # "models/TabRec/TableMaster/*",
+        # "models/TabRec/StructEqTable/*",
     ]
     model_dir = snapshot_download('opendatalab/PDF-Extract-Kit-1.0', allow_patterns=mineru_patterns)
     layoutreader_model_dir = snapshot_download('ppaanngggg/layoutreader')

+ 5 - 5
scripts/download_models_hf.py

@@ -16,7 +16,7 @@ def download_and_modify_json(url, local_filename, modifications):
     if os.path.exists(local_filename):
         data = json.load(open(local_filename))
         config_version = data.get('config_version', '0.0.0')
-        if config_version < '1.1.1':
+        if config_version < '1.2.0':
             data = download_json(url)
     else:
         data = download_json(url)
@@ -33,12 +33,12 @@ def download_and_modify_json(url, local_filename, modifications):
 if __name__ == '__main__':
 
     mineru_patterns = [
-        "models/Layout/LayoutLMv3/*",
+        # "models/Layout/LayoutLMv3/*",
         "models/Layout/YOLO/*",
         "models/MFD/YOLO/*",
-        "models/MFR/unimernet_small_2501/*",
-        "models/TabRec/TableMaster/*",
-        "models/TabRec/StructEqTable/*",
+        "models/MFR/unimernet_hf_small_2503/*",
+        # "models/TabRec/TableMaster/*",
+        # "models/TabRec/StructEqTable/*",
     ]
     model_dir = snapshot_download('opendatalab/PDF-Extract-Kit-1.0', allow_patterns=mineru_patterns)
 

+ 9 - 1
setup.py

@@ -52,7 +52,15 @@ if __name__ == '__main__':
                      ],
             "old_linux":[
                 "albumentations<=1.4.20", # 1.4.21引入的simsimd不支持2019年及更早的linux系统
-            ]
+            ],
+            "layoutlmv3":[
+                "detectron2"
+            ],
+            "struct_eqtable":[
+                "struct-eqtable==0.3.2",  # 表格解析
+                "einops",  # struct-eqtable依赖
+                "accelerate",  # struct-eqtable依赖
+            ],
         },
         description="A practical tool for converting PDF to Markdown",  # 简短描述
         long_description=long_description,  # 详细描述