Browse Source

feat(models): add PaddleOCR model and update download scripts

- Add PaddleOCR model to the list of models to download
- Update download_models.py and download_models_hf.py scripts to include PaddleOCR
- Create user directory for PaddleOCR and copy downloaded models
myhloli 8 tháng trước cách đây
mục cha
commit
6e35e382df
2 tập tin đã thay đổi với 16 bổ sung0 xóa
  1. 8 0
      scripts/download_models.py
  2. 8 0
      scripts/download_models_hf.py

+ 8 - 0
scripts/download_models.py

@@ -1,4 +1,5 @@
 import json
+import shutil
 import os
 
 import requests
@@ -36,6 +37,7 @@ if __name__ == '__main__':
         "models/Layout/YOLO/*",
         "models/MFD/YOLO/*",
         "models/MFR/unimernet_hf_small_2503/*",
+        "models/OCR/paddleocr/*",
         # "models/TabRec/TableMaster/*",
         # "models/TabRec/StructEqTable/*",
     ]
@@ -45,6 +47,12 @@ if __name__ == '__main__':
     print(f'model_dir is: {model_dir}')
     print(f'layoutreader_model_dir is: {layoutreader_model_dir}')
 
+    paddleocr_model_dir = model_dir + '/OCR/paddleocr'
+    user_paddleocr_dir = os.path.expanduser('~/.paddleocr')
+    if os.path.exists(user_paddleocr_dir):
+        shutil.rmtree(user_paddleocr_dir)
+    shutil.copytree(paddleocr_model_dir, user_paddleocr_dir)
+
     json_url = 'https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/magic-pdf.template.json'
     config_file_name = 'magic-pdf.json'
     home_dir = os.path.expanduser('~')

+ 8 - 0
scripts/download_models_hf.py

@@ -1,5 +1,6 @@
 import json
 import os
+import shutil
 
 import requests
 from huggingface_hub import snapshot_download
@@ -37,6 +38,7 @@ if __name__ == '__main__':
         "models/Layout/YOLO/*",
         "models/MFD/YOLO/*",
         "models/MFR/unimernet_hf_small_2503/*",
+        "models/OCR/paddleocr/*",
         # "models/TabRec/TableMaster/*",
         # "models/TabRec/StructEqTable/*",
     ]
@@ -52,6 +54,12 @@ if __name__ == '__main__':
     print(f'model_dir is: {model_dir}')
     print(f'layoutreader_model_dir is: {layoutreader_model_dir}')
 
+    paddleocr_model_dir = model_dir + '/OCR/paddleocr'
+    user_paddleocr_dir = os.path.expanduser('~/.paddleocr')
+    if os.path.exists(user_paddleocr_dir):
+        shutil.rmtree(user_paddleocr_dir)
+    shutil.copytree(paddleocr_model_dir, user_paddleocr_dir)
+
     json_url = 'https://github.com/opendatalab/MinerU/raw/master/magic-pdf.template.json'
     config_file_name = 'magic-pdf.json'
     home_dir = os.path.expanduser('~')