|
@@ -1,5 +1,6 @@
|
|
|
import json
|
|
import json
|
|
|
import os
|
|
import os
|
|
|
|
|
+import shutil
|
|
|
|
|
|
|
|
import requests
|
|
import requests
|
|
|
from huggingface_hub import snapshot_download
|
|
from huggingface_hub import snapshot_download
|
|
@@ -37,6 +38,7 @@ if __name__ == '__main__':
|
|
|
"models/Layout/YOLO/*",
|
|
"models/Layout/YOLO/*",
|
|
|
"models/MFD/YOLO/*",
|
|
"models/MFD/YOLO/*",
|
|
|
"models/MFR/unimernet_hf_small_2503/*",
|
|
"models/MFR/unimernet_hf_small_2503/*",
|
|
|
|
|
+ "models/OCR/paddleocr/*",
|
|
|
# "models/TabRec/TableMaster/*",
|
|
# "models/TabRec/TableMaster/*",
|
|
|
# "models/TabRec/StructEqTable/*",
|
|
# "models/TabRec/StructEqTable/*",
|
|
|
]
|
|
]
|
|
@@ -52,6 +54,12 @@ if __name__ == '__main__':
|
|
|
print(f'model_dir is: {model_dir}')
|
|
print(f'model_dir is: {model_dir}')
|
|
|
print(f'layoutreader_model_dir is: {layoutreader_model_dir}')
|
|
print(f'layoutreader_model_dir is: {layoutreader_model_dir}')
|
|
|
|
|
|
|
|
|
|
+ paddleocr_model_dir = model_dir + '/OCR/paddleocr'
|
|
|
|
|
+ user_paddleocr_dir = os.path.expanduser('~/.paddleocr')
|
|
|
|
|
+ if os.path.exists(user_paddleocr_dir):
|
|
|
|
|
+ shutil.rmtree(user_paddleocr_dir)
|
|
|
|
|
+ shutil.copytree(paddleocr_model_dir, user_paddleocr_dir)
|
|
|
|
|
+
|
|
|
json_url = 'https://github.com/opendatalab/MinerU/raw/master/magic-pdf.template.json'
|
|
json_url = 'https://github.com/opendatalab/MinerU/raw/master/magic-pdf.template.json'
|
|
|
config_file_name = 'magic-pdf.json'
|
|
config_file_name = 'magic-pdf.json'
|
|
|
home_dir = os.path.expanduser('~')
|
|
home_dir = os.path.expanduser('~')
|