Bladeren bron

fix: download model file scripts path

xu rui 1 jaar geleden
bovenliggende
commit
065bf993ec

+ 2 - 2
.readthedocs.yaml

@@ -10,7 +10,7 @@ formats:
 
 python:
   install:
-    - requirements: docs/zh_cn/requirements.txt
+    - requirements: next_docs/zh_cn/requirements.txt
 
 sphinx:
-  configuration: docs/zh_cn/conf.py
+  configuration: next_docs/zh_cn/conf.py

+ 2 - 2
next_docs/en/.readthedocs.yaml

@@ -10,7 +10,7 @@ formats:
 
 python:
   install:
-    - requirements: docs/requirements.txt
+    - requirements: next_docs/requirements.txt
 
 sphinx:
-  configuration: docs/en/conf.py
+  configuration: next_docs/en/conf.py

+ 1 - 1
next_docs/en/user_guide/install/download_model_weight_files.rst

@@ -17,7 +17,7 @@ Use a Python Script to Download Model Files from Hugging Face
 .. code:: bash
 
    pip install huggingface_hub
-   wget https://github.com/opendatalab/MinerU/raw/master/docs/download_models_hf.py -O download_models_hf.py
+   wget https://github.com/opendatalab/MinerU/raw/master/scripts/download_models_hf.py -O download_models_hf.py
    python download_models_hf.py
 
 The Python script will automatically download the model files and

+ 1 - 1
next_docs/en/user_guide/install/install.rst

@@ -100,7 +100,7 @@ Download model weight files
 .. code-block:: shell
 
     pip install huggingface_hub
-    wget https://github.com/opendatalab/MinerU/raw/master/docs/download_models_hf.py -O download_models_hf.py
+    wget https://github.com/opendatalab/MinerU/raw/master/scripts/download_models_hf.py -O download_models_hf.py
     python download_models_hf.py    
 
 

+ 2 - 2
next_docs/zh_cn/.readthedocs.yaml

@@ -10,7 +10,7 @@ formats:
 
 python:
   install:
-    - requirements: docs/requirements.txt
+    - requirements: next_docs/requirements.txt
 
 sphinx:
-  configuration: docs/zh_cn/conf.py
+  configuration: next_docs/zh_cn/conf.py

+ 59 - 0
scripts/download_models.py

@@ -0,0 +1,59 @@
+import json
+import os
+
+import requests
+from modelscope import snapshot_download
+
+
+def download_json(url):
+    # 下载JSON文件
+    response = requests.get(url)
+    response.raise_for_status()  # 检查请求是否成功
+    return response.json()
+
+
+def download_and_modify_json(url, local_filename, modifications):
+    if os.path.exists(local_filename):
+        data = json.load(open(local_filename))
+        config_version = data.get('config_version', '0.0.0')
+        if config_version < '1.0.0':
+            data = download_json(url)
+    else:
+        data = download_json(url)
+
+    # 修改内容
+    for key, value in modifications.items():
+        data[key] = value
+
+    # 保存修改后的内容
+    with open(local_filename, 'w', encoding='utf-8') as f:
+        json.dump(data, f, ensure_ascii=False, indent=4)
+
+
+if __name__ == '__main__':
+    mineru_patterns = [
+        "models/Layout/LayoutLMv3/*",
+        "models/Layout/YOLO/*",
+        "models/MFD/YOLO/*",
+        "models/MFR/unimernet_small/*",
+        "models/TabRec/TableMaster/*",
+        "models/TabRec/StructEqTable/*",
+    ]
+    model_dir = snapshot_download('opendatalab/PDF-Extract-Kit-1.0', allow_patterns=mineru_patterns)
+    layoutreader_model_dir = snapshot_download('ppaanngggg/layoutreader')
+    model_dir = model_dir + '/models'
+    print(f'model_dir is: {model_dir}')
+    print(f'layoutreader_model_dir is: {layoutreader_model_dir}')
+
+    json_url = 'https://gitee.com/myhloli/MinerU/raw/dev/magic-pdf.template.json'
+    config_file_name = 'magic-pdf.json'
+    home_dir = os.path.expanduser('~')
+    config_file = os.path.join(home_dir, config_file_name)
+
+    json_mods = {
+        'models-dir': model_dir,
+        'layoutreader-model-dir': layoutreader_model_dir,
+    }
+
+    download_and_modify_json(json_url, config_file, json_mods)
+    print(f'The configuration file has been configured successfully, the path is: {config_file}')

+ 66 - 0
scripts/download_models_hf.py

@@ -0,0 +1,66 @@
+import json
+import os
+
+import requests
+from huggingface_hub import snapshot_download
+
+
+def download_json(url):
+    # 下载JSON文件
+    response = requests.get(url)
+    response.raise_for_status()  # 检查请求是否成功
+    return response.json()
+
+
+def download_and_modify_json(url, local_filename, modifications):
+    if os.path.exists(local_filename):
+        data = json.load(open(local_filename))
+        config_version = data.get('config_version', '0.0.0')
+        if config_version < '1.0.0':
+            data = download_json(url)
+    else:
+        data = download_json(url)
+
+    # 修改内容
+    for key, value in modifications.items():
+        data[key] = value
+
+    # 保存修改后的内容
+    with open(local_filename, 'w', encoding='utf-8') as f:
+        json.dump(data, f, ensure_ascii=False, indent=4)
+
+
+if __name__ == '__main__':
+
+    mineru_patterns = [
+        "models/Layout/LayoutLMv3/*",
+        "models/Layout/YOLO/*",
+        "models/MFD/YOLO/*",
+        "models/MFR/unimernet_small/*",
+        "models/TabRec/TableMaster/*",
+        "models/TabRec/StructEqTable/*",
+    ]
+    model_dir = snapshot_download('opendatalab/PDF-Extract-Kit-1.0', allow_patterns=mineru_patterns)
+
+    layoutreader_pattern = [
+        "*.json",
+        "*.safetensors",
+    ]
+    layoutreader_model_dir = snapshot_download('hantian/layoutreader', allow_patterns=layoutreader_pattern)
+
+    model_dir = model_dir + '/models'
+    print(f'model_dir is: {model_dir}')
+    print(f'layoutreader_model_dir is: {layoutreader_model_dir}')
+
+    json_url = 'https://github.com/opendatalab/MinerU/raw/dev/magic-pdf.template.json'
+    config_file_name = 'magic-pdf.json'
+    home_dir = os.path.expanduser('~')
+    config_file = os.path.join(home_dir, config_file_name)
+
+    json_mods = {
+        'models-dir': model_dir,
+        'layoutreader-model-dir': layoutreader_model_dir,
+    }
+
+    download_and_modify_json(json_url, config_file, json_mods)
+    print(f'The configuration file has been configured successfully, the path is: {config_file}')