Browse Source

feat: update parse_doc function to support backend options and add environment variable instructions

myhloli 5 months ago
parent
commit
d41179da84
1 changed files with 11 additions and 1 deletions
  1. 11 1
      demo/demo.py

+ 11 - 1
demo/demo.py

@@ -230,4 +230,14 @@ if __name__ == '__main__':
     for doc_path in Path(pdf_files_dir).glob('*'):
         if doc_path.suffix in pdf_suffixes + image_suffixes:
             doc_path_list.append(doc_path)
-    parse_doc(doc_path_list, output_dir)
+
+    """如果您由于网络问题无法下载模型,可以设置环境变量MINERU_MODEL_SOURCE为modelscope使用免代理仓库下载模型"""
+    # os.environ['MINERU_MODEL_SOURCE'] = "modelscope"
+
+    """Use pipeline mode if your environment does not support VLM"""
+    parse_doc(doc_path_list, output_dir, backend="pipeline")
+
+    """To enable VLM mode, change the backend to 'vlm-xxx'"""
+    # parse_doc(doc_path_list, output_dir, backend="vlm-transformers")  # more general.
+    # parse_doc(doc_path_list, output_dir, backend="vlm-sglang-engine")  # faster(engine).
+    # parse_doc(doc_path_list, output_dir, backend="vlm-sglang-client", server_url="http://127.0.0.1:30000")  # faster(client).