Forráskód Böngészése

feat: update Dockerfile and README_zh-CN for mineru installation and model download improvements

myhloli 5 hónapja
szülő
commit
89c9853778
4 módosított fájl, 16 hozzáadás és 103 törlés
  1. 4 4
      README_zh-CN.md
  2. 0 51
      docker/ascend_npu/Dockerfile
  3. 6 24
      docker/china/Dockerfile
  4. 6 24
      docker/global/Dockerfile

+ 4 - 4
README_zh-CN.md

@@ -476,23 +476,23 @@ https://github.com/user-attachments/assets/4bea02c9-6d54-4cd6-97ed-dff14340982c
 ```bash
 pip install --upgrade pip
 pip install uv
-uv pip install "mineru[core]>=2.0.0" -i https://mirrors.aliyun.com/pypi/simple
+uv pip install "mineru[core]>=2.0.0"
 ```
 
 您也可以通过源码安装
 ```bash
 git clone https://github.com/opendatalab/MinerU.git
 cd MinerU
-uv pip install -e .[core] -i https://mirrors.aliyun.com/pypi/simple
+uv pip install -e .[core]
 ```
 
 如果您需要使用sglang加速vlm模型推理,请直接安装MinerU的完整版本
 ```bash
-uv pip install "mineru[all]>=2.0.0" -i https://mirrors.aliyun.com/pypi/simple
+uv pip install "mineru[all]>=2.0.0"
 ```
 ```bash
-uv pip install -e .[all] -i https://mirrors.aliyun.com/pypi/simple
+uv pip install -e .[all]
 ```
 
 #### 2.使用 MinerU

+ 0 - 51
docker/ascend_npu/Dockerfile

@@ -1,51 +0,0 @@
-# Use the official Ubuntu base image
-FROM swr.cn-central-221.ovaijisuan.com/mindformers/mindformers1.2_mindspore2.3:20240722
-
-USER root
-
-# Set environment variables to non-interactive to avoid prompts during installation
-ENV DEBIAN_FRONTEND=noninteractive
-
-# Update the package list and install necessary packages
-RUN apt-get update && \
-    apt-get install -y \
-        software-properties-common && \
-    add-apt-repository -y ppa:deadsnakes/ppa && \
-    apt-get update && \
-    apt-get install -y \
-        python3.10 \
-        python3.10-venv \
-        python3.10-distutils \
-        python3.10-dev \
-        python3-pip \
-        wget \
-        git \
-        libgl1 \
-        libglib2.0-0 \
-        && rm -rf /var/lib/apt/lists/*
-
-# Set Python 3.10 as the default python3
-RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1
-
-# Create a virtual environment for MinerU
-RUN python3 -m venv /opt/mineru_venv
-
-# Copy the configuration file template and install magic-pdf latest
-RUN /bin/bash -c "wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/magic-pdf.template.json && \
-    cp magic-pdf.template.json /root/magic-pdf.json && \
-    source /opt/mineru_venv/bin/activate && \
-    pip3 install --upgrade pip -i https://mirrors.aliyun.com/pypi/simple && \
-    pip3 install torch==2.3.1 torchvision==0.18.1 -i https://mirrors.aliyun.com/pypi/simple && \
-    pip3 install -U magic-pdf[full] 'numpy<2' decorator attrs absl-py cloudpickle ml-dtypes tornado einops -i https://mirrors.aliyun.com/pypi/simple && \
-    wget https://gitee.com/ascend/pytorch/releases/download/v6.0.rc2-pytorch2.3.1/torch_npu-2.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl && \
-    pip3 install torch_npu-2.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl"
-
-# Download models and update the configuration file
-RUN /bin/bash -c "source /opt/mineru_venv/bin/activate && \
-    pip3 install modelscope -i https://mirrors.aliyun.com/pypi/simple && \
-    wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/scripts/download_models.py -O download_models.py && \
-    python3 download_models.py && \
-    sed -i 's|cpu|npu|g' /root/magic-pdf.json"
-
-# Set the entry point to activate the virtual environment and run the command line tool
-ENTRYPOINT ["/bin/bash", "-c", "source /opt/mineru_venv/bin/activate && exec \"$@\"", "--"]

+ 6 - 24
docker/china/Dockerfile

@@ -18,37 +18,19 @@ RUN apt-get update && \
         wget \
         git \
         libgl1 \
-        libreoffice \
-        fonts-noto-cjk \
-        fonts-wqy-zenhei \
-        fonts-wqy-microhei \
-        ttf-mscorefonts-installer \
-        fontconfig \
         libglib2.0-0 \
-        libxrender1 \
-        libsm6 \
-        libxext6 \
-        poppler-utils \
         && rm -rf /var/lib/apt/lists/*
 
 # Set Python 3.10 as the default python3
 RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1
 
-# Create a virtual environment for MinerU
-RUN python3 -m venv /opt/mineru_venv
-
-# Copy the configuration file template and install magic-pdf latest
-RUN /bin/bash -c "wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/magic-pdf.template.json && \
-    cp magic-pdf.template.json /root/magic-pdf.json && \
-    source /opt/mineru_venv/bin/activate && \
-    pip3 install --upgrade pip -i https://mirrors.aliyun.com/pypi/simple && \
-    pip3 install -U magic-pdf[full] -i https://mirrors.aliyun.com/pypi/simple"
+# install mineru latest
+RUN /bin/bash -c "pip3 install --upgrade pip -i https://mirrors.aliyun.com/pypi/simple && \
+    pip3 install uv -i https://mirrors.aliyun.com/pypi/simple && \
+    uv pip install 'mineru[all]>=2.0.0' -i https://mirrors.aliyun.com/pypi/simple"
 
 # Download models and update the configuration file
-RUN /bin/bash -c "pip3 install modelscope -i https://mirrors.aliyun.com/pypi/simple && \
-    wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/scripts/download_models.py -O download_models.py && \
-    python3 download_models.py && \
-    sed -i 's|cpu|cuda|g' /root/magic-pdf.json"
+RUN /bin/bash -c "mineru-models-download -s modelscope -m all"
 
 # Set the entry point to activate the virtual environment and run the command line tool
-ENTRYPOINT ["/bin/bash", "-c", "source /opt/mineru_venv/bin/activate && exec \"$@\"", "--"]
+ENTRYPOINT ["/bin/bash", "-c", "export MINERU_MODEL_SOURCE=local && exec \"$@\"", "--"]

+ 6 - 24
docker/global/Dockerfile

@@ -18,37 +18,19 @@ RUN apt-get update && \
         wget \
         git \
         libgl1 \
-        libreoffice \
-        fonts-noto-cjk \
-        fonts-wqy-zenhei \
-        fonts-wqy-microhei \
-        ttf-mscorefonts-installer \
-        fontconfig \
         libglib2.0-0 \
-        libxrender1 \
-        libsm6 \
-        libxext6 \
-        poppler-utils \
         && rm -rf /var/lib/apt/lists/*
 
 # Set Python 3.10 as the default python3
 RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1
 
-# Create a virtual environment for MinerU
-RUN python3 -m venv /opt/mineru_venv
-
-# Copy the configuration file template and install magic-pdf latest
-RUN /bin/bash -c "wget https://github.com/opendatalab/MinerU/raw/master/magic-pdf.template.json && \
-    cp magic-pdf.template.json /root/magic-pdf.json && \
-    source /opt/mineru_venv/bin/activate && \
-    pip3 install --upgrade pip && \
-    pip3 install -U magic-pdf[full]"
+# install mineru latest
+RUN /bin/bash -c "pip3 install --upgrade pip && \
+    pip3 install uv && \
+    uv pip install 'mineru[all]>=2.0.0'"
 
 # Download models and update the configuration file
-RUN /bin/bash -c "pip3 install huggingface_hub && \
-    wget https://github.com/opendatalab/MinerU/raw/master/scripts/download_models_hf.py -O download_models.py && \
-    python3 download_models.py && \
-    sed -i 's|cpu|cuda|g' /root/magic-pdf.json"
+RUN /bin/bash -c "mineru-models-download -s huggingface -m all"
 
 # Set the entry point to activate the virtual environment and run the command line tool
-ENTRYPOINT ["/bin/bash", "-c", "source /opt/mineru_venv/bin/activate && exec \"$@\"", "--"]
+ENTRYPOINT ["/bin/bash", "-c", "export MINERU_MODEL_SOURCE=local && exec \"$@\"", "--"]