瀏覽代碼

build(docker): remove requirements.txt and update package installation

- Remove requirements.txt files for all Docker configurations
- Update package installation process in Dockerfiles:
- Use magic-pdf[full] instead of individual requirements
- Simplify installation steps and reduce image size
- Remove redundant wget commands for requirements.txt
myhloli 7 月之前
父節點
當前提交
e48add6af7

+ 4 - 9
docker/ascend_npu/Dockerfile

@@ -30,19 +30,14 @@ RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1
 # Create a virtual environment for MinerU
 RUN python3 -m venv /opt/mineru_venv
 
-# Activate the virtual environment and install necessary Python packages
-RUN /bin/bash -c "source /opt/mineru_venv/bin/activate && \
-    pip3 install --upgrade pip -i https://mirrors.aliyun.com/pypi/simple && \
-    wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/docker/ascend_npu/requirements.txt -O requirements.txt && \
-    pip3 install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple && \
-    wget https://gitee.com/ascend/pytorch/releases/download/v6.0.rc2-pytorch2.3.1/torch_npu-2.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl && \
-    pip3 install torch_npu-2.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl"
-
 # Copy the configuration file template and install magic-pdf latest
 RUN /bin/bash -c "wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/magic-pdf.template.json && \
     cp magic-pdf.template.json /root/magic-pdf.json && \
     source /opt/mineru_venv/bin/activate && \
-    pip3 install -U magic-pdf -i https://mirrors.aliyun.com/pypi/simple"
+    pip3 install --upgrade pip -i https://mirrors.aliyun.com/pypi/simple && \
+    pip3 install -U magic-pdf[full] -i https://mirrors.aliyun.com/pypi/simple && \
+    wget https://gitee.com/ascend/pytorch/releases/download/v6.0.rc2-pytorch2.3.1/torch_npu-2.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl && \
+    pip3 install torch_npu-2.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl"
 
 # Download models and update the configuration file
 RUN /bin/bash -c "source /opt/mineru_venv/bin/activate && \

+ 0 - 20
docker/ascend_npu/requirements.txt

@@ -1,20 +0,0 @@
-boto3>=1.28.43
-Brotli>=1.1.0
-click>=8.1.7
-PyMuPDF>=1.24.9,<1.25.0
-loguru>=0.6.0
-numpy>=1.21.6,<2.0.0
-fast-langdetect>=0.2.3,<0.3.0
-scikit-learn>=1.0.2
-pdfminer.six==20231228
-torch==2.3.1
-torchvision==0.18.1
-matplotlib
-ultralytics>=8.3.48
-rapid-table>=1.0.3,<2.0.0
-doclayout-yolo==0.0.2b1
-ftfy
-openai
-pydantic>=2.7.2,<2.11
-transformers>=4.49.0,<5.0.0
-tqdm>=4.67.1

+ 2 - 7
docker/china/Dockerfile

@@ -27,17 +27,12 @@ RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1
 # Create a virtual environment for MinerU
 RUN python3 -m venv /opt/mineru_venv
 
-# Activate the virtual environment and install necessary Python packages
-RUN /bin/bash -c "source /opt/mineru_venv/bin/activate && \
-    pip3 install --upgrade pip -i https://mirrors.aliyun.com/pypi/simple && \
-    wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/docker/china/requirements.txt -O requirements.txt && \
-    pip3 install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple"
-
 # Copy the configuration file template and install magic-pdf latest
 RUN /bin/bash -c "wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/magic-pdf.template.json && \
     cp magic-pdf.template.json /root/magic-pdf.json && \
     source /opt/mineru_venv/bin/activate && \
-    pip3 install -U magic-pdf -i https://mirrors.aliyun.com/pypi/simple"
+    pip3 install --upgrade pip -i https://mirrors.aliyun.com/pypi/simple && \
+    pip3 install -U magic-pdf[full] -i https://mirrors.aliyun.com/pypi/simple"
 
 # Download models and update the configuration file
 RUN /bin/bash -c "pip3 install modelscope && \

+ 0 - 20
docker/china/requirements.txt

@@ -1,20 +0,0 @@
-boto3>=1.28.43
-Brotli>=1.1.0
-click>=8.1.7
-PyMuPDF>=1.24.9,<1.25.0
-loguru>=0.6.0
-numpy>=1.21.6,<2.0.0
-fast-langdetect>=0.2.3,<0.3.0
-scikit-learn>=1.0.2
-pdfminer.six==20231228
-torch>=2.2.2,!=2.5.0,!=2.5.1,<=2.6.0
-torchvision
-matplotlib
-ultralytics>=8.3.48
-rapid-table>=1.0.3,<2.0.0
-doclayout-yolo==0.0.2b1
-ftfy
-openai
-pydantic>=2.7.2,<2.11
-transformers>=4.49.0,<5.0.0
-tqdm>=4.67.1

+ 2 - 7
docker/global/Dockerfile

@@ -27,17 +27,12 @@ RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1
 # Create a virtual environment for MinerU
 RUN python3 -m venv /opt/mineru_venv
 
-# Activate the virtual environment and install necessary Python packages
-RUN /bin/bash -c "source /opt/mineru_venv/bin/activate && \
-    pip3 install --upgrade pip && \
-    wget https://github.com/opendatalab/MinerU/raw/master/docker/global/requirements.txt -O requirements.txt && \
-    pip3 install -r requirements.txt"
-
 # Copy the configuration file template and install magic-pdf latest
 RUN /bin/bash -c "wget https://github.com/opendatalab/MinerU/raw/master/magic-pdf.template.json && \
     cp magic-pdf.template.json /root/magic-pdf.json && \
     source /opt/mineru_venv/bin/activate && \
-    pip3 install -U magic-pdf"
+    pip3 install --upgrade pip && \
+    pip3 install -U magic-pdf[full]"
 
 # Download models and update the configuration file
 RUN /bin/bash -c "pip3 install huggingface_hub && \

+ 0 - 20
docker/global/requirements.txt

@@ -1,20 +0,0 @@
-boto3>=1.28.43
-Brotli>=1.1.0
-click>=8.1.7
-PyMuPDF>=1.24.9,<1.25.0
-loguru>=0.6.0
-numpy>=1.21.6,<2.0.0
-fast-langdetect>=0.2.3,<0.3.0
-scikit-learn>=1.0.2
-pdfminer.six==20231228
-torch>=2.2.2,!=2.5.0,!=2.5.1,<=2.6.0
-torchvision
-matplotlib
-ultralytics>=8.3.48
-rapid-table>=1.0.3,<2.0.0
-doclayout-yolo==0.0.2b1
-ftfy
-openai
-pydantic>=2.7.2,<2.11
-transformers>=4.49.0,<5.0.0
-tqdm>=4.67.1