Ver Fonte

!1 udpate wechat, fix some little bugs
Merge pull request !1 from zhch158/master

zhch158 há 3 meses atrás
pai
commit
839aa90b81
5 ficheiros alterados com 46 adições e 3 exclusões
  1. 1 1
      README.md
  2. BIN
      assets/wechat.jpg
  3. 44 0
      docker/docker-compose.yml
  4. 1 1
      dots_ocr/parser.py
  5. 0 1
      requirements.txt

+ 1 - 1
README.md

@@ -1139,7 +1139,7 @@ print(output_text)
 # Parse a single image
 python3 dots_ocr/parser.py demo/demo_image1.jpg
 # Parse a single PDF
-python3 dots_ocr/parser.py demo/demo_pdf1.pdf  --num_threads 64  # try bigger num_threads for pdf with a large number of pages
+python3 dots_ocr/parser.py demo/demo_pdf1.pdf  --num_thread 64  # try bigger num_threads for pdf with a large number of pages
 
 # Layout detection only
 python3 dots_ocr/parser.py demo/demo_image1.jpg --prompt prompt_layout_only_en

BIN
assets/wechat.jpg


+ 44 - 0
docker/docker-compose.yml

@@ -0,0 +1,44 @@
+version: '3.8'
+
+services:
+  dots-ocr-server:
+    image: dots-ocr:latest
+    container_name: dots-ocr-container
+    ports:
+      - "8000:8000"
+    volumes:
+      #download model to local,model url:https://www.modelscope.cn/models/rednote-hilab/dots.ocr
+      - ./model/dots.ocr:/workspace/weights/DotsOCR
+    environment:
+      - PYTHONPATH=/workspace/weights:$PYTHONPATH
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - capabilities: [gpu]
+              device_ids: ['0']
+    entrypoint: /bin/bash
+    command:
+      - -c
+      - |
+        set -ex;
+        echo '--- Starting setup and server ---';
+        echo 'Modifying vllm entrypoint...';
+        # This sed command patches the vllm entrypoint script to import the custom modeling code.
+        sed -i '/^from vllm\.entrypoints\.cli\.main import main/a from DotsOCR import modeling_dots_ocr_vllm' $(which vllm) && \
+        echo 'vllm script after patch:';
+        # Show the patched part of the vllm script for verification.
+        grep -A 1 'from vllm.entrypoints.cli.main import main' $(which vllm) && \
+        echo 'Starting server...';
+        # Use 'exec' to replace the current shell process with the vllm server,
+        # ensuring logs are properly forwarded to Docker's standard output.
+        exec vllm serve /workspace/weights/DotsOCR \
+            --tensor-parallel-size 1 \
+            --gpu-memory-utilization 0.8 \
+            --chat-template-content-format string \
+            --served-model-name dotsocr-model \
+            --trust-remote-code
+
+
+
+

+ 1 - 1
dots_ocr/parser.py

@@ -190,7 +190,7 @@ class DotsOCRParser:
         
     def parse_pdf(self, input_path, filename, prompt_mode, save_dir):
         print(f"loading pdf: {input_path}")
-        images_origin = load_images_from_pdf(input_path)
+        images_origin = load_images_from_pdf(input_path, dpi=self.dpi)
         total_pages = len(images_origin)
         tasks = [
             {

+ 0 - 1
requirements.txt

@@ -1,4 +1,3 @@
-# 生产环境依赖
 # streamlit 
 gradio
 gradio_image_annotation