{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "machine_shape": "hm", "gpuType": "L4", "authorship_tag": "ABX9TyOkGQh7maXiQhQ6pYoY2NaU", "include_colab_link": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU" }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "\"Open" ] }, { "cell_type": "markdown", "source": [ "# DotsOCR vLLM Openai API Compatible server" ], "metadata": { "id": "PshK9ZarVTfM" } }, { "cell_type": "code", "source": [ "!pip install pyngrok\n", "!ngrok authtoken # Get this from https://dashboard.ngrok.com/" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "oyY3E3mlOXNX", "outputId": "8d7ba92f-7170-4b2e-e8a0-c7f94096f7e0" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Requirement already satisfied: pyngrok in /usr/local/lib/python3.11/dist-packages (7.3.0)\n", "Requirement already satisfied: PyYAML>=5.1 in /usr/local/lib/python3.11/dist-packages (from pyngrok) (6.0.2)\n", "Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml\n" ] } ] }, { "cell_type": "code", "source": [ "!conda create -n dots_ocr python=3.12\n", "!conda activate dots_ocr\n", "\n", "!git clone https://github.com/rednote-hilab/dots.ocr.git" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "BcV7hkvuRnwS", "outputId": "7cb9c743-6f41-4c90-a05b-90bce2c29ced" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "/bin/bash: line 1: conda: command not found\n", "/bin/bash: line 1: conda: command not found\n", "Cloning into 'dots.ocr'...\n", "remote: Enumerating objects: 163, done.\u001b[K\n", "remote: Counting objects: 100% (51/51), done.\u001b[K\n", "remote: Compressing objects: 100% (31/31), done.\u001b[K\n", "remote: Total 163 (delta 30), reused 30 (delta 20), pack-reused 112 (from 1)\u001b[K\n", "Receiving objects: 100% (163/163), 35.82 MiB | 13.64 MiB/s, done.\n", "Resolving deltas: 100% (56/56), done.\n" ] } ] }, { "cell_type": "code", "source": [ "cd /content/dots.ocr" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Rsc_MkGfRpit", "outputId": "5265315f-c27c-4346-cda7-aba7d4c226d6" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "/content/dots.ocr\n" ] } ] }, { "cell_type": "code", "source": [ "# Install pytorch, see https://pytorch.org/get-started/previous-versions/ for your cuda version\n", "!pip install torch==2.7.0 torchvision==0.22.0 torchaudio==2.7.0 --index-url https://download.pytorch.org/whl/cu128\n", "!pip install -e ." ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "OxLaSyTJPFwk", "outputId": "a073dcdd-5e5d-4f62-d3b9-be9e9cf98d2f" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Looking in indexes: https://download.pytorch.org/whl/cu128\n", "Requirement already satisfied: torch==2.7.0 in /usr/local/lib/python3.11/dist-packages (2.7.0+cu128)\n", "Requirement already satisfied: torchvision==0.22.0 in /usr/local/lib/python3.11/dist-packages (0.22.0+cu128)\n", "Requirement already satisfied: torchaudio==2.7.0 in /usr/local/lib/python3.11/dist-packages (2.7.0+cu128)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from torch==2.7.0) (3.18.0)\n", "Requirement already satisfied: typing-extensions>=4.10.0 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.0) (4.14.1)\n", "Requirement already satisfied: sympy>=1.13.3 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.0) (1.13.3)\n", "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch==2.7.0) (3.5)\n", "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.0) (3.1.6)\n", "Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from torch==2.7.0) (2025.3.0)\n", "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.8.61 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.0) (12.8.61)\n", "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.8.57 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.0) (12.8.57)\n", "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.8.57 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.0) (12.8.57)\n", "Requirement already satisfied: nvidia-cudnn-cu12==9.7.1.26 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.0) (9.7.1.26)\n", "Requirement already satisfied: nvidia-cublas-cu12==12.8.3.14 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.0) (12.8.3.14)\n", "Requirement already satisfied: nvidia-cufft-cu12==11.3.3.41 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.0) (11.3.3.41)\n", "Requirement already satisfied: nvidia-curand-cu12==10.3.9.55 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.0) (10.3.9.55)\n", "Requirement already satisfied: nvidia-cusolver-cu12==11.7.2.55 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.0) (11.7.2.55)\n", "Requirement already satisfied: nvidia-cusparse-cu12==12.5.7.53 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.0) (12.5.7.53)\n", "Requirement already satisfied: nvidia-cusparselt-cu12==0.6.3 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.0) (0.6.3)\n", "Requirement already satisfied: nvidia-nccl-cu12==2.26.2 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.0) (2.26.2)\n", "Requirement already satisfied: nvidia-nvtx-cu12==12.8.55 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.0) (12.8.55)\n", "Requirement already satisfied: nvidia-nvjitlink-cu12==12.8.61 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.0) (12.8.61)\n", "Requirement already satisfied: nvidia-cufile-cu12==1.13.0.11 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.0) (1.13.0.11)\n", "Requirement already satisfied: triton==3.3.0 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.0) (3.3.0)\n", "Requirement already satisfied: numpy in /usr/local/lib/python3.11/dist-packages (from torchvision==0.22.0) (2.0.2)\n", "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.11/dist-packages (from torchvision==0.22.0) (11.3.0)\n", "Requirement already satisfied: setuptools>=40.8.0 in /usr/local/lib/python3.11/dist-packages (from triton==3.3.0->torch==2.7.0) (75.2.0)\n", "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy>=1.13.3->torch==2.7.0) (1.3.0)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch==2.7.0) (3.0.2)\n", "Obtaining file:///content/dots.ocr\n", " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", "Requirement already satisfied: gradio in /usr/local/lib/python3.11/dist-packages (from dots_ocr==1.0) (5.39.0)\n", "Collecting gradio_image_annotation (from dots_ocr==1.0)\n", " Downloading gradio_image_annotation-0.4.0-py3-none-any.whl.metadata (17 kB)\n", "Collecting PyMuPDF (from dots_ocr==1.0)\n", " Downloading pymupdf-1.26.3-cp39-abi3-manylinux_2_28_x86_64.whl.metadata (3.4 kB)\n", "Requirement already satisfied: openai in /usr/local/lib/python3.11/dist-packages (from dots_ocr==1.0) (1.98.0)\n", "Collecting qwen_vl_utils (from dots_ocr==1.0)\n", " Downloading qwen_vl_utils-0.0.11-py3-none-any.whl.metadata (6.3 kB)\n", "Collecting transformers==4.51.3 (from dots_ocr==1.0)\n", " Downloading transformers-4.51.3-py3-none-any.whl.metadata (38 kB)\n", "Requirement already satisfied: huggingface_hub in /usr/local/lib/python3.11/dist-packages (from dots_ocr==1.0) (0.34.3)\n", "Collecting modelscope (from dots_ocr==1.0)\n", " Downloading modelscope-1.28.2-py3-none-any.whl.metadata (39 kB)\n", "Collecting flash-attn==2.8.0.post2 (from dots_ocr==1.0)\n", " Downloading flash_attn-2.8.0.post2.tar.gz (7.9 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.9/7.9 MB\u001b[0m \u001b[31m124.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", "Requirement already satisfied: accelerate in /usr/local/lib/python3.11/dist-packages (from dots_ocr==1.0) (1.9.0)\n", "Requirement already satisfied: torch in /usr/local/lib/python3.11/dist-packages (from flash-attn==2.8.0.post2->dots_ocr==1.0) (2.7.0+cu128)\n", "Requirement already satisfied: einops in /usr/local/lib/python3.11/dist-packages (from flash-attn==2.8.0.post2->dots_ocr==1.0) (0.8.1)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from transformers==4.51.3->dots_ocr==1.0) (3.18.0)\n", "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.11/dist-packages (from transformers==4.51.3->dots_ocr==1.0) (2.0.2)\n", "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.11/dist-packages (from transformers==4.51.3->dots_ocr==1.0) (25.0)\n", "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.11/dist-packages (from transformers==4.51.3->dots_ocr==1.0) (6.0.2)\n", "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/dist-packages (from transformers==4.51.3->dots_ocr==1.0) (2024.11.6)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from transformers==4.51.3->dots_ocr==1.0) (2.32.3)\n", "Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.11/dist-packages (from transformers==4.51.3->dots_ocr==1.0) (0.21.4)\n", "Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.11/dist-packages (from transformers==4.51.3->dots_ocr==1.0) (0.5.3)\n", "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.11/dist-packages (from transformers==4.51.3->dots_ocr==1.0) (4.67.1)\n", "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface_hub->dots_ocr==1.0) (2025.3.0)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface_hub->dots_ocr==1.0) (4.14.1)\n", "Requirement already satisfied: hf-xet<2.0.0,>=1.1.3 in /usr/local/lib/python3.11/dist-packages (from huggingface_hub->dots_ocr==1.0) (1.1.5)\n", "Requirement already satisfied: psutil in /usr/local/lib/python3.11/dist-packages (from accelerate->dots_ocr==1.0) (5.9.5)\n", "Requirement already satisfied: aiofiles<25.0,>=22.0 in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (24.1.0)\n", "Requirement already satisfied: anyio<5.0,>=3.0 in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (4.10.0)\n", "Requirement already satisfied: brotli>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (1.1.0)\n", "Requirement already satisfied: fastapi<1.0,>=0.115.2 in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (0.116.1)\n", "Requirement already satisfied: ffmpy in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (0.6.1)\n", "Requirement already satisfied: gradio-client==1.11.0 in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (1.11.0)\n", "Requirement already satisfied: groovy~=0.1 in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (0.1.2)\n", "Requirement already satisfied: httpx<1.0,>=0.24.1 in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (0.28.1)\n", "Requirement already satisfied: jinja2<4.0 in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (3.1.6)\n", "Requirement already satisfied: markupsafe<4.0,>=2.0 in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (3.0.2)\n", "Requirement already satisfied: orjson~=3.0 in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (3.11.1)\n", "Requirement already satisfied: pandas<3.0,>=1.0 in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (2.2.2)\n", "Requirement already satisfied: pillow<12.0,>=8.0 in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (11.3.0)\n", "Requirement already satisfied: pydantic<2.12,>=2.0 in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (2.11.7)\n", "Requirement already satisfied: pydub in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (0.25.1)\n", "Requirement already satisfied: python-multipart>=0.0.18 in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (0.0.20)\n", "Requirement already satisfied: ruff>=0.9.3 in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (0.12.7)\n", "Requirement already satisfied: safehttpx<0.2.0,>=0.1.6 in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (0.1.6)\n", "Requirement already satisfied: semantic-version~=2.0 in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (2.10.0)\n", "Requirement already satisfied: starlette<1.0,>=0.40.0 in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (0.47.2)\n", "Requirement already satisfied: tomlkit<0.14.0,>=0.12.0 in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (0.13.3)\n", "Requirement already satisfied: typer<1.0,>=0.12 in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (0.16.0)\n", "Requirement already satisfied: uvicorn>=0.14.0 in /usr/local/lib/python3.11/dist-packages (from gradio->dots_ocr==1.0) (0.35.0)\n", "Requirement already satisfied: websockets<16.0,>=10.0 in /usr/local/lib/python3.11/dist-packages (from gradio-client==1.11.0->gradio->dots_ocr==1.0) (15.0.1)\n", "Requirement already satisfied: setuptools in /usr/local/lib/python3.11/dist-packages (from modelscope->dots_ocr==1.0) (75.2.0)\n", "Requirement already satisfied: urllib3>=1.26 in /usr/local/lib/python3.11/dist-packages (from modelscope->dots_ocr==1.0) (2.5.0)\n", "Requirement already satisfied: distro<2,>=1.7.0 in /usr/local/lib/python3.11/dist-packages (from openai->dots_ocr==1.0) (1.9.0)\n", "Requirement already satisfied: jiter<1,>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from openai->dots_ocr==1.0) (0.10.0)\n", "Requirement already satisfied: sniffio in /usr/local/lib/python3.11/dist-packages (from openai->dots_ocr==1.0) (1.3.1)\n", "Collecting av (from qwen_vl_utils->dots_ocr==1.0)\n", " Downloading av-15.0.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.6 kB)\n", "Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.11/dist-packages (from anyio<5.0,>=3.0->gradio->dots_ocr==1.0) (3.10)\n", "Requirement already satisfied: certifi in /usr/local/lib/python3.11/dist-packages (from httpx<1.0,>=0.24.1->gradio->dots_ocr==1.0) (2025.8.3)\n", "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.11/dist-packages (from httpx<1.0,>=0.24.1->gradio->dots_ocr==1.0) (1.0.9)\n", "Requirement already satisfied: h11>=0.16 in /usr/local/lib/python3.11/dist-packages (from httpcore==1.*->httpx<1.0,>=0.24.1->gradio->dots_ocr==1.0) (0.16.0)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3.0,>=1.0->gradio->dots_ocr==1.0) (2.9.0.post0)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3.0,>=1.0->gradio->dots_ocr==1.0) (2025.2)\n", "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3.0,>=1.0->gradio->dots_ocr==1.0) (2025.2)\n", "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic<2.12,>=2.0->gradio->dots_ocr==1.0) (0.7.0)\n", "Requirement already satisfied: pydantic-core==2.33.2 in /usr/local/lib/python3.11/dist-packages (from pydantic<2.12,>=2.0->gradio->dots_ocr==1.0) (2.33.2)\n", "Requirement already satisfied: typing-inspection>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from pydantic<2.12,>=2.0->gradio->dots_ocr==1.0) (0.4.1)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->transformers==4.51.3->dots_ocr==1.0) (3.4.2)\n", "Requirement already satisfied: sympy>=1.13.3 in /usr/local/lib/python3.11/dist-packages (from torch->flash-attn==2.8.0.post2->dots_ocr==1.0) (1.13.3)\n", "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch->flash-attn==2.8.0.post2->dots_ocr==1.0) (3.5)\n", "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.8.61 in /usr/local/lib/python3.11/dist-packages (from torch->flash-attn==2.8.0.post2->dots_ocr==1.0) (12.8.61)\n", "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.8.57 in /usr/local/lib/python3.11/dist-packages (from torch->flash-attn==2.8.0.post2->dots_ocr==1.0) (12.8.57)\n", "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.8.57 in /usr/local/lib/python3.11/dist-packages (from torch->flash-attn==2.8.0.post2->dots_ocr==1.0) (12.8.57)\n", "Requirement already satisfied: nvidia-cudnn-cu12==9.7.1.26 in /usr/local/lib/python3.11/dist-packages (from torch->flash-attn==2.8.0.post2->dots_ocr==1.0) (9.7.1.26)\n", "Requirement already satisfied: nvidia-cublas-cu12==12.8.3.14 in /usr/local/lib/python3.11/dist-packages (from torch->flash-attn==2.8.0.post2->dots_ocr==1.0) (12.8.3.14)\n", "Requirement already satisfied: nvidia-cufft-cu12==11.3.3.41 in /usr/local/lib/python3.11/dist-packages (from torch->flash-attn==2.8.0.post2->dots_ocr==1.0) (11.3.3.41)\n", "Requirement already satisfied: nvidia-curand-cu12==10.3.9.55 in /usr/local/lib/python3.11/dist-packages (from torch->flash-attn==2.8.0.post2->dots_ocr==1.0) (10.3.9.55)\n", "Requirement already satisfied: nvidia-cusolver-cu12==11.7.2.55 in /usr/local/lib/python3.11/dist-packages (from torch->flash-attn==2.8.0.post2->dots_ocr==1.0) (11.7.2.55)\n", "Requirement already satisfied: nvidia-cusparse-cu12==12.5.7.53 in /usr/local/lib/python3.11/dist-packages (from torch->flash-attn==2.8.0.post2->dots_ocr==1.0) (12.5.7.53)\n", "Requirement already satisfied: nvidia-cusparselt-cu12==0.6.3 in /usr/local/lib/python3.11/dist-packages (from torch->flash-attn==2.8.0.post2->dots_ocr==1.0) (0.6.3)\n", "Requirement already satisfied: nvidia-nccl-cu12==2.26.2 in /usr/local/lib/python3.11/dist-packages (from torch->flash-attn==2.8.0.post2->dots_ocr==1.0) (2.26.2)\n", "Requirement already satisfied: nvidia-nvtx-cu12==12.8.55 in /usr/local/lib/python3.11/dist-packages (from torch->flash-attn==2.8.0.post2->dots_ocr==1.0) (12.8.55)\n", "Requirement already satisfied: nvidia-nvjitlink-cu12==12.8.61 in /usr/local/lib/python3.11/dist-packages (from torch->flash-attn==2.8.0.post2->dots_ocr==1.0) (12.8.61)\n", "Requirement already satisfied: nvidia-cufile-cu12==1.13.0.11 in /usr/local/lib/python3.11/dist-packages (from torch->flash-attn==2.8.0.post2->dots_ocr==1.0) (1.13.0.11)\n", "Requirement already satisfied: triton==3.3.0 in /usr/local/lib/python3.11/dist-packages (from torch->flash-attn==2.8.0.post2->dots_ocr==1.0) (3.3.0)\n", "Requirement already satisfied: click>=8.0.0 in /usr/local/lib/python3.11/dist-packages (from typer<1.0,>=0.12->gradio->dots_ocr==1.0) (8.2.1)\n", "Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.11/dist-packages (from typer<1.0,>=0.12->gradio->dots_ocr==1.0) (1.5.4)\n", "Requirement already satisfied: rich>=10.11.0 in /usr/local/lib/python3.11/dist-packages (from typer<1.0,>=0.12->gradio->dots_ocr==1.0) (13.9.4)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3.0,>=1.0->gradio->dots_ocr==1.0) (1.17.0)\n", "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio->dots_ocr==1.0) (3.0.0)\n", "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio->dots_ocr==1.0) (2.19.2)\n", "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy>=1.13.3->torch->flash-attn==2.8.0.post2->dots_ocr==1.0) (1.3.0)\n", "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<1.0,>=0.12->gradio->dots_ocr==1.0) (0.1.2)\n", "Downloading transformers-4.51.3-py3-none-any.whl (10.4 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m10.4/10.4 MB\u001b[0m \u001b[31m132.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading gradio_image_annotation-0.4.0-py3-none-any.whl (91 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m91.5/91.5 kB\u001b[0m \u001b[31m9.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading modelscope-1.28.2-py3-none-any.whl (5.9 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m129.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading pymupdf-1.26.3-cp39-abi3-manylinux_2_28_x86_64.whl (24.1 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m24.1/24.1 MB\u001b[0m \u001b[31m101.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading qwen_vl_utils-0.0.11-py3-none-any.whl (7.6 kB)\n", "Downloading av-15.0.0-cp311-cp311-manylinux_2_28_x86_64.whl (39.7 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m39.7/39.7 MB\u001b[0m \u001b[31m61.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hBuilding wheels for collected packages: flash-attn\n", " Building wheel for flash-attn (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for flash-attn: filename=flash_attn-2.8.0.post2-cp311-cp311-linux_x86_64.whl size=255941661 sha256=8ed71ac092f80b079d2e6043b769135904d6e834916cb6da7d372b394581447b\n", " Stored in directory: /root/.cache/pip/wheels/a2/75/55/57ba1e272fd7fa1a01d9ba6b5334b7adaabf79900ede22c040\n", "Successfully built flash-attn\n", "Installing collected packages: PyMuPDF, av, qwen_vl_utils, modelscope, transformers, flash-attn, gradio_image_annotation, dots_ocr\n", " Attempting uninstall: transformers\n", " Found existing installation: transformers 4.54.1\n", " Uninstalling transformers-4.54.1:\n", " Successfully uninstalled transformers-4.54.1\n", " Running setup.py develop for dots_ocr\n", "Successfully installed PyMuPDF-1.26.3 av-15.0.0 dots_ocr-1.0 flash-attn-2.8.0.post2 gradio_image_annotation-0.4.0 modelscope-1.28.2 qwen_vl_utils-0.0.11 transformers-4.51.3\n" ] } ] }, { "cell_type": "code", "source": [ "!python3 tools/download_model.py" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "z0nKSOYsRaA2", "outputId": "e4d67ed5-0cb9-437a-abec-5514f7bb8ccc" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Attention: The model save dir dots.ocr should be replace by a name without `.` like DotsOCR, util we merge our code to transformers.\n", "/usr/local/lib/python3.11/dist-packages/huggingface_hub/file_download.py:945: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", " warnings.warn(\n", "/usr/local/lib/python3.11/dist-packages/huggingface_hub/file_download.py:982: UserWarning: `local_dir_use_symlinks` parameter is deprecated and will be ignored. The process to download files to a local folder has been updated and do not rely on symlinks anymore. You only need to pass a destination folder as`local_dir`.\n", "For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder.\n", " warnings.warn(\n", "Fetching 19 files: 0% 0/19 [00:00=2.26.0 in /usr/local/lib/python3.11/dist-packages (from vllm) (2.32.3)\n", "Requirement already satisfied: tqdm in /usr/local/lib/python3.11/dist-packages (from vllm) (4.67.1)\n", "Collecting blake3 (from vllm)\n", " Downloading blake3-1.0.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.2 kB)\n", "Requirement already satisfied: py-cpuinfo in /usr/local/lib/python3.11/dist-packages (from vllm) (9.0.0)\n", "Collecting transformers\n", " Downloading transformers-4.55.0-py3-none-any.whl.metadata (39 kB)\n", "Requirement already satisfied: huggingface-hub>=0.33.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub[hf_xet]>=0.33.0->vllm) (0.34.3)\n", "Requirement already satisfied: tokenizers>=0.21.1 in /usr/local/lib/python3.11/dist-packages (from vllm) (0.21.4)\n", "Requirement already satisfied: protobuf in /usr/local/lib/python3.11/dist-packages (from vllm) (5.29.5)\n", "Requirement already satisfied: fastapi>=0.115.0 in /usr/local/lib/python3.11/dist-packages (from fastapi[standard]>=0.115.0->vllm) (0.116.1)\n", "Requirement already satisfied: aiohttp in /usr/local/lib/python3.11/dist-packages (from vllm) (3.12.15)\n", "Collecting openai<=1.90.0,>=1.87.0 (from vllm)\n", " Downloading openai-1.90.0-py3-none-any.whl.metadata (26 kB)\n", "Requirement already satisfied: pydantic>=2.10 in /usr/local/lib/python3.11/dist-packages (from vllm) (2.11.7)\n", "Requirement already satisfied: prometheus_client>=0.18.0 in /usr/local/lib/python3.11/dist-packages (from vllm) (0.22.1)\n", "Requirement already satisfied: pillow in /usr/local/lib/python3.11/dist-packages (from vllm) (11.3.0)\n", "Collecting prometheus-fastapi-instrumentator>=7.0.0 (from vllm)\n", " Downloading prometheus_fastapi_instrumentator-7.1.0-py3-none-any.whl.metadata (13 kB)\n", "Requirement already satisfied: tiktoken>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from vllm) (0.9.0)\n", "Collecting lm-format-enforcer<0.11,>=0.10.11 (from vllm)\n", " Downloading lm_format_enforcer-0.10.12-py3-none-any.whl.metadata (17 kB)\n", "Collecting llguidance<0.8.0,>=0.7.11 (from vllm)\n", " Downloading llguidance-0.7.30-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)\n", "Collecting outlines_core==0.2.10 (from vllm)\n", " Downloading outlines_core-0.2.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.8 kB)\n", "Collecting diskcache==5.6.3 (from vllm)\n", " Downloading diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)\n", "Collecting lark==1.2.2 (from vllm)\n", " Downloading lark-1.2.2-py3-none-any.whl.metadata (1.8 kB)\n", "Collecting xgrammar==0.1.21 (from vllm)\n", " Downloading xgrammar-0.1.21-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.3 kB)\n", "Requirement already satisfied: typing_extensions>=4.10 in /usr/local/lib/python3.11/dist-packages (from vllm) (4.14.1)\n", "Requirement already satisfied: filelock>=3.16.1 in /usr/local/lib/python3.11/dist-packages (from vllm) (3.18.0)\n", "Collecting partial-json-parser (from vllm)\n", " Downloading partial_json_parser-0.2.1.1.post6-py3-none-any.whl.metadata (6.1 kB)\n", "Requirement already satisfied: pyzmq>=25.0.0 in /usr/local/lib/python3.11/dist-packages (from vllm) (26.2.1)\n", "Collecting msgspec (from vllm)\n", " Downloading msgspec-0.19.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.9 kB)\n", "Collecting gguf>=0.13.0 (from vllm)\n", " Downloading gguf-0.17.1-py3-none-any.whl.metadata (4.3 kB)\n", "Collecting mistral_common>=1.8.2 (from mistral_common[audio,image]>=1.8.2->vllm)\n", " Downloading mistral_common-1.8.3-py3-none-any.whl.metadata (3.8 kB)\n", "Requirement already satisfied: opencv-python-headless>=4.11.0 in /usr/local/lib/python3.11/dist-packages (from vllm) (4.12.0.88)\n", "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from vllm) (6.0.2)\n", "Requirement already satisfied: einops in /usr/local/lib/python3.11/dist-packages (from vllm) (0.8.1)\n", "Collecting compressed-tensors==0.10.2 (from vllm)\n", " Downloading compressed_tensors-0.10.2-py3-none-any.whl.metadata (7.0 kB)\n", "Collecting depyf==0.19.0 (from vllm)\n", " Downloading depyf-0.19.0-py3-none-any.whl.metadata (7.3 kB)\n", "Requirement already satisfied: cloudpickle in /usr/local/lib/python3.11/dist-packages (from vllm) (3.1.1)\n", "Collecting watchfiles (from vllm)\n", " Downloading watchfiles-1.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)\n", "Collecting python-json-logger (from vllm)\n", " Downloading python_json_logger-3.3.0-py3-none-any.whl.metadata (4.0 kB)\n", "Requirement already satisfied: scipy in /usr/local/lib/python3.11/dist-packages (from vllm) (1.16.1)\n", "Collecting ninja (from vllm)\n", " Using cached ninja-1.11.1.4-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl.metadata (5.0 kB)\n", "Collecting pybase64 (from vllm)\n", " Downloading pybase64-1.4.2-cp311-cp311-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl.metadata (8.7 kB)\n", "Collecting cbor2 (from vllm)\n", " Downloading cbor2-5.6.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.0 kB)\n", "Collecting numba==0.61.2 (from vllm)\n", " Downloading numba-0.61.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (2.8 kB)\n", "Collecting ray!=2.44.*,>=2.43.0 (from ray[cgraph]!=2.44.*,>=2.43.0->vllm)\n", " Downloading ray-2.48.0-cp311-cp311-manylinux2014_x86_64.whl.metadata (19 kB)\n", "Collecting torch==2.7.1 (from vllm)\n", " Downloading torch-2.7.1-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (29 kB)\n", "Collecting torchaudio==2.7.1 (from vllm)\n", " Downloading torchaudio-2.7.1-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (6.6 kB)\n", "Collecting torchvision==0.22.1 (from vllm)\n", " Downloading torchvision-0.22.1-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (6.1 kB)\n", "Collecting xformers==0.0.31 (from vllm)\n", " Downloading xformers-0.0.31-cp39-abi3-manylinux_2_28_x86_64.whl.metadata (1.0 kB)\n", "Collecting astor (from depyf==0.19.0->vllm)\n", " Downloading astor-0.8.1-py2.py3-none-any.whl.metadata (4.2 kB)\n", "Requirement already satisfied: dill in /usr/local/lib/python3.11/dist-packages (from depyf==0.19.0->vllm) (0.3.8)\n", "Collecting llvmlite<0.45,>=0.44.0dev0 (from numba==0.61.2->vllm)\n", " Downloading llvmlite-0.44.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.8 kB)\n", "Requirement already satisfied: sympy>=1.13.3 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.1->vllm) (1.13.3)\n", "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch==2.7.1->vllm) (3.5)\n", "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.1->vllm) (3.1.6)\n", "Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from torch==2.7.1->vllm) (2025.3.0)\n", "Collecting nvidia-cuda-nvrtc-cu12==12.6.77 (from torch==2.7.1->vllm)\n", " Downloading nvidia_cuda_nvrtc_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", "Collecting nvidia-cuda-runtime-cu12==12.6.77 (from torch==2.7.1->vllm)\n", " Downloading nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.5 kB)\n", "Collecting nvidia-cuda-cupti-cu12==12.6.80 (from torch==2.7.1->vllm)\n", " Downloading nvidia_cuda_cupti_cu12-12.6.80-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.6 kB)\n", "Collecting nvidia-cudnn-cu12==9.5.1.17 (from torch==2.7.1->vllm)\n", " Downloading nvidia_cudnn_cu12-9.5.1.17-py3-none-manylinux_2_28_x86_64.whl.metadata (1.6 kB)\n", "Collecting nvidia-cublas-cu12==12.6.4.1 (from torch==2.7.1->vllm)\n", " Downloading nvidia_cublas_cu12-12.6.4.1-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.5 kB)\n", "Collecting nvidia-cufft-cu12==11.3.0.4 (from torch==2.7.1->vllm)\n", " Downloading nvidia_cufft_cu12-11.3.0.4-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.5 kB)\n", "Collecting nvidia-curand-cu12==10.3.7.77 (from torch==2.7.1->vllm)\n", " Downloading nvidia_curand_cu12-10.3.7.77-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.5 kB)\n", "Collecting nvidia-cusolver-cu12==11.7.1.2 (from torch==2.7.1->vllm)\n", " Downloading nvidia_cusolver_cu12-11.7.1.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.6 kB)\n", "Collecting nvidia-cusparse-cu12==12.5.4.2 (from torch==2.7.1->vllm)\n", " Downloading nvidia_cusparse_cu12-12.5.4.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.6 kB)\n", "Requirement already satisfied: nvidia-cusparselt-cu12==0.6.3 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.1->vllm) (0.6.3)\n", "Requirement already satisfied: nvidia-nccl-cu12==2.26.2 in /usr/local/lib/python3.11/dist-packages (from torch==2.7.1->vllm) (2.26.2)\n", "Collecting nvidia-nvtx-cu12==12.6.77 (from torch==2.7.1->vllm)\n", " Downloading nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.6 kB)\n", "Collecting nvidia-nvjitlink-cu12==12.6.85 (from torch==2.7.1->vllm)\n", " Downloading nvidia_nvjitlink_cu12-12.6.85-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl.metadata (1.5 kB)\n", "Collecting nvidia-cufile-cu12==1.11.1.6 (from torch==2.7.1->vllm)\n", " Downloading nvidia_cufile_cu12-1.11.1.6-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.5 kB)\n", "Collecting triton==3.3.1 (from torch==2.7.1->vllm)\n", " Downloading triton-3.3.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (1.5 kB)\n", "Requirement already satisfied: setuptools>=40.8.0 in /usr/local/lib/python3.11/dist-packages (from triton==3.3.1->torch==2.7.1->vllm) (75.2.0)\n", "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.11/dist-packages (from transformers) (25.0)\n", "Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.11/dist-packages (from transformers) (0.5.3)\n", "Requirement already satisfied: starlette<0.48.0,>=0.40.0 in /usr/local/lib/python3.11/dist-packages (from fastapi>=0.115.0->fastapi[standard]>=0.115.0->vllm) (0.47.2)\n", "Collecting fastapi-cli>=0.0.8 (from fastapi-cli[standard]>=0.0.8; extra == \"standard\"->fastapi[standard]>=0.115.0->vllm)\n", " Downloading fastapi_cli-0.0.8-py3-none-any.whl.metadata (6.3 kB)\n", "Requirement already satisfied: httpx>=0.23.0 in /usr/local/lib/python3.11/dist-packages (from fastapi[standard]>=0.115.0->vllm) (0.28.1)\n", "Requirement already satisfied: python-multipart>=0.0.18 in /usr/local/lib/python3.11/dist-packages (from fastapi[standard]>=0.115.0->vllm) (0.0.20)\n", "Collecting email-validator>=2.0.0 (from fastapi[standard]>=0.115.0->vllm)\n", " Downloading email_validator-2.2.0-py3-none-any.whl.metadata (25 kB)\n", "Requirement already satisfied: uvicorn>=0.12.0 in /usr/local/lib/python3.11/dist-packages (from uvicorn[standard]>=0.12.0; extra == \"standard\"->fastapi[standard]>=0.115.0->vllm) (0.35.0)\n", "Requirement already satisfied: hf-xet<2.0.0,>=1.1.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub>=0.33.0->huggingface-hub[hf_xet]>=0.33.0->vllm) (1.1.5)\n", "Collecting interegular>=0.3.2 (from lm-format-enforcer<0.11,>=0.10.11->vllm)\n", " Downloading interegular-0.3.3-py37-none-any.whl.metadata (3.0 kB)\n", "Requirement already satisfied: jsonschema>=4.21.1 in /usr/local/lib/python3.11/dist-packages (from mistral_common>=1.8.2->mistral_common[audio,image]>=1.8.2->vllm) (4.25.0)\n", "Collecting pydantic-extra-types>=2.10.5 (from pydantic-extra-types[pycountry]>=2.10.5->mistral_common>=1.8.2->mistral_common[audio,image]>=1.8.2->vllm)\n", " Downloading pydantic_extra_types-2.10.5-py3-none-any.whl.metadata (3.9 kB)\n", "Requirement already satisfied: anyio<5,>=3.5.0 in /usr/local/lib/python3.11/dist-packages (from openai<=1.90.0,>=1.87.0->vllm) (4.10.0)\n", "Requirement already satisfied: distro<2,>=1.7.0 in /usr/local/lib/python3.11/dist-packages (from openai<=1.90.0,>=1.87.0->vllm) (1.9.0)\n", "Requirement already satisfied: jiter<1,>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from openai<=1.90.0,>=1.87.0->vllm) (0.10.0)\n", "Requirement already satisfied: sniffio in /usr/local/lib/python3.11/dist-packages (from openai<=1.90.0,>=1.87.0->vllm) (1.3.1)\n", "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic>=2.10->vllm) (0.7.0)\n", "Requirement already satisfied: pydantic-core==2.33.2 in /usr/local/lib/python3.11/dist-packages (from pydantic>=2.10->vllm) (2.33.2)\n", "Requirement already satisfied: typing-inspection>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from pydantic>=2.10->vllm) (0.4.1)\n", "Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.11/dist-packages (from ray!=2.44.*,>=2.43.0->ray[cgraph]!=2.44.*,>=2.43.0->vllm) (8.2.1)\n", "Requirement already satisfied: msgpack<2.0.0,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from ray!=2.44.*,>=2.43.0->ray[cgraph]!=2.44.*,>=2.43.0->vllm) (1.1.1)\n", "Requirement already satisfied: cupy-cuda12x in /usr/local/lib/python3.11/dist-packages (from ray[cgraph]!=2.44.*,>=2.43.0->vllm) (13.3.0)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests>=2.26.0->vllm) (3.4.2)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests>=2.26.0->vllm) (3.10)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests>=2.26.0->vllm) (2.5.0)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests>=2.26.0->vllm) (2025.8.3)\n", "Requirement already satisfied: aiohappyeyeballs>=2.5.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->vllm) (2.6.1)\n", "Requirement already satisfied: aiosignal>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->vllm) (1.4.0)\n", "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->vllm) (25.3.0)\n", "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp->vllm) (1.7.0)\n", "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp->vllm) (6.6.3)\n", "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->vllm) (0.3.2)\n", "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->vllm) (1.20.1)\n", "Collecting dnspython>=2.0.0 (from email-validator>=2.0.0->fastapi[standard]>=0.115.0->vllm)\n", " Downloading dnspython-2.7.0-py3-none-any.whl.metadata (5.8 kB)\n", "Requirement already satisfied: typer>=0.15.1 in /usr/local/lib/python3.11/dist-packages (from fastapi-cli>=0.0.8->fastapi-cli[standard]>=0.0.8; extra == \"standard\"->fastapi[standard]>=0.115.0->vllm) (0.16.0)\n", "Collecting rich-toolkit>=0.14.8 (from fastapi-cli>=0.0.8->fastapi-cli[standard]>=0.0.8; extra == \"standard\"->fastapi[standard]>=0.115.0->vllm)\n", " Downloading rich_toolkit-0.14.9-py3-none-any.whl.metadata (999 bytes)\n", "Collecting fastapi-cloud-cli>=0.1.1 (from fastapi-cli[standard]>=0.0.8; extra == \"standard\"->fastapi[standard]>=0.115.0->vllm)\n", " Downloading fastapi_cloud_cli-0.1.5-py3-none-any.whl.metadata (3.2 kB)\n", "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.11/dist-packages (from httpx>=0.23.0->fastapi[standard]>=0.115.0->vllm) (1.0.9)\n", "Requirement already satisfied: h11>=0.16 in /usr/local/lib/python3.11/dist-packages (from httpcore==1.*->httpx>=0.23.0->fastapi[standard]>=0.115.0->vllm) (0.16.0)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch==2.7.1->vllm) (3.0.2)\n", "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=4.21.1->mistral_common>=1.8.2->mistral_common[audio,image]>=1.8.2->vllm) (2025.4.1)\n", "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=4.21.1->mistral_common>=1.8.2->mistral_common[audio,image]>=1.8.2->vllm) (0.36.2)\n", "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=4.21.1->mistral_common>=1.8.2->mistral_common[audio,image]>=1.8.2->vllm) (0.26.0)\n", "Collecting pycountry>=23 (from pydantic-extra-types[pycountry]>=2.10.5->mistral_common>=1.8.2->mistral_common[audio,image]>=1.8.2->vllm)\n", " Downloading pycountry-24.6.1-py3-none-any.whl.metadata (12 kB)\n", "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy>=1.13.3->torch==2.7.1->vllm) (1.3.0)\n", "Collecting httptools>=0.6.3 (from uvicorn[standard]>=0.12.0; extra == \"standard\"->fastapi[standard]>=0.115.0->vllm)\n", " Downloading httptools-0.6.4-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)\n", "Collecting python-dotenv>=0.13 (from uvicorn[standard]>=0.12.0; extra == \"standard\"->fastapi[standard]>=0.115.0->vllm)\n", " Downloading python_dotenv-1.1.1-py3-none-any.whl.metadata (24 kB)\n", "Collecting uvloop>=0.15.1 (from uvicorn[standard]>=0.12.0; extra == \"standard\"->fastapi[standard]>=0.115.0->vllm)\n", " Downloading uvloop-0.21.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)\n", "Requirement already satisfied: websockets>=10.4 in /usr/local/lib/python3.11/dist-packages (from uvicorn[standard]>=0.12.0; extra == \"standard\"->fastapi[standard]>=0.115.0->vllm) (15.0.1)\n", "Requirement already satisfied: fastrlock>=0.5 in /usr/local/lib/python3.11/dist-packages (from cupy-cuda12x->ray[cgraph]!=2.44.*,>=2.43.0->vllm) (0.8.3)\n", "Requirement already satisfied: soundfile>=0.12.1 in /usr/local/lib/python3.11/dist-packages (from mistral_common>=1.8.2->mistral_common[audio,image]>=1.8.2->vllm) (0.13.1)\n", "Requirement already satisfied: soxr>=0.5.0 in /usr/local/lib/python3.11/dist-packages (from mistral_common>=1.8.2->mistral_common[audio,image]>=1.8.2->vllm) (0.5.0.post1)\n", "Collecting rignore>=0.5.1 (from fastapi-cloud-cli>=0.1.1->fastapi-cli[standard]>=0.0.8; extra == \"standard\"->fastapi[standard]>=0.115.0->vllm)\n", " Downloading rignore-0.6.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)\n", "Requirement already satisfied: sentry-sdk>=2.20.0 in /usr/local/lib/python3.11/dist-packages (from fastapi-cloud-cli>=0.1.1->fastapi-cli[standard]>=0.0.8; extra == \"standard\"->fastapi[standard]>=0.115.0->vllm) (2.34.1)\n", "Requirement already satisfied: rich>=13.7.1 in /usr/local/lib/python3.11/dist-packages (from rich-toolkit>=0.14.8->fastapi-cli>=0.0.8->fastapi-cli[standard]>=0.0.8; extra == \"standard\"->fastapi[standard]>=0.115.0->vllm) (13.9.4)\n", "Requirement already satisfied: cffi>=1.0 in /usr/local/lib/python3.11/dist-packages (from soundfile>=0.12.1->mistral_common>=1.8.2->mistral_common[audio,image]>=1.8.2->vllm) (1.17.1)\n", "Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.11/dist-packages (from typer>=0.15.1->fastapi-cli>=0.0.8->fastapi-cli[standard]>=0.0.8; extra == \"standard\"->fastapi[standard]>=0.115.0->vllm) (1.5.4)\n", "Requirement already satisfied: pycparser in /usr/local/lib/python3.11/dist-packages (from cffi>=1.0->soundfile>=0.12.1->mistral_common>=1.8.2->mistral_common[audio,image]>=1.8.2->vllm) (2.22)\n", "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich>=13.7.1->rich-toolkit>=0.14.8->fastapi-cli>=0.0.8->fastapi-cli[standard]>=0.0.8; extra == \"standard\"->fastapi[standard]>=0.115.0->vllm) (3.0.0)\n", "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich>=13.7.1->rich-toolkit>=0.14.8->fastapi-cli>=0.0.8->fastapi-cli[standard]>=0.0.8; extra == \"standard\"->fastapi[standard]>=0.115.0->vllm) (2.19.2)\n", "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich>=13.7.1->rich-toolkit>=0.14.8->fastapi-cli>=0.0.8->fastapi-cli[standard]>=0.0.8; extra == \"standard\"->fastapi[standard]>=0.115.0->vllm) (0.1.2)\n", "Downloading vllm-0.10.0-cp38-abi3-manylinux1_x86_64.whl (386.6 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m386.6/386.6 MB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading compressed_tensors-0.10.2-py3-none-any.whl (169 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m169.0/169.0 kB\u001b[0m \u001b[31m14.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading depyf-0.19.0-py3-none-any.whl (39 kB)\n", "Downloading diskcache-5.6.3-py3-none-any.whl (45 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.5/45.5 kB\u001b[0m \u001b[31m5.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading lark-1.2.2-py3-none-any.whl (111 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m111.0/111.0 kB\u001b[0m \u001b[31m11.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading numba-0.61.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (3.8 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.8/3.8 MB\u001b[0m \u001b[31m105.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading outlines_core-0.2.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.3/2.3 MB\u001b[0m \u001b[31m89.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading torch-2.7.1-cp311-cp311-manylinux_2_28_x86_64.whl (821.2 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m821.2/821.2 MB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading torchaudio-2.7.1-cp311-cp311-manylinux_2_28_x86_64.whl (3.5 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.5/3.5 MB\u001b[0m \u001b[31m111.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading torchvision-0.22.1-cp311-cp311-manylinux_2_28_x86_64.whl (7.5 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.5/7.5 MB\u001b[0m \u001b[31m133.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading xformers-0.0.31-cp39-abi3-manylinux_2_28_x86_64.whl (117.1 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m117.1/117.1 MB\u001b[0m \u001b[31m20.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading xgrammar-0.1.21-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.8 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m11.8/11.8 MB\u001b[0m \u001b[31m128.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cublas_cu12-12.6.4.1-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (393.1 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m393.1/393.1 MB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cuda_cupti_cu12-12.6.80-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (8.9 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.9/8.9 MB\u001b[0m \u001b[31m134.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cuda_nvrtc_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl (23.7 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m23.7/23.7 MB\u001b[0m \u001b[31m99.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (897 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m897.7/897.7 kB\u001b[0m \u001b[31m52.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cudnn_cu12-9.5.1.17-py3-none-manylinux_2_28_x86_64.whl (571.0 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m571.0/571.0 MB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cufft_cu12-11.3.0.4-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (200.2 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m200.2/200.2 MB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cufile_cu12-1.11.1.6-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (1.1 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m65.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_curand_cu12-10.3.7.77-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (56.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.3/56.3 MB\u001b[0m \u001b[31m43.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cusolver_cu12-11.7.1.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (158.2 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m158.2/158.2 MB\u001b[0m \u001b[31m5.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cusparse_cu12-12.5.4.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (216.6 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m216.6/216.6 MB\u001b[0m \u001b[31m4.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_nvjitlink_cu12-12.6.85-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl (19.7 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m19.7/19.7 MB\u001b[0m \u001b[31m109.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (89 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m89.3/89.3 kB\u001b[0m \u001b[31m9.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading triton-3.3.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (155.7 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m155.7/155.7 MB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading transformers-4.55.0-py3-none-any.whl (11.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m11.3/11.3 MB\u001b[0m \u001b[31m141.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading gguf-0.17.1-py3-none-any.whl (96 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m96.2/96.2 kB\u001b[0m \u001b[31m9.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading llguidance-0.7.30-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.0 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.0/15.0 MB\u001b[0m \u001b[31m129.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading lm_format_enforcer-0.10.12-py3-none-any.whl (44 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.3/44.3 kB\u001b[0m \u001b[31m4.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading mistral_common-1.8.3-py3-none-any.whl (6.5 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.5/6.5 MB\u001b[0m \u001b[31m133.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading openai-1.90.0-py3-none-any.whl (734 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m734.6/734.6 kB\u001b[0m \u001b[31m53.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading prometheus_fastapi_instrumentator-7.1.0-py3-none-any.whl (19 kB)\n", "Downloading ray-2.48.0-cp311-cp311-manylinux2014_x86_64.whl (70.1 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m70.1/70.1 MB\u001b[0m \u001b[31m37.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading blake3-1.0.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (385 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m385.5/385.5 kB\u001b[0m \u001b[31m35.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading cbor2-5.6.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (249 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m249.2/249.2 kB\u001b[0m \u001b[31m25.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading msgspec-0.19.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (210 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m210.7/210.7 kB\u001b[0m \u001b[31m22.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hUsing cached ninja-1.11.1.4-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (422 kB)\n", "Downloading partial_json_parser-0.2.1.1.post6-py3-none-any.whl (10 kB)\n", "Downloading pybase64-1.4.2-cp311-cp311-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl (71 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.4/71.4 kB\u001b[0m \u001b[31m7.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading python_json_logger-3.3.0-py3-none-any.whl (15 kB)\n", "Downloading watchfiles-1.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (453 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m453.1/453.1 kB\u001b[0m \u001b[31m41.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading email_validator-2.2.0-py3-none-any.whl (33 kB)\n", "Downloading fastapi_cli-0.0.8-py3-none-any.whl (10 kB)\n", "Downloading interegular-0.3.3-py37-none-any.whl (23 kB)\n", "Downloading llvmlite-0.44.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (42.4 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m42.4/42.4 MB\u001b[0m \u001b[31m61.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading pydantic_extra_types-2.10.5-py3-none-any.whl (38 kB)\n", "Downloading astor-0.8.1-py2.py3-none-any.whl (27 kB)\n", "Downloading dnspython-2.7.0-py3-none-any.whl (313 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m313.6/313.6 kB\u001b[0m \u001b[31m29.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading fastapi_cloud_cli-0.1.5-py3-none-any.whl (18 kB)\n", "Downloading httptools-0.6.4-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (459 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m459.8/459.8 kB\u001b[0m \u001b[31m36.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading pycountry-24.6.1-py3-none-any.whl (6.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.3/6.3 MB\u001b[0m \u001b[31m127.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading python_dotenv-1.1.1-py3-none-any.whl (20 kB)\n", "Downloading rich_toolkit-0.14.9-py3-none-any.whl (25 kB)\n", "Downloading uvloop-0.21.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.0 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.0/4.0 MB\u001b[0m \u001b[31m114.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading rignore-0.6.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (950 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m950.6/950.6 kB\u001b[0m \u001b[31m60.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hInstalling collected packages: blake3, uvloop, triton, rignore, python-json-logger, python-dotenv, pycountry, pybase64, partial-json-parser, outlines_core, nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-curand-cu12, nvidia-cufile-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, ninja, msgspec, llvmlite, llguidance, lark, interegular, httptools, gguf, dnspython, diskcache, cbor2, astor, watchfiles, nvidia-cusparse-cu12, nvidia-cufft-cu12, nvidia-cudnn-cu12, numba, email-validator, depyf, rich-toolkit, pydantic-extra-types, prometheus-fastapi-instrumentator, openai, nvidia-cusolver-cu12, lm-format-enforcer, transformers, torch, ray, fastapi-cloud-cli, fastapi-cli, xgrammar, xformers, torchvision, torchaudio, mistral_common, compressed-tensors, vllm\n", " Attempting uninstall: triton\n", " Found existing installation: triton 3.3.0\n", " Uninstalling triton-3.3.0:\n", " Successfully uninstalled triton-3.3.0\n", " Attempting uninstall: nvidia-nvtx-cu12\n", " Found existing installation: nvidia-nvtx-cu12 12.8.55\n", " Uninstalling nvidia-nvtx-cu12-12.8.55:\n", " Successfully uninstalled nvidia-nvtx-cu12-12.8.55\n", " Attempting uninstall: nvidia-nvjitlink-cu12\n", " Found existing installation: nvidia-nvjitlink-cu12 12.8.61\n", " Uninstalling nvidia-nvjitlink-cu12-12.8.61:\n", " Successfully uninstalled nvidia-nvjitlink-cu12-12.8.61\n", " Attempting uninstall: nvidia-curand-cu12\n", " Found existing installation: nvidia-curand-cu12 10.3.9.55\n", " Uninstalling nvidia-curand-cu12-10.3.9.55:\n", " Successfully uninstalled nvidia-curand-cu12-10.3.9.55\n", " Attempting uninstall: nvidia-cufile-cu12\n", " Found existing installation: nvidia-cufile-cu12 1.13.0.11\n", " Uninstalling nvidia-cufile-cu12-1.13.0.11:\n", " Successfully uninstalled nvidia-cufile-cu12-1.13.0.11\n", " Attempting uninstall: nvidia-cuda-runtime-cu12\n", " Found existing installation: nvidia-cuda-runtime-cu12 12.8.57\n", " Uninstalling nvidia-cuda-runtime-cu12-12.8.57:\n", " Successfully uninstalled nvidia-cuda-runtime-cu12-12.8.57\n", " Attempting uninstall: nvidia-cuda-nvrtc-cu12\n", " Found existing installation: nvidia-cuda-nvrtc-cu12 12.8.61\n", " Uninstalling nvidia-cuda-nvrtc-cu12-12.8.61:\n", " Successfully uninstalled nvidia-cuda-nvrtc-cu12-12.8.61\n", " Attempting uninstall: nvidia-cuda-cupti-cu12\n", " Found existing installation: nvidia-cuda-cupti-cu12 12.8.57\n", " Uninstalling nvidia-cuda-cupti-cu12-12.8.57:\n", " Successfully uninstalled nvidia-cuda-cupti-cu12-12.8.57\n", " Attempting uninstall: nvidia-cublas-cu12\n", " Found existing installation: nvidia-cublas-cu12 12.8.3.14\n", " Uninstalling nvidia-cublas-cu12-12.8.3.14:\n", " Successfully uninstalled nvidia-cublas-cu12-12.8.3.14\n", " Attempting uninstall: llvmlite\n", " Found existing installation: llvmlite 0.43.0\n", " Uninstalling llvmlite-0.43.0:\n", " Successfully uninstalled llvmlite-0.43.0\n", " Attempting uninstall: nvidia-cusparse-cu12\n", " Found existing installation: nvidia-cusparse-cu12 12.5.7.53\n", " Uninstalling nvidia-cusparse-cu12-12.5.7.53:\n", " Successfully uninstalled nvidia-cusparse-cu12-12.5.7.53\n", " Attempting uninstall: nvidia-cufft-cu12\n", " Found existing installation: nvidia-cufft-cu12 11.3.3.41\n", " Uninstalling nvidia-cufft-cu12-11.3.3.41:\n", " Successfully uninstalled nvidia-cufft-cu12-11.3.3.41\n", " Attempting uninstall: nvidia-cudnn-cu12\n", " Found existing installation: nvidia-cudnn-cu12 9.7.1.26\n", " Uninstalling nvidia-cudnn-cu12-9.7.1.26:\n", " Successfully uninstalled nvidia-cudnn-cu12-9.7.1.26\n", " Attempting uninstall: numba\n", " Found existing installation: numba 0.60.0\n", " Uninstalling numba-0.60.0:\n", " Successfully uninstalled numba-0.60.0\n", " Attempting uninstall: openai\n", " Found existing installation: openai 1.98.0\n", " Uninstalling openai-1.98.0:\n", " Successfully uninstalled openai-1.98.0\n", " Attempting uninstall: nvidia-cusolver-cu12\n", " Found existing installation: nvidia-cusolver-cu12 11.7.2.55\n", " Uninstalling nvidia-cusolver-cu12-11.7.2.55:\n", " Successfully uninstalled nvidia-cusolver-cu12-11.7.2.55\n", " Attempting uninstall: transformers\n", " Found existing installation: transformers 4.51.3\n", " Uninstalling transformers-4.51.3:\n", " Successfully uninstalled transformers-4.51.3\n", " Attempting uninstall: torch\n", " Found existing installation: torch 2.7.0+cu128\n", " Uninstalling torch-2.7.0+cu128:\n", " Successfully uninstalled torch-2.7.0+cu128\n", " Attempting uninstall: torchvision\n", " Found existing installation: torchvision 0.22.0+cu128\n", " Uninstalling torchvision-0.22.0+cu128:\n", " Successfully uninstalled torchvision-0.22.0+cu128\n", " Attempting uninstall: torchaudio\n", " Found existing installation: torchaudio 2.7.0+cu128\n", " Uninstalling torchaudio-2.7.0+cu128:\n", " Successfully uninstalled torchaudio-2.7.0+cu128\n", "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", "fastai 2.7.19 requires torch<2.7,>=1.10, but you have torch 2.7.1 which is incompatible.\n", "dots-ocr 1.0 requires transformers==4.51.3, but you have transformers 4.55.0 which is incompatible.\u001b[0m\u001b[31m\n", "\u001b[0mSuccessfully installed astor-0.8.1 blake3-1.0.5 cbor2-5.6.5 compressed-tensors-0.10.2 depyf-0.19.0 diskcache-5.6.3 dnspython-2.7.0 email-validator-2.2.0 fastapi-cli-0.0.8 fastapi-cloud-cli-0.1.5 gguf-0.17.1 httptools-0.6.4 interegular-0.3.3 lark-1.2.2 llguidance-0.7.30 llvmlite-0.44.0 lm-format-enforcer-0.10.12 mistral_common-1.8.3 msgspec-0.19.0 ninja-1.11.1.4 numba-0.61.2 nvidia-cublas-cu12-12.6.4.1 nvidia-cuda-cupti-cu12-12.6.80 nvidia-cuda-nvrtc-cu12-12.6.77 nvidia-cuda-runtime-cu12-12.6.77 nvidia-cudnn-cu12-9.5.1.17 nvidia-cufft-cu12-11.3.0.4 nvidia-cufile-cu12-1.11.1.6 nvidia-curand-cu12-10.3.7.77 nvidia-cusolver-cu12-11.7.1.2 nvidia-cusparse-cu12-12.5.4.2 nvidia-nvjitlink-cu12-12.6.85 nvidia-nvtx-cu12-12.6.77 openai-1.90.0 outlines_core-0.2.10 partial-json-parser-0.2.1.1.post6 prometheus-fastapi-instrumentator-7.1.0 pybase64-1.4.2 pycountry-24.6.1 pydantic-extra-types-2.10.5 python-dotenv-1.1.1 python-json-logger-3.3.0 ray-2.48.0 rich-toolkit-0.14.9 rignore-0.6.4 torch-2.7.1 torchaudio-2.7.1 torchvision-0.22.1 transformers-4.55.0 triton-3.3.1 uvloop-0.21.0 vllm-0.10.0 watchfiles-1.1.0 xformers-0.0.31 xgrammar-0.1.21\n", "nohup: failed to run command 'CUDA_VISIBLE_DEVICES=0': No such file or directory\n" ] } ] }, { "cell_type": "code", "source": [ "from pyngrok import ngrok\n", "public_url = ngrok.connect(8000, bind_tls=True) # Adjust port if needed\n", "print(\"Public URL:\", public_url)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "iNPRVOjmUxJb", "outputId": "66388365-796e-4489-9285-17ad6ccad0ed" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Public URL: NgrokTunnel: \"https://988ecbb0776c.ngrok-free.app\" -> \"http://localhost:8000\"\n" ] } ] }, { "cell_type": "code", "source": [ "!CUDA_VISIBLE_DEVICES=0 vllm serve ./weights/DotsOCR --tensor-parallel-size 1 --gpu-memory-utilization 0.95 --chat-template-content-format string --served-model-name model --trust-remote-code" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "QbYEd_foT2QY", "outputId": "6c980927-042e-498a-e013-a575d4cf5132" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "2025-08-07 20:57:52.107021: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", "2025-08-07 20:57:52.125111: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", "E0000 00:00:1754600272.146783 10516 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", "E0000 00:00:1754600272.153513 10516 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", "W0000 00:00:1754600272.170115 10516 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n", "W0000 00:00:1754600272.170145 10516 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n", "W0000 00:00:1754600272.170148 10516 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n", "W0000 00:00:1754600272.170151 10516 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n", "2025-08-07 20:57:52.174913: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", "INFO 08-07 20:57:57 [__init__.py:235] Automatically detected platform cuda.\n", "INFO 08-07 20:58:01 [api_server.py:1755] vLLM API server version 0.10.0\n", "INFO 08-07 20:58:01 [cli_args.py:261] non-default args: {'model_tag': './weights/DotsOCR', 'chat_template_content_format': 'string', 'model': './weights/DotsOCR', 'trust_remote_code': True, 'served_model_name': ['model'], 'gpu_memory_utilization': 0.95}\n", "INFO 08-07 20:58:01 [config.py:1604] Using max model len 131072\n", "INFO 08-07 20:58:01 [config.py:2434] Chunked prefill is enabled with max_num_batched_tokens=2048.\n", "2025-08-07 20:58:05.950037: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", "E0000 00:00:1754600285.970806 10621 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", "E0000 00:00:1754600285.977110 10621 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", "W0000 00:00:1754600285.992571 10621 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n", "W0000 00:00:1754600285.992601 10621 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n", "W0000 00:00:1754600285.992604 10621 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n", "W0000 00:00:1754600285.992606 10621 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n", "INFO 08-07 20:58:11 [__init__.py:235] Automatically detected platform cuda.\n", "INFO 08-07 20:58:14 [core.py:572] Waiting for init message from front-end.\n", "INFO 08-07 20:58:14 [core.py:71] Initializing a V1 LLM engine (v0.10.0) with config: model='./weights/DotsOCR', speculative_config=None, tokenizer='./weights/DotsOCR', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config={}, tokenizer_revision=None, trust_remote_code=True, dtype=torch.bfloat16, max_seq_len=131072, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, device_config=cuda, decoding_config=DecodingConfig(backend='auto', disable_fallback=False, disable_any_whitespace=False, disable_additional_properties=False, reasoning_backend=''), observability_config=ObservabilityConfig(show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None), seed=0, served_model_name=model, num_scheduler_steps=1, multi_step_stream_outputs=True, enable_prefix_caching=True, chunked_prefill_enabled=True, use_async_output_proc=True, pooler_config=None, compilation_config={\"level\":3,\"debug_dump_path\":\"\",\"cache_dir\":\"\",\"backend\":\"\",\"custom_ops\":[],\"splitting_ops\":[\"vllm.unified_attention\",\"vllm.unified_attention_with_output\",\"vllm.mamba_mixer2\"],\"use_inductor\":true,\"compile_sizes\":[],\"inductor_compile_config\":{\"enable_auto_functionalized_v2\":false},\"inductor_passes\":{},\"use_cudagraph\":true,\"cudagraph_num_of_warmups\":1,\"cudagraph_capture_sizes\":[512,504,496,488,480,472,464,456,448,440,432,424,416,408,400,392,384,376,368,360,352,344,336,328,320,312,304,296,288,280,272,264,256,248,240,232,224,216,208,200,192,184,176,168,160,152,144,136,128,120,112,104,96,88,80,72,64,56,48,40,32,24,16,8,4,2,1],\"cudagraph_copy_inputs\":false,\"full_cuda_graph\":false,\"max_capture_size\":512,\"local_cache_dir\":null}\n", "INFO 08-07 20:58:15 [parallel_state.py:1102] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, TP rank 0, EP rank 0\n", "WARNING 08-07 20:58:15 [topk_topp_sampler.py:59] FlashInfer is not available. Falling back to the PyTorch-native implementation of top-p & top-k sampling. For the best performance, please install FlashInfer.\n", "INFO 08-07 20:58:15 [gpu_model_runner.py:1843] Starting to load model ./weights/DotsOCR...\n", "INFO 08-07 20:58:15 [gpu_model_runner.py:1875] Loading model from scratch...\n", "INFO 08-07 20:58:16 [cuda.py:290] Using Flash Attention backend on V1 engine.\n", "Loading safetensors checkpoint shards: 100% 2/2 [00:01<00:00, 1.06it/s]\n", "INFO 08-07 20:58:18 [default_loader.py:262] Loading weights took 1.99 seconds\n", "INFO 08-07 20:58:19 [gpu_model_runner.py:1892] Model loading took 5.7174 GiB and 2.253556 seconds\n", "INFO 08-07 20:58:19 [gpu_model_runner.py:2380] Encoder cache will be initialized with a budget of 14400 tokens, and profiled with 1 image items of the maximum feature size.\n", "The image processor of type `Qwen2VLImageProcessor` is now loaded as a fast processor by default, even if the model checkpoint was saved with a slow processor. This is a breaking change and may produce slightly different outputs. To continue using the slow processor, instantiate this class with `use_fast=False`. Note that this behavior will be extended to all models in a future release.\n", "You have video processor config saved in `preprocessor.json` file which is deprecated. Video processor configs should be saved in their own `video_preprocessor.json` file. You can rename the file or load and save the processor back which renames it automatically. Loading from `preprocessor.json` will be removed in v5.0.\n", "INFO 08-07 20:58:49 [backends.py:530] Using cache directory: /root/.cache/vllm/torch_compile_cache/f40f68567f/rank_0_0/backbone for vLLM's torch.compile\n", "INFO 08-07 20:58:49 [backends.py:541] Dynamo bytecode transform time: 8.76 s\n", "INFO 08-07 20:58:56 [backends.py:161] Directly load the compiled graph(s) for dynamic shape from the cache, took 6.316 s\n", "INFO 08-07 20:58:56 [monitor.py:34] torch.compile takes 8.76 s in total\n", "INFO 08-07 20:58:58 [gpu_worker.py:255] Available KV cache memory: 12.20 GiB\n", "INFO 08-07 20:58:58 [kv_cache_utils.py:833] GPU KV cache size: 456,816 tokens\n", "INFO 08-07 20:58:58 [kv_cache_utils.py:837] Maximum concurrency for 131,072 tokens per request: 3.49x\n", "Capturing CUDA graph shapes: 100% 67/67 [00:02<00:00, 24.17it/s]\n", "INFO 08-07 20:59:01 [gpu_model_runner.py:2485] Graph capturing finished in 3 secs, took 0.44 GiB\n", "INFO 08-07 20:59:01 [core.py:193] init engine (profile, create kv cache, warmup model) took 42.75 seconds\n", "INFO 08-07 20:59:02 [loggers.py:141] Engine 000: vllm cache_config_info with initialization after num_gpu_blocks is: 28551\n", "INFO 08-07 20:59:02 [api_server.py:1818] Starting vLLM API server 0 on http://0.0.0.0:8000\n", "INFO 08-07 20:59:02 [launcher.py:29] Available routes are:\n", "INFO 08-07 20:59:02 [launcher.py:37] Route: /openapi.json, Methods: HEAD, GET\n", "INFO 08-07 20:59:02 [launcher.py:37] Route: /docs, Methods: HEAD, GET\n", "INFO 08-07 20:59:02 [launcher.py:37] Route: /docs/oauth2-redirect, Methods: HEAD, GET\n", "INFO 08-07 20:59:02 [launcher.py:37] Route: /redoc, Methods: HEAD, GET\n", "INFO 08-07 20:59:02 [launcher.py:37] Route: /health, Methods: GET\n", "INFO 08-07 20:59:02 [launcher.py:37] Route: /load, Methods: GET\n", "INFO 08-07 20:59:02 [launcher.py:37] Route: /ping, Methods: POST\n", "INFO 08-07 20:59:02 [launcher.py:37] Route: /ping, Methods: GET\n", "INFO 08-07 20:59:02 [launcher.py:37] Route: /tokenize, Methods: POST\n", "INFO 08-07 20:59:02 [launcher.py:37] Route: /detokenize, Methods: POST\n", "INFO 08-07 20:59:02 [launcher.py:37] Route: /v1/models, Methods: GET\n", "INFO 08-07 20:59:02 [launcher.py:37] Route: /version, Methods: GET\n", "INFO 08-07 20:59:02 [launcher.py:37] Route: /v1/responses, Methods: POST\n", "INFO 08-07 20:59:02 [launcher.py:37] Route: /v1/responses/{response_id}, Methods: GET\n", "INFO 08-07 20:59:02 [launcher.py:37] Route: /v1/responses/{response_id}/cancel, Methods: POST\n", "INFO 08-07 20:59:02 [launcher.py:37] Route: /v1/chat/completions, Methods: POST\n", "INFO 08-07 20:59:02 [launcher.py:37] Route: /v1/completions, Methods: POST\n", "INFO 08-07 20:59:02 [launcher.py:37] Route: /v1/embeddings, Methods: POST\n", "INFO 08-07 20:59:02 [launcher.py:37] Route: /pooling, Methods: POST\n", "INFO 08-07 20:59:02 [launcher.py:37] Route: /classify, Methods: POST\n", "INFO 08-07 20:59:02 [launcher.py:37] Route: /score, Methods: POST\n", "INFO 08-07 20:59:02 [launcher.py:37] Route: /v1/score, Methods: POST\n", "INFO 08-07 20:59:02 [launcher.py:37] Route: /v1/audio/transcriptions, Methods: POST\n", "INFO 08-07 20:59:02 [launcher.py:37] Route: /v1/audio/translations, Methods: POST\n", "INFO 08-07 20:59:02 [launcher.py:37] Route: /rerank, Methods: POST\n", "INFO 08-07 20:59:02 [launcher.py:37] Route: /v1/rerank, Methods: POST\n", "INFO 08-07 20:59:02 [launcher.py:37] Route: /v2/rerank, Methods: POST\n", "INFO 08-07 20:59:02 [launcher.py:37] Route: /scale_elastic_ep, Methods: POST\n", "INFO 08-07 20:59:02 [launcher.py:37] Route: /is_scaling_elastic_ep, Methods: POST\n", "INFO 08-07 20:59:02 [launcher.py:37] Route: /invocations, Methods: POST\n", "INFO 08-07 20:59:02 [launcher.py:37] Route: /metrics, Methods: GET\n", "\u001b[32mINFO\u001b[0m: Started server process [\u001b[36m10516\u001b[0m]\n", "\u001b[32mINFO\u001b[0m: Waiting for application startup.\n", "\u001b[32mINFO\u001b[0m: Application startup complete.\n", "\u001b[32mINFO\u001b[0m: 2001:818:c61b:b000:457e:d747:75e4:7263:0 - \"\u001b[1mGET / HTTP/1.1\u001b[0m\" \u001b[31m404 Not Found\u001b[0m\n", "\u001b[32mINFO\u001b[0m: 2001:818:c61b:b000:457e:d747:75e4:7263:0 - \"\u001b[1mGET /favicon.ico HTTP/1.1\u001b[0m\" \u001b[31m404 Not Found\u001b[0m\n", "INFO 08-07 21:00:28 [launcher.py:80] Shutting down FastAPI HTTP server.\n", "[rank0]:[W807 21:00:29.608158947 ProcessGroupNCCL.cpp:1479] Warning: WARNING: destroy_process_group() was not called before program exit, which can leak resources. For more info, please see https://pytorch.org/docs/stable/distributed.html#shutdown (function operator())\n", "\u001b[32mINFO\u001b[0m: Shutting down\n", "\u001b[31mERROR\u001b[0m: Traceback (most recent call last):\n", " File \"/usr/local/lib/python3.11/dist-packages/starlette/routing.py\", line 701, in lifespan\n", " await receive()\n", " File \"/usr/local/lib/python3.11/dist-packages/uvicorn/lifespan/on.py\", line 137, in receive\n", " return await self.receive_queue.get()\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"/usr/lib/python3.11/asyncio/queues.py\", line 158, in get\n", " await getter\n", "asyncio.exceptions.CancelledError\n", "\n" ] } ] }, { "cell_type": "code", "source": [], "metadata": { "id": "mbOe-1sxU1-r" }, "execution_count": null, "outputs": [] } ] }