瀏覽代碼

Merge pull request #1693 from shniubobo/feat/web_api

Update web_api
Xiaomeng Zhao 9 月之前
父節點
當前提交
84f84d2648

+ 47 - 65
projects/web_api/Dockerfile

@@ -1,85 +1,67 @@
-# Use the official Ubuntu base image
-FROM ubuntu:latest
+FROM python:3.10-slim-bookworm AS base
 
-# ENV http_proxy http://127.0.0.1:7890
-# ENV https_proxy http://127.0.0.1:7890
+WORKDIR /app
 
-# Set environment variables to non-interactive to avoid prompts during installation
-ENV DEBIAN_FRONTEND=noninteractive
-ENV LANG C.UTF-8
-
-# ADD sources.list /etc/apt
-# RUN apt-get clean
+ENV DEBIAN_FRONTEND=noninteractive \
+    LANG=C.UTF-8 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    PIP_DISABLE_PIP_VERSION_CHECK=1 \
+    PIP_NO_CACHE_DIR=1
 
 
+FROM base AS build
 
 # Update the package list and install necessary packages
-RUN apt-get -q update \
-    && apt-get -q install -y --no-install-recommends \
-        apt-utils \
-        bats \
-        build-essential
-RUN apt-get update && apt-get install -y vim net-tools procps lsof curl wget iputils-ping telnet lrzsz git
-
 RUN apt-get update && \
-    apt-get install -y \
-        software-properties-common && \
-    add-apt-repository ppa:deadsnakes/ppa && \
-    apt-get update && \
-    apt-get install -y \
-        python3.10 \
-        python3.10-venv \
-        python3.10-distutils \
-        python3-pip \
-        wget \
-        git \
-        libgl1 \
-        libglib2.0-0 \
-        && rm -rf /var/lib/apt/lists/*
-        
-# RUN unset http_proxy && unset https_proxy
-
-# Set Python 3.10 as the default python3
-RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1
-
-# Create a virtual environment for MinerU
-RUN python3 -m venv /opt/mineru_venv
-RUN pip config set global.index-url https://mirrors.aliyun.com/pypi/simple
-# Activate the virtual environment and install necessary Python packages
-RUN /bin/bash -c "source /opt/mineru_venv/bin/activate && \
-    pip install --upgrade pip && \
-    pip install magic-pdf[full] --extra-index-url https://myhloli.github.io/wheels/ --no-cache-dir"
+    apt-get install -y --no-install-recommends \
+        build-essential && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
 
+# Build Python dependencies
+COPY requirements.txt .
+RUN python -m venv /app/venv && \
+    . /app/venv/bin/activate && \
+    pip install -r requirements.txt && \
+    pip uninstall -y paddlepaddle && \
+    pip install -i https://www.paddlepaddle.org.cn/packages/stable/cu118/ \
+        paddlepaddle-gpu==3.0.0rc1
 
-RUN /bin/bash -c "source /opt/mineru_venv/bin/activate && \
-    pip install fastapi uvicorn python-multipart --no-cache-dir"
+# Download models
+COPY download_models.py .
+RUN . /app/venv/bin/activate && \
+    ./download_models.py
 
-RUN /bin/bash -c "source /opt/mineru_venv/bin/activate && \
-    pip uninstall  paddlepaddle -y"
 
-RUN /bin/bash -c "source /opt/mineru_venv/bin/activate && \
-    python -m pip install paddlepaddle-gpu==3.0.0b1 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/ --no-cache-dir"
+FROM base AS prod
 
-# Copy the configuration file template and set up the model directory
-COPY magic-pdf.template.json /root/magic-pdf.json
-ADD models /opt/models
-ADD .paddleocr /root/.paddleocr 
-ADD app.py /root/app.py
+# Copy Python dependencies and models from the build stage
+COPY --from=build /app/venv /app/venv
+COPY --from=build /opt/models /opt/models
+COPY --from=build /opt/layoutreader /opt/layoutreader
 
-WORKDIR /root
-
-# Set the models directory in the configuration file (adjust the path as needed)
-RUN sed -i 's|/tmp/models|/opt/models|g' /root/magic-pdf.json
-
-# Create the models directory
-# RUN mkdir -p /opt/models
+# Update the package list and install necessary packages
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+        libgl1 \
+        libglib2.0-0 \
+        libgomp1 && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
 
-# Set the entry point to activate the virtual environment and run the command line tool
-# ENTRYPOINT ["/bin/bash", "-c", "source /opt/mineru_venv/bin/activate && exec \"$@\" && python3 app.py", "--"]
+# Create volume for paddleocr models
+RUN mkdir -p /root/.paddleocr
+VOLUME [ "/root/.paddleocr" ]
 
+# Copy the app and its configuration file
+COPY entrypoint.sh /app/entrypoint.sh
+COPY magic-pdf.json /root/magic-pdf.json
+COPY app.py /app/app.py
 
 # Expose the port that FastAPI will run on
 EXPOSE 8000
 
 # Command to run FastAPI using Uvicorn, pointing to app.py and binding to 0.0.0.0:8000
-CMD ["/bin/bash", "-c", "source /opt/mineru_venv/bin/activate && uvicorn app:app --host 0.0.0.0 --port 8000"]
+ENTRYPOINT [ "/app/entrypoint.sh" ]
+CMD ["--host", "0.0.0.0", "--port", "8000"]

+ 36 - 20
projects/web_api/README.md

@@ -1,44 +1,60 @@
-基于MinerU的PDF解析API
+# 基于MinerU的PDF解析API
 
-    - MinerU的GPU镜像构建
-    - 基于FastAPI的PDF解析接口
+- MinerU的GPU镜像构建
+- 基于FastAPI的PDF解析接口
 
-支持一键启动,已经打包到镜像中,自带模型权重,支持GPU推理加速,GPU速度相比CPU每页解析要快几十倍不等
+## 构建方式
 
+```
+docker build -t mineru-api .
+```
 
-##  启动命令:
+或者使用代理
 
+```
+docker build --build-arg http_proxy=http://127.0.0.1:7890 --build-arg https_proxy=http://127.0.0.1:7890 -t mineru-api .
+```
 
-```docker run -itd --name=mineru_server --gpus=all -p 8888:8000 quincyqiang/mineru:0.1-models```
+## 启动命令
 
-![](https://i-blog.csdnimg.cn/direct/bcff4f524ea5400db14421ba7cec4989.png)
+```
+docker run --rm -it --gpus=all -v ./paddleocr:/root/.paddleocr -p 8000:8000 mineru-api
+```
 
-具体截图请见博客:https://blog.csdn.net/yanqianglifei/article/details/141979684
+初次调用 API 时会自动下载 paddleocr 的模型(约数十 MB),其余模型已包含在镜像中。
 
+## 测试参数
 
-##   启动日志
+访问地址
 
-![](https://i-blog.csdnimg.cn/direct/4eb5657567e4415eba912179dca5c8aa.png)
+```
+http://localhost:8000/docs
+http://127.0.0.1:8000/docs
+```
 
-##  输入参数:
+## 旧版镜像地址
 
-访问地址:
+> 阿里云地址:docker pull registry.cn-beijing.aliyuncs.com/quincyqiang/mineru:0.1-models
+>
+> dockerhub地址:docker pull quincyqiang/mineru:0.1-models
 
-    http://localhost:8888/docs
 
-    http://127.0.01:8888/docs
+## 旧版截图
 
-![](https://i-blog.csdnimg.cn/direct/8b3a2bc5908042268e8cc69756e331a2.png)
+### 启动命令
 
-##  解析效果:
+![](https://i-blog.csdnimg.cn/direct/bcff4f524ea5400db14421ba7cec4989.png)
 
-![](https://i-blog.csdnimg.cn/direct/a54dcae834ae48d498fb595aca4212c3.png)
+具体截图请见博客:https://blog.csdn.net/yanqianglifei/article/details/141979684
 
+### 启动日志
 
+![](https://i-blog.csdnimg.cn/direct/4eb5657567e4415eba912179dca5c8aa.png)
 
-##   镜像地址:
+### 测试参数
 
-> 阿里云地址:docker pull registry.cn-beijing.aliyuncs.com/quincyqiang/mineru:0.1-models
+![](https://i-blog.csdnimg.cn/direct/8b3a2bc5908042268e8cc69756e331a2.png)
 
-> dockerhub地址:docker pull quincyqiang/mineru:0.1-models
+### 解析效果
 
+![](https://i-blog.csdnimg.cn/direct/a54dcae834ae48d498fb595aca4212c3.png)

+ 237 - 122
projects/web_api/app.py

@@ -1,163 +1,278 @@
-import copy
 import json
 import os
-from tempfile import NamedTemporaryFile
+from base64 import b64encode
+from glob import glob
+from io import StringIO
+from typing import Tuple, Union
 
 import uvicorn
-from fastapi import FastAPI, File, UploadFile
+from fastapi import FastAPI, HTTPException, UploadFile
 from fastapi.responses import JSONResponse
 from loguru import logger
 
 import magic_pdf.model as model_config
 from magic_pdf.config.enums import SupportedPdfParseMethod
-from magic_pdf.data.data_reader_writer import FileBasedDataWriter
+from magic_pdf.data.data_reader_writer import DataWriter, FileBasedDataWriter
+from magic_pdf.data.data_reader_writer.s3 import S3DataReader, S3DataWriter
 from magic_pdf.data.dataset import PymuDocDataset
+from magic_pdf.libs.config_reader import get_bucket_name, get_s3_config
 from magic_pdf.model.doc_analyze_by_custom_model import doc_analyze
 from magic_pdf.operators.models import InferenceResult
+from magic_pdf.operators.pipes import PipeResult
 
 model_config.__use_inside_model__ = True
 
 app = FastAPI()
 
 
-def json_md_dump(
-    model_json,
-    middle_json,
-    md_writer,
-    pdf_name,
-    content_list,
-    md_content,
-):
-    # Write model results to model.json
-    orig_model_list = copy.deepcopy(model_json)
-    md_writer.write_string(
-        f'{pdf_name}_model.json',
-        json.dumps(orig_model_list, ensure_ascii=False, indent=4),
-    )
-
-    # Write intermediate results to middle.json
-    md_writer.write_string(
-        f'{pdf_name}_middle.json',
-        json.dumps(middle_json, ensure_ascii=False, indent=4),
-    )
-
-    # Write text content results to content_list.json
-    md_writer.write_string(
-        f'{pdf_name}_content_list.json',
-        json.dumps(content_list, ensure_ascii=False, indent=4),
-    )
-
-    # Write results to .md file
-    md_writer.write_string(
-        f'{pdf_name}.md',
-        md_content,
-    )
-
-
-@app.post('/pdf_parse', tags=['projects'], summary='Parse PDF file')
-async def pdf_parse_main(
-    pdf_file: UploadFile = File(...),
-    parse_method: str = 'auto',
-    model_json_path: str = None,
-    is_json_md_dump: bool = True,
-    output_dir: str = 'output',
-):
-    """Execute the process of converting PDF to JSON and MD, outputting MD and
-    JSON files to the specified directory.
-
-    :param pdf_file: The PDF file to be parsed
-    :param parse_method: Parsing method, can be auto, ocr, or txt. Default is auto. If results are not satisfactory, try ocr
-    :param model_json_path: Path to existing model data file. If empty, use built-in model. PDF and model_json must correspond
-    :param is_json_md_dump: Whether to write parsed data to .json and .md files. Default is True. Different stages of data will be written to different .json files (3 in total), md content will be saved to .md file  # noqa E501
-    :param output_dir: Output directory for results. A folder named after the PDF file will be created to store all results
+class MemoryDataWriter(DataWriter):
+    def __init__(self):
+        self.buffer = StringIO()
+
+    def write(self, path: str, data: bytes) -> None:
+        if isinstance(data, str):
+            self.buffer.write(data)
+        else:
+            self.buffer.write(data.decode("utf-8"))
+
+    def write_string(self, path: str, data: str) -> None:
+        self.buffer.write(data)
+
+    def get_value(self) -> str:
+        return self.buffer.getvalue()
+
+    def close(self):
+        self.buffer.close()
+
+
+def init_writers(
+    pdf_path: str = None,
+    pdf_file: UploadFile = None,
+    output_path: str = None,
+    output_image_path: str = None,
+) -> Tuple[
+    Union[S3DataWriter, FileBasedDataWriter],
+    Union[S3DataWriter, FileBasedDataWriter],
+    bytes,
+]:
     """
-    try:
-        # Create a temporary file to store the uploaded PDF
-        with NamedTemporaryFile(delete=False, suffix='.pdf') as temp_pdf:
-            temp_pdf.write(await pdf_file.read())
-            temp_pdf_path = temp_pdf.name
+    Initialize writers based on path type
 
-        pdf_name = os.path.basename(pdf_file.filename).split('.')[0]
+    Args:
+        pdf_path: PDF file path (local path or S3 path)
+        pdf_file: Uploaded PDF file object
+        output_path: Output directory path
+        output_image_path: Image output directory path
 
-        if output_dir:
-            output_path = os.path.join(output_dir, pdf_name)
+    Returns:
+        Tuple[writer, image_writer, pdf_bytes]: Returns initialized writer tuple and PDF
+        file content
+    """
+    if pdf_path:
+        is_s3_path = pdf_path.startswith("s3://")
+        if is_s3_path:
+            bucket = get_bucket_name(pdf_path)
+            ak, sk, endpoint = get_s3_config(bucket)
+
+            writer = S3DataWriter(
+                output_path, bucket=bucket, ak=ak, sk=sk, endpoint_url=endpoint
+            )
+            image_writer = S3DataWriter(
+                output_image_path, bucket=bucket, ak=ak, sk=sk, endpoint_url=endpoint
+            )
+            # 临时创建reader读取文件内容
+            temp_reader = S3DataReader(
+                "", bucket=bucket, ak=ak, sk=sk, endpoint_url=endpoint
+            )
+            pdf_bytes = temp_reader.read(pdf_path)
         else:
-            output_path = os.path.join(os.path.dirname(temp_pdf_path), pdf_name)
+            writer = FileBasedDataWriter(output_path)
+            image_writer = FileBasedDataWriter(output_image_path)
+            os.makedirs(output_image_path, exist_ok=True)
+            with open(pdf_path, "rb") as f:
+                pdf_bytes = f.read()
+    else:
+        # 处理上传的文件
+        pdf_bytes = pdf_file.file.read()
+        writer = FileBasedDataWriter(output_path)
+        image_writer = FileBasedDataWriter(output_image_path)
+        os.makedirs(output_image_path, exist_ok=True)
 
-        output_image_path = os.path.join(output_path, 'images')
+    return writer, image_writer, pdf_bytes
 
-        # Get parent path of images for relative path in .md and content_list.json
-        image_path_parent = os.path.basename(output_image_path)
 
-        pdf_bytes = open(temp_pdf_path, 'rb').read()  # Read binary data of PDF file
+def process_pdf(
+    pdf_bytes: bytes,
+    parse_method: str,
+    image_writer: Union[S3DataWriter, FileBasedDataWriter],
+) -> Tuple[InferenceResult, PipeResult]:
+    """
+    Process PDF file content
+
+    Args:
+        pdf_bytes: Binary content of PDF file
+        parse_method: Parse method ('ocr', 'txt', 'auto')
+        image_writer: Image writer
 
-        if model_json_path:
-            # Read original JSON data of PDF file parsed by model, list type
-            model_json = json.loads(open(model_json_path, 'r', encoding='utf-8').read())
+    Returns:
+        Tuple[InferenceResult, PipeResult]: Returns inference result and pipeline result
+    """
+    ds = PymuDocDataset(pdf_bytes)
+    infer_result: InferenceResult = None
+    pipe_result: PipeResult = None
+
+    if parse_method == "ocr":
+        infer_result = ds.apply(doc_analyze, ocr=True)
+        pipe_result = infer_result.pipe_ocr_mode(image_writer)
+    elif parse_method == "txt":
+        infer_result = ds.apply(doc_analyze, ocr=False)
+        pipe_result = infer_result.pipe_txt_mode(image_writer)
+    else:  # auto
+        if ds.classify() == SupportedPdfParseMethod.OCR:
+            infer_result = ds.apply(doc_analyze, ocr=True)
+            pipe_result = infer_result.pipe_ocr_mode(image_writer)
         else:
-            model_json = []
-
-        # Execute parsing steps
-        image_writer, md_writer = FileBasedDataWriter(
-            output_image_path
-        ), FileBasedDataWriter(output_path)
-
-        ds = PymuDocDataset(pdf_bytes)
-        # Choose parsing method
-        if parse_method == 'auto':
-            if ds.classify() == SupportedPdfParseMethod.OCR:
-                parse_method = 'ocr'
-            else:
-                parse_method = 'txt'
-
-        if parse_method not in ['txt', 'ocr']:
-            logger.error('Unknown parse method, only auto, ocr, txt allowed')
+            infer_result = ds.apply(doc_analyze, ocr=False)
+            pipe_result = infer_result.pipe_txt_mode(image_writer)
+
+    return infer_result, pipe_result
+
+
+def encode_image(image_path: str) -> str:
+    """Encode image using base64"""
+    with open(image_path, "rb") as f:
+        return b64encode(f.read()).decode()
+
+
+@app.post(
+    "/pdf_parse",
+    tags=["projects"],
+    summary="Parse PDF files (supports local files and S3)",
+)
+async def pdf_parse(
+    pdf_file: UploadFile = None,
+    pdf_path: str = None,
+    parse_method: str = "auto",
+    is_json_md_dump: bool = False,
+    output_dir: str = "output",
+    return_layout: bool = False,
+    return_info: bool = False,
+    return_content_list: bool = False,
+    return_images: bool = False,
+):
+    """
+    Execute the process of converting PDF to JSON and MD, outputting MD and JSON files
+    to the specified directory.
+
+    Args:
+        pdf_file: The PDF file to be parsed. Must not be specified together with
+            `pdf_path`
+        pdf_path: The path to the PDF file to be parsed. Must not be specified together
+            with `pdf_file`
+        parse_method: Parsing method, can be auto, ocr, or txt. Default is auto. If
+            results are not satisfactory, try ocr
+        is_json_md_dump: Whether to write parsed data to .json and .md files. Default
+            to False. Different stages of data will be written to different .json files
+            (3 in total), md content will be saved to .md file
+        output_dir: Output directory for results. A folder named after the PDF file
+            will be created to store all results
+        return_layout: Whether to return parsed PDF layout. Default to False
+        return_info: Whether to return parsed PDF info. Default to False
+        return_content_list: Whether to return parsed PDF content list. Default to False
+    """
+    try:
+        if (pdf_file is None and pdf_path is None) or (
+            pdf_file is not None and pdf_path is not None
+        ):
             return JSONResponse(
-                content={'error': 'Invalid parse method'}, status_code=400
+                content={"error": "Must provide either pdf_file or pdf_path"},
+                status_code=400,
             )
 
-        if len(model_json) == 0:
-            if parse_method == 'ocr':
-                infer_result = ds.apply(doc_analyze, ocr=True)
-            else:
-                infer_result = ds.apply(doc_analyze, ocr=False)
+        # Get PDF filename
+        pdf_name = os.path.basename(pdf_path if pdf_path else pdf_file.filename).split(
+            "."
+        )[0]
+        output_path = f"{output_dir}/{pdf_name}"
+        output_image_path = f"{output_path}/images"
 
-        else:
-            infer_result = InferenceResult(model_json, ds)
-
-        if len(model_json) == 0 and not model_config.__use_inside_model__:
-                logger.error('Need model list input')
-                return JSONResponse(
-                    content={'error': 'Model list input required'}, status_code=400
-                )
-        if parse_method == 'ocr':
-            pipe_res = infer_result.pipe_ocr_mode(image_writer)
-        else:
-            pipe_res = infer_result.pipe_txt_mode(image_writer)
+        # Initialize readers/writers and get PDF content
+        writer, image_writer, pdf_bytes = init_writers(
+            pdf_path=pdf_path,
+            pdf_file=pdf_file,
+            output_path=output_path,
+            output_image_path=output_image_path,
+        )
+
+        # Process PDF
+        infer_result, pipe_result = process_pdf(pdf_bytes, parse_method, image_writer)
+
+        # Use MemoryDataWriter to get results
+        content_list_writer = MemoryDataWriter()
+        md_content_writer = MemoryDataWriter()
+        middle_json_writer = MemoryDataWriter()
 
+        # Use PipeResult's dump method to get data
+        pipe_result.dump_content_list(content_list_writer, "", "images")
+        pipe_result.dump_md(md_content_writer, "", "images")
+        pipe_result.dump_middle_json(middle_json_writer, "")
 
-        # Save results in text and md format
-        content_list = pipe_res.get_content_list(image_path_parent, drop_mode='none')
-        md_content = pipe_res.get_markdown(image_path_parent, drop_mode='none')
+        # Get content
+        content_list = json.loads(content_list_writer.get_value())
+        md_content = md_content_writer.get_value()
+        middle_json = json.loads(middle_json_writer.get_value())
+        model_json = infer_result.get_infer_res()
 
+        # If results need to be saved
         if is_json_md_dump:
-            json_md_dump(infer_result._infer_res, pipe_res._pipe_res, md_writer, pdf_name, content_list, md_content)
-        data = {
-            'layout': copy.deepcopy(infer_result._infer_res),
-            'info': pipe_res._pipe_res,
-            'content_list': content_list,
-            'md_content': md_content,
-        }
+            writer.write_string(
+                f"{pdf_name}_content_list.json", content_list_writer.get_value()
+            )
+            writer.write_string(f"{pdf_name}.md", md_content)
+            writer.write_string(
+                f"{pdf_name}_middle.json", middle_json_writer.get_value()
+            )
+            writer.write_string(
+                f"{pdf_name}_model.json",
+                json.dumps(model_json, indent=4, ensure_ascii=False),
+            )
+            # Save visualization results
+            pipe_result.draw_layout(os.path.join(output_path, f"{pdf_name}_layout.pdf"))
+            pipe_result.draw_span(os.path.join(output_path, f"{pdf_name}_spans.pdf"))
+            pipe_result.draw_line_sort(
+                os.path.join(output_path, f"{pdf_name}_line_sort.pdf")
+            )
+            infer_result.draw_model(os.path.join(output_path, f"{pdf_name}_model.pdf"))
+
+        # Build return data
+        data = {}
+        if return_layout:
+            data["layout"] = model_json
+        if return_info:
+            data["info"] = middle_json
+        if return_content_list:
+            data["content_list"] = content_list
+        if return_images:
+            image_paths = glob(f"{output_image_path}/*.jpg")
+            data["images"] = {
+                os.path.basename(
+                    image_path
+                ): f"data:image/jpeg;base64,{encode_image(image_path)}"
+                for image_path in image_paths
+            }
+        data["md_content"] = md_content  # md_content is always returned
+
+        # Clean up memory writers
+        content_list_writer.close()
+        md_content_writer.close()
+        middle_json_writer.close()
+
         return JSONResponse(data, status_code=200)
 
     except Exception as e:
         logger.exception(e)
-        return JSONResponse(content={'error': str(e)}, status_code=500)
-    finally:
-        # Clean up the temporary file
-        if 'temp_pdf_path' in locals():
-            os.unlink(temp_pdf_path)
+        return JSONResponse(content={"error": str(e)}, status_code=500)
 
 
-if __name__ == '__main__':
-    uvicorn.run(app, host='0.0.0.0', port=8888)
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=8888)

+ 32 - 0
projects/web_api/download_models.py

@@ -0,0 +1,32 @@
+#!/usr/bin/env python
+from huggingface_hub import snapshot_download
+
+if __name__ == "__main__":
+
+    mineru_patterns = [
+        "models/Layout/LayoutLMv3/*",
+        "models/Layout/YOLO/*",
+        "models/MFD/YOLO/*",
+        "models/MFR/unimernet_small_2501/*",
+        "models/TabRec/TableMaster/*",
+        "models/TabRec/StructEqTable/*",
+    ]
+    model_dir = snapshot_download(
+        "opendatalab/PDF-Extract-Kit-1.0",
+        allow_patterns=mineru_patterns,
+        local_dir="/opt/",
+    )
+
+    layoutreader_pattern = [
+        "*.json",
+        "*.safetensors",
+    ]
+    layoutreader_model_dir = snapshot_download(
+        "hantian/layoutreader",
+        allow_patterns=layoutreader_pattern,
+        local_dir="/opt/layoutreader/",
+    )
+
+    model_dir = model_dir + "/models"
+    print(f"model_dir is: {model_dir}")
+    print(f"layoutreader_model_dir is: {layoutreader_model_dir}")

+ 5 - 0
projects/web_api/entrypoint.sh

@@ -0,0 +1,5 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+. /app/venv/bin/activate
+exec uvicorn app:app "$@"

+ 34 - 3
projects/web_api/magic-pdf.json

@@ -4,10 +4,41 @@
         "bucket-name-2":["ak", "sk", "endpoint"]
     },
     "models-dir":"/opt/models",
+    "layoutreader-model-dir":"/opt/layoutreader",
     "device-mode":"cuda",
+    "layout-config": {
+        "model": "doclayout_yolo"
+    },
+    "formula-config": {
+        "mfd_model": "yolo_v8_mfd",
+        "mfr_model": "unimernet_small",
+        "enable": true
+    },
     "table-config": {
-        "model": "TableMaster",
-        "is_table_recog_enable": false,
+        "model": "rapid_table",
+        "sub_model": "slanet_plus",
+        "enable": true,
         "max_time": 400
-    }
+    },
+    "llm-aided-config": {
+        "formula_aided": {
+            "api_key": "your_api_key",
+            "base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1",
+            "model": "qwen2.5-7b-instruct",
+            "enable": false
+        },
+        "text_aided": {
+            "api_key": "your_api_key",
+            "base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1",
+            "model": "qwen2.5-7b-instruct",
+            "enable": false
+        },
+        "title_aided": {
+            "api_key": "your_api_key",
+            "base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1",
+            "model": "qwen2.5-32b-instruct",
+            "enable": false
+        }
+    },
+    "config_version": "1.1.1"
 }

+ 0 - 13
projects/web_api/magic-pdf.template.json

@@ -1,13 +0,0 @@
-{
-    "bucket_info":{
-        "bucket-name-1":["ak", "sk", "endpoint"],
-        "bucket-name-2":["ak", "sk", "endpoint"]
-    },
-    "models-dir":"/tmp/models",
-    "device-mode":"cuda",
-    "table-config": {
-        "model": "TableMaster",
-        "is_table_recog_enable": false,
-        "max_time": 400
-    }
-}

+ 7 - 0
projects/web_api/requirements.txt

@@ -0,0 +1,7 @@
+--extra-index-url https://myhloli.github.io/wheels/
+
+magic-pdf[full]
+
+fastapi
+uvicorn
+python-multipart

二進制
projects/web_api/small_ocr.pdf


+ 0 - 10
projects/web_api/sources.list

@@ -1,10 +0,0 @@
-deb http://mirrors.aliyun.com/ubuntu/ focal main restricted universe multiverse
-deb-src http://mirrors.aliyun.com/ubuntu/ focal main restricted universe multiverse
-deb http://mirrors.aliyun.com/ubuntu/ focal-security main restricted universe multiverse
-deb-src http://mirrors.aliyun.com/ubuntu/ focal-security main restricted universe multiverse
-deb http://mirrors.aliyun.com/ubuntu/ focal-updates main restricted universe multiverse
-deb-src http://mirrors.aliyun.com/ubuntu/ focal-updates main restricted universe multiverse
-deb http://mirrors.aliyun.com/ubuntu/ focal-proposed main restricted universe multiverse
-deb-src http://mirrors.aliyun.com/ubuntu/ focal-proposed main restricted universe multiverse
-deb http://mirrors.aliyun.com/ubuntu/ focal-backports main restricted universe multiverse
-deb-src http://mirrors.aliyun.com/ubuntu/ focal-backports main restricted universe multiverse

+ 0 - 7
projects/web_api/start_mineru.sh

@@ -1,7 +0,0 @@
-docker run -itd --name=mineru_server --gpus=all -p 8888:8000 quincyqiang/mineru:0.1-models /bin/bash
-
-docker run -itd --name=mineru_server --gpus=all -p 8888:8000 quincyqiang/mineru:0.3-models
-
-docker login --username=1185918903@qq.com registry.cn-beijing.aliyuncs.com
-docker tag quincyqiang/mineru:0.3-models registry.cn-beijing.aliyuncs.com/quincyqiang/gomate:0.3-models
-docker push registry.cn-beijing.aliyuncs.com/quincyqiang/gomate:0.3-models