# ============================================================ # model_doctor 模型清单(手工维护) # ------------------------------------------------------------ # 覆盖四类模型来源: # hf —— HuggingFace 仓库(自动下载,缓存在 defaults.hf_hub_dir) # local_file —— 本地单个权重文件(.onnx/.gguf/.pth/...)或目录 # daemon —— 通过 HTTP 服务访问的模型(llama-server / vllm),可附带本地 GGUF 资产 # mineru —— MinerU 内置模型(校验包版本 + 可选模型根目录指纹) # # 指纹策略: # 本地文件/目录默认用 size + mtime(快、可离线);加 --hash 才算快速 sha256。 # hf 默认只读本地快照 revision;加 --online 才查远端最新 commit 比对。 # daemon 默认探测 /v1/models 是否包含 served_model + 本地 assets 指纹。 # # 维护说明:新增/升级模型时在此增删条目,再运行 `model_doctor.py update-lock` # 固化基线;日常用 `model_doctor.py check` 体检。 # ============================================================ defaults: hf_hub_dir: "/Users/zhch158/models/hf_home/hub" # HF 缓存 hub 根(= $HF_HOME/hub) hash: false # 本地文件默认仅 size+mtime;true 则计算快速 sha256 online: false # hf 远端比对默认关闭(内网/离线场景) daemon_timeout: 3 # daemon 探测超时(秒) models: # ===== ① HF 仓库(自动下载,缓存在 hf_hub_dir) ===== - name: docling-layout-old kind: hf repo_id: ds4sd/docling-layout-old used_by: ["layout/docling(bank_statement_* 默认布局)"] enabled: true - name: pp-doclayoutv3 kind: hf repo_id: PaddlePaddle/PP-DocLayoutV3_safetensors used_by: ["layout/paddle", "seal_supplement"] enabled: true - name: paddleocr-vl-1.6-hf kind: hf repo_id: PaddlePaddle/PaddleOCR-VL-1.6 used_by: ["PaddleOCR-VL transformers 权重(GGUF 转换源)"] enabled: false - name: mineru-2.5-pro-2604-1.2b-hf kind: hf repo_id: opendatalab/MinerU2.5-Pro-2604-1.2B used_by: ["MinerU2.5-Pro-2604-1.2B transformers 权重(GGUF 转换源)"] enabled: false - name: glm-ocr-hf kind: hf repo_id: zai-org/GLM-OCR used_by: ["GLM-OCR transformers 权重(GGUF 转换源)"] enabled: false - name: rtdetr-wired-cell-hf kind: hf repo_id: PaddlePaddle/RT-DETR-L_wired_table_cell_det used_by: ["table_recognition_wired/cell_fusion paddle格式 pdiparams"] enabled: false - name: rdetr-h-layout-17cls-hf kind: hf repo_id: PaddlePaddle/RT-DETR-H_layout_17cls used_by: ["layout/paddle paddle格式 pdiparams"] enabled: false # ===== ② 本地权重文件 ===== - name: rtdetr-wired-cell kind: local_file path: /Users/zhch158/models/pytorch_models/Table/RT-DETR-L_wired_table_cell_det.onnx used_by: ["table_recognition_wired/cell_fusion(有线表格单元格融合)"] enabled: true - name: rtdetr-h-layout-17cls kind: local_file path: /Users/zhch158/models/pytorch_models/Layout/RT-DETR-H_layout_17cls.onnx used_by: ["layout/paddle(可选,默认走 HF 路径)"] enabled: false # ===== ③ daemon 服务(HTTP + 关联本地 GGUF 资产) ===== - name: paddleocr-vl-1.6-daemon kind: daemon server_url: http://localhost:8102 served_model: PaddleOCR-VL-1.6 # 期望 /v1/models 返回包含此 id assets: - /Users/zhch158/models/PaddleOCR-VL-1.6-GGUF/PaddleOCR-VL-1.6-F16.gguf - /Users/zhch158/models/PaddleOCR-VL-1.6-GGUF/PaddleOCR-VL-1.6-F16-mmproj.gguf used_by: ["vl_recognition/paddle(bank_statement_paddle_vl_local)"] enabled: true - name: glm-ocr-daemon-local kind: daemon server_url: http://localhost:8101 served_model: glm-ocr assets: - /Users/zhch158/models/hf_home/hub/models--ggml-org--GLM-OCR-GGUF/snapshots/65a42de1148dbed2297e922b5dbc7d9b70c36578/GLM-OCR-Q8_0.gguf - /Users/zhch158/models/hf_home/hub/models--ggml-org--GLM-OCR-GGUF/snapshots/65a42de1148dbed2297e922b5dbc7d9b70c36578/mmproj-GLM-OCR-Q8_0.gguf used_by: ["vl_recognition/glmocr(本地)"] enabled: false - name: mineru-2.5-pro-daemon-local kind: daemon server_url: http://localhost:8103 served_model: MinerU2.5-Pro-2604-1.2B assets: - /Users/zhch158/models/hf_home/hub/models--mradermacher--MinerU2.5-Pro-2604-1.2B-GGUF/snapshots/70429e9c728b6a5e904f358a9936c17bd3f5f4b8/MinerU2.5-Pro-2604-1.2B.Q8_0.gguf used_by: ["MinerU2.5 本地 VLM"] enabled: false - name: mineru-vl-remote kind: daemon server_url: http://10.192.72.11:20006 served_model: MinerU2.5 used_by: ["vl_recognition/mineru_vl(远程 vllm)"] enabled: false - name: paddleocr-vl-remote kind: daemon server_url: http://10.192.72.11:20016 served_model: PaddleOCR-VL-0.9B used_by: ["vl_recognition/paddle(远程 vllm)"] enabled: false # ===== ④ MinerU 内置模型(包版本 + 模型根目录指纹) ===== - name: mineru-builtin kind: mineru package: mineru # MinerU pipeline 内置模型(layout/ocr/formula/table 等)下载根目录; # 留空(null)则仅校验包版本。常见位置:modelscope_cache 或 hf_home。 model_root: /Users/zhch158/models/modelscope_cache used_by: ["preprocessor/mineru", "ocr_recognition/mineru", "table_classification/paddle"] enabled: true