| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134 |
- # ============================================================
- # model_doctor 模型清单(手工维护)
- # ------------------------------------------------------------
- # 覆盖四类模型来源:
- # hf —— HuggingFace 仓库(自动下载,缓存在 defaults.hf_hub_dir)
- # local_file —— 本地单个权重文件(.onnx/.gguf/.pth/...)或目录
- # daemon —— 通过 HTTP 服务访问的模型(llama-server / vllm),可附带本地 GGUF 资产
- # mineru —— MinerU 内置模型(校验包版本 + 可选模型根目录指纹)
- #
- # 指纹策略:
- # 本地文件/目录默认用 size + mtime(快、可离线);加 --hash 才算快速 sha256。
- # hf 默认只读本地快照 revision;加 --online 才查远端最新 commit 比对。
- # daemon 默认探测 /v1/models 是否包含 served_model + 本地 assets 指纹。
- #
- # 维护说明:新增/升级模型时在此增删条目,再运行 `model_doctor.py update-lock`
- # 固化基线;日常用 `model_doctor.py check` 体检。
- # ============================================================
- defaults:
- hf_hub_dir: "/Users/zhch158/models/hf_home/hub" # HF 缓存 hub 根(= $HF_HOME/hub)
- hash: false # 本地文件默认仅 size+mtime;true 则计算快速 sha256
- online: false # hf 远端比对默认关闭(内网/离线场景)
- daemon_timeout: 3 # daemon 探测超时(秒)
- models:
- # ===== ① HF 仓库(自动下载,缓存在 hf_hub_dir) =====
- - name: docling-layout-old
- kind: hf
- repo_id: ds4sd/docling-layout-old
- used_by: ["layout/docling(bank_statement_* 默认布局)"]
- enabled: true
- - name: pp-doclayoutv3
- kind: hf
- repo_id: PaddlePaddle/PP-DocLayoutV3_safetensors
- used_by: ["layout/paddle", "seal_supplement"]
- enabled: true
- - name: paddleocr-vl-1.6-hf
- kind: hf
- repo_id: PaddlePaddle/PaddleOCR-VL-1.6
- used_by: ["PaddleOCR-VL transformers 权重(GGUF 转换源)"]
- enabled: false
- - name: mineru-2.5-pro-2604-1.2b-hf
- kind: hf
- repo_id: opendatalab/MinerU2.5-Pro-2604-1.2B
- used_by: ["MinerU2.5-Pro-2604-1.2B transformers 权重(GGUF 转换源)"]
- enabled: false
- - name: glm-ocr-hf
- kind: hf
- repo_id: zai-org/GLM-OCR
- used_by: ["GLM-OCR transformers 权重(GGUF 转换源)"]
- enabled: false
- - name: rtdetr-wired-cell-hf
- kind: hf
- repo_id: PaddlePaddle/RT-DETR-L_wired_table_cell_det
- used_by: ["table_recognition_wired/cell_fusion paddle格式 pdiparams"]
- enabled: false
- - name: rdetr-h-layout-17cls-hf
- kind: hf
- repo_id: PaddlePaddle/RT-DETR-H_layout_17cls
- used_by: ["layout/paddle paddle格式 pdiparams"]
- enabled: false
- # ===== ② 本地权重文件 =====
- - name: rtdetr-wired-cell
- kind: local_file
- path: /Users/zhch158/models/pytorch_models/Table/RT-DETR-L_wired_table_cell_det.onnx
- used_by: ["table_recognition_wired/cell_fusion(有线表格单元格融合)"]
- enabled: true
- - name: rtdetr-h-layout-17cls
- kind: local_file
- path: /Users/zhch158/models/pytorch_models/Layout/RT-DETR-H_layout_17cls.onnx
- used_by: ["layout/paddle(可选,默认走 HF 路径)"]
- enabled: false
- # ===== ③ daemon 服务(HTTP + 关联本地 GGUF 资产) =====
- - name: paddleocr-vl-1.6-daemon
- kind: daemon
- server_url: http://localhost:8102
- served_model: PaddleOCR-VL-1.6 # 期望 /v1/models 返回包含此 id
- assets:
- - /Users/zhch158/models/PaddleOCR-VL-1.6-GGUF/PaddleOCR-VL-1.6-F16.gguf
- - /Users/zhch158/models/PaddleOCR-VL-1.6-GGUF/PaddleOCR-VL-1.6-F16-mmproj.gguf
- used_by: ["vl_recognition/paddle(bank_statement_paddle_vl_local)"]
- enabled: true
- - name: glm-ocr-daemon-local
- kind: daemon
- server_url: http://localhost:8101
- served_model: glm-ocr
- assets:
- - /Users/zhch158/models/hf_home/hub/models--ggml-org--GLM-OCR-GGUF/snapshots/65a42de1148dbed2297e922b5dbc7d9b70c36578/GLM-OCR-Q8_0.gguf
- - /Users/zhch158/models/hf_home/hub/models--ggml-org--GLM-OCR-GGUF/snapshots/65a42de1148dbed2297e922b5dbc7d9b70c36578/mmproj-GLM-OCR-Q8_0.gguf
- used_by: ["vl_recognition/glmocr(本地)"]
- enabled: false
- - name: mineru-2.5-pro-daemon-local
- kind: daemon
- server_url: http://localhost:8103
- served_model: MinerU2.5-Pro-2604-1.2B
- assets:
- - /Users/zhch158/models/hf_home/hub/models--mradermacher--MinerU2.5-Pro-2604-1.2B-GGUF/snapshots/70429e9c728b6a5e904f358a9936c17bd3f5f4b8/MinerU2.5-Pro-2604-1.2B.Q8_0.gguf
- used_by: ["MinerU2.5 本地 VLM"]
- enabled: false
- - name: mineru-vl-remote
- kind: daemon
- server_url: http://10.192.72.11:20006
- served_model: MinerU2.5
- used_by: ["vl_recognition/mineru_vl(远程 vllm)"]
- enabled: false
- - name: paddleocr-vl-remote
- kind: daemon
- server_url: http://10.192.72.11:20016
- served_model: PaddleOCR-VL-0.9B
- used_by: ["vl_recognition/paddle(远程 vllm)"]
- enabled: false
- # ===== ④ MinerU 内置模型(包版本 + 模型根目录指纹) =====
- - name: mineru-builtin
- kind: mineru
- package: mineru
- # MinerU pipeline 内置模型(layout/ocr/formula/table 等)下载根目录;
- # 留空(null)则仅校验包版本。常见位置:modelscope_cache 或 hf_home。
- model_root: /Users/zhch158/models/modelscope_cache
- used_by: ["preprocessor/mineru", "ocr_recognition/mineru", "table_classification/paddle"]
- enabled: true
|