model_registry.yaml 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134
  1. # ============================================================
  2. # model_doctor 模型清单(手工维护)
  3. # ------------------------------------------------------------
  4. # 覆盖四类模型来源:
  5. # hf —— HuggingFace 仓库(自动下载,缓存在 defaults.hf_hub_dir)
  6. # local_file —— 本地单个权重文件(.onnx/.gguf/.pth/...)或目录
  7. # daemon —— 通过 HTTP 服务访问的模型(llama-server / vllm),可附带本地 GGUF 资产
  8. # mineru —— MinerU 内置模型(校验包版本 + 可选模型根目录指纹)
  9. #
  10. # 指纹策略:
  11. # 本地文件/目录默认用 size + mtime(快、可离线);加 --hash 才算快速 sha256。
  12. # hf 默认只读本地快照 revision;加 --online 才查远端最新 commit 比对。
  13. # daemon 默认探测 /v1/models 是否包含 served_model + 本地 assets 指纹。
  14. #
  15. # 维护说明:新增/升级模型时在此增删条目,再运行 `model_doctor.py update-lock`
  16. # 固化基线;日常用 `model_doctor.py check` 体检。
  17. # ============================================================
  18. defaults:
  19. hf_hub_dir: "/Users/zhch158/models/hf_home/hub" # HF 缓存 hub 根(= $HF_HOME/hub)
  20. hash: false # 本地文件默认仅 size+mtime;true 则计算快速 sha256
  21. online: false # hf 远端比对默认关闭(内网/离线场景)
  22. daemon_timeout: 3 # daemon 探测超时(秒)
  23. models:
  24. # ===== ① HF 仓库(自动下载,缓存在 hf_hub_dir) =====
  25. - name: docling-layout-old
  26. kind: hf
  27. repo_id: ds4sd/docling-layout-old
  28. used_by: ["layout/docling(bank_statement_* 默认布局)"]
  29. enabled: true
  30. - name: pp-doclayoutv3
  31. kind: hf
  32. repo_id: PaddlePaddle/PP-DocLayoutV3_safetensors
  33. used_by: ["layout/paddle", "seal_supplement"]
  34. enabled: true
  35. - name: paddleocr-vl-1.6-hf
  36. kind: hf
  37. repo_id: PaddlePaddle/PaddleOCR-VL-1.6
  38. used_by: ["PaddleOCR-VL transformers 权重(GGUF 转换源)"]
  39. enabled: false
  40. - name: mineru-2.5-pro-2604-1.2b-hf
  41. kind: hf
  42. repo_id: opendatalab/MinerU2.5-Pro-2604-1.2B
  43. used_by: ["MinerU2.5-Pro-2604-1.2B transformers 权重(GGUF 转换源)"]
  44. enabled: false
  45. - name: glm-ocr-hf
  46. kind: hf
  47. repo_id: zai-org/GLM-OCR
  48. used_by: ["GLM-OCR transformers 权重(GGUF 转换源)"]
  49. enabled: false
  50. - name: rtdetr-wired-cell-hf
  51. kind: hf
  52. repo_id: PaddlePaddle/RT-DETR-L_wired_table_cell_det
  53. used_by: ["table_recognition_wired/cell_fusion paddle格式 pdiparams"]
  54. enabled: false
  55. - name: rdetr-h-layout-17cls-hf
  56. kind: hf
  57. repo_id: PaddlePaddle/RT-DETR-H_layout_17cls
  58. used_by: ["layout/paddle paddle格式 pdiparams"]
  59. enabled: false
  60. # ===== ② 本地权重文件 =====
  61. - name: rtdetr-wired-cell
  62. kind: local_file
  63. path: /Users/zhch158/models/pytorch_models/Table/RT-DETR-L_wired_table_cell_det.onnx
  64. used_by: ["table_recognition_wired/cell_fusion(有线表格单元格融合)"]
  65. enabled: true
  66. - name: rtdetr-h-layout-17cls
  67. kind: local_file
  68. path: /Users/zhch158/models/pytorch_models/Layout/RT-DETR-H_layout_17cls.onnx
  69. used_by: ["layout/paddle(可选,默认走 HF 路径)"]
  70. enabled: false
  71. # ===== ③ daemon 服务(HTTP + 关联本地 GGUF 资产) =====
  72. - name: paddleocr-vl-1.6-daemon
  73. kind: daemon
  74. server_url: http://localhost:8102
  75. served_model: PaddleOCR-VL-1.6 # 期望 /v1/models 返回包含此 id
  76. assets:
  77. - /Users/zhch158/models/PaddleOCR-VL-1.6-GGUF/PaddleOCR-VL-1.6-F16.gguf
  78. - /Users/zhch158/models/PaddleOCR-VL-1.6-GGUF/PaddleOCR-VL-1.6-F16-mmproj.gguf
  79. used_by: ["vl_recognition/paddle(bank_statement_paddle_vl_local)"]
  80. enabled: true
  81. - name: glm-ocr-daemon-local
  82. kind: daemon
  83. server_url: http://localhost:8101
  84. served_model: glm-ocr
  85. assets:
  86. - /Users/zhch158/models/hf_home/hub/models--ggml-org--GLM-OCR-GGUF/snapshots/65a42de1148dbed2297e922b5dbc7d9b70c36578/GLM-OCR-Q8_0.gguf
  87. - /Users/zhch158/models/hf_home/hub/models--ggml-org--GLM-OCR-GGUF/snapshots/65a42de1148dbed2297e922b5dbc7d9b70c36578/mmproj-GLM-OCR-Q8_0.gguf
  88. used_by: ["vl_recognition/glmocr(本地)"]
  89. enabled: false
  90. - name: mineru-2.5-pro-daemon-local
  91. kind: daemon
  92. server_url: http://localhost:8103
  93. served_model: MinerU2.5-Pro-2604-1.2B
  94. assets:
  95. - /Users/zhch158/models/hf_home/hub/models--mradermacher--MinerU2.5-Pro-2604-1.2B-GGUF/snapshots/70429e9c728b6a5e904f358a9936c17bd3f5f4b8/MinerU2.5-Pro-2604-1.2B.Q8_0.gguf
  96. used_by: ["MinerU2.5 本地 VLM"]
  97. enabled: false
  98. - name: mineru-vl-remote
  99. kind: daemon
  100. server_url: http://10.192.72.11:20006
  101. served_model: MinerU2.5
  102. used_by: ["vl_recognition/mineru_vl(远程 vllm)"]
  103. enabled: false
  104. - name: paddleocr-vl-remote
  105. kind: daemon
  106. server_url: http://10.192.72.11:20016
  107. served_model: PaddleOCR-VL-0.9B
  108. used_by: ["vl_recognition/paddle(远程 vllm)"]
  109. enabled: false
  110. # ===== ④ MinerU 内置模型(包版本 + 模型根目录指纹) =====
  111. - name: mineru-builtin
  112. kind: mineru
  113. package: mineru
  114. # MinerU pipeline 内置模型(layout/ocr/formula/table 等)下载根目录;
  115. # 留空(null)则仅校验包版本。常见位置:modelscope_cache 或 hf_home。
  116. model_root: /Users/zhch158/models/modelscope_cache
  117. used_by: ["preprocessor/mineru", "ocr_recognition/mineru", "table_classification/paddle"]
  118. enabled: true