|
@@ -8,7 +8,22 @@ processors:
|
|
|
# MinerU vLLM 处理器
|
|
# MinerU vLLM 处理器
|
|
|
# 基于 MinerU 的多线程批量处理(支持 PDF 和图片)
|
|
# 基于 MinerU 的多线程批量处理(支持 PDF 和图片)
|
|
|
# -------------------------------------------------------------------------
|
|
# -------------------------------------------------------------------------
|
|
|
- yusys_ocr:
|
|
|
|
|
|
|
+ yusys_ocr_v4:
|
|
|
|
|
+ script: "/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/main_v2.py"
|
|
|
|
|
+ input_arg: "--input"
|
|
|
|
|
+ output_arg: "--output_dir"
|
|
|
|
|
+ extra_args:
|
|
|
|
|
+ - "--config=/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/config/bank_statement_yusys_v4.yaml"
|
|
|
|
|
+ - "--pages=1-35"
|
|
|
|
|
+ - "--streaming"
|
|
|
|
|
+ - "--debug"
|
|
|
|
|
+ - "--log_level=DEBUG"
|
|
|
|
|
+ output_subdir: "bank_statement_yusys_v4"
|
|
|
|
|
+ log_subdir: "logs/bank_statement_yusys_v4"
|
|
|
|
|
+ venv: "conda activate mineru2"
|
|
|
|
|
+ description: "YUSYS Wired UNET OCR 框架 GLM-OCR"
|
|
|
|
|
+
|
|
|
|
|
+ yusys_ocr_v3:
|
|
|
script: "/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/main_v2.py"
|
|
script: "/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/main_v2.py"
|
|
|
input_arg: "--input"
|
|
input_arg: "--input"
|
|
|
output_arg: "--output_dir"
|
|
output_arg: "--output_dir"
|