Explorar el Código

feat: 修改README文档,将输出格式参数名称从format更改为output-type

zhch158_admin hace 3 semanas
padre
commit
f0d3236884
Se han modificado 1 ficheros con 18 adiciones y 18 borrados
  1. 18 18
      merger/README.md

+ 18 - 18
merger/README.md

@@ -251,14 +251,14 @@ python merger/merge_mineru_paddle_ocr.py \
   --mineru-file /path/to/mineru_page_001.json \
   --paddle-file /path/to/paddle_page_001.json \
   --output-dir /path/to/output \
-  --format both
+  --output-type both
 
 # 批量处理
 python merger/merge_mineru_paddle_ocr.py \
   --mineru-dir /path/to/mineru_results \
   --paddle-dir /path/to/paddle_results \
   --output-dir /path/to/output \
-  --format both \
+  --output-type both \
   --window 15 \
   --threshold 85
 ```
@@ -272,7 +272,7 @@ python merger/merge_mineru_paddle_ocr.py \
 | `--mineru-dir` | MinerU 结果目录(批量模式) | - |
 | `--paddle-dir` | PaddleOCR 结果目录(批量模式) | - |
 | `-o, --output-dir` | 输出目录(必需) | - |
-| `-f, --format` | 输出格式:json/markdown/both | both |
+| `-f, --output-type` | 输出格式:json/markdown/both | both |
 | `-w, --window` | 向前查找窗口大小 | 15 |
 | `-t, --threshold` | 文本相似度阈值(0-100) | 80 |
 
@@ -287,14 +287,14 @@ python merger/merge_paddleocr_vl_paddleocr.py \
   --paddleocr-vl-file /path/to/paddleocr_vl_page_001.json \
   --paddle-file /path/to/paddle_page_001.json \
   --output-dir /path/to/output \
-  --format both
+  --output-type both
 
 # 批量处理
 python merger/merge_paddleocr_vl_paddleocr.py \
   --paddleocr-vl-dir /path/to/paddleocr_vl_results \
   --paddle-dir /path/to/paddle_results \
   --output-dir /path/to/output \
-  --format both \
+  --output-type both \
   --window 15 \
   --threshold 85
 ```
@@ -308,7 +308,7 @@ python merger/merge_paddleocr_vl_paddleocr.py \
 | `--paddleocr-vl-dir` | PaddleOCR_VL 结果目录(批量模式) | - |
 | `--paddle-dir` | PaddleOCR 结果目录(批量模式) | - |
 | `-o, --output-dir` | 输出目录(必需) | - |
-| `-f, --format` | 输出格式:json/markdown/both | both |
+| `-f, --output-type` | 输出格式:json/markdown/both | both |
 | `-w, --window` | 向前查找窗口大小 | 15 |
 | `-t, --threshold` | 文本相似度阈值(0-100) | 80 |
 
@@ -571,80 +571,80 @@ def test_table_processing():
 ---
 
 ### 运行试验数据
-1. mineru-vlm-2.5.3
+#### 1. mineru-vlm-2.5.3
 ```bash
 echo "A用户_单元格扫描流水"
 python merge_mineru_paddle_ocr.py \
   --mineru-dir "/Users/zhch158/workspace/data/流水分析/A用户_单元格扫描流水/mineru-vlm-2.5.3_Results" \
   --paddle-dir "/Users/zhch158/workspace/data/流水分析/A用户_单元格扫描流水/data_PPStructureV3_Results" \
   --output-dir "/Users/zhch158/workspace/data/流水分析/A用户_单元格扫描流水/merged_results" \
-  --format "both"
+  --output-type "both"
 
 echo "B用户_扫描流水"
 python merge_mineru_paddle_ocr.py \
   --mineru-dir "/Users/zhch158/workspace/data/流水分析/B用户_扫描流水/mineru-vlm-2.5.3_Results" \
   --paddle-dir "/Users/zhch158/workspace/data/流水分析/B用户_扫描流水/data_PPStructureV3_Results" \
   --output-dir "/Users/zhch158/workspace/data/流水分析/B用户_扫描流水/merged_results" \
-  --format "both"
+  --output-type "both"
 
 echo "德_内蒙古银行照"
 python merge_mineru_paddle_ocr.py \
   --mineru-dir "/Users/zhch158/workspace/data/流水分析/德_内蒙古银行照/mineru-vlm-2.5.3_Results" \
   --paddle-dir "/Users/zhch158/workspace/data/流水分析/德_内蒙古银行照/data_PPStructureV3_Results" \
   --output-dir "/Users/zhch158/workspace/data/流水分析/德_内蒙古银行照/merged_results" \
-  --format "both"
+  --output-type "both"
 
 echo "对公_招商银行图"
 python merge_mineru_paddle_ocr.py \
   --mineru-dir "/Users/zhch158/workspace/data/流水分析/对公_招商银行图/mineru-vlm-2.5.3_Results" \
   --paddle-dir "/Users/zhch158/workspace/data/流水分析/对公_招商银行图/data_PPStructureV3_Results" \
   --output-dir "/Users/zhch158/workspace/data/流水分析/对公_招商银行图/merged_results" \
-  --format "both"
+  --output-type "both"
 
 echo "至远彩色印刷工业有限公司"
 python merge_mineru_paddle_ocr.py \
   --mineru-dir "/Users/zhch158/workspace/data/至远彩色印刷工业有限公司/mineru-vlm-2.5.3_Results" \
   --paddle-dir "/Users/zhch158/workspace/data/至远彩色印刷工业有限公司/data_PPStructureV3_Results" \
   --output-dir "/Users/zhch158/workspace/data/至远彩色印刷工业有限公司/merged_results" \
-  --format "both"
+  --output-type "both"
 
 ```
 
-2. PaddleOCR_VL_Results
+#### 2. PaddleOCR_VL_Results
 ```bash
 echo "A用户_单元格扫描流水"
 python merge_paddleocr_vl_paddleocr.py \
   --paddleocr-vl-dir "/Users/zhch158/workspace/data/流水分析/A用户_单元格扫描流水/PaddleOCR_VL_Results" \
   --paddle-dir "/Users/zhch158/workspace/data/流水分析/A用户_单元格扫描流水/data_PPStructureV3_Results" \
   --output-dir "/Users/zhch158/workspace/data/流水分析/A用户_单元格扫描流水/PaddleOCR_VL_Results_cell_bbox" \
-  --format "both"
+  --output-type "both"
 
 echo "B用户_扫描流水"
 python merge_paddleocr_vl_paddleocr.py \
   --paddleocr-vl-dir "/Users/zhch158/workspace/data/流水分析/B用户_扫描流水/PaddleOCR_VL_Results" \
   --paddle-dir "/Users/zhch158/workspace/data/流水分析/B用户_扫描流水/data_PPStructureV3_Results" \
   --output-dir "/Users/zhch158/workspace/data/流水分析/B用户_扫描流水/PaddleOCR_VL_Results_cell_bbox" \
-  --format "both"
+  --output-type "both"
 
 echo "德_内蒙古银行照"
 python merge_paddleocr_vl_paddleocr.py \
   --paddleocr-vl-dir "/Users/zhch158/workspace/data/流水分析/德_内蒙古银行照/PaddleOCR_VL_Results" \
   --paddle-dir "/Users/zhch158/workspace/data/流水分析/德_内蒙古银行照/data_PPStructureV3_Results" \
   --output-dir "/Users/zhch158/workspace/data/流水分析/德_内蒙古银行照/PaddleOCR_VL_Results_cell_bbox" \
-  --format "both"
+  --output-type "both"
 
 echo "对公_招商银行图"
 python merge_paddleocr_vl_paddleocr.py \
   --paddleocr-vl-dir "/Users/zhch158/workspace/data/流水分析/对公_招商银行图/PaddleOCR_VL_Results" \
   --paddle-dir "/Users/zhch158/workspace/data/流水分析/对公_招商银行图/data_PPStructureV3_Results" \
   --output-dir "/Users/zhch158/workspace/data/流水分析/对公_招商银行图/PaddleOCR_VL_Results_cell_bbox" \
-  --format "both"
+  --output-type "both"
 
 echo "至远彩色印刷工业有限公司"
 python merge_paddleocr_vl_paddleocr.py \
   --paddleocr-vl-dir "/Users/zhch158/workspace/data/至远彩色印刷工业有限公司/PaddleOCR_VL_Results" \
   --paddle-dir "/Users/zhch158/workspace/data/至远彩色印刷工业有限公司/data_PPStructureV3_Results" \
   --output-dir "/Users/zhch158/workspace/data/至远彩色印刷工业有限公司/PaddleOCR_VL_Results_cell_bbox" \
-  --format "both"
+  --output-type "both"
 
 ```