Преглед изворни кода

feat: 更新示例输入输出路径,添加新的测试图像以增强文档解析功能的测试覆盖率

zhch158_admin пре 6 дана
родитељ
комит
bd17ca00f4
1 измењених фајлова са 14 додато и 7 уклоњено
  1. 14 7
      ocr_tools/universal_doc_parser/main_v2.py

+ 14 - 7
ocr_tools/universal_doc_parser/main_v2.py

@@ -402,25 +402,31 @@ if __name__ == "__main__":
             # "input": "/Users/zhch158/workspace/data/流水分析/康强_北京农村商业银行.pdf",
             # "output_dir": "./output/康强_北京农村商业银行_bank_statement_v2",
 
-            # "input": "/Users/zhch158/workspace/data/流水分析/2023年度报告母公司/mineru_vllm_results/2023年度报告母公司/2023年度报告母公司_page_003.png",
-            # "output_dir": "./output/2023年度报告母公司_bank_statement_v2",
+            "input": "/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/tests/A用户_单元格扫描流水_page_002.png",
+            "output_dir": "./output/A用户_单元格扫描流水_bank_statement_wired_unet",
             
             # "input": "/Users/zhch158/workspace/data/流水分析/B用户_扫描流水.pdf",
             # "output_dir": "/Users/zhch158/workspace/data/流水分析/B用户_扫描流水/bank_statement_yusys_v2",
 
-            # "input": "/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/tests/2023年度报告母公司_page_006_270.png",
+            # "input": "/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/tests/2023年度报告母公司_page_005.png",
+            # "input": "/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/tests/2023年度报告母公司_page_003_270.png",
+            # "input": "/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/tests/2023年度报告母公司_page_003_270_skew(-0.4).png",
             # "output_dir": "./output/2023年度报告母公司/bank_statement_wired_unet",
+
             # "input": "/Users/zhch158/workspace/data/流水分析/2023年度报告母公司.pdf",
+            # "output_dir": "/Users/zhch158/workspace/data/流水分析/2023年度报告母公司/bank_statement_wired_unet",
             # "output_dir": "/Users/zhch158/workspace/data/流水分析/2023年度报告母公司/bank_statement_yusys_v2",
 
-            "input": "/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/tests/600916_中国黄金_2022年报_page_096.png",
-            "output_dir": "./output/600916_中国黄金_2022年报/bank_statement_wired_unet",
+            # "input": "/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/tests/600916_中国黄金_2022年报_page_096.png",
+            # "output_dir": "./output/600916_中国黄金_2022年报/bank_statement_wired_unet",
+            # "input": "/Users/zhch158/workspace/data/流水分析/600916_中国黄金_2022年报.pdf",
+            # "output_dir": "./output/600916_中国黄金_2022年报/bank_statement_wired_unet",
 
             # "input": "/Users/zhch158/workspace/data/流水分析/施博深.pdf",
             # "output_dir": "/Users/zhch158/workspace/data/流水分析/施博深/bank_statement_yusys_v2",
 
-            # "input": "/Users/zhch158/workspace/data/流水分析/施博深.wiredtable/施博深_page_001.png",
-            # "output_dir": "./output/施博深_page_001_bank_statement_wired_unet",
+            # "input": "/Users/zhch158/workspace/data/流水分析/施博深.wiredtable/施博深_page_020.png",
+            # "output_dir": "./output/施博深/bank_statement_wired_unet",
 
             # "input": "/Users/zhch158/workspace/data/流水分析/施博深.wiredtable",
             # "output_dir": "/Users/zhch158/workspace/data/流水分析/施博深/bank_statement_wired_unet",
@@ -436,6 +442,7 @@ if __name__ == "__main__":
             # 页面范围(可选)
             # "pages": "6",  # 只处理前1页
             # "pages": "1-3,5,7-10",  # 处理指定页面
+            # "pages": "83-109",  # 处理指定页面
 
             "streaming": True,