Jelajahi Sumber

feat(process_single_input): 添加场景名称设置功能并更新场景参数

zhch158_admin 1 Minggu lalu
induk
melakukan
4ede25dc86
1 mengubah file dengan 13 tambahan dan 8 penghapusan
  1. 13 8
      ocr_tools/universal_doc_parser/main_v2.py

+ 13 - 8
ocr_tools/universal_doc_parser/main_v2.py

@@ -179,6 +179,8 @@ def process_single_input(
         try:
             if scene:
                 pipeline.scene_name = scene
+                if hasattr(pipeline, 'set_scene_name'):
+                    pipeline.set_scene_name(scene)
                 logger.info(f"🔄 Scene overridden to: {scene}")
             
             logger.info(f"🚀 开始处理: {input_path}")
@@ -349,6 +351,7 @@ def main():
     )
     parser.add_argument(
         "--scene", "-s",
+        required=True,
         choices=["bank_statement", "financial_report"],
         help="场景类型(覆盖配置文件设置)"
     )
@@ -436,10 +439,10 @@ if __name__ == "__main__":
             # "input": "/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/tests/2023年度报告母公司_page_003.png",
             # "input": "/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/tests/2023年度报告母公司_page_003.png",
             # "input": "/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/tests/2023年度报告母公司_page_003_270_skew(-0.4).png",
-            "input": "/Users/zhch158/workspace/data/流水分析/2023年度报告母公司.pdf",
+            # "input": "/Users/zhch158/workspace/data/流水分析/2023年度报告母公司.pdf",
             # "output_dir": "./output/2023年度报告母公司/bank_statement_yusys_v3",
             # "output_dir": "/Users/zhch158/workspace/data/流水分析/2023年度报告母公司/bank_statement_yusys_v3",
-            "output_dir": "/Users/zhch158/workspace/data/流水分析/2023年度报告母公司/bank_statement_glm_vl",
+            # "output_dir": "/Users/zhch158/workspace/data/流水分析/2023年度报告母公司/bank_statement_glm_vl",
 
             # "input": "/Users/zhch158/workspace/data/流水分析/2023年度报告母公司.pdf",
             # "output_dir": "/Users/zhch158/workspace/data/流水分析/2023年度报告母公司/bank_statement_yusys_v2",
@@ -458,8 +461,9 @@ if __name__ == "__main__":
 
             # "input": "/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/tests/提取自赤峰黄金2023年报.pdf",
             # "output_dir": "./output/提取自赤峰黄金2023年报/bank_statement_yusys_v3",
-            # "input": "/Users/zhch158/workspace/data/流水分析/提取自赤峰黄金2023年报.pdf",
-            # "output_dir": "/Users/zhch158/workspace/data/流水分析/提取自赤峰黄金2023年报/bank_statement_yusys_v3",
+            "input": "/Users/zhch158/workspace/data/流水分析/提取自赤峰黄金2023年报.pdf",
+            "output_dir": "./output/提取自赤峰黄金2023年报/bank_statement_yusys_v4",
+            # "output_dir": "/Users/zhch158/workspace/data/流水分析/提取自赤峰黄金2023年报/bank_statement_yusys_v4",
 
             # "input": "/Users/zhch158/workspace/data/流水分析/施博深.pdf",
             # "output_dir": "/Users/zhch158/workspace/data/流水分析/施博深/bank_statement_yusys_v3",
@@ -475,7 +479,7 @@ if __name__ == "__main__":
             # "output_dir": "/Users/zhch158/workspace/data/流水分析/山西云集科技有限公司/bank_statement_yusys_v3",
 
             # 配置文件
-            "config": "./config/bank_statement_glm_vl.yaml",
+            "config": "./config/bank_statement_yusys_v4.yaml",
             # "config": "./config/bank_statement_yusys_v3.yaml",
             # "config": "./config/bank_statement_smart_router.yaml",
             # "config": "./config/bank_statement_mineru_vl.yaml",
@@ -483,10 +487,11 @@ if __name__ == "__main__":
             # "config": "./config/bank_statement_paddle_vl.yaml",
             
             # 场景
-            "scene": "bank_statement",
+            # "scene": "bank_statement",
+            "scene": "financial_report",
             
             # 页面范围(可选)
-            "pages": "3-7",  # 只处理前1页
+            "pages": "11",  # 只处理前1页
             # "pages": "1-3,5,7-10",  # 处理指定页面
             # "pages": "83-109",  # 处理指定页面
 
@@ -499,7 +504,7 @@ if __name__ == "__main__":
             "log_level": "DEBUG",
 
             # 日志文件
-            "log_file": "./output/logs/bank_statement_glm_vl/process.log",
+            "log_file": "./output/logs/bank_statement_yusys_v4/process.log",
         }
         
         # 构造参数