|
|
@@ -171,6 +171,13 @@ def process_single_input(
|
|
|
# 创建流水线
|
|
|
pipeline = _create_pipeline(streaming, str(config_path), str(output_dir))
|
|
|
output_config = pipeline.config.get('output', {}) or _get_default_output_config(debug)
|
|
|
+
|
|
|
+ # 命令行 --debug 优先级最高:覆盖 yaml 中的所有 debug 设置
|
|
|
+ if debug:
|
|
|
+ pipeline.debug_mode = True
|
|
|
+ output_config['debug_mode'] = True
|
|
|
+ output_config.setdefault('save_layout_image', True)
|
|
|
+ output_config.setdefault('save_ocr_image', True)
|
|
|
|
|
|
use_context = not streaming and hasattr(pipeline, '__enter__')
|
|
|
if use_context:
|
|
|
@@ -420,8 +427,8 @@ if __name__ == "__main__":
|
|
|
# "input": "/Users/zhch158/workspace/data/流水分析/湛_平安银行图.pdf",
|
|
|
# "output_dir": "./output/湛_平安银行图/bank_statement_yusys_v3",
|
|
|
|
|
|
- # "input": "/Users/zhch158/workspace/data/流水分析/张_微信图.pdf",
|
|
|
- # "output_dir": "./output/张_微信图/bank_statement_yusys_v3",
|
|
|
+ "input": "/Users/zhch158/workspace/data/流水分析/张_微信图.pdf",
|
|
|
+ "output_dir": "./output/张_微信图/bank_statement_yusys_v4",
|
|
|
|
|
|
# "input": "/Users/zhch158/workspace/data/流水分析/许_民生银行图.pdf",
|
|
|
# "output_dir": "./output/许_民生银行图/bank_statement_yusys_v3",
|
|
|
@@ -461,8 +468,8 @@ if __name__ == "__main__":
|
|
|
|
|
|
# "input": "/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/tests/提取自赤峰黄金2023年报.pdf",
|
|
|
# "output_dir": "./output/提取自赤峰黄金2023年报/bank_statement_yusys_v3",
|
|
|
- "input": "/Users/zhch158/workspace/data/流水分析/提取自赤峰黄金2023年报.pdf",
|
|
|
- "output_dir": "./output/提取自赤峰黄金2023年报/bank_statement_yusys_v4",
|
|
|
+ # "input": "/Users/zhch158/workspace/data/流水分析/提取自赤峰黄金2023年报.pdf",
|
|
|
+ # "output_dir": "./output/提取自赤峰黄金2023年报/bank_statement_yusys_v4",
|
|
|
# "output_dir": "/Users/zhch158/workspace/data/流水分析/提取自赤峰黄金2023年报/bank_statement_yusys_v4",
|
|
|
|
|
|
# "input": "/Users/zhch158/workspace/data/流水分析/施博深.pdf",
|
|
|
@@ -487,11 +494,11 @@ if __name__ == "__main__":
|
|
|
# "config": "./config/bank_statement_paddle_vl.yaml",
|
|
|
|
|
|
# 场景
|
|
|
- # "scene": "bank_statement",
|
|
|
- "scene": "financial_report",
|
|
|
+ "scene": "bank_statement",
|
|
|
+ # "scene": "financial_report",
|
|
|
|
|
|
# 页面范围(可选)
|
|
|
- "pages": "11", # 只处理前1页
|
|
|
+ "pages": "1", # 只处理前1页
|
|
|
# "pages": "1-3,5,7-10", # 处理指定页面
|
|
|
# "pages": "83-109", # 处理指定页面
|
|
|
|