|
@@ -414,6 +414,15 @@ if __name__ == "__main__":
|
|
|
# 默认配置(用于开发测试)
|
|
# 默认配置(用于开发测试)
|
|
|
default_config = {
|
|
default_config = {
|
|
|
# 测试输入
|
|
# 测试输入
|
|
|
|
|
+ # "input": "/Users/zhch158/workspace/data/流水分析/湛_平安银行图.pdf",
|
|
|
|
|
+ # "output_dir": "./output/湛_平安银行图/bank_statement_yusys_v3",
|
|
|
|
|
+
|
|
|
|
|
+ # "input": "/Users/zhch158/workspace/data/流水分析/张_微信图.pdf",
|
|
|
|
|
+ # "output_dir": "./output/张_微信图/bank_statement_yusys_v3",
|
|
|
|
|
+
|
|
|
|
|
+ # "input": "/Users/zhch158/workspace/data/流水分析/许_民生银行图.pdf",
|
|
|
|
|
+ # "output_dir": "./output/许_民生银行图/bank_statement_yusys_v3",
|
|
|
|
|
+
|
|
|
# "input": "/Users/zhch158/workspace/data/流水分析/康强_北京农村商业银行.pdf",
|
|
# "input": "/Users/zhch158/workspace/data/流水分析/康强_北京农村商业银行.pdf",
|
|
|
# "output_dir": "./output/康强_北京农村商业银行/bank_statement_mineru_vl",
|
|
# "output_dir": "./output/康强_北京农村商业银行/bank_statement_mineru_vl",
|
|
|
|
|
|
|
@@ -425,9 +434,12 @@ if __name__ == "__main__":
|
|
|
|
|
|
|
|
# "input": "/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/tests/2023年度报告母公司_page_005.png",
|
|
# "input": "/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/tests/2023年度报告母公司_page_005.png",
|
|
|
# "input": "/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/tests/2023年度报告母公司_page_003.png",
|
|
# "input": "/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/tests/2023年度报告母公司_page_003.png",
|
|
|
|
|
+ # "input": "/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/tests/2023年度报告母公司_page_003.png",
|
|
|
# "input": "/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/tests/2023年度报告母公司_page_003_270_skew(-0.4).png",
|
|
# "input": "/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/tests/2023年度报告母公司_page_003_270_skew(-0.4).png",
|
|
|
- # "input": "/Users/zhch158/workspace/data/流水分析/2023年度报告母公司.pdf",
|
|
|
|
|
|
|
+ "input": "/Users/zhch158/workspace/data/流水分析/2023年度报告母公司.pdf",
|
|
|
|
|
+ # "output_dir": "./output/2023年度报告母公司/bank_statement_yusys_v3",
|
|
|
# "output_dir": "/Users/zhch158/workspace/data/流水分析/2023年度报告母公司/bank_statement_yusys_v3",
|
|
# "output_dir": "/Users/zhch158/workspace/data/流水分析/2023年度报告母公司/bank_statement_yusys_v3",
|
|
|
|
|
+ "output_dir": "/Users/zhch158/workspace/data/流水分析/2023年度报告母公司/bank_statement_glm_vl",
|
|
|
|
|
|
|
|
# "input": "/Users/zhch158/workspace/data/流水分析/2023年度报告母公司.pdf",
|
|
# "input": "/Users/zhch158/workspace/data/流水分析/2023年度报告母公司.pdf",
|
|
|
# "output_dir": "/Users/zhch158/workspace/data/流水分析/2023年度报告母公司/bank_statement_yusys_v2",
|
|
# "output_dir": "/Users/zhch158/workspace/data/流水分析/2023年度报告母公司/bank_statement_yusys_v2",
|
|
@@ -444,8 +456,8 @@ if __name__ == "__main__":
|
|
|
# "input": "/Users/zhch158/workspace/data/流水分析/德_内蒙古银行照.pdf",
|
|
# "input": "/Users/zhch158/workspace/data/流水分析/德_内蒙古银行照.pdf",
|
|
|
# "output_dir": "./output/德_内蒙古银行照/bank_statement_yusys_v3",
|
|
# "output_dir": "./output/德_内蒙古银行照/bank_statement_yusys_v3",
|
|
|
|
|
|
|
|
- "input": "/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/tests/提取自赤峰黄金2023年报.pdf",
|
|
|
|
|
- "output_dir": "./output/提取自赤峰黄金2023年报/bank_statement_yusys_v3",
|
|
|
|
|
|
|
+ # "input": "/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/tests/提取自赤峰黄金2023年报.pdf",
|
|
|
|
|
+ # "output_dir": "./output/提取自赤峰黄金2023年报/bank_statement_yusys_v3",
|
|
|
# "input": "/Users/zhch158/workspace/data/流水分析/提取自赤峰黄金2023年报.pdf",
|
|
# "input": "/Users/zhch158/workspace/data/流水分析/提取自赤峰黄金2023年报.pdf",
|
|
|
# "output_dir": "/Users/zhch158/workspace/data/流水分析/提取自赤峰黄金2023年报/bank_statement_yusys_v3",
|
|
# "output_dir": "/Users/zhch158/workspace/data/流水分析/提取自赤峰黄金2023年报/bank_statement_yusys_v3",
|
|
|
|
|
|
|
@@ -463,7 +475,8 @@ if __name__ == "__main__":
|
|
|
# "output_dir": "/Users/zhch158/workspace/data/流水分析/山西云集科技有限公司/bank_statement_yusys_v3",
|
|
# "output_dir": "/Users/zhch158/workspace/data/流水分析/山西云集科技有限公司/bank_statement_yusys_v3",
|
|
|
|
|
|
|
|
# 配置文件
|
|
# 配置文件
|
|
|
- "config": "./config/bank_statement_yusys_v3.yaml",
|
|
|
|
|
|
|
+ "config": "./config/bank_statement_glm_vl.yaml",
|
|
|
|
|
+ # "config": "./config/bank_statement_yusys_v3.yaml",
|
|
|
# "config": "./config/bank_statement_smart_router.yaml",
|
|
# "config": "./config/bank_statement_smart_router.yaml",
|
|
|
# "config": "./config/bank_statement_mineru_vl.yaml",
|
|
# "config": "./config/bank_statement_mineru_vl.yaml",
|
|
|
# "config": "./config/bank_statement_yusys_v2.yaml",
|
|
# "config": "./config/bank_statement_yusys_v2.yaml",
|
|
@@ -473,7 +486,7 @@ if __name__ == "__main__":
|
|
|
"scene": "bank_statement",
|
|
"scene": "bank_statement",
|
|
|
|
|
|
|
|
# 页面范围(可选)
|
|
# 页面范围(可选)
|
|
|
- "pages": "7", # 只处理前1页
|
|
|
|
|
|
|
+ "pages": "3-7", # 只处理前1页
|
|
|
# "pages": "1-3,5,7-10", # 处理指定页面
|
|
# "pages": "1-3,5,7-10", # 处理指定页面
|
|
|
# "pages": "83-109", # 处理指定页面
|
|
# "pages": "83-109", # 处理指定页面
|
|
|
|
|
|
|
@@ -486,7 +499,7 @@ if __name__ == "__main__":
|
|
|
"log_level": "DEBUG",
|
|
"log_level": "DEBUG",
|
|
|
|
|
|
|
|
# 日志文件
|
|
# 日志文件
|
|
|
- "log_file": "./output/logs/bank_statement_yusys_v3/process.log",
|
|
|
|
|
|
|
+ "log_file": "./output/logs/bank_statement_glm_vl/process.log",
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
# 构造参数
|
|
# 构造参数
|