|
|
@@ -0,0 +1,28 @@
|
|
|
+from paddlex import create_pipeline
|
|
|
+import time
|
|
|
+from pathlib import Path
|
|
|
+
|
|
|
+# input_path = "./sample_data/300674-母公司现金流量表-扫描.png"
|
|
|
+input_path = "/home/ubuntu/zhch/data/至远彩色印刷工业有限公司/2023年度报告母公司.img/2023年度报告母公司_page_006.png"
|
|
|
+
|
|
|
+pipeline_path = "./my_config/PaddleOCR-VL-Client.yaml"
|
|
|
+pipeline_name = Path(pipeline_path).stem
|
|
|
+output_path = Path(f"./sample_data/single_pipeline_output/{pipeline_name}/")
|
|
|
+
|
|
|
+pipeline = create_pipeline(pipeline=pipeline_path)
|
|
|
+
|
|
|
+# For Image
|
|
|
+output = pipeline.predict(
|
|
|
+ input=input_path,
|
|
|
+ useLayoutDetection=True,
|
|
|
+ useDocOrientationClassify=False, # 开启文档方向分类
|
|
|
+ useDocUnwarping=False, # 开启文档去畸变
|
|
|
+)
|
|
|
+
|
|
|
+# 可视化结果并保存 json 结果
|
|
|
+for res in output:
|
|
|
+ res.print()
|
|
|
+ # res.save_to_json(save_path="sample_data/output")
|
|
|
+ # res.save_to_markdown(save_path="sample_data/output")
|
|
|
+ output_path.mkdir(parents=True, exist_ok=True)
|
|
|
+ res.save_all(save_path=output_path.as_posix()) # 保存所有结果到指定路径
|