test_table_recognition_v2.py 1.3 KB

123456789101112131415161718192021222324252627282930
  1. from paddlex import create_pipeline
  2. import time
  3. from pathlib import Path
  4. # input_path = "./sample_data/300674-母公司现金流量表-扫描.png"
  5. input_path = "/Users/zhch158/workspace/data/流水分析/对公_招商银行图/table_recognition_v2_Results/对公_招商银行图/对公_招商银行图_page_001.png"
  6. pipeline_path = "./my_config/table_recognition_v2.yaml"
  7. pipeline_name = Path(pipeline_path).stem
  8. output_path = Path(f"./sample_data/single_pipeline_output/{pipeline_name}/")
  9. pipeline = create_pipeline(pipeline=pipeline_path)
  10. # For Image
  11. output = pipeline.predict(
  12. input=input_path,
  13. device="gpu", # 或者 "gpu" 如果你有 GPU 支持
  14. use_doc_orientation_classify=True, # 开启文档方向分类
  15. use_doc_unwarping=False, # 开启文档去畸变
  16. # use_e2e_wireless_table_rec_model=True, # 开启端到端无线表格识别
  17. use_wireless_table_cells_trans_to_html=True, # 开启无线表格单元格转 HTML
  18. )
  19. # 可视化结果并保存 json 结果
  20. for res in output:
  21. res.print()
  22. # res.save_to_json(save_path="sample_data/output")
  23. # res.save_to_markdown(save_path="sample_data/output")
  24. output_path.mkdir(parents=True, exist_ok=True)
  25. res.save_all(save_path=output_path.as_posix()) # 保存所有结果到指定路径