test_ocr.py 2.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from paddlex import create_pipeline
  15. pipeline = create_pipeline(pipeline="OCR", limit_side_len=320)
  16. output = pipeline.predict(
  17. "./test_samples/general_ocr_002.png",
  18. use_doc_orientation_classify=True,
  19. use_doc_unwarping=False,
  20. use_textline_orientation=False,
  21. unclip_ratio=3.0,
  22. limit_side_len=1920,
  23. )
  24. # output = pipeline.predict(
  25. # "./test_samples/general_ocr_002.png",
  26. # use_doc_orientation_classify=True,
  27. # use_doc_unwarping=True,
  28. # use_textline_orientation=False,
  29. # )
  30. # output = pipeline.predict(
  31. # "./test_samples/general_ocr_002.png",
  32. # use_doc_orientation_classify=True,
  33. # use_doc_unwarping=False,
  34. # use_textline_orientation=True,
  35. # )
  36. # output = pipeline.predict(
  37. # "./test_samples/general_ocr_002.png",
  38. # use_doc_orientation_classify=True,
  39. # use_doc_unwarping=False,
  40. # use_textline_orientation=False,
  41. # )
  42. # output = pipeline.predict(
  43. # "./test_samples/general_ocr_002.png",
  44. # use_doc_orientation_classify=False,
  45. # use_doc_unwarping=True,
  46. # use_textline_orientation=True,
  47. # )
  48. # output = pipeline.predict(
  49. # "./test_samples/general_ocr_002.png",
  50. # use_doc_orientation_classify=False,
  51. # use_doc_unwarping=True,
  52. # use_textline_orientation=False,
  53. # )
  54. # output = pipeline.predict(
  55. # "./test_samples/general_ocr_002.png",
  56. # use_doc_orientation_classify=False,
  57. # use_doc_unwarping=False,
  58. # use_textline_orientation=True,
  59. # )
  60. # output = pipeline.predict(
  61. # "./test_samples/general_ocr_002.png",
  62. # use_doc_orientation_classify=False,
  63. # use_doc_unwarping=False,
  64. # use_textline_orientation=False,
  65. # )
  66. # output = pipeline.predict("./test_samples/财报1.pdf")
  67. for res in output:
  68. print(res)
  69. res.save_to_img("./output")
  70. res.save_to_json("./output/res.json")