table_structure.py 1.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import time
  15. from typing import Any, Dict
  16. import numpy as np
  17. from .table_stucture_utils import OrtInferSession, TableLabelDecode, TablePreprocess
  18. class TableStructurer:
  19. def __init__(self, config: Dict[str, Any]):
  20. self.preprocess_op = TablePreprocess()
  21. self.session = OrtInferSession(config)
  22. self.character = self.session.get_metadata()
  23. self.postprocess_op = TableLabelDecode(self.character)
  24. def __call__(self, img):
  25. starttime = time.time()
  26. data = {"image": img}
  27. data = self.preprocess_op(data)
  28. img = data[0]
  29. if img is None:
  30. return None, 0
  31. img = np.expand_dims(img, axis=0)
  32. img = img.copy()
  33. outputs = self.session([img])
  34. preds = {"loc_preds": outputs[0], "structure_probs": outputs[1]}
  35. shape_list = np.expand_dims(data[-1], axis=0)
  36. post_result = self.postprocess_op(preds, [shape_list])
  37. bbox_list = post_result["bbox_batch_list"][0]
  38. structure_str_list = post_result["structure_batch_list"][0]
  39. structure_str_list = structure_str_list[0]
  40. structure_str_list = (
  41. ["<html>", "<body>", "<table>"]
  42. + structure_str_list
  43. + ["</table>", "</body>", "</html>"]
  44. )
  45. elapse = time.time() - starttime
  46. return structure_str_list, bbox_list, elapse