post_process.py 1.1 KB

123456789101112131415161718192021222324252627282930313233343536
  1. import re
  2. def layout_rm_equation(layout_res):
  3. rm_idxs = []
  4. for idx, ele in enumerate(layout_res['layout_dets']):
  5. if ele['category_id'] == 10:
  6. rm_idxs.append(idx)
  7. for idx in rm_idxs[::-1]:
  8. del layout_res['layout_dets'][idx]
  9. return layout_res
  10. def get_croped_image(image_pil, bbox):
  11. x_min, y_min, x_max, y_max = bbox
  12. croped_img = image_pil.crop((x_min, y_min, x_max, y_max))
  13. return croped_img
  14. def latex_rm_whitespace(s: str):
  15. """Remove unnecessary whitespace from LaTeX code.
  16. """
  17. text_reg = r'(\\(operatorname|mathrm|text|mathbf)\s?\*? {.*?})'
  18. letter = '[a-zA-Z]'
  19. noletter = '[\W_^\d]'
  20. names = [x[0].replace(' ', '') for x in re.findall(text_reg, s)]
  21. s = re.sub(text_reg, lambda match: str(names.pop(0)), s)
  22. news = s
  23. while True:
  24. s = news
  25. news = re.sub(r'(?!\\ )(%s)\s+?(%s)' % (noletter, noletter), r'\1\2', s)
  26. news = re.sub(r'(?!\\ )(%s)\s+?(%s)' % (noletter, letter), r'\1\2', news)
  27. news = re.sub(r'(%s)\s+?(%s)' % (letter, noletter), r'\1\2', news)
  28. if news == s:
  29. break
  30. return s