| 123456789101112131415161718192021222324252627282930313233343536 |
- import re
- def layout_rm_equation(layout_res):
- rm_idxs = []
- for idx, ele in enumerate(layout_res['layout_dets']):
- if ele['category_id'] == 10:
- rm_idxs.append(idx)
-
- for idx in rm_idxs[::-1]:
- del layout_res['layout_dets'][idx]
- return layout_res
- def get_croped_image(image_pil, bbox):
- x_min, y_min, x_max, y_max = bbox
- croped_img = image_pil.crop((x_min, y_min, x_max, y_max))
- return croped_img
- def latex_rm_whitespace(s: str):
- """Remove unnecessary whitespace from LaTeX code.
- """
- text_reg = r'(\\(operatorname|mathrm|text|mathbf)\s?\*? {.*?})'
- letter = '[a-zA-Z]'
- noletter = '[\W_^\d]'
- names = [x[0].replace(' ', '') for x in re.findall(text_reg, s)]
- s = re.sub(text_reg, lambda match: str(names.pop(0)), s)
- news = s
- while True:
- s = news
- news = re.sub(r'(?!\\ )(%s)\s+?(%s)' % (noletter, noletter), r'\1\2', s)
- news = re.sub(r'(?!\\ )(%s)\s+?(%s)' % (noletter, letter), r'\1\2', news)
- news = re.sub(r'(%s)\s+?(%s)' % (letter, noletter), r'\1\2', news)
- if news == s:
- break
- return s
|