fix_image.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245
  1. import re
  2. from libs.boxbase import _is_in_or_part_overlap, _is_part_overlap, _is_in, find_bottom_nearest_text_bbox, find_left_nearest_text_bbox, find_right_nearest_text_bbox, find_top_nearest_text_bbox
  3. from loguru import logger
  4. from libs.textbase import get_text_block_base_info
  5. def fix_image_vertical(image_bboxes:list, text_blocks:list):
  6. """
  7. 修正图片的位置
  8. 如果图片与文字block发生一定重叠(也就是图片切到了一部分文字),那么减少图片边缘,让文字和图片不再重叠。
  9. 只对垂直方向进行。
  10. """
  11. for image_bbox in image_bboxes:
  12. for text_block in text_blocks:
  13. text_bbox = text_block["bbox"]
  14. if _is_part_overlap(text_bbox, image_bbox) and any([text_bbox[0]>=image_bbox[0] and text_bbox[2]<=image_bbox[2], text_bbox[0]<=image_bbox[0] and text_bbox[2]>=image_bbox[2]]):
  15. if text_bbox[1] < image_bbox[1]:#在图片上方
  16. image_bbox[1] = text_bbox[3]+1
  17. elif text_bbox[3]>image_bbox[3]:#在图片下方
  18. image_bbox[3] = text_bbox[1]-1
  19. return image_bboxes
  20. def __merge_if_common_edge(bbox1, bbox2):
  21. x_min_1, y_min_1, x_max_1, y_max_1 = bbox1
  22. x_min_2, y_min_2, x_max_2, y_max_2 = bbox2
  23. # 检查是否有公共的水平边
  24. if y_min_1 == y_min_2 or y_max_1 == y_max_2:
  25. # 确保一个框的x范围在另一个框的x范围内
  26. if max(x_min_1, x_min_2) <= min(x_max_1, x_max_2):
  27. return [min(x_min_1, x_min_2), min(y_min_1, y_min_2), max(x_max_1, x_max_2), max(y_max_1, y_max_2)]
  28. # 检查是否有公共的垂直边
  29. if x_min_1 == x_min_2 or x_max_1 == x_max_2:
  30. # 确保一个框的y范围在另一个框的y范围内
  31. if max(y_min_1, y_min_2) <= min(y_max_1, y_max_2):
  32. return [min(x_min_1, x_min_2), min(y_min_1, y_min_2), max(x_max_1, x_max_2), max(y_max_1, y_max_2)]
  33. # 如果没有公共边
  34. return None
  35. def fix_seperated_image(image_bboxes:list):
  36. """
  37. 如果2个图片有一个边重叠,那么合并2个图片
  38. """
  39. new_images = []
  40. droped_img_idx = []
  41. for i in range(0, len(image_bboxes)):
  42. for j in range(i+1, len(image_bboxes)):
  43. new_img = __merge_if_common_edge(image_bboxes[i], image_bboxes[j])
  44. if new_img is not None:
  45. new_images.append(new_img)
  46. droped_img_idx.append(i)
  47. droped_img_idx.append(j)
  48. break
  49. for i in range(0, len(image_bboxes)):
  50. if i not in droped_img_idx:
  51. new_images.append(image_bboxes[i])
  52. return new_images
  53. def __check_img_title_pattern(text):
  54. """
  55. 检查文本段是否是表格的标题
  56. """
  57. patterns = [r"^(fig|figure).*", r"^(scheme).*"]
  58. text = text.strip()
  59. for pattern in patterns:
  60. match = re.match(pattern, text, re.IGNORECASE)
  61. if match:
  62. return True
  63. return False
  64. def __get_fig_caption_text(text_block):
  65. txt = " ".join(span['text'] for line in text_block['lines'] for span in line['spans'])
  66. line_cnt = len(text_block['lines'])
  67. txt = txt.replace("Ž . ", '')
  68. return txt, line_cnt
  69. def __find_and_extend_bottom_caption(text_block, pymu_blocks, image_box):
  70. """
  71. 继续向下方寻找和图片caption字号,字体,颜色一样的文字框,合并入caption。
  72. text_block是已经找到的图片catpion(这个caption可能不全,多行被划分到多个pymu block里了)
  73. """
  74. combined_image_caption_text_block = list(text_block.copy()['bbox'])
  75. base_font_color, base_font_size, base_font_type = get_text_block_base_info(text_block)
  76. while True:
  77. tb_add = find_bottom_nearest_text_bbox(pymu_blocks, combined_image_caption_text_block)
  78. if not tb_add:
  79. break
  80. tb_font_color, tb_font_size, tb_font_type = get_text_block_base_info(tb_add)
  81. if tb_font_color==base_font_color and tb_font_size==base_font_size and tb_font_type==base_font_type:
  82. combined_image_caption_text_block[0] = min(combined_image_caption_text_block[0], tb_add['bbox'][0])
  83. combined_image_caption_text_block[2] = max(combined_image_caption_text_block[2], tb_add['bbox'][2])
  84. combined_image_caption_text_block[3] = tb_add['bbox'][3]
  85. else:
  86. break
  87. image_box[0] = min(image_box[0], combined_image_caption_text_block[0])
  88. image_box[1] = min(image_box[1], combined_image_caption_text_block[1])
  89. image_box[2] = max(image_box[2], combined_image_caption_text_block[2])
  90. image_box[3] = max(image_box[3], combined_image_caption_text_block[3])
  91. text_block['_image_caption'] = True
  92. def include_img_title(pymu_blocks, image_bboxes: list):
  93. """
  94. 向上方和下方寻找符合图片title的文本block,合并到图片里
  95. 如果图片上下都有fig的情况怎么办?寻找标题距离最近的那个。
  96. ---
  97. 增加对左侧和右侧图片标题的寻找
  98. """
  99. for tb in image_bboxes:
  100. # 优先找下方的
  101. max_find_cnt = 3 # 向上,向下最多找3个就停止
  102. temp_box = tb.copy()
  103. while max_find_cnt>0:
  104. text_block_btn = find_bottom_nearest_text_bbox(pymu_blocks, temp_box)
  105. if text_block_btn:
  106. txt, line_cnt = __get_fig_caption_text(text_block_btn)
  107. if len(txt.strip())>0:
  108. if not __check_img_title_pattern(txt) and max_find_cnt>0 and line_cnt<3: # 设置line_cnt<=2目的是为了跳过子标题,或者有时候图片下方文字没有被图片识别模型放入图片里
  109. max_find_cnt = max_find_cnt - 1
  110. temp_box[3] = text_block_btn['bbox'][3]
  111. continue
  112. else:
  113. break
  114. else:
  115. temp_box[3] = text_block_btn['bbox'][3] # 宽度不变,扩大
  116. max_find_cnt = max_find_cnt - 1
  117. else:
  118. break
  119. max_find_cnt = 3 # 向上,向下最多找3个就停止
  120. temp_box = tb.copy()
  121. while max_find_cnt>0:
  122. text_block_top = find_top_nearest_text_bbox(pymu_blocks, temp_box)
  123. if text_block_top:
  124. txt, line_cnt = __get_fig_caption_text(text_block_top)
  125. if len(txt.strip())>0:
  126. if not __check_img_title_pattern(txt) and max_find_cnt>0 and line_cnt <3:
  127. max_find_cnt = max_find_cnt - 1
  128. temp_box[1] = text_block_top['bbox'][1]
  129. continue
  130. else:
  131. break
  132. else:
  133. b = text_block_top['bbox']
  134. temp_box[1] = b[1] # 宽度不变,扩大
  135. max_find_cnt = max_find_cnt - 1
  136. else:
  137. break
  138. if text_block_btn and text_block_top and text_block_btn.get("_image_caption", False) is False and text_block_top.get("_image_caption", False) is False :
  139. btn_text, _ = __get_fig_caption_text(text_block_btn)
  140. top_text, _ = __get_fig_caption_text(text_block_top)
  141. if __check_img_title_pattern(btn_text) and __check_img_title_pattern(top_text):
  142. # 取距离图片最近的
  143. btn_text_distance = text_block_btn['bbox'][1] - tb[3]
  144. top_text_distance = tb[1] - text_block_top['bbox'][3]
  145. if btn_text_distance<top_text_distance: # caption在下方
  146. __find_and_extend_bottom_caption(text_block_btn, pymu_blocks, tb)
  147. else:
  148. text_block = text_block_top
  149. tb[0] = min(tb[0], text_block['bbox'][0])
  150. tb[1] = min(tb[1], text_block['bbox'][1])
  151. tb[2] = max(tb[2], text_block['bbox'][2])
  152. tb[3] = max(tb[3], text_block['bbox'][3])
  153. text_block_btn['_image_caption'] = True
  154. continue
  155. text_block = text_block_btn # find_bottom_nearest_text_bbox(pymu_blocks, tb)
  156. if text_block and text_block.get("_image_caption", False) is False:
  157. first_text_line, _ = __get_fig_caption_text(text_block)
  158. if __check_img_title_pattern(first_text_line):
  159. # 发现特征之后,继续向相同方向寻找(想同颜色,想同大小,想同字体)的textblock
  160. __find_and_extend_bottom_caption(text_block, pymu_blocks, tb)
  161. continue
  162. text_block = text_block_top # find_top_nearest_text_bbox(pymu_blocks, tb)
  163. if text_block and text_block.get("_image_caption", False) is False:
  164. first_text_line, _ = __get_fig_caption_text(text_block)
  165. if __check_img_title_pattern(first_text_line):
  166. tb[0] = min(tb[0], text_block['bbox'][0])
  167. tb[1] = min(tb[1], text_block['bbox'][1])
  168. tb[2] = max(tb[2], text_block['bbox'][2])
  169. tb[3] = max(tb[3], text_block['bbox'][3])
  170. text_block['_image_caption'] = True
  171. continue
  172. """向左、向右寻找,暂时只寻找一次"""
  173. left_text_block = find_left_nearest_text_bbox(pymu_blocks, tb)
  174. if left_text_block and left_text_block.get("_image_caption", False) is False:
  175. first_text_line, _ = __get_fig_caption_text(left_text_block)
  176. if __check_img_title_pattern(first_text_line):
  177. tb[0] = min(tb[0], left_text_block['bbox'][0])
  178. tb[1] = min(tb[1], left_text_block['bbox'][1])
  179. tb[2] = max(tb[2], left_text_block['bbox'][2])
  180. tb[3] = max(tb[3], left_text_block['bbox'][3])
  181. left_text_block['_image_caption'] = True
  182. continue
  183. right_text_block = find_right_nearest_text_bbox(pymu_blocks, tb)
  184. if right_text_block and right_text_block.get("_image_caption", False) is False:
  185. first_text_line, _ = __get_fig_caption_text(right_text_block)
  186. if __check_img_title_pattern(first_text_line):
  187. tb[0] = min(tb[0], right_text_block['bbox'][0])
  188. tb[1] = min(tb[1], right_text_block['bbox'][1])
  189. tb[2] = max(tb[2], right_text_block['bbox'][2])
  190. tb[3] = max(tb[3], right_text_block['bbox'][3])
  191. right_text_block['_image_caption'] = True
  192. continue
  193. return image_bboxes
  194. def combine_images(image_bboxes:list):
  195. """
  196. 合并图片,如果图片有重叠,那么合并
  197. """
  198. new_images = []
  199. droped_img_idx = []
  200. for i in range(0, len(image_bboxes)):
  201. for j in range(i+1, len(image_bboxes)):
  202. if j not in droped_img_idx and _is_in_or_part_overlap(image_bboxes[i], image_bboxes[j]):
  203. # 合并
  204. image_bboxes[i][0], image_bboxes[i][1],image_bboxes[i][2],image_bboxes[i][3] = min(image_bboxes[i][0], image_bboxes[j][0]), min(image_bboxes[i][1], image_bboxes[j][1]), max(image_bboxes[i][2], image_bboxes[j][2]), max(image_bboxes[i][3], image_bboxes[j][3])
  205. droped_img_idx.append(j)
  206. for i in range(0, len(image_bboxes)):
  207. if i not in droped_img_idx:
  208. new_images.append(image_bboxes[i])
  209. return new_images