extract_caption.py 1.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. from magic_pdf.libs.boxbase import _is_in
  2. def extract_caption_bbox(outer: list, inner: list) -> list:
  3. """
  4. ret: list of {
  5. "bbox": [1,2,3,4],
  6. "caption": [5,6,7,8] # may existed
  7. }
  8. """
  9. found_count = 0 # for debug
  10. print(outer, inner)
  11. def is_float_equal(a, b):
  12. if 0.01 > abs(a - b): # non strict float equal compare
  13. return True
  14. return False
  15. outer_h = {i: outer[i] for i in range(len(outer))}
  16. ret = []
  17. for v in inner:
  18. ix0, iy0, ix1, iy1 = v
  19. found_idx = None
  20. d = {"bbox": v[:4]}
  21. for k in outer_h:
  22. ox0, oy0, ox1, oy1 = outer_h[k]
  23. equal_float_flags = [
  24. is_float_equal(ix0, ox0),
  25. is_float_equal(iy0, oy0),
  26. is_float_equal(ix1, ox1),
  27. is_float_equal(iy1, oy1),
  28. ]
  29. if _is_in(v, outer_h[k]) and not all(equal_float_flags):
  30. found_idx = k
  31. break
  32. if found_idx is not None:
  33. found_count += 1
  34. captions: list[list] = []
  35. ox0, oy0, ox1, oy1 = outer_h[found_idx]
  36. captions = [
  37. [ox0, oy0, ix0, oy1],
  38. [ox0, oy0, ox1, iy0],
  39. [ox0, iy1, ox1, oy1],
  40. [ix1, oy0, ox1, oy1],
  41. ]
  42. captions = sorted(
  43. captions,
  44. key=lambda rect: abs(rect[0] - rect[2]) * abs(rect[1] - rect[3]),
  45. ) # 面积最大的框就是caption
  46. d["caption"] = captions[-1]
  47. outer_h.pop(
  48. found_idx
  49. ) # 同一个 outer box 只能用于确定一个 inner box 的 caption 位置。
  50. ret.append(d)
  51. print("found_count: ", found_count)
  52. return ret