remove_bbox_overlap.py 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. from magic_pdf.libs.boxbase import _is_in_or_part_overlap, _is_in
  2. def _remove_overlap_between_bbox(spans):
  3. res = []
  4. for v in spans:
  5. for i in range(len(res)):
  6. if _is_in(res[i]["bbox"], v["bbox"]) or _is_in(v["bbox"], res[i]["bbox"]):
  7. continue
  8. if _is_in_or_part_overlap(res[i]["bbox"], v["bbox"]):
  9. ix0, iy0, ix1, iy1 = res[i]["bbox"]
  10. x0, y0, x1, y1 = v["bbox"]
  11. diff_x = min(x1, ix1) - max(x0, ix0)
  12. diff_y = min(y1, iy1) - max(y0, iy0)
  13. if diff_y > diff_x:
  14. if x1 >= ix1:
  15. mid = (x0 + ix1) // 2
  16. ix1 = min(mid - 0.25, ix1)
  17. x0 = max(mid + 0.25, x0)
  18. else:
  19. mid = (ix0 + x1) // 2
  20. ix0 = max(mid + 0.25, ix0)
  21. x1 = min(mid -0.25, x1)
  22. else:
  23. if y1 >= iy1:
  24. mid = (y0 + iy1) // 2
  25. y0 = max(mid + 0.25, y0)
  26. iy1 = min(iy1, mid-0.25)
  27. else:
  28. mid = (iy0 + y1) // 2
  29. y1 = min(y1, mid-0.25)
  30. iy0 = max(mid + 0.25, iy0)
  31. res[i]["bbox"] = [ix0, iy0, ix1, iy1]
  32. v["bbox"] = [x0, y0, x1, y1]
  33. res.append(v)
  34. return res
  35. def remove_overlap_between_bbox(spans):
  36. return _remove_overlap_between_bbox(spans)