| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859 |
- from magic_pdf.libs.boxbase import _is_in
- def extract_caption_bbox(outer: list, inner: list) -> list:
- """
- ret: list of {
- "bbox": [1,2,3,4],
- "caption": [5,6,7,8] # may existed
- }
- """
- found_count = 0 # for debug
- print(outer, inner)
- def is_float_equal(a, b):
- if 0.01 > abs(a - b): # non strict float equal compare
- return True
- return False
- outer_h = {i: outer[i] for i in range(len(outer))}
- ret = []
- for v in inner:
- ix0, iy0, ix1, iy1 = v
- found_idx = None
- d = {"bbox": v[:4]}
- for k in outer_h:
- ox0, oy0, ox1, oy1 = outer_h[k]
- equal_float_flags = [
- is_float_equal(ix0, ox0),
- is_float_equal(iy0, oy0),
- is_float_equal(ix1, ox1),
- is_float_equal(iy1, oy1),
- ]
- if _is_in(v, outer_h[k]) and not all(equal_float_flags):
- found_idx = k
- break
- if found_idx is not None:
- found_count += 1
- captions: list[list] = []
- ox0, oy0, ox1, oy1 = outer_h[found_idx]
- captions = [
- [ox0, oy0, ix0, oy1],
- [ox0, oy0, ox1, iy0],
- [ox0, iy1, ox1, oy1],
- [ix1, oy0, ox1, oy1],
- ]
- captions = sorted(
- captions,
- key=lambda rect: abs(rect[0] - rect[2]) * abs(rect[1] - rect[3]),
- ) # 面积最大的框就是caption
- d["caption"] = captions[-1]
- outer_h.pop(
- found_idx
- ) # 同一个 outer box 只能用于确定一个 inner box 的 caption 位置。
- ret.append(d)
- print("found_count: ", found_count)
- return ret
|