|
|
@@ -1,4 +1,5 @@
|
|
|
-from magic_pdf.libs.boxbase import calculate_overlap_area_in_bbox1_area_ratio, get_minbox_if_overlap_by_ratio
|
|
|
+from magic_pdf.libs.boxbase import calculate_overlap_area_in_bbox1_area_ratio, get_minbox_if_overlap_by_ratio, \
|
|
|
+ __is_overlaps_y_exceeds_threshold
|
|
|
|
|
|
|
|
|
def remove_overlaps_min_spans(spans):
|
|
|
@@ -58,7 +59,7 @@ def modify_y_axis(spans: list, displayed_list: list, text_inline_lines: list):
|
|
|
|
|
|
line_first_y0 = spans[0]["bbox"][1]
|
|
|
line_first_y = spans[0]["bbox"][3]
|
|
|
- #用于给行间公式搜索
|
|
|
+ # 用于给行间公式搜索
|
|
|
# text_inline_lines = []
|
|
|
for span in spans[1:]:
|
|
|
# if span.get("content","") == "78.":
|
|
|
@@ -67,7 +68,7 @@ def modify_y_axis(spans: list, displayed_list: list, text_inline_lines: list):
|
|
|
# image和table类型,同上
|
|
|
if span['type'] in ["displayed_equation", "image", "table"] or any(
|
|
|
s['type'] in ["displayed_equation", "image", "table"] for s in current_line):
|
|
|
- #传入
|
|
|
+ # 传入
|
|
|
if span["type"] in ["displayed_equation", "image", "table"]:
|
|
|
displayed_list.append(span)
|
|
|
# 则开始新行
|
|
|
@@ -129,7 +130,7 @@ def modify_inline_equation(spans: list, displayed_list: list, text_inline_lines:
|
|
|
if (span_y0 < y0 and span_y > y0 or span_y0 < y1 and span_y > y1 or span_y0 < y0 and span_y > y1) and __is_overlaps_y_exceeds_threshold(span['bbox'], (0, y0, 0, y1)):
|
|
|
span["bbox"][1] = y0
|
|
|
# span["bbox"][3] = y1
|
|
|
- #调整公式类型
|
|
|
+ # 调整公式类型
|
|
|
if span["type"] == "displayed_equation":
|
|
|
if j+1 >= len(text_inline_lines):
|
|
|
span["type"] = "inline_equation"
|