소스 검색

Refactor fix_two_layer_blocks function to improve handling of captions and footnotes in table blocks

myhloli 1 개월 전
부모
커밋
7c689e33b8
1개의 변경된 파일114개의 추가작업 그리고 1개의 파일을 삭제
  1. 114 1
      mineru/backend/vlm/vlm_magic_model.py

+ 114 - 1
mineru/backend/vlm/vlm_magic_model.py

@@ -361,7 +361,7 @@ def get_type_blocks(blocks, block_type: Literal["image", "table", "code"]):
     return ret
 
 
-def fix_two_layer_blocks(blocks, fix_type: Literal["image", "table", "code"]):
+def fix_two_layer_blocks_back(blocks, fix_type: Literal["image", "table", "code"]):
     need_fix_blocks = get_type_blocks(blocks, fix_type)
     fixed_blocks = []
     not_include_blocks = []
@@ -404,6 +404,119 @@ def fix_two_layer_blocks(blocks, fix_type: Literal["image", "table", "code"]):
     return fixed_blocks, not_include_blocks
 
 
+def fix_two_layer_blocks(blocks, fix_type: Literal["image", "table", "code"]):
+    need_fix_blocks = get_type_blocks(blocks, fix_type)
+    fixed_blocks = []
+    not_include_blocks = []
+    processed_indices = set()
+
+    # 特殊处理表格类型,确保标题在表格前,注脚在表格后
+    if fix_type == "table":
+        # 收集所有不合适的caption和footnote
+        misplaced_captions = []  # 存储(caption, 原始block索引)
+        misplaced_footnotes = []  # 存储(footnote, 原始block索引)
+
+        # 第一步:移除不符合位置要求的caption和footnote
+        for block_idx, block in enumerate(need_fix_blocks):
+            body = block[f"{fix_type}_body"]
+            body_index = body["index"]
+
+            # 检查caption应在body前或同位置
+            valid_captions = []
+            for caption in block[f"{fix_type}_caption_list"]:
+                if caption["index"] <= body_index:
+                    valid_captions.append(caption)
+                else:
+                    misplaced_captions.append((caption, block_idx))
+            block[f"{fix_type}_caption_list"] = valid_captions
+
+            # 检查footnote应在body后或同位置
+            valid_footnotes = []
+            for footnote in block[f"{fix_type}_footnote_list"]:
+                if footnote["index"] >= body_index:
+                    valid_footnotes.append(footnote)
+                else:
+                    misplaced_footnotes.append((footnote, block_idx))
+            block[f"{fix_type}_footnote_list"] = valid_footnotes
+
+        # 第二步:重新分配不合规的caption到合适的body
+        for caption, original_block_idx in misplaced_captions:
+            caption_index = caption["index"]
+            best_block_idx = None
+            min_distance = float('inf')
+
+            # 寻找索引大于等于caption_index的最近body
+            for idx, block in enumerate(need_fix_blocks):
+                body_index = block[f"{fix_type}_body"]["index"]
+                if body_index >= caption_index and idx != original_block_idx:
+                    distance = body_index - caption_index
+                    if distance < min_distance:
+                        min_distance = distance
+                        best_block_idx = idx
+
+            if best_block_idx is not None:
+                # 找到合适的body,添加到对应block的caption_list
+                need_fix_blocks[best_block_idx][f"{fix_type}_caption_list"].append(caption)
+            else:
+                # 没找到合适的body,作为普通block处理
+                not_include_blocks.append(caption)
+
+        # 第三步:重新分配不合规的footnote到合适的body
+        for footnote, original_block_idx in misplaced_footnotes:
+            footnote_index = footnote["index"]
+            best_block_idx = None
+            min_distance = float('inf')
+
+            # 寻找索引小于等于footnote_index的最近body
+            for idx, block in enumerate(need_fix_blocks):
+                body_index = block[f"{fix_type}_body"]["index"]
+                if body_index <= footnote_index and idx != original_block_idx:
+                    distance = footnote_index - body_index
+                    if distance < min_distance:
+                        min_distance = distance
+                        best_block_idx = idx
+
+            if best_block_idx is not None:
+                # 找到合适的body,添加到对应block的footnote_list
+                need_fix_blocks[best_block_idx][f"{fix_type}_footnote_list"].append(footnote)
+            else:
+                # 没找到合适的body,作为普通block处理
+                not_include_blocks.append(footnote)
+
+    # 构建两层结构blocks
+    for block in need_fix_blocks:
+        body = block[f"{fix_type}_body"]
+        caption_list = block[f"{fix_type}_caption_list"]
+        footnote_list = block[f"{fix_type}_footnote_list"]
+
+        body["type"] = f"{fix_type}_body"
+        for caption in caption_list:
+            caption["type"] = f"{fix_type}_caption"
+            processed_indices.add(caption["index"])
+        for footnote in footnote_list:
+            footnote["type"] = f"{fix_type}_footnote"
+            processed_indices.add(footnote["index"])
+
+        processed_indices.add(body["index"])
+
+        two_layer_block = {
+            "type": fix_type,
+            "bbox": body["bbox"],
+            "blocks": [body],
+            "index": body["index"],
+        }
+        two_layer_block["blocks"].extend([*caption_list, *footnote_list])
+
+        fixed_blocks.append(two_layer_block)
+
+    # 添加未处理的blocks
+    for block in blocks:
+        if block["index"] not in processed_indices:
+            not_include_blocks.append(block)
+
+    return fixed_blocks, not_include_blocks
+
+
 def fix_list_blocks(list_blocks, text_blocks, ref_text_blocks):
     for list_block in list_blocks:
         list_block["blocks"] = []