1 개월 전 · 4c8bb038ce
--- a/mineru/cli/fast_api.py
+++ b/mineru/cli/fast_api.py
@@ -177,10 +177,7 @@ async def parse_pdf(
 
				                             zf.write(path, arcname=os.path.join(safe_pdf_name, f"{safe_pdf_name}_middle.json"))
			
 
				 
			
 
				                     if return_model_output:
			
 
				-                        if backend.startswith("pipeline"):
			
 
				-                            path = os.path.join(parse_dir, f"{pdf_name}_model.json")
			
 
				-                        else:
			
 
				-                            path = os.path.join(parse_dir, f"{pdf_name}_model_output.txt")
			
 
				+                        path = os.path.join(parse_dir, f"{pdf_name}_model.json")
			
 
				                         if os.path.exists(path): 
			
 
				                             zf.write(path, arcname=os.path.join(safe_pdf_name, os.path.basename(path)))
			
 
				 
			
@@ -220,10 +217,7 @@ async def parse_pdf(
 
				                     if return_middle_json:
			
 
				                         data["middle_json"] = get_infer_result("_middle.json", pdf_name, parse_dir)
			
 
				                     if return_model_output:
			
 
				-                        if backend.startswith("pipeline"):
			
 
				-                            data["model_output"] = get_infer_result("_model.json", pdf_name, parse_dir)
			
 
				-                        else:
			
 
				-                            data["model_output"] = get_infer_result("_model_output.txt", pdf_name, parse_dir)
			
 
				+                        data["model_output"] = get_infer_result("_model.json", pdf_name, parse_dir)
			
 
				                     if return_content_list:
			
 
				                         data["content_list"] = get_infer_result("_content_list.json", pdf_name, parse_dir)
			
 
				                     if return_images:
			
--- a/mineru/utils/table_merge.py
+++ b/mineru/utils/table_merge.py
@@ -3,6 +3,7 @@
 
				 from loguru import logger
			
 
				 from bs4 import BeautifulSoup
			
 
				 
			
 
				+from mineru.backend.vlm.vlm_middle_json_mkcontent import merge_para_with_text
			
 
				 from mineru.utils.enum_class import BlockType, SplitFlag
			
 
				 
			
 
				 
			
@@ -169,7 +170,10 @@ def detect_table_headers(soup1, soup2, max_header_rows=5):
 
				 def can_merge_tables(current_table_block, previous_table_block):
			
 
				     """判断两个表格是否可以合并"""
			
 
				     # 检查表格是否有caption和footnote
			
 
				-    if any(block["type"] == BlockType.TABLE_CAPTION for block in current_table_block["blocks"]):
			
 
				+    # if any(block["type"] == BlockType.TABLE_CAPTION for block in current_table_block["blocks"]):
			
 
				+    #     return False, None, None, None, None
			
 
				+    # current_table_block["blocks"]中有任何TABLE_CAPTION类型的块，且任意caption块内不以"（续）"结尾，则不合并
			
 
				+    if any(block["type"] == BlockType.TABLE_CAPTION and not full_to_half(merge_para_with_text(block).strip()).endswith("(续)") for block in current_table_block["blocks"]):
			
 
				         return False, None, None, None, None
			
 
				 
			
 
				     if any(block["type"] == BlockType.TABLE_FOOTNOTE for block in previous_table_block["blocks"]):
			
@@ -263,17 +267,84 @@ def perform_table_merge(soup1, soup2, previous_table_block, wait_merge_table_foo
 
				     # 找到第一个表格的tbody，如果没有则查找table元素
			
 
				     tbody1 = soup1.find("tbody") or soup1.find("table")
			
 
				 
			
 
				-    # 找到第二个表格的tbody，如果没有则查找table元素
			
 
				-    tbody2 = soup2.find("tbody") or soup2.find("table")
			
 
				+    # 获取表1和表2的所有行
			
 
				+    rows1 = soup1.find_all("tr")
			
 
				+    rows2 = soup2.find_all("tr")
			
 
				+
			
 
				+
			
 
				+    if rows1 and rows2 and header_count < len(rows2):
			
 
				+        # 获取表1最后一行
			
 
				+        last_row1 = rows1[-1]
			
 
				+        # 获取表2第一个非表头行
			
 
				+        first_data_row2 = rows2[header_count]
			
 
				+
			
 
				+        # 分析两行的colspan结构
			
 
				+        last_row1_structure = []
			
 
				+        has_colspan_last_row1 = False
			
 
				+        for cell in last_row1.find_all(["td", "th"]):
			
 
				+            colspan = int(cell.get("colspan", 1))
			
 
				+            last_row1_structure.append(colspan)
			
 
				+            if colspan > 1:
			
 
				+                has_colspan_last_row1 = True
			
 
				+
			
 
				+        first_row2_structure = []
			
 
				+        has_colspan_first_row2 = False
			
 
				+        for cell in first_data_row2.find_all(["td", "th"]):
			
 
				+            colspan = int(cell.get("colspan", 1))
			
 
				+            first_row2_structure.append(colspan)
			
 
				+            if colspan > 1:
			
 
				+                has_colspan_first_row2 = True
			
 
				+
			
 
				+        # 确定基准结构（优先使用有colspan的行）
			
 
				+        if has_colspan_last_row1:
			
 
				+            reference_structure = last_row1_structure
			
 
				+            reference_visual_cols = calculate_visual_columns(last_row1)
			
 
				+        elif has_colspan_first_row2:
			
 
				+            reference_structure = first_row2_structure
			
 
				+            reference_visual_cols = calculate_visual_columns(first_data_row2)
			
 
				+        else:
			
 
				+            # 都没有colspan时使用表1最后一行作为默认基准
			
 
				+            reference_structure = last_row1_structure
			
 
				+            reference_visual_cols = calculate_visual_columns(last_row1)
			
 
				+
			
 
				+        # 如果表1最后一行没有colspan但表2首行有，则调整表1相关行
			
 
				+        if not has_colspan_last_row1 and has_colspan_first_row2:
			
 
				+            # 找到表1中所有具有相同视觉列数的行
			
 
				+            rows_to_adjust = []
			
 
				+            for i in range(len(rows1) - 1, -1, -1):
			
 
				+                if calculate_visual_columns(rows1[i]) == reference_visual_cols:
			
 
				+                    rows_to_adjust.append(rows1[i])
			
 
				+                else:
			
 
				+                    break
			
 
				+
			
 
				+            # 应用参考结构到这些行
			
 
				+            for row in rows_to_adjust:
			
 
				+                cells = row.find_all(["td", "th"])
			
 
				+                if cells and len(cells) <= len(reference_structure):
			
 
				+                    for j, cell in enumerate(cells):
			
 
				+                        if j < len(reference_structure) and reference_structure[j] > 1:
			
 
				+                            cell["colspan"] = str(reference_structure[j])
			
 
				+
			
 
				+        # 如果表2首行没有colspan但表1最后一行有，则调整表2相关行
			
 
				+        elif has_colspan_last_row1 and not has_colspan_first_row2:
			
 
				+            # 调整表2中所有具有相同视觉列数的行
			
 
				+            for i in range(header_count, len(rows2)):
			
 
				+                row = rows2[i]
			
 
				+                if calculate_visual_columns(row) == reference_visual_cols:
			
 
				+                    cells = row.find_all(["td", "th"])
			
 
				+                    if cells and len(cells) <= len(reference_structure):
			
 
				+                        for j, cell in enumerate(cells):
			
 
				+                            if j < len(reference_structure) and reference_structure[j] > 1:
			
 
				+                                cell["colspan"] = str(reference_structure[j])
			
 
				 
			
 
				     # 将第二个表格的行添加到第一个表格中
			
 
				-    if tbody1 and tbody2:
			
 
				-        rows2 = soup2.find_all("tr")
			
 
				-        # 将第二个表格的行添加到第一个表格中（跳过表头行）
			
 
				-        for row in rows2[header_count:]:
			
 
				-            # 从原来的位置移除行，并添加到第一个表格中
			
 
				-            row.extract()
			
 
				-            tbody1.append(row)
			
 
				+    if tbody1:
			
 
				+        tbody2 = soup2.find("tbody") or soup2.find("table")
			
 
				+        if tbody2:
			
 
				+            # 将第二个表格的行添加到第一个表格中（跳过表头行）
			
 
				+            for row in rows2[header_count:]:
			
 
				+                row.extract()
			
 
				+                tbody1.append(row)
			
 
				 
			
 
				     # 添加待合并表格的footnote到前一个表格中
			
 
				     for table_footnote in wait_merge_table_footnotes: