1 lună în urmă · 2108019698
--- a/mineru/utils/table_merge.py
+++ b/mineru/utils/table_merge.py
@@ -263,17 +263,84 @@ def perform_table_merge(soup1, soup2, previous_table_block, wait_merge_table_foo
 
				     # 找到第一个表格的tbody，如果没有则查找table元素
			
 
				     tbody1 = soup1.find("tbody") or soup1.find("table")
			
 
				 
			
 
				-    # 找到第二个表格的tbody，如果没有则查找table元素
			
 
				-    tbody2 = soup2.find("tbody") or soup2.find("table")
			
 
				+    # 获取表1和表2的所有行
			
 
				+    rows1 = soup1.find_all("tr")
			
 
				+    rows2 = soup2.find_all("tr")
			
 
				+
			
 
				+
			
 
				+    if rows1 and rows2 and header_count < len(rows2):
			
 
				+        # 获取表1最后一行
			
 
				+        last_row1 = rows1[-1]
			
 
				+        # 获取表2第一个非表头行
			
 
				+        first_data_row2 = rows2[header_count]
			
 
				+
			
 
				+        # 分析两行的colspan结构
			
 
				+        last_row1_structure = []
			
 
				+        has_colspan_last_row1 = False
			
 
				+        for cell in last_row1.find_all(["td", "th"]):
			
 
				+            colspan = int(cell.get("colspan", 1))
			
 
				+            last_row1_structure.append(colspan)
			
 
				+            if colspan > 1:
			
 
				+                has_colspan_last_row1 = True
			
 
				+
			
 
				+        first_row2_structure = []
			
 
				+        has_colspan_first_row2 = False
			
 
				+        for cell in first_data_row2.find_all(["td", "th"]):
			
 
				+            colspan = int(cell.get("colspan", 1))
			
 
				+            first_row2_structure.append(colspan)
			
 
				+            if colspan > 1:
			
 
				+                has_colspan_first_row2 = True
			
 
				+
			
 
				+        # 确定基准结构（优先使用有colspan的行）
			
 
				+        if has_colspan_last_row1:
			
 
				+            reference_structure = last_row1_structure
			
 
				+            reference_visual_cols = calculate_visual_columns(last_row1)
			
 
				+        elif has_colspan_first_row2:
			
 
				+            reference_structure = first_row2_structure
			
 
				+            reference_visual_cols = calculate_visual_columns(first_data_row2)
			
 
				+        else:
			
 
				+            # 都没有colspan时使用表1最后一行作为默认基准
			
 
				+            reference_structure = last_row1_structure
			
 
				+            reference_visual_cols = calculate_visual_columns(last_row1)
			
 
				+
			
 
				+        # 如果表1最后一行没有colspan但表2首行有，则调整表1相关行
			
 
				+        if not has_colspan_last_row1 and has_colspan_first_row2:
			
 
				+            # 找到表1中所有具有相同视觉列数的行
			
 
				+            rows_to_adjust = []
			
 
				+            for i in range(len(rows1) - 1, -1, -1):
			
 
				+                if calculate_visual_columns(rows1[i]) == reference_visual_cols:
			
 
				+                    rows_to_adjust.append(rows1[i])
			
 
				+                else:
			
 
				+                    break
			
 
				+
			
 
				+            # 应用参考结构到这些行
			
 
				+            for row in rows_to_adjust:
			
 
				+                cells = row.find_all(["td", "th"])
			
 
				+                if cells and len(cells) <= len(reference_structure):
			
 
				+                    for j, cell in enumerate(cells):
			
 
				+                        if j < len(reference_structure) and reference_structure[j] > 1:
			
 
				+                            cell["colspan"] = str(reference_structure[j])
			
 
				+
			
 
				+        # 如果表2首行没有colspan但表1最后一行有，则调整表2相关行
			
 
				+        elif has_colspan_last_row1 and not has_colspan_first_row2:
			
 
				+            # 调整表2中所有具有相同视觉列数的行
			
 
				+            for i in range(header_count, len(rows2)):
			
 
				+                row = rows2[i]
			
 
				+                if calculate_visual_columns(row) == reference_visual_cols:
			
 
				+                    cells = row.find_all(["td", "th"])
			
 
				+                    if cells and len(cells) <= len(reference_structure):
			
 
				+                        for j, cell in enumerate(cells):
			
 
				+                            if j < len(reference_structure) and reference_structure[j] > 1:
			
 
				+                                cell["colspan"] = str(reference_structure[j])
			
 
				 
			
 
				     # 将第二个表格的行添加到第一个表格中
			
 
				-    if tbody1 and tbody2:
			
 
				-        rows2 = soup2.find_all("tr")
			
 
				-        # 将第二个表格的行添加到第一个表格中（跳过表头行）
			
 
				-        for row in rows2[header_count:]:
			
 
				-            # 从原来的位置移除行，并添加到第一个表格中
			
 
				-            row.extract()
			
 
				-            tbody1.append(row)
			
 
				+    if tbody1:
			
 
				+        tbody2 = soup2.find("tbody") or soup2.find("table")
			
 
				+        if tbody2:
			
 
				+            # 将第二个表格的行添加到第一个表格中（跳过表头行）
			
 
				+            for row in rows2[header_count:]:
			
 
				+                row.extract()
			
 
				+                tbody1.append(row)
			
 
				 
			
 
				     # 添加待合并表格的footnote到前一个表格中
			
 
				     for table_footnote in wait_merge_table_footnotes: