浏览代码

Merge pull request #99 from papayalove/master

解决分段span丢失问题
myhloli 1 年之前
父节点
当前提交
6a993c1671
共有 1 个文件被更改,包括 4 次插入1 次删除
  1. 4 1
      magic_pdf/para/para_split_v2.py

+ 4 - 1
magic_pdf/para/para_split_v2.py

@@ -489,7 +489,10 @@ def __connect_para_inter_layoutbox(blocks_group, new_layout_bbox, lang):
     connected_layout_blocks.append(blocks_group[0])
     for i in range(1, len(blocks_group)):
         try:
-            if len(blocks_group[i]) == 0 or len(blocks_group[i - 1]) == 0:  # TODO 考虑连接问题,
+            if len(blocks_group[i]) == 0:
+                continue
+            if  len(blocks_group[i - 1]) == 0:  # TODO 考虑连接问题,
+                connected_layout_blocks.append(blocks_group[i])
                 continue
             # text类型的段才需要考虑layout间的合并
             if blocks_group[i - 1][-1]["type"] != BlockType.Text or blocks_group[i][0]["type"] != BlockType.Text: