浏览代码

修复边界问题(修复list拼接和reference分行问题)

liukaiwen 1 年之前
父节点
当前提交
4ff09a2fbc
共有 1 个文件被更改,包括 2 次插入1 次删除
  1. 2 1
      magic_pdf/para/para_split_v2.py

+ 2 - 1
magic_pdf/para/para_split_v2.py

@@ -106,7 +106,8 @@ def __detect_list_lines(lines, new_layout_bboxes, lang):
         3. 如果非顶格,首字符大写,编码为2
         3. 如果非顶格,首字符大写,编码为2
         4. 如果非顶格,首字符非大写编码为3
         4. 如果非顶格,首字符非大写编码为3
         """
         """
-        x_map_tag_dict, min_x_tag = cluster_line_x(lines)
+        if len(lines) > 0:
+            x_map_tag_dict, min_x_tag = cluster_line_x(lines)
         for l in lines:
         for l in lines:
             span_text = __get_span_text(l['spans'][0])
             span_text = __get_span_text(l['spans'][0])
             first_char = span_text[0]
             first_char = span_text[0]