Explorar o código

更新了para_split

liukaiwen hai 1 ano
pai
achega
37483f0af0
Modificáronse 1 ficheiros con 7 adicións e 1 borrados
  1. 7 1
      magic_pdf/para/para_split_v2.py

+ 7 - 1
magic_pdf/para/para_split_v2.py

@@ -696,4 +696,10 @@ def para_split(pdf_info_dict, debug_mode, lang="en"):
         page_paras = page['para_blocks']
         new_layout_bbox = new_layout_of_pages[page_num]
         __connect_middle_align_text(page_paras, new_layout_bbox, page_num, lang, debug_mode=debug_mode)
-        __merge_signle_list_text(page_paras, new_layout_bbox, page_num, lang)
+        __merge_signle_list_text(page_paras, new_layout_bbox, page_num, lang)
+
+    # layout展平
+    for page_num, page in enumerate(pdf_info_dict.values()):
+        page_paras = page['para_blocks']
+        page_blocks = [block for layout in page_paras for block in layout]
+        page["para_blocks"] = page_blocks