Преглед на файлове

Merge pull request #2852 from myhloli/dev

fix: remove unnecessary fields from block data in para_split.py
Xiaomeng Zhao преди 4 месеца
родител
ревизия
a76e3b60d6
променени са 1 файла, в които са добавени 6 реда и са изтрити 2 реда
  1. 6 2
      mineru/backend/pipeline/para_split.py

+ 6 - 2
mineru/backend/pipeline/para_split.py

@@ -365,8 +365,12 @@ def para_split(page_info_list):
     for page_info in page_info_list:
         page_info['para_blocks'] = []
         for block in all_blocks:
-            if block['page_num'] == page_info['page_idx']:
-                page_info['para_blocks'].append(block)
+            if 'page_num' in block:
+                if block['page_num'] == page_info['page_idx']:
+                    page_info['para_blocks'].append(block)
+                    # 从block中删除不需要的page_num和page_size字段
+                    del block['page_num']
+                    del block['page_size']
 
 
 if __name__ == '__main__':