Ver código fonte

Merge pull request #2852 from myhloli/dev

fix: remove unnecessary fields from block data in para_split.py
Xiaomeng Zhao 4 meses atrás
pai
commit
a76e3b60d6
1 arquivos alterados com 6 adições e 2 exclusões
  1. 6 2
      mineru/backend/pipeline/para_split.py

+ 6 - 2
mineru/backend/pipeline/para_split.py

@@ -365,8 +365,12 @@ def para_split(page_info_list):
     for page_info in page_info_list:
         page_info['para_blocks'] = []
         for block in all_blocks:
-            if block['page_num'] == page_info['page_idx']:
-                page_info['para_blocks'].append(block)
+            if 'page_num' in block:
+                if block['page_num'] == page_info['page_idx']:
+                    page_info['para_blocks'].append(block)
+                    # 从block中删除不需要的page_num和page_size字段
+                    del block['page_num']
+                    del block['page_size']
 
 
 if __name__ == '__main__':