Browse Source

fix: remove unnecessary fields from block data in para_split.py

myhloli 4 months ago
parent
commit
fefe2d36d4
1 changed files with 4 additions and 0 deletions
  1. 4 0
      mineru/backend/pipeline/para_split.py

+ 4 - 0
mineru/backend/pipeline/para_split.py

@@ -368,6 +368,10 @@ def para_split(page_info_list):
             if block['page_num'] == page_info['page_idx']:
             if block['page_num'] == page_info['page_idx']:
                 page_info['para_blocks'].append(block)
                 page_info['para_blocks'].append(block)
 
 
+            # 从block中删除不需要的page_num和page_size字段
+            del block['page_num']
+            del block['page_size']
+
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':
     input_blocks = []
     input_blocks = []