construct_paras.py 1.3 KB

123456789101112131415161718192021222324252627282930
  1. def construct_page_component(page_id, image_info, table_info, text_blocks_preproc, layout_bboxes, inline_eq_info, interline_eq_info, raw_pymu_blocks,
  2. removed_text_blocks, removed_image_blocks, images_backup, droped_table_block, table_backup,layout_tree,
  3. page_w, page_h, footnote_bboxes_tmp):
  4. """
  5. """
  6. return_dict = {}
  7. return_dict['para_blocks'] = {}
  8. return_dict['preproc_blocks'] = text_blocks_preproc
  9. return_dict['images'] = image_info
  10. return_dict['tables'] = table_info
  11. return_dict['interline_equations'] = interline_eq_info
  12. return_dict['inline_equations'] = inline_eq_info
  13. return_dict['layout_bboxes'] = layout_bboxes
  14. return_dict['pymu_raw_blocks'] = raw_pymu_blocks
  15. return_dict['global_statistic'] = {}
  16. return_dict['droped_text_block'] = removed_text_blocks
  17. return_dict['droped_image_block'] = removed_image_blocks
  18. return_dict['droped_table_block'] = []
  19. return_dict['image_backup'] = images_backup
  20. return_dict['table_backup'] = []
  21. return_dict['page_idx'] = page_id
  22. return_dict['page_size'] = [page_w, page_h]
  23. return_dict['_layout_tree'] = layout_tree # 辅助分析layout作用
  24. return_dict['footnote_bboxes_tmp'] = footnote_bboxes_tmp
  25. return return_dict