Explorar el Código

feat: backup footnote_bbox_tmp

xu rui hace 1 año
padre
commit
05161c6e62

+ 1 - 0
magic_pdf/pdf_parse_for_train.py

@@ -531,6 +531,7 @@ def parse_pdf_for_train(
         page_info["bak_page_no_bboxes"] = page_no_bboxs
         page_info["bak_header_bboxes"] = header_bboxs
         page_info["bak_footer_bboxes"] = footer_bboxs
+        page_info["bak_footer_note_bboxes"] = footnote_bboxes_tmp
 
         pdf_info_dict[f"page_{page_id}"] = page_info
 

+ 1 - 1
magic_pdf/train_utils/convert_to_train_format.py

@@ -54,7 +54,7 @@ def convert_to_train_format(jso: dict) -> []:
             n_bbox = {"category_id": 10, "bbox": inter_equation["bbox"]}
             bboxes.append(n_bbox)
 
-        for footnote in v['footnote_bboxes_tmp']:
+        for footnote in v['bak_footer_note_bboxes']:
             n_bbox = {"category_id": 5, "bbox": footnote["bbox"]}
             bboxes.append(n_bbox)