فهرست منبع

Merge remote-tracking branch 'origin/master'

赵小蒙 1 سال پیش
والد
کامیت
671ce1d97c
2فایلهای تغییر یافته به همراه3 افزوده شده و 3 حذف شده
  1. 1 1
      magic_pdf/pdf_parse_for_train.py
  2. 2 2
      magic_pdf/train_utils/convert_to_train_format.py

+ 1 - 1
magic_pdf/pdf_parse_for_train.py

@@ -220,7 +220,7 @@ def parse_pdf_for_train(
         # 解析表格并对table_bboxes进行位置的微调,防止表格周围的文字被截断
         table_bboxes = parse_tables(page_id, page, model_output_json)
         table_bboxes = fix_tables(
-            page, table_bboxes, include_table_title=True, scan_line_num=2
+            page, table_bboxes, include_table_title=False, scan_line_num=2
         )  # 修正
         table_bboxes = fix_table_text_block(
             text_raw_blocks, table_bboxes

+ 2 - 2
magic_pdf/train_utils/convert_to_train_format.py

@@ -54,8 +54,8 @@ def convert_to_train_format(jso: dict) -> []:
             n_bbox = {"category_id": 10, "bbox": inter_equation["bbox"]}
             bboxes.append(n_bbox)
 
-        for footnote in v['bak_footer_note_bboxes']:
-            n_bbox = {"category_id": 5, "bbox": footnote["bbox"]}
+        for footnote_bbox in v["bak_footer_note_bboxes"]:
+            n_bbox = {"category_id": 5, "bbox": list(footnote_bbox)}
             bboxes.append(n_bbox)
 
         info["bboxes"] = bboxes