Sfoglia il codice sorgente

fix(pdf-parse-union-core): #492 decrease span threshold for block filling (#500)

Reduce the span threshold used in fill_spans_in_blocks from 0.6 to 0.3 to
improve the accuracy of block filling based on layout analysis.
Xiaomeng Zhao 1 anno fa
parent
commit
58bfcc9ca4
1 ha cambiato i file con 1 aggiunte e 1 eliminazioni
  1. 1 1
      magic_pdf/pdf_parse_union_core.py

+ 1 - 1
magic_pdf/pdf_parse_union_core.py

@@ -175,7 +175,7 @@ def parse_page_core(pdf_docs, magic_model, page_id, pdf_bytes_md5, imageWriter,
     sorted_blocks = sort_blocks_by_layout(all_bboxes, layout_bboxes)
 
     '''将span填入排好序的blocks中'''
-    block_with_spans, spans = fill_spans_in_blocks(sorted_blocks, spans, 0.6)
+    block_with_spans, spans = fill_spans_in_blocks(sorted_blocks, spans, 0.3)
 
     '''对block进行fix操作'''
     fix_blocks = fix_block_spans(block_with_spans, img_blocks, table_blocks)