Sfoglia il codice sorgente

Update mineru/utils/span_block_fix.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Xiaomeng Zhao 4 mesi fa
parent
commit
0508df994a
1 ha cambiato i file con 2 aggiunte e 1 eliminazioni
  1. 2 1
      mineru/utils/span_block_fix.py

+ 2 - 1
mineru/utils/span_block_fix.py

@@ -73,9 +73,10 @@ def fix_text_block(block):
             span['type'] = ContentType.INLINE_EQUATION
 
     # 假设block中的span超过80%的数量高度是宽度的两倍以上,则认为是纵向文本块
+    VERTICAL_TEXT_RATIO_THRESHOLD = 2  # Threshold for determining vertical text blocks
     vertical_span_count = sum(
         1 for span in block['spans']
-        if (span['bbox'][3] - span['bbox'][1]) / (span['bbox'][2] - span['bbox'][0]) > 2
+        if (span['bbox'][3] - span['bbox'][1]) / (span['bbox'][2] - span['bbox'][0]) > VERTICAL_TEXT_RATIO_THRESHOLD
     )
     total_span_count = len(block['spans'])
     if total_span_count == 0: