Explorar o código

refactor(pre_proc): allow interline equations to be associated with text blocks

- Update OCR dictionary merge logic to include text blocks when processing interline equations
- This change improves the handling of equations that may be embedded within text content
myhloli hai 8 meses
pai
achega
083b787c15
Modificáronse 1 ficheiros con 1 adicións e 1 borrados
  1. 1 1
      magic_pdf/pre_proc/ocr_dict_merge.py

+ 1 - 1
magic_pdf/pre_proc/ocr_dict_merge.py

@@ -64,7 +64,7 @@ def span_block_type_compatible(span_type, block_type):
     if span_type in [ContentType.Text, ContentType.InlineEquation]:
     if span_type in [ContentType.Text, ContentType.InlineEquation]:
         return block_type in [BlockType.Text, BlockType.Title, BlockType.ImageCaption, BlockType.ImageFootnote, BlockType.TableCaption, BlockType.TableFootnote]
         return block_type in [BlockType.Text, BlockType.Title, BlockType.ImageCaption, BlockType.ImageFootnote, BlockType.TableCaption, BlockType.TableFootnote]
     elif span_type == ContentType.InterlineEquation:
     elif span_type == ContentType.InterlineEquation:
-        return block_type in [BlockType.InterlineEquation]
+        return block_type in [BlockType.InterlineEquation, BlockType.Text]
     elif span_type == ContentType.Image:
     elif span_type == ContentType.Image:
         return block_type in [BlockType.ImageBody]
         return block_type in [BlockType.ImageBody]
     elif span_type == ContentType.Table:
     elif span_type == ContentType.Table: