Sfoglia il codice sorgente

mk_mm_markdown2中span_type分类更新

赵小蒙 1 anno fa
parent
commit
195998a07f
1 ha cambiato i file con 4 aggiunte e 4 eliminazioni
  1. 4 4
      magic_pdf/dict2md/ocr_mkcontent.py

+ 4 - 4
magic_pdf/dict2md/ocr_mkcontent.py

@@ -66,13 +66,13 @@ def mk_mm_markdown2(pdf_info_dict:dict):
             for line in para:
                 for span in line['spans']:
                     span_type = span.get('type')
-                    if span_type == 'text':
+                    if span_type == ContentType.Text:
                         para_text += span['content']
-                    elif span_type == 'inline_equation':
+                    elif span_type == ContentType.InlineEquation:
                         para_text += f" ${span['content']}$ "
-                    elif span_type == 'displayed_equation':
+                    elif span_type == ContentType.InterlineEquation:
                         para_text += f"$$\n{span['content']}\n$$ "
-                    elif span_type == 'image':
+                    elif span_type == ContentType.Image:
                         para_text += f"![](s3://mllm-raw-media/pdf2md_img/{span['image_path']}) "
             markdown.append(para_text)