|
@@ -106,29 +106,30 @@ def ocr_mk_markdown_with_para_core_v2(paras_of_layout, mode, img_buket_path=""):
|
|
|
if mode == 'nlp':
|
|
if mode == 'nlp':
|
|
|
continue
|
|
continue
|
|
|
elif mode == 'mm':
|
|
elif mode == 'mm':
|
|
|
- for block in para_block['blocks']:
|
|
|
|
|
|
|
+ for block in para_block['blocks']: # 1st.拼image_body
|
|
|
if block['type'] == BlockType.ImageBody:
|
|
if block['type'] == BlockType.ImageBody:
|
|
|
for line in block['lines']:
|
|
for line in block['lines']:
|
|
|
for span in line['spans']:
|
|
for span in line['spans']:
|
|
|
if span['type'] == ContentType.Image:
|
|
if span['type'] == ContentType.Image:
|
|
|
- para_text = f"\n})\n"
|
|
|
|
|
- for block in para_block['blocks']:
|
|
|
|
|
|
|
+ para_text += f"\n})\n"
|
|
|
|
|
+ for block in para_block['blocks']: # 2nd.拼image_caption
|
|
|
if block['type'] == BlockType.ImageCaption:
|
|
if block['type'] == BlockType.ImageCaption:
|
|
|
para_text += merge_para_with_text(block)
|
|
para_text += merge_para_with_text(block)
|
|
|
elif para_type == BlockType.Table:
|
|
elif para_type == BlockType.Table:
|
|
|
if mode == 'nlp':
|
|
if mode == 'nlp':
|
|
|
continue
|
|
continue
|
|
|
elif mode == 'mm':
|
|
elif mode == 'mm':
|
|
|
- for block in para_block['blocks']:
|
|
|
|
|
|
|
+ for block in para_block['blocks']: # 1st.拼table_caption
|
|
|
|
|
+ if block['type'] == BlockType.TableCaption:
|
|
|
|
|
+ para_text += merge_para_with_text(block)
|
|
|
|
|
+ for block in para_block['blocks']: # 2nd.拼table_body
|
|
|
if block['type'] == BlockType.TableBody:
|
|
if block['type'] == BlockType.TableBody:
|
|
|
for line in block['lines']:
|
|
for line in block['lines']:
|
|
|
for span in line['spans']:
|
|
for span in line['spans']:
|
|
|
if span['type'] == ContentType.Table:
|
|
if span['type'] == ContentType.Table:
|
|
|
- para_text = f"\n})\n"
|
|
|
|
|
- for block in para_block['blocks']:
|
|
|
|
|
- if block['type'] == BlockType.TableCaption:
|
|
|
|
|
- para_text += merge_para_with_text(block)
|
|
|
|
|
- elif block['type'] == BlockType.TableFootnote:
|
|
|
|
|
|
|
+ para_text += f"\n})\n"
|
|
|
|
|
+ for block in para_block['blocks']: # 3rd.拼table_footnote
|
|
|
|
|
+ if block['type'] == BlockType.TableFootnote:
|
|
|
para_text += merge_para_with_text(block)
|
|
para_text += merge_para_with_text(block)
|
|
|
|
|
|
|
|
if para_text.strip() == '':
|
|
if para_text.strip() == '':
|