gaotingquan 6 місяців тому
батько
коміт
255886f827
1 змінених файлів з 31 додано та 24 видалено
  1. 31 24
      paddlex/inference/pipelines/layout_parsing/result_v2.py

+ 31 - 24
paddlex/inference/pipelines/layout_parsing/result_v2.py

@@ -296,29 +296,29 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
                     " ",
                 )
 
-            # def format_centered_text():
-            #     return (
-            #         f'<div style="text-align: center;">{block.content}</div>'.replace(
-            #             "-\n",
-            #             "",
-            #         ).replace("\n", " ")
-            #         + "\n"
-            #     )
-
-            def format_centered_text():
+            def format_text_centered_by_html():
+                return (
+                    f'<div style="text-align: center;">{block.content}</div>'.replace(
+                        "-\n",
+                        "",
+                    ).replace("\n", " ")
+                    + "\n"
+                )
+
+            def format_text_plain():
                 return block.content
 
-            # def format_image():
-            #     img_tags = []
-            #     image_path = "".join(block.image.keys())
-            #     img_tags.append(
-            #         '<div style="text-align: center;"><img src="{}" alt="Image" /></div>'.format(
-            #             image_path.replace("-\n", "").replace("\n", " "),
-            #         ),
-            #     )
-            #     return "\n".join(img_tags)
-
-            def format_image():
+            def format_image_centered_by_html():
+                img_tags = []
+                image_path = "".join(block.image.keys())
+                img_tags.append(
+                    '<div style="text-align: center;"><img src="{}" alt="Image" style="width: auto; height: auto;" /></div>'.format(
+                        image_path.replace("-\n", "").replace("\n", " "),
+                    ),
+                )
+                return "\n".join(img_tags)
+
+            def format_image_plain():
                 img_tags = []
                 image_path = "".join(block.image.keys())
                 img_tags.append(
@@ -407,6 +407,13 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
 
                 return seg_start_flag, seg_end_flag
 
+            if self["model_settings"].get("is_pretty_markdown", True):
+                format_text = format_text_centered_by_html
+                format_image = format_image_centered_by_html
+            else:
+                format_text = format_text_plain
+                format_image = format_image_plain
+
             handlers = {
                 "paragraph_title": lambda: format_title(block.content),
                 "abstract_title": lambda: format_title(block.content),
@@ -416,9 +423,9 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
                     "-\n",
                     "",
                 ).replace("\n", " "),
-                "table_title": lambda: format_centered_text(),
-                "figure_title": lambda: format_centered_text(),
-                "chart_title": lambda: format_centered_text(),
+                "table_title": lambda: format_text(),
+                "figure_title": lambda: format_text(),
+                "chart_title": lambda: format_text(),
                 "text": lambda: block.content.replace("\n\n", "\n").replace(
                     "\n", "\n\n"
                 ),