Răsfoiți Sursa

move pretty_markdown arg from predict to save_to_markdown

gaotingquan 6 luni în urmă
părinte
comite
6780905719

+ 0 - 1
paddlex/configs/pipelines/PP-StructureV3.yaml

@@ -8,7 +8,6 @@ use_table_recognition: True
 use_formula_recognition: True
 use_chart_recognition: False
 use_region_detection: False
-pretty_markdown: True
 
 SubModules:
   LayoutDetection:

+ 10 - 3
paddlex/inference/common/result/mixin.py

@@ -609,10 +609,15 @@ class MarkdownMixin:
         self._save_funcs.append(self.save_to_markdown)
 
     @abstractmethod
-    def _to_markdown(self) -> Dict[str, Union[str, Dict[str, Any]]]:
+    def _to_markdown(
+        self, pretty_markdown=True
+    ) -> Dict[str, Union[str, Dict[str, Any]]]:
         """
         Convert the result to markdown format.
 
+        Args:
+            pretty_markdown (Optional[bool]): wheather to pretting markdown by HTML, default by True.
+
         Returns:
             Dict[str, Union[str, Dict[str, Any]]]: A dictionary containing markdown text and image data.
         """
@@ -627,7 +632,9 @@ class MarkdownMixin:
         """
         return self._to_markdown()
 
-    def save_to_markdown(self, save_path, *args, **kwargs) -> None:
+    def save_to_markdown(
+        self, save_path, pretty_markdown=True, *args, **kwargs
+    ) -> None:
         """Save the markdown data to a file.
 
         Args:
@@ -665,7 +672,7 @@ class MarkdownMixin:
             self._markdown_writer.write,
             self._img_writer.write,
             self.save_path,
-            self._to_markdown(),
+            self._to_markdown(pretty_markdown=pretty_markdown),
             *args,
             **kwargs,
         )

+ 2 - 15
paddlex/inference/pipelines/layout_parsing/pipeline_v2.py

@@ -114,11 +114,6 @@ class LayoutParsingPipelineV2(BasePipeline):
             False,
         )
 
-        self.pretty_markdown = config.get(
-            "pretty_markdown",
-            True,
-        )
-
         if self.use_doc_preprocessor:
             doc_preprocessor_config = config.get("SubPipelines", {}).get(
                 "DocPreprocessor",
@@ -910,7 +905,6 @@ class LayoutParsingPipelineV2(BasePipeline):
         use_formula_recognition: Union[bool, None],
         use_chart_recognition: Union[bool, None],
         use_region_detection: Union[bool, None],
-        pretty_markdown: Union[bool, None],
     ) -> dict:
         """
         Get the model settings based on the provided parameters or default values.
@@ -953,9 +947,6 @@ class LayoutParsingPipelineV2(BasePipeline):
         if use_chart_recognition is None:
             use_chart_recognition = self.use_chart_recognition
 
-        if pretty_markdown is None:
-            pretty_markdown = self.pretty_markdown
-
         return dict(
             use_doc_preprocessor=use_doc_preprocessor,
             use_general_ocr=use_general_ocr,
@@ -964,7 +955,6 @@ class LayoutParsingPipelineV2(BasePipeline):
             use_formula_recognition=use_formula_recognition,
             use_chart_recognition=use_chart_recognition,
             use_region_detection=use_region_detection,
-            pretty_markdown=pretty_markdown,
         )
 
     def predict(
@@ -1000,7 +990,6 @@ class LayoutParsingPipelineV2(BasePipeline):
         use_e2e_wireless_table_rec_model: bool = True,
         max_new_tokens: int = 1024,
         no_repeat_ngram_size: int = 20,
-        pretty_markdown: Union[bool, None] = None,
         **kwargs,
     ) -> LayoutParsingResultV2:
         """
@@ -1038,9 +1027,8 @@ class LayoutParsingPipelineV2(BasePipeline):
             use_table_cells_ocr_results (bool): whether to use OCR results with cells.
             use_e2e_wired_table_rec_model (bool): Whether to use end-to-end wired table recognition model.
             use_e2e_wireless_table_rec_model (bool): Whether to use end-to-end wireless table recognition model.
-            max_new_tokens: int = 1024,
-            no_repeat_ngram_size: int = 20,
-            pretty_markdown,
+            max_new_tokens (int): argument for chart to table model, default by 1024.
+            no_repeat_ngram_size (int): argument for chart to table model, default by 20.
             **kwargs (Any): Additional settings to extend functionality.
 
         Returns:
@@ -1056,7 +1044,6 @@ class LayoutParsingPipelineV2(BasePipeline):
             use_formula_recognition,
             use_chart_recognition,
             use_region_detection,
-            pretty_markdown,
         )
 
         if not self.check_model_settings_valid(model_settings):

+ 5 - 2
paddlex/inference/pipelines/layout_parsing/result_v2.py

@@ -255,10 +255,13 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
                 res_xlsx_dict[key] = table_res.xlsx["pred"]
         return res_xlsx_dict
 
-    def _to_markdown(self) -> dict:
+    def _to_markdown(self, pretty_markdown=True) -> dict:
         """
         Save the parsing result to a Markdown file.
 
+        Args:
+            pretty_markdown (Optional[bool]): wheather to pretting markdown by HTML, default by True.
+
         Returns:
             Dict
         """
@@ -414,7 +417,7 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
                     "</html>", ""
                 ).replace("<body>", "").replace("</body>", "")
 
-            if self["model_settings"].get("pretty_markdown", True):
+            if pretty_markdown:
                 format_text = format_text_centered_by_html
                 format_image = format_image_centered_by_html
                 format_table = format_table_with_html_body