9 月之前 · 0a93e36d38
--- a/api_examples/pipelines/test_layout_parsing_v2.py
+++ b/api_examples/pipelines/test_layout_parsing_v2.py
@@ -32,4 +32,3 @@ for res in output:
 
															     res.save_to_xlsx("./output")
														
 
															     res.save_to_html("./output")
														
 
															     res.save_to_markdown("./output")
														
 
															-    res.save_to_pdf_order("./output")
														
--- a/docs/pipeline_usage/tutorials/ocr_pipelines/layout_parsing_v2.md
+++ b/docs/pipeline_usage/tutorials/ocr_pipelines/layout_parsing_v2.md
--- a/paddlex/configs/pipelines/layout_parsing_v2.yaml
+++ b/paddlex/configs/pipelines/layout_parsing_v2.yaml
@@ -51,7 +51,7 @@ SubPipelines:
 
															       TextRecognition:
														
 
															         module_name: text_recognition
														
 
															-        model_name: PP-OCRv4_server_rec
														
 
															+        model_name: PP-OCRv4_server_rec_doc
														
 
															         model_dir: null
														
 
															         batch_size: 1
														
 
															         score_thresh: 0.0
														
@@ -87,17 +87,6 @@ SubPipelines:
 
															         model_name: RT-DETR-L_wireless_table_cell_det
														
 
															         model_dir: null
														
 
															-  # TableRecognition:
														
 
															-  #   pipeline_name: table_recognition
														
 
															-  #   use_layout_detection: False
														
 
															-  #   use_doc_preprocessor: False
														
 
															-  #   use_ocr_model: False
														
 
															-  #   SubModules:
														
 
															-  #     TableStructureRecognition:
														
 
															-  #       module_name: table_structure_recognition
														
 
															-  #       model_name: SLANet_plus
														
 
															-  #       model_dir: null
														
 
															-
														
 
															   SealRecognition:
														
 
															     pipeline_name: seal_recognition
														
 
															     use_layout_detection: False
														
--- a/paddlex/inference/common/result/mixin.py
+++ b/paddlex/inference/common/result/mixin.py
@@ -12,9 +12,10 @@
 
															 # See the License for the specific language governing permissions and
														
 
															 # limitations under the License.
														
 
															-from typing import Union, Tuple, List, Dict, Any, Iterator
														
 
															+from typing import Union, Tuple, List, Dict, Any, Iterator, Callable, Optional
														
 
															 from abc import abstractmethod
														
 
															 from pathlib import Path
														
 
															+import os
														
 
															 import mimetypes
														
 
															 import json
														
 
															 import copy
														
@@ -379,7 +380,6 @@ class CSVMixin:
 
															         if not _is_csv_file(save_path):
														
 
															             fn = Path(self._get_input_fn())
														
 
															-            fn = Path(self._get_input_fn())
														
 
															             stem = fn.stem
														
 
															             base_save_path = Path(save_path)
														
 
															             for key in self.csv:
														
@@ -597,53 +597,108 @@ class VideoMixin:
 
															 class MarkdownMixin:
														
 
															+    """Mixin class for adding Markdown handling capabilities."""
														
 
															     def __init__(self, *args: list, **kwargs: dict):
														
 
															+        """Initializes the Markdown writer and appends the save_to_markdown method to the save functions.
														
 
															+
														
 
															+        Args:
														
 
															+            *args: Positional arguments to be passed to the MarkdownWriter constructor.
														
 
															+            **kwargs: Keyword arguments to be passed to the MarkdownWriter constructor.
														
 
															+        """
														
 
															         self._markdown_writer = MarkdownWriter(*args, **kwargs)
														
 
															+        self._img_writer = ImageWriter(*args, **kwargs)
														
 
															         self._save_funcs.append(self.save_to_markdown)
														
 
															-        self.save_path = None
														
 
															     @abstractmethod
														
 
															-    def _to_markdown(self):
														
 
															+    def _to_markdown(self) -> Dict[str, Union[str, Dict[str, Any]]]:
														
 
															         """
														
 
															         Convert the result to markdown format.
														
 
															+
														
 
															         Returns:
														
 
															-            Dict
														
 
															+            Dict[str, Union[str, Dict[str, Any]]]: A dictionary containing markdown text and image data.
														
 
															         """
														
 
															         raise NotImplementedError
														
 
															     @property
														
 
															-    def markdown(self):
														
 
															+    def markdown(self) -> Dict[str, Union[str, Dict[str, Any]]]:
														
 
															+        """Property to access the markdown data.
														
 
															+
														
 
															+        Returns:
														
 
															+            Dict[str, Union[str, Dict[str, Any]]]: A dictionary containing markdown text and image data.
														
 
															+        """
														
 
															         return self._to_markdown()
														
 
															-    def save_to_markdown(self, save_path, *args, **kwargs):
														
 
															-        save_path = Path(save_path)
														
 
															-        if not save_path.suffix.lower() == ".md":
														
 
															-            save_path = save_path / f"layout_parsing_result.md"
														
 
															+    def save_to_markdown(self, save_path, *args, **kwargs) -> None:
														
 
															+        """Save the markdown data to a file.
														
 
															+
														
 
															+        Args:
														
 
															+            save_path (Union[str, Path]): The path where the markdown file will be saved.
														
 
															+            *args: Additional positional arguments for saving.
														
 
															+            **kwargs: Additional keyword arguments for saving.
														
 
															+        """
														
 
															-        self.save_path = save_path
														
 
															+        def _is_markdown_file(file_path) -> bool:
														
 
															+            """Check if a file is a markdown file based on its extension or MIME type.
														
 
															-        self._save_list_data(
														
 
															+            Args:
														
 
															+                file_path (Union[str, Path]): The path to the file.
														
 
															+
														
 
															+            Returns:
														
 
															+                bool: True if the file is a markdown file, False otherwise.
														
 
															+            """
														
 
															+            markdown_extensions = {".md", ".markdown", ".mdown", ".mkd"}
														
 
															+            _, ext = os.path.splitext(str(file_path))
														
 
															+            if ext.lower() in markdown_extensions:
														
 
															+                return True
														
 
															+            mime_type, _ = mimetypes.guess_type(str(file_path))
														
 
															+            return mime_type == "text/markdown"
														
 
															+
														
 
															+        if not _is_markdown_file(save_path):
														
 
															+            fn = Path(self._get_input_fn())
														
 
															+            suffix = fn.suffix if _is_markdown_file(fn) else ".md"
														
 
															+            stem = fn.stem
														
 
															+            base_save_path = Path(save_path)
														
 
															+            save_path = base_save_path / f"{stem}{suffix}"
														
 
															+            self.save_path = save_path
														
 
															+        else:
														
 
															+            self.save_path = save_path
														
 
															+        self._save_data(
														
 
															             self._markdown_writer.write,
														
 
															-            save_path,
														
 
															+            self._img_writer.write,
														
 
															+            self.save_path,
														
 
															             self.markdown,
														
 
															             *args,
														
 
															             **kwargs,
														
 
															         )
														
 
															-    def _save_list_data(self, save_func, save_path, data, *args, **kwargs):
														
 
															+    def _save_data(
														
 
															+        self,
														
 
															+        save_mkd_func: Callable,
														
 
															+        save_img_func: Callable,
														
 
															+        save_path: Union[str, Path],
														
 
															+        data: Optional[Dict[str, Union[str, Dict[str, Any]]]],
														
 
															+        *args,
														
 
															+        **kwargs,
														
 
															+    ) -> None:
														
 
															+        """Internal method to save markdown and image data.
														
 
															+
														
 
															+        Args:
														
 
															+            save_mkd_func (Callable): Function to save markdown text.
														
 
															+            save_img_func (Callable): Function to save image data.
														
 
															+            save_path (Union[str, Path]): The base path where the data will be saved.
														
 
															+            data (Optional[Dict[str, Union[str, Dict[str, Any]]]]): The markdown data to save.
														
 
															+            *args: Additional positional arguments for saving.
														
 
															+            **kwargs: Additional keyword arguments for saving.
														
 
															+        """
														
 
															         save_path = Path(save_path)
														
 
															         if data is None:
														
 
															             return
														
 
															-        if isinstance(data, list):
														
 
															-            for idx, single in enumerate(data):
														
 
															-                save_func(
														
 
															-                    (
														
 
															-                        save_path.parent / f"{save_path.stem}_{idx}{save_path.suffix}"
														
 
															-                    ).as_posix(),
														
 
															-                    single,
														
 
															-                    *args,
														
 
															-                    **kwargs,
														
 
															-                )
														
 
															-        save_func(save_path.as_posix(), data, *args, **kwargs)
														
 
															-        logging.info(f"The result has been saved in {save_path}.")
														
 
															+        for key, value in data.items():
														
 
															+            if isinstance(value, str):
														
 
															+                save_mkd_func(save_path.as_posix(), value, *args, **kwargs)
														
 
															+            if isinstance(value, dict):
														
 
															+                base_save_path = save_path.parent
														
 
															+                for img_path, img_data in value.items():
														
 
															+                    save_path = base_save_path / img_path
														
 
															+                    save_img_func(save_path.as_posix(), img_data, *args, **kwargs)
														
--- a/paddlex/inference/pipelines/layout_parsing/result_v2.py
+++ b/paddlex/inference/pipelines/layout_parsing/result_v2.py
@@ -48,11 +48,22 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
 
															         JsonMixin.__init__(self)
														
 
															         self.already_sorted = False
														
 
															+    def _get_input_fn(self):
														
 
															+        fn = super()._get_input_fn()
														
 
															+        if (page_idx := self["page_index"]) is not None:
														
 
															+            fp = Path(fn)
														
 
															+            stem, suffix = fp.stem, fp.suffix
														
 
															+            return f"{stem}_{page_idx}{suffix}"
														
 
															+        else:
														
 
															+            return fn
														
 
															+
														
 
															     def _to_img(self) -> dict[str, np.ndarray]:
														
 
															         res_img_dict = {}
														
 
															         model_settings = self["model_settings"]
														
 
															+        page_index = self["page_index"]
														
 
															         if model_settings["use_doc_preprocessor"]:
														
 
															-            res_img_dict.update(**self["doc_preprocessor_res"].img)
														
 
															+            for key, value in self["doc_preprocessor_res"].img.items():
														
 
															+                res_img_dict[key] = value
														
 
															         res_img_dict["layout_det_res"] = self["layout_det_res"].img["res"]
														
 
															         if model_settings["use_general_ocr"] or model_settings["use_table_recognition"]:
														
@@ -92,16 +103,39 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
 
															                 key = f"seal_res_region{seal_region_id}"
														
 
															                 res_img_dict[key] = sub_seal_res_dict["ocr_res_img"]
														
 
															-        # if (
														
 
															-        #     model_settings["use_formula_recognition"]
														
 
															-        #     and len(self["formula_res_list"]) > 0
														
 
															-        # ):
														
 
															-        #     for sno in range(len(self["formula_res_list"])):
														
 
															-        #         formula_res = self["formula_res_list"][sno]
														
 
															-        #         formula_region_id = formula_res["formula_region_id"]
														
 
															-        #         sub_formula_res_dict = formula_res.img
														
 
															-        #         key = f"formula_res_region{formula_region_id}"
														
 
															-        #         res_img_dict[key] = sub_formula_res_dict["res"]
														
 
															+        # for layout ordering image
														
 
															+        image = Image.fromarray(self["doc_preprocessor_res"]["output_img"])
														
 
															+        draw = ImageDraw.Draw(image, "RGBA")
														
 
															+        parsing_result = self["parsing_res_list"]
														
 
															+
														
 
															+        for block in parsing_result:
														
 
															+            if self.already_sorted == False:
														
 
															+                block = get_layout_ordering(
														
 
															+                    block,
														
 
															+                    no_mask_labels=[
														
 
															+                        "text",
														
 
															+                        "formula",
														
 
															+                        "algorithm",
														
 
															+                        "reference",
														
 
															+                        "content",
														
 
															+                        "abstract",
														
 
															+                    ],
														
 
															+                    already_sorted=self.already_sorted,
														
 
															+                )
														
 
															+
														
 
															+            sub_blocks = block["sub_blocks"]
														
 
															+            for sub_block in sub_blocks:
														
 
															+                bbox = sub_block["layout_bbox"]
														
 
															+                index = sub_block.get("index", None)
														
 
															+                label = sub_block["sub_label"]
														
 
															+                fill_color = get_show_color(label)
														
 
															+                draw.rectangle(bbox, fill=fill_color)
														
 
															+                if index is not None:
														
 
															+                    text_position = (bbox[2] + 2, bbox[1] - 10)
														
 
															+                    draw.text(text_position, str(index), fill="red")
														
 
															+
														
 
															+        self.already_sorted = True
														
 
															+        res_img_dict["layout_order_res"] = image
														
 
															         return res_img_dict
														
@@ -117,6 +151,7 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
 
															         """
														
 
															         data = {}
														
 
															         data["input_path"] = self["input_path"]
														
 
															+        data["page_index"] = self["page_index"]
														
 
															         model_settings = self["model_settings"]
														
 
															         data["model_settings"] = model_settings
														
 
															         if self["model_settings"]["use_doc_preprocessor"]:
														
@@ -167,6 +202,7 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
 
															         """
														
 
															         data = {}
														
 
															         data["input_path"] = self["input_path"]
														
 
															+        data["page_index"] = self["page_index"]
														
 
															         model_settings = self["model_settings"]
														
 
															         data["model_settings"] = model_settings
														
 
															         if self["model_settings"]["use_doc_preprocessor"]:
														
@@ -235,73 +271,6 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
 
															                 res_xlsx_dict[key] = table_res.xlsx["pred"]
														
 
															         return res_xlsx_dict
														
 
															-    def save_to_pdf_order(self, save_path: str) -> None:
														
 
															-        """
														
 
															-        Save the layout ordering to an image file.
														
 
															-
														
 
															-        Args:
														
 
															-            save_path (str): The path where the image should be saved.
														
 
															-
														
 
															-        Returns:
														
 
															-            None
														
 
															-        """
														
 
															-        input_path = Path(self["input_path"])
														
 
															-        page_index = self["page_index"]
														
 
															-        save_path = Path(save_path)
														
 
															-        if save_path.suffix.lower() not in (".jpg", ".png"):
														
 
															-            if input_path.suffix.lower() == ".pdf":
														
 
															-                save_path = save_path / f"page_{page_index}.jpg"
														
 
															-            else:
														
 
															-                save_path = save_path / f"{input_path.stem}.jpg"
														
 
															-        else:
														
 
															-            save_path = save_path.with_suffix("")
														
 
															-
														
 
															-        ordering_image_path = (
														
 
															-            save_path.parent / f"{save_path.stem}_layout_order_res.jpg"
														
 
															-        )
														
 
															-
														
 
															-        try:
														
 
															-            image = Image.fromarray(self["doc_preprocessor_res"]["output_img"])
														
 
															-        except OSError as e:
														
 
															-            print(f"Error opening image: {e}")
														
 
															-            return
														
 
															-
														
 
															-        draw = ImageDraw.Draw(image, "RGBA")
														
 
															-        parsing_result = self["parsing_res_list"]
														
 
															-
														
 
															-        for block in parsing_result:
														
 
															-            if self.already_sorted == False:
														
 
															-                block = get_layout_ordering(
														
 
															-                    block,
														
 
															-                    no_mask_labels=[
														
 
															-                        "text",
														
 
															-                        "formula",
														
 
															-                        "algorithm",
														
 
															-                        "reference",
														
 
															-                        "content",
														
 
															-                        "abstract",
														
 
															-                    ],
														
 
															-                    already_sorted=self.already_sorted,
														
 
															-                )
														
 
															-
														
 
															-            sub_blocks = block["sub_blocks"]
														
 
															-            for sub_block in sub_blocks:
														
 
															-                bbox = sub_block["layout_bbox"]
														
 
															-                index = sub_block.get("index", None)
														
 
															-                label = sub_block["sub_label"]
														
 
															-                fill_color = get_show_color(label)
														
 
															-                draw.rectangle(bbox, fill=fill_color)
														
 
															-                if index is not None:
														
 
															-                    text_position = (bbox[2] + 2, bbox[1] - 10)
														
 
															-                    draw.text(text_position, str(index), fill="red")
														
 
															-
														
 
															-        self.already_sorted = True
														
 
															-
														
 
															-        # Ensure the directory exists and save the image
														
 
															-        ordering_image_path.parent.mkdir(parents=True, exist_ok=True)
														
 
															-        print(f"Saving ordering image to {ordering_image_path}")
														
 
															-        image.save(str(ordering_image_path))
														
 
															-
														
 
															     def _to_markdown(self) -> dict:
														
 
															         """
														
 
															         Save the parsing result to a Markdown file.
														
@@ -309,14 +278,8 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
 
															         Returns:
														
 
															             Dict
														
 
															         """
														
 
															-        if self.save_path == None:
														
 
															-            is_save_mk_img = False
														
 
															-        else:
														
 
															-            is_save_mk_img = True
														
 
															-            save_path = Path(self.save_path)
														
 
															         parsing_result = self["parsing_res_list"]
														
 
															-
														
 
															         for block in parsing_result:
														
 
															             if self.already_sorted == False:
														
 
															                 block = get_layout_ordering(
														
@@ -333,12 +296,7 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
 
															                 )
														
 
															         self.already_sorted == True
														
 
															-        if is_save_mk_img:
														
 
															-            recursive_img_array2path(
														
 
															-                self["parsing_res_list"],
														
 
															-                save_path.parent,
														
 
															-                labels=["img"],
														
 
															-            )
														
 
															+        recursive_img_array2path(self["parsing_res_list"], labels=["img"])
														
 
															         def _format_data(obj):
														
@@ -367,16 +325,12 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
 
															                 )
														
 
															             def format_image(label):
														
 
															-                if is_save_mk_img is False:
														
 
															-                    return ""
														
 
															-
														
 
															                 img_tags = []
														
 
															                 if "img" in sub_block[label]:
														
 
															+                    image_path = "".join(sub_block[label]["img"].keys())
														
 
															                     img_tags.append(
														
 
															                         '<div style="text-align: center;"><img src="{}" alt="Image" /></div>'.format(
														
 
															-                            sub_block[label]["img"]
														
 
															-                            .replace("-\n", "")
														
 
															-                            .replace("\n", " "),
														
 
															+                            image_path.replace("-\n", "").replace("\n", " "),
														
 
															                         ),
														
 
															                     )
														
 
															                 if "image_text" in sub_block[label]:
														
@@ -456,4 +410,16 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
 
															             return markdown_content
														
 
															-        return _format_data(self)
														
 
															+        markdown_info = dict()
														
 
															+        markdown_info["markdown_texts"] = _format_data(self)
														
 
															+        markdown_info["markdown_images"] = dict()
														
 
															+        for block in self["parsing_res_list"]:
														
 
															+            sub_blocks = block["sub_blocks"]
														
 
															+            for sub_block in sub_blocks:
														
 
															+                if sub_block["label"] == "image":
														
 
															+                    image_path, image_value = next(
														
 
															+                        iter(sub_block["image"]["img"].items())
														
 
															+                    )
														
 
															+                    markdown_info["markdown_images"][image_path] = image_value
														
 
															+
														
 
															+        return markdown_info
														
--- a/paddlex/inference/pipelines/layout_parsing/utils.py
+++ b/paddlex/inference/pipelines/layout_parsing/utils.py
@@ -24,6 +24,7 @@ __all__ = [
 
															 import numpy as np
														
 
															 import copy
														
 
															 import cv2
														
 
															+from PIL import Image
														
 
															 import uuid
														
 
															 from pathlib import Path
														
 
															 from typing import Optional, Union, List, Tuple, Dict, Any
														
@@ -724,16 +725,16 @@ def sort_by_xycut(
 
															     return res
														
 
															-def _img_array2path(data: np.ndarray, save_path: Union[str, Path]) -> str:
														
 
															+def _img_array2path(data: np.ndarray) -> str:
														
 
															     """
														
 
															     Save an image array to disk and return the relative file path.
														
 
															     Args:
														
 
															         data (np.ndarray): An image represented as a numpy array with 3 dimensions (H, W, C).
														
 
															-        save_path (Union[str, Path]): The base path where images should be saved.
														
 
															     Returns:
														
 
															-        str: The relative path of the saved image file.
														
 
															+        dict: A dictionary with a single key-value pair formatted as:
														
 
															+              {"imgs/image_{uuid4_hex}.png": PIL.Image.Image}
														
 
															     Raises:
														
 
															         ValueError: If the input data is not a valid image array.
														
@@ -741,17 +742,8 @@ def _img_array2path(data: np.ndarray, save_path: Union[str, Path]) -> str:
 
															     if isinstance(data, np.ndarray) and data.ndim == 3:
														
 
															         # Generate a unique filename using UUID
														
 
															         img_name = f"image_{uuid.uuid4().hex}.png"
														
 
															-        img_path = Path(save_path) / "imgs" / img_name
														
 
															-        img_path.parent.mkdir(
														
 
															-            parents=True, exist_ok=True
														
 
															-        )  # Ensure the directory exists
														
 
															-
														
 
															-        # Save the image using OpenCV
														
 
															-        success = cv2.imwrite(str(img_path), data)
														
 
															-        if not success:
														
 
															-            raise IOError(f"Failed to save image to {img_path}")
														
 
															-        return f"imgs/{img_name}"
														
 
															+        return {f"imgs/{img_name}": Image.fromarray(data[:, :, ::-1])}
														
 
															     else:
														
 
															         raise ValueError(
														
 
															             "Input data must be a 3-dimensional numpy array representing an image."
														
@@ -760,7 +752,6 @@ def _img_array2path(data: np.ndarray, save_path: Union[str, Path]) -> str:
 
															 def recursive_img_array2path(
														
 
															     data: Union[Dict[str, Any], List[Any]],
														
 
															-    save_path: Union[str, Path],
														
 
															     labels: List[str] = [],
														
 
															 ) -> None:
														
 
															     """
														
@@ -778,12 +769,12 @@ def recursive_img_array2path(
 
															     if isinstance(data, dict):
														
 
															         for k, v in data.items():
														
 
															             if k in labels and isinstance(v, np.ndarray) and v.ndim == 3:
														
 
															-                data[k] = _img_array2path(v, save_path)
														
 
															+                data[k] = _img_array2path(v)
														
 
															             else:
														
 
															-                recursive_img_array2path(v, save_path, labels)
														
 
															+                recursive_img_array2path(v, labels)
														
 
															     elif isinstance(data, list):
														
 
															         for item in data:
														
 
															-            recursive_img_array2path(item, save_path, labels)
														
 
															+            recursive_img_array2path(item, labels)
														
 
															 def _get_minbox_if_overlap_by_ratio(
														
--- a/paddlex/inference/utils/io/writers.py
+++ b/paddlex/inference/utils/io/writers.py
@@ -454,5 +454,5 @@ class MarkdownWriterBackend(_BaseWriterBackend):
 
															     def _write_obj(self, out_path, obj):
														
 
															         """write markdown obj"""
														
 
															-        with open(out_path, mode="a", encoding="utf-8", errors="replace") as f:
														
 
															+        with open(out_path, mode="w", encoding="utf-8", errors="replace") as f:
														
 
															             f.write(obj)