| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476 |
- # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- from typing import Union, Tuple, List, Dict, Any, Iterator
- from abc import abstractmethod
- from pathlib import Path
- import mimetypes
- import json
- import copy
- import numpy as np
- from PIL import Image
- import pandas as pd
- from ....utils import logging
- from ...utils.io import (
- JsonWriter,
- ImageReader,
- ImageWriter,
- CSVWriter,
- HtmlWriter,
- XlsxWriter,
- TextWriter,
- VideoWriter,
- )
- def _save_list_data(save_func, save_path, data, *args, **kwargs):
- """
- Save list type data to the specified path.
- If data type is a list, iterate through it and save each element using save_func with a modified filename (appending an index and the original file extension).
- Args:
- save_func (Callable): The function to be used for saving data.
- save_path (Union[str, Path]): The path to save the data.
- data (Union[None, list, Any]): The data to be saved. If None, the function will return immediately.
- *args: Additional positional arguments to be passed to save_func.
- **kwargs: Additional keyword arguments to be passed to save_func.
- Returns:
- None
- """
- save_path = Path(save_path)
- if data is None:
- return
- if isinstance(data, list):
- for idx, single in enumerate(data):
- save_func(
- (
- save_path.parent / f"{save_path.stem}_{idx}{save_path.suffix}"
- ).as_posix(),
- single,
- *args,
- **kwargs,
- )
- save_func(save_path.as_posix(), data, *args, **kwargs)
- logging.info(f"The result has been saved in {save_path}.")
- class StrMixin:
- """Mixin class for adding string conversion capabilities."""
- @property
- def str(self) -> str:
- """Property to get the string representation of the result.
- Returns:
- str: The str type string representation of the result.
- """
- return self._to_str(self)
- def _to_str(
- self,
- data: dict,
- json_format: bool = False,
- indent: int = 4,
- ensure_ascii: bool = False,
- ) -> str:
- """Convert the given result data to a string representation.
- Args:
- data (dict): The data would be converted to str.
- json_format (bool): If True, return a JSON formatted string. Default is False.
- indent (int): Number of spaces to indent for JSON formatting. Default is 4.
- ensure_ascii (bool): If True, ensure all characters are ASCII. Default is False.
- Returns:
- str: The string representation of the data.
- """
- if json_format:
- return json.dumps(data.json, indent=indent, ensure_ascii=ensure_ascii)
- else:
- return str(data)
- def print(
- self, json_format: bool = False, indent: int = 4, ensure_ascii: bool = False
- ) -> None:
- """Print the string representation of the result.
- Args:
- json_format (bool): If True, print a JSON formatted string. Default is False.
- indent (int): Number of spaces to indent for JSON formatting. Default is 4.
- ensure_ascii (bool): If True, ensure all characters are ASCII. Default is False.
- """
- str_ = self._to_str(
- self, json_format=json_format, indent=indent, ensure_ascii=ensure_ascii
- )
- logging.info(str_)
- class JsonMixin:
- """Mixin class for adding JSON serialization capabilities."""
- def __init__(self) -> None:
- self._json_writer = JsonWriter()
- self._save_funcs.append(self.save_to_json)
- def _to_json(self) -> Dict[str, Any]:
- """Convert the object to a JSON-serializable format.
- Returns:
- Dict[str, Any]: A dictionary representation of the object that is JSON-serializable.
- """
- def _format_data(obj):
- """Helper function to format data into a JSON-serializable format.
- Args:
- obj: The object to be formatted.
- Returns:
- Any: The formatted object.
- """
- if isinstance(obj, np.float32):
- return float(obj)
- elif isinstance(obj, np.ndarray):
- return [_format_data(item) for item in obj.tolist()]
- elif isinstance(obj, pd.DataFrame):
- return obj.to_json(orient="records", force_ascii=False)
- elif isinstance(obj, Path):
- return obj.as_posix()
- elif isinstance(obj, dict):
- return dict({k: _format_data(v) for k, v in obj.items()})
- elif isinstance(obj, (list, tuple)):
- return [_format_data(i) for i in obj]
- else:
- return obj
- return _format_data(copy.deepcopy(self))
- @property
- def json(self) -> Dict[str, Any]:
- """Property to get the JSON representation of the result.
- Returns:
- Dict[str, Any]: The dict type JSON representation of the result.
- """
- return self._to_json()
- def save_to_json(
- self,
- save_path: str,
- indent: int = 4,
- ensure_ascii: bool = False,
- *args: List,
- **kwargs: Dict,
- ) -> None:
- """Save the JSON representation of the object to a file.
- Args:
- save_path (str): The path to save the JSON file. If the save path does not end with '.json', it appends the base name and suffix of the input path.
- indent (int): The number of spaces to indent for pretty printing. Default is 4.
- ensure_ascii (bool): If False, non-ASCII characters will be included in the output. Default is False.
- *args: Additional positional arguments to pass to the underlying writer.
- **kwargs: Additional keyword arguments to pass to the underlying writer.
- """
- def _is_json_file(file_path):
- mime_type, _ = mimetypes.guess_type(file_path)
- return mime_type is not None and mime_type == "application/json"
- if not _is_json_file(save_path):
- save_path = Path(save_path) / f"{Path(self['input_path']).stem}.json"
- save_path = save_path.as_posix()
- self._json_writer.write(
- save_path,
- self.json,
- indent=indent,
- ensure_ascii=ensure_ascii,
- *args,
- **kwargs,
- )
- class Base64Mixin:
- """Mixin class for adding Base64 encoding capabilities."""
- def __init__(self, *args: List, **kwargs: Dict) -> None:
- """Initializes the Base64Mixin.
- Args:
- *args: Positional arguments to pass to the TextWriter.
- **kwargs: Keyword arguments to pass to the TextWriter.
- """
- self._base64_writer = TextWriter(*args, **kwargs)
- self._save_funcs.append(self.save_to_base64)
- @abstractmethod
- def _to_base64(self) -> str:
- """Abstract method to convert the result to Base64.
- Returns:
- str: The str type Base64 representation result.
- """
- raise NotImplementedError
- @property
- def base64(self) -> str:
- """
- Property that returns the Base64 encoded content.
- Returns:
- str: The base64 representation of the result.
- """
- return self._to_base64()
- def save_to_base64(self, save_path: str, *args: List, **kwargs: Dict) -> None:
- """Saves the Base64 encoded content to the specified path.
- Args:
- save_path (str): The path to save the base64 representation result. If the save path does not end with '.b64', it appends the base name and suffix of the input path.
- *args: Additional positional arguments that will be passed to the base64 writer.
- **kwargs: Additional keyword arguments that will be passed to the base64 writer.
- """
- if not str(save_path).lower().endswith((".b64")):
- fp = Path(self["input_path"])
- save_path = Path(save_path) / f"{fp.stem}{fp.suffix}"
- else:
- save_path = Path(save_path)
- self._base64_writer.write(save_path.as_posix(), self.base64, *args, **kwargs)
- class ImgMixin:
- """Mixin class for adding image handling capabilities."""
- def __init__(self, backend: str = "pillow", *args: List, **kwargs: Dict) -> None:
- """Initializes ImgMixin.
- Args:
- backend (str): The backend to use for image processing. Defaults to "pillow".
- *args: Additional positional arguments to pass to the ImageWriter.
- **kwargs: Additional keyword arguments to pass to the ImageWriter.
- """
- self._img_writer = ImageWriter(backend=backend, *args, **kwargs)
- self._save_funcs.append(self.save_to_img)
- @abstractmethod
- def _to_img(self) -> Union[np.ndarray, Image.Image]:
- """Abstract method to convert the result to an image.
- Returns:
- Union[np.ndarray, Image.Image]: The image representation result.
- """
- raise NotImplementedError
- @property
- def img(self) -> Image.Image:
- """Property to get the image representation of the result.
- Returns:
- Image.Image: The image representation of the result.
- """
- image = self._to_img()
- # The img must be a PIL.Image obj
- if isinstance(image, np.ndarray):
- return Image.fromarray(image)
- return image
- def save_to_img(self, save_path: str, *args: List, **kwargs: Dict) -> None:
- """Saves the image representation of the result to the specified path.
- Args:
- save_path (str): The path to save the image. If the save path does not end with .jpg or .png, it appends the input path's stem and suffix to the save path.
- *args: Additional positional arguments that will be passed to the image writer.
- **kwargs: Additional keyword arguments that will be passed to the image writer.
- """
- def _is_image_file(file_path):
- mime_type, _ = mimetypes.guess_type(file_path)
- return mime_type is not None and mime_type.startswith("image/")
- if not _is_image_file(save_path):
- fp = Path(self["input_path"])
- save_path = Path(save_path) / f"{fp.stem}{fp.suffix}"
- save_path = save_path.as_posix()
- self._img_writer.write(save_path, self.img, *args, **kwargs)
- class CSVMixin:
- """Mixin class for adding CSV handling capabilities."""
- def __init__(self, backend: str = "pandas", *args: List, **kwargs: Dict) -> None:
- """Initializes the CSVMixin.
- Args:
- backend (str): The backend to use for CSV operations (default is "pandas").
- *args: Optional positional arguments to pass to the CSVWriter.
- **kwargs: Optional keyword arguments to pass to the CSVWriter.
- """
- self._csv_writer = CSVWriter(backend=backend, *args, **kwargs)
- if not hasattr(self, "_save_funcs"):
- self._save_funcs = []
- self._save_funcs.append(self.save_to_csv)
- @property
- def csv(self) -> pd.DataFrame:
- """Property to get the pandas Dataframe representation of the result.
- Returns:
- pandas.DataFrame: The pandas.DataFrame representation of the result.
- """
- return self._to_csv()
- @abstractmethod
- def _to_csv(self) -> pd.DataFrame:
- """Abstract method to convert the result to pandas.DataFrame.
- Returns:
- pandas.DataFrame: The pandas.DataFrame representation result.
- """
- raise NotImplementedError
- def save_to_csv(self, save_path: str, *args: List, **kwargs: Dict) -> None:
- """Saves the result to a CSV file.
- Args:
- save_path (str): The path to save the CSV file. If the path does not end with ".csv",
- the stem of the input path attribute (self['input_path']) will be used as the filename.
- *args: Optional positional arguments to pass to the CSV writer's write method.
- **kwargs: Optional keyword arguments to pass to the CSV writer's write method.
- """
- if not str(save_path).endswith(".csv"):
- save_path = Path(save_path) / f"{Path(self['input_path']).stem}.csv"
- else:
- save_path = Path(save_path)
- self._csv_writer.write(save_path.as_posix(), self.csv, *args, **kwargs)
- class HtmlMixin:
- """Mixin class for adding HTML handling capabilities."""
- def __init__(self, *args: List, **kwargs: Dict) -> None:
- """
- Initializes the HTML writer and appends the save_to_html method to the save functions list.
- Args:
- *args: Positional arguments passed to the HtmlWriter.
- **kwargs: Keyword arguments passed to the HtmlWriter.
- """
- self._html_writer = HtmlWriter(*args, **kwargs)
- self._save_funcs.append(self.save_to_html)
- @property
- def html(self) -> str:
- """Property to get the HTML representation of the result.
- Returns:
- str: The str type HTML representation of the result.
- """
- return self._to_html()
- @abstractmethod
- def _to_html(self) -> str:
- """Abstract method to convert the result to str type HTML representation.
- Returns:
- str: The str type HTML representation result.
- """
- raise NotImplementedError
- def save_to_html(self, save_path: str, *args: List, **kwargs: Dict) -> None:
- """Saves the HTML representation of the object to the specified path.
- Args:
- save_path (str): The path to save the HTML file.
- *args: Additional positional arguments.
- **kwargs: Additional keyword arguments.
- """
- if not str(save_path).endswith(".html"):
- save_path = Path(save_path) / f"{Path(self['input_path']).stem}.html"
- else:
- save_path = Path(save_path)
- self._html_writer.write(save_path.as_posix(), self.html, *args, **kwargs)
- class XlsxMixin:
- """Mixin class for adding XLSX handling capabilities."""
- def __init__(self, *args: List, **kwargs: Dict) -> None:
- """Initializes the XLSX writer and appends the save_to_xlsx method to the save functions.
- Args:
- *args: Positional arguments to be passed to the XlsxWriter constructor.
- **kwargs: Keyword arguments to be passed to the XlsxWriter constructor.
- """
- self._xlsx_writer = XlsxWriter(*args, **kwargs)
- self._save_funcs.append(self.save_to_xlsx)
- @property
- def xlsx(self) -> str:
- """Property to get the XLSX representation of the result.
- Returns:
- str: The str type XLSX representation of the result.
- """
- return self._to_xlsx()
- @abstractmethod
- def _to_xlsx(self) -> str:
- """Abstract method to convert the result to str type XLSX representation.
- Returns:
- str: The str type HTML representation result.
- """
- raise NotImplementedError
- def save_to_xlsx(self, save_path: str, *args: List, **kwargs: Dict) -> None:
- """Saves the HTML representation to an XLSX file.
- Args:
- save_path (str): The path to save the XLSX file. If the path does not end with ".xlsx",
- the filename will be set to the stem of the input path with ".xlsx" extension.
- *args: Additional positional arguments to pass to the XLSX writer.
- **kwargs: Additional keyword arguments to pass to the XLSX writer.
- """
- if not str(save_path).endswith(".xlsx"):
- save_path = Path(save_path) / f"{Path(self['input_path']).stem}.xlsx"
- else:
- save_path = Path(save_path)
- self._xlsx_writer.write(save_path.as_posix(), self.xlsx, *args, **kwargs)
- class VideoMixin:
- def __init__(self, backend="opencv", *args, **kwargs):
- self._backend = backend
- self._save_funcs.append(self.save_to_video)
- @abstractmethod
- def _to_video(self):
- raise NotImplementedError
- @property
- def video(self):
- video = self._to_video()
- return video
- def save_to_video(self, save_path, *args, **kwargs):
- video_writer = VideoWriter(backend=self._backend, *args, **kwargs)
- if not str(save_path).lower().endswith((".mp4", ".avi", ".mkv", ".webm")):
- fp = Path(self["input_path"])
- save_path = Path(save_path) / f"{fp.stem}{fp.suffix}"
- _save_list_data(video_writer.write, save_path, self.video, *args, **kwargs)
|