mixin.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473
  1. # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from typing import Union, Tuple, List, Dict, Any, Iterator
  15. from abc import abstractmethod
  16. from pathlib import Path
  17. import mimetypes
  18. import json
  19. import copy
  20. import numpy as np
  21. from PIL import Image
  22. import pandas as pd
  23. from ....utils import logging
  24. from ...utils.io import (
  25. JsonWriter,
  26. ImageReader,
  27. ImageWriter,
  28. CSVWriter,
  29. HtmlWriter,
  30. XlsxWriter,
  31. TextWriter,
  32. VideoWriter,
  33. )
  34. class StrMixin:
  35. """Mixin class for adding string conversion capabilities."""
  36. @property
  37. def str(self) -> Dict[str, str]:
  38. """Property to get the string representation of the result.
  39. Returns:
  40. Dict[str, str]: The string representation of the result.
  41. """
  42. return self._to_str()
  43. def _to_str(
  44. self,
  45. ):
  46. """Convert the given result data to a string representation.
  47. Args:
  48. json_format (bool): If True, return a JSON formatted string. Default is False.
  49. indent (int): Number of spaces to indent for JSON formatting. Default is 4.
  50. ensure_ascii (bool): If True, ensure all characters are ASCII. Default is False.
  51. Returns:
  52. Dict[str, str]: The string representation of the result.
  53. """
  54. return {"res": str(self)}
  55. def print(self) -> None:
  56. """Print the string representation of the result."""
  57. logging.info(self.str)
  58. class JsonMixin:
  59. """Mixin class for adding JSON serialization capabilities."""
  60. def __init__(self) -> None:
  61. self._json_writer = JsonWriter()
  62. self._save_funcs.append(self.save_to_json)
  63. def _to_json(self) -> Dict[str, Dict[str, Any]]:
  64. """Convert the object to a JSON-serializable format.
  65. Returns:
  66. Dict[str, Dict[str, Any]]: A dictionary representation of the object that is JSON-serializable.
  67. """
  68. def _format_data(obj):
  69. """Helper function to format data into a JSON-serializable format.
  70. Args:
  71. obj: The object to be formatted.
  72. Returns:
  73. Any: The formatted object.
  74. """
  75. if isinstance(obj, np.float32):
  76. return float(obj)
  77. elif isinstance(obj, np.ndarray):
  78. return [_format_data(item) for item in obj.tolist()]
  79. elif isinstance(obj, pd.DataFrame):
  80. return obj.to_json(orient="records", force_ascii=False)
  81. elif isinstance(obj, Path):
  82. return obj.as_posix()
  83. elif isinstance(obj, dict):
  84. return dict({k: _format_data(v) for k, v in obj.items()})
  85. elif isinstance(obj, (list, tuple)):
  86. return [_format_data(i) for i in obj]
  87. else:
  88. return obj
  89. return {"res": _format_data(copy.deepcopy(self))}
  90. @property
  91. def json(self) -> Dict[str, Dict[str, Any]]:
  92. """Property to get the JSON representation of the result.
  93. Returns:
  94. Dict[str, Dict[str, Any]]: The dict type JSON representation of the result.
  95. """
  96. return self._to_json()
  97. def save_to_json(
  98. self,
  99. save_path: str,
  100. indent: int = 4,
  101. ensure_ascii: bool = False,
  102. *args: List,
  103. **kwargs: Dict,
  104. ) -> None:
  105. """Save the JSON representation of the object to a file.
  106. Args:
  107. save_path (str): The path to save the JSON file. If the save path does not end with '.json', it appends the base name and suffix of the input path.
  108. indent (int): The number of spaces to indent for pretty printing. Default is 4.
  109. ensure_ascii (bool): If False, non-ASCII characters will be included in the output. Default is False.
  110. *args: Additional positional arguments to pass to the underlying writer.
  111. **kwargs: Additional keyword arguments to pass to the underlying writer.
  112. """
  113. def _is_json_file(file_path):
  114. mime_type, _ = mimetypes.guess_type(file_path)
  115. return mime_type is not None and mime_type == "application/json"
  116. if not _is_json_file(save_path):
  117. fp = Path(self["input_path"])
  118. stem = fp.stem
  119. suffix = fp.suffix
  120. base_save_path = Path(save_path)
  121. for key in self.json:
  122. save_path = base_save_path / f"{stem}_{key}.json"
  123. self._json_writer.write(
  124. save_path.as_posix(), self.json[key], *args, **kwargs
  125. )
  126. else:
  127. if len(self.json) > 1:
  128. logging.warning(
  129. f"The result has multiple json files need to be saved. But the `save_path` has been specfied as `{save_path}`!"
  130. )
  131. self._json_writer.write(
  132. save_path,
  133. self.json[list(self.json.keys())[0]],
  134. indent=indent,
  135. ensure_ascii=ensure_ascii,
  136. *args,
  137. **kwargs,
  138. )
  139. class Base64Mixin:
  140. """Mixin class for adding Base64 encoding capabilities."""
  141. def __init__(self, *args: List, **kwargs: Dict) -> None:
  142. """Initializes the Base64Mixin.
  143. Args:
  144. *args: Positional arguments to pass to the TextWriter.
  145. **kwargs: Keyword arguments to pass to the TextWriter.
  146. """
  147. self._base64_writer = TextWriter(*args, **kwargs)
  148. self._save_funcs.append(self.save_to_base64)
  149. @abstractmethod
  150. def _to_base64(self) -> Dict[str, str]:
  151. """Abstract method to convert the result to Base64.
  152. Returns:
  153. Dict[str, str]: The str type Base64 representation result.
  154. """
  155. raise NotImplementedError
  156. @property
  157. def base64(self) -> Dict[str, str]:
  158. """
  159. Property that returns the Base64 encoded content.
  160. Returns:
  161. Dict[str, str]: The base64 representation of the result.
  162. """
  163. return self._to_base64()
  164. def save_to_base64(self, save_path: str, *args: List, **kwargs: Dict) -> None:
  165. """Saves the Base64 encoded content to the specified path.
  166. Args:
  167. save_path (str): The path to save the base64 representation result. If the save path does not end with '.b64', it appends the base name and suffix of the input path.
  168. *args: Additional positional arguments that will be passed to the base64 writer.
  169. **kwargs: Additional keyword arguments that will be passed to the base64 writer.
  170. """
  171. if not str(save_path).lower().endswith((".b64")):
  172. fp = Path(self["input_path"])
  173. stem = fp.stem
  174. suffix = fp.suffix
  175. base_save_path = Path(save_path)
  176. for key in self.base64:
  177. save_path = base_save_path / f"{stem}_{key}.b64"
  178. self._base64_writer.write(
  179. save_path.as_posix(), self.base64[key], *args, **kwargs
  180. )
  181. else:
  182. if len(self.base64) > 1:
  183. logging.warning(
  184. f"The result has multiple base64 files need to be saved. But the `save_path` has been specfied as `{save_path}`!"
  185. )
  186. self._base64_writer.write(
  187. save_path, self.base64[list(self.base64.keys())[0]], *args, **kwargs
  188. )
  189. class ImgMixin:
  190. """Mixin class for adding image handling capabilities."""
  191. def __init__(self, backend: str = "pillow", *args: List, **kwargs: Dict) -> None:
  192. """Initializes ImgMixin.
  193. Args:
  194. backend (str): The backend to use for image processing. Defaults to "pillow".
  195. *args: Additional positional arguments to pass to the ImageWriter.
  196. **kwargs: Additional keyword arguments to pass to the ImageWriter.
  197. """
  198. self._img_writer = ImageWriter(backend=backend, *args, **kwargs)
  199. self._save_funcs.append(self.save_to_img)
  200. @abstractmethod
  201. def _to_img(self) -> Dict[str, Image.Image]:
  202. """Abstract method to convert the result to an image.
  203. Returns:
  204. Dict[str, Image.Image]: The image representation result.
  205. """
  206. raise NotImplementedError
  207. @property
  208. def img(self) -> Dict[str, Image.Image]:
  209. """Property to get the image representation of the result.
  210. Returns:
  211. Dict[str, Image.Image]: The image representation of the result.
  212. """
  213. return self._to_img()
  214. def save_to_img(self, save_path: str, *args: List, **kwargs: Dict) -> None:
  215. """Saves the image representation of the result to the specified path.
  216. Args:
  217. save_path (str): The path to save the image. If the save path does not end with .jpg or .png, it appends the input path's stem and suffix to the save path.
  218. *args: Additional positional arguments that will be passed to the image writer.
  219. **kwargs: Additional keyword arguments that will be passed to the image writer.
  220. """
  221. def _is_image_file(file_path):
  222. mime_type, _ = mimetypes.guess_type(file_path)
  223. return mime_type is not None and mime_type.startswith("image/")
  224. if not _is_image_file(save_path):
  225. fp = Path(self["input_path"])
  226. stem = fp.stem
  227. suffix = fp.suffix
  228. base_save_path = Path(save_path)
  229. for key in self.img:
  230. save_path = base_save_path / f"{stem}_{key}{suffix}"
  231. self._img_writer.write(
  232. save_path.as_posix(), self.img[key], *args, **kwargs
  233. )
  234. else:
  235. if len(self.img) > 1:
  236. logging.warning(
  237. f"The result has multiple img files need to be saved. But the `save_path` has been specfied as `{save_path}`!"
  238. )
  239. self._img_writer.write(
  240. save_path, self.img[list(self.img.keys())[0]], *args, **kwargs
  241. )
  242. class CSVMixin:
  243. """Mixin class for adding CSV handling capabilities."""
  244. def __init__(self, backend: str = "pandas", *args: List, **kwargs: Dict) -> None:
  245. """Initializes the CSVMixin.
  246. Args:
  247. backend (str): The backend to use for CSV operations (default is "pandas").
  248. *args: Optional positional arguments to pass to the CSVWriter.
  249. **kwargs: Optional keyword arguments to pass to the CSVWriter.
  250. """
  251. self._csv_writer = CSVWriter(backend=backend, *args, **kwargs)
  252. if not hasattr(self, "_save_funcs"):
  253. self._save_funcs = []
  254. self._save_funcs.append(self.save_to_csv)
  255. @property
  256. def csv(self) -> Dict[str, pd.DataFrame]:
  257. """Property to get the pandas Dataframe representation of the result.
  258. Returns:
  259. Dict[str, pd.DataFrame]: The pandas.DataFrame representation of the result.
  260. """
  261. return self._to_csv()
  262. @abstractmethod
  263. def _to_csv(self) -> Dict[str, pd.DataFrame]:
  264. """Abstract method to convert the result to pandas.DataFrame.
  265. Returns:
  266. Dict[str, pd.DataFrame]: The pandas.DataFrame representation result.
  267. """
  268. raise NotImplementedError
  269. def save_to_csv(self, save_path: str, *args: List, **kwargs: Dict) -> None:
  270. """Saves the result to a CSV file.
  271. Args:
  272. save_path (str): The path to save the CSV file. If the path does not end with ".csv",
  273. the stem of the input path attribute (self['input_path']) will be used as the filename.
  274. *args: Optional positional arguments to pass to the CSV writer's write method.
  275. **kwargs: Optional keyword arguments to pass to the CSV writer's write method.
  276. """
  277. def _is_csv_file(file_path):
  278. mime_type, _ = mimetypes.guess_type(file_path)
  279. return mime_type is not None and mime_type == "text/csv"
  280. if not _is_csv_file(save_path):
  281. fp = Path(self["input_path"])
  282. stem = fp.stem
  283. base_save_path = Path(save_path)
  284. for key in self.csv:
  285. save_path = base_save_path / f"{stem}_{key}.csv"
  286. self._csv_writer.write(
  287. save_path.as_posix(), self.csv[key], *args, **kwargs
  288. )
  289. else:
  290. if len(self.csv) > 1:
  291. logging.warning(
  292. f"The result has multiple csv files need to be saved. But the `save_path` has been specfied as `{save_path}`!"
  293. )
  294. self._csv_writer.write(
  295. save_path, self.csv[list(self.csv.keys())[0]], *args, **kwargs
  296. )
  297. class HtmlMixin:
  298. """Mixin class for adding HTML handling capabilities."""
  299. def __init__(self, *args: List, **kwargs: Dict) -> None:
  300. """
  301. Initializes the HTML writer and appends the save_to_html method to the save functions list.
  302. Args:
  303. *args: Positional arguments passed to the HtmlWriter.
  304. **kwargs: Keyword arguments passed to the HtmlWriter.
  305. """
  306. self._html_writer = HtmlWriter(*args, **kwargs)
  307. self._save_funcs.append(self.save_to_html)
  308. @property
  309. def html(self) -> Dict[str, str]:
  310. """Property to get the HTML representation of the result.
  311. Returns:
  312. str: The str type HTML representation of the result.
  313. """
  314. return self._to_html()
  315. @abstractmethod
  316. def _to_html(self) -> Dict[str, str]:
  317. """Abstract method to convert the result to str type HTML representation.
  318. Returns:
  319. Dict[str, str]: The str type HTML representation result.
  320. """
  321. raise NotImplementedError
  322. def save_to_html(self, save_path: str, *args: List, **kwargs: Dict) -> None:
  323. """Saves the HTML representation of the object to the specified path.
  324. Args:
  325. save_path (str): The path to save the HTML file.
  326. *args: Additional positional arguments.
  327. **kwargs: Additional keyword arguments.
  328. """
  329. if not str(save_path).endswith(".html"):
  330. save_path = Path(save_path) / f"{Path(self['input_path']).stem}.html"
  331. else:
  332. save_path = Path(save_path)
  333. self._html_writer.write(save_path.as_posix(), self.html["res"], *args, **kwargs)
  334. class XlsxMixin:
  335. """Mixin class for adding XLSX handling capabilities."""
  336. def __init__(self, *args: List, **kwargs: Dict) -> None:
  337. """Initializes the XLSX writer and appends the save_to_xlsx method to the save functions.
  338. Args:
  339. *args: Positional arguments to be passed to the XlsxWriter constructor.
  340. **kwargs: Keyword arguments to be passed to the XlsxWriter constructor.
  341. """
  342. self._xlsx_writer = XlsxWriter(*args, **kwargs)
  343. self._save_funcs.append(self.save_to_xlsx)
  344. @property
  345. def xlsx(self) -> Dict[str, str]:
  346. """Property to get the XLSX representation of the result.
  347. Returns:
  348. Dict[str, str]: The str type XLSX representation of the result.
  349. """
  350. return self._to_xlsx()
  351. @abstractmethod
  352. def _to_xlsx(self) -> Dict[str, str]:
  353. """Abstract method to convert the result to str type XLSX representation.
  354. Returns:
  355. Dict[str, str]: The str type HTML representation result.
  356. """
  357. raise NotImplementedError
  358. def save_to_xlsx(self, save_path: str, *args: List, **kwargs: Dict) -> None:
  359. """Saves the HTML representation to an XLSX file.
  360. Args:
  361. save_path (str): The path to save the XLSX file. If the path does not end with ".xlsx",
  362. the filename will be set to the stem of the input path with ".xlsx" extension.
  363. *args: Additional positional arguments to pass to the XLSX writer.
  364. **kwargs: Additional keyword arguments to pass to the XLSX writer.
  365. """
  366. if not str(save_path).endswith(".xlsx"):
  367. save_path = Path(save_path) / f"{Path(self['input_path']).stem}.xlsx"
  368. else:
  369. save_path = Path(save_path)
  370. self._xlsx_writer.write(save_path.as_posix(), self.xlsx, *args, **kwargs)
  371. class VideoMixin:
  372. def __init__(self, backend="opencv", *args, **kwargs):
  373. self._backend = backend
  374. self._save_funcs.append(self.save_to_video)
  375. @abstractmethod
  376. def _to_video(self):
  377. raise NotImplementedError
  378. @property
  379. def video(self):
  380. return self._to_video()
  381. def save_to_video(self, save_path, *args, **kwargs):
  382. video_writer = VideoWriter(backend=self._backend, *args, **kwargs)
  383. if not str(save_path).lower().endswith((".mp4", ".avi", ".mkv", ".webm")):
  384. fp = Path(self["input_path"])
  385. save_path = Path(save_path) / f"{fp.stem}{fp.suffix}"
  386. video_writer.write(save_path.as_posix(), self.video["video"], *args, **kwargs)