writers.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460
  1. # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import enum
  15. import json
  16. from pathlib import Path
  17. import numpy as np
  18. import pandas as pd
  19. import yaml
  20. from PIL import Image
  21. from ....utils.deps import class_requires_deps, is_dep_available
  22. from .tablepyxl import document_to_xl
  23. if is_dep_available("opencv-contrib-python"):
  24. import cv2
  25. __all__ = [
  26. "WriterType",
  27. "ImageWriter",
  28. "TextWriter",
  29. "JsonWriter",
  30. "CSVWriter",
  31. "HtmlWriter",
  32. "XlsxWriter",
  33. "YAMLWriter",
  34. "VideoWriter",
  35. "MarkdownWriter",
  36. ]
  37. class WriterType(enum.Enum):
  38. """WriterType"""
  39. IMAGE = 1
  40. VIDEO = 2
  41. TEXT = 3
  42. JSON = 4
  43. HTML = 5
  44. XLSX = 6
  45. CSV = 7
  46. YAML = 8
  47. MARKDOWN = 9
  48. TXT = 10
  49. class _BaseWriter(object):
  50. """_BaseWriter"""
  51. def __init__(self, backend, **bk_args):
  52. super().__init__()
  53. if len(bk_args) == 0:
  54. bk_args = self.get_default_backend_args()
  55. self.bk_type = backend
  56. self.bk_args = bk_args
  57. self._backend = self.get_backend()
  58. def write(self, out_path, obj):
  59. """write"""
  60. raise NotImplementedError
  61. def get_backend(self, bk_args=None):
  62. """get backend"""
  63. if bk_args is None:
  64. bk_args = self.bk_args
  65. return self._init_backend(self.bk_type, bk_args)
  66. def set_backend(self, backend, **bk_args):
  67. self.bk_type = backend
  68. self.bk_args = bk_args
  69. self._backend = self.get_backend()
  70. def _init_backend(self, bk_type, bk_args):
  71. """init backend"""
  72. raise NotImplementedError
  73. def get_type(self):
  74. """get type"""
  75. raise NotImplementedError
  76. def get_default_backend_args(self):
  77. """get default backend arguments"""
  78. return {}
  79. class ImageWriter(_BaseWriter):
  80. """ImageWriter"""
  81. def __init__(self, backend="opencv", **bk_args):
  82. super().__init__(backend=backend, **bk_args)
  83. def write(self, out_path, obj):
  84. """write"""
  85. return self._backend.write_obj(str(out_path), obj)
  86. def _init_backend(self, bk_type, bk_args):
  87. """init backend"""
  88. if bk_type == "opencv":
  89. return OpenCVImageWriterBackend(**bk_args)
  90. elif bk_type == "pil" or bk_type == "pillow":
  91. return PILImageWriterBackend(**bk_args)
  92. else:
  93. raise ValueError("Unsupported backend type")
  94. def get_type(self):
  95. """get type"""
  96. return WriterType.IMAGE
  97. class VideoWriter(_BaseWriter):
  98. """VideoWriter"""
  99. def __init__(self, backend="opencv", **bk_args):
  100. super().__init__(backend=backend, **bk_args)
  101. def write(self, out_path, obj):
  102. """write"""
  103. return self._backend.write_obj(str(out_path), obj)
  104. def _init_backend(self, bk_type, bk_args):
  105. """init backend"""
  106. if bk_type == "opencv":
  107. return OpenCVVideoWriterBackend(**bk_args)
  108. else:
  109. raise ValueError("Unsupported backend type")
  110. def get_type(self):
  111. """get type"""
  112. return WriterType.VIDEO
  113. class TextWriter(_BaseWriter):
  114. """TextWriter"""
  115. def __init__(self, backend="python", **bk_args):
  116. super().__init__(backend=backend, **bk_args)
  117. def write(self, out_path, obj):
  118. """write"""
  119. return self._backend.write_obj(str(out_path), obj)
  120. def _init_backend(self, bk_type, bk_args):
  121. """init backend"""
  122. if bk_type == "python":
  123. return TextWriterBackend(**bk_args)
  124. else:
  125. raise ValueError("Unsupported backend type")
  126. def get_type(self):
  127. """get type"""
  128. return WriterType.TEXT
  129. class JsonWriter(_BaseWriter):
  130. def __init__(self, backend="json", **bk_args):
  131. super().__init__(backend=backend, **bk_args)
  132. def write(self, out_path, obj, **bk_args):
  133. return self._backend.write_obj(str(out_path), obj, **bk_args)
  134. def _init_backend(self, bk_type, bk_args):
  135. if bk_type == "json":
  136. return JsonWriterBackend(**bk_args)
  137. elif bk_type == "ujson":
  138. return UJsonWriterBackend(**bk_args)
  139. else:
  140. raise ValueError("Unsupported backend type")
  141. def get_type(self):
  142. """get type"""
  143. return WriterType.JSON
  144. class HtmlWriter(_BaseWriter):
  145. def __init__(self, backend="html", **bk_args):
  146. super().__init__(backend=backend, **bk_args)
  147. def write(self, out_path, obj, **bk_args):
  148. return self._backend.write_obj(str(out_path), obj, **bk_args)
  149. def _init_backend(self, bk_type, bk_args):
  150. if bk_type == "html":
  151. return HtmlWriterBackend(**bk_args)
  152. else:
  153. raise ValueError("Unsupported backend type")
  154. def get_type(self):
  155. """get type"""
  156. return WriterType.HTML
  157. class XlsxWriter(_BaseWriter):
  158. def __init__(self, backend="xlsx", **bk_args):
  159. super().__init__(backend=backend, **bk_args)
  160. def write(self, out_path, obj, **bk_args):
  161. return self._backend.write_obj(str(out_path), obj, **bk_args)
  162. def _init_backend(self, bk_type, bk_args):
  163. if bk_type == "xlsx":
  164. return XlsxWriterBackend(**bk_args)
  165. else:
  166. raise ValueError("Unsupported backend type")
  167. def get_type(self):
  168. """get type"""
  169. return WriterType.XLSX
  170. class YAMLWriter(_BaseWriter):
  171. def __init__(self, backend="PyYAML", **bk_args):
  172. super().__init__(backend=backend, **bk_args)
  173. def write(self, out_path, obj, **bk_args):
  174. return self._backend.write_obj(str(out_path), obj, **bk_args)
  175. def _init_backend(self, bk_type, bk_args):
  176. if bk_type == "PyYAML":
  177. return YAMLWriterBackend(**bk_args)
  178. else:
  179. raise ValueError("Unsupported backend type")
  180. def get_type(self):
  181. """get type"""
  182. return WriterType.YAML
  183. class MarkdownWriter(_BaseWriter):
  184. """MarkdownWriter"""
  185. def __init__(self, backend="markdown", **bk_args):
  186. super().__init__(backend=backend, **bk_args)
  187. def write(self, out_path, obj):
  188. """write"""
  189. return self._backend.write_obj(str(out_path), obj)
  190. def _init_backend(self, bk_type, bk_args):
  191. """init backend"""
  192. if bk_type == "markdown":
  193. return MarkdownWriterBackend(**bk_args)
  194. else:
  195. raise ValueError("Unsupported backend type")
  196. def get_type(self):
  197. """get type"""
  198. return WriterType.MARKDOWN
  199. class _BaseWriterBackend(object):
  200. """_BaseWriterBackend"""
  201. def write_obj(self, out_path, obj, **bk_args):
  202. """write object"""
  203. Path(out_path).parent.mkdir(parents=True, exist_ok=True)
  204. return self._write_obj(out_path, obj, **bk_args)
  205. def _write_obj(self, out_path, obj, **bk_args):
  206. """write object"""
  207. raise NotImplementedError
  208. class TextWriterBackend(_BaseWriterBackend):
  209. """TextWriterBackend"""
  210. def __init__(self, mode="w", encoding="utf-8"):
  211. super().__init__()
  212. self.mode = mode
  213. self.encoding = encoding
  214. def _write_obj(self, out_path, obj):
  215. """write text object"""
  216. with open(out_path, mode=self.mode, encoding=self.encoding) as f:
  217. f.write(obj)
  218. class HtmlWriterBackend(_BaseWriterBackend):
  219. def __init__(self, mode="w", encoding="utf-8"):
  220. super().__init__()
  221. self.mode = mode
  222. self.encoding = encoding
  223. def _write_obj(self, out_path, obj, **bk_args):
  224. with open(out_path, mode=self.mode, encoding=self.encoding) as f:
  225. f.write(obj)
  226. class XlsxWriterBackend(_BaseWriterBackend):
  227. def _write_obj(self, out_path, obj, **bk_args):
  228. document_to_xl(obj, out_path)
  229. class _ImageWriterBackend(_BaseWriterBackend):
  230. """_ImageWriterBackend"""
  231. @class_requires_deps("opencv-contrib-python")
  232. class OpenCVImageWriterBackend(_ImageWriterBackend):
  233. """OpenCVImageWriterBackend"""
  234. def _write_obj(self, out_path, obj):
  235. """write image object by OpenCV"""
  236. if isinstance(obj, Image.Image):
  237. # Assuming the channel order is RGB.
  238. arr = np.asarray(obj)[:, :, ::-1]
  239. elif isinstance(obj, np.ndarray):
  240. arr = obj
  241. else:
  242. raise TypeError("Unsupported object type")
  243. return cv2.imwrite(out_path, arr)
  244. class PILImageWriterBackend(_ImageWriterBackend):
  245. """PILImageWriterBackend"""
  246. def __init__(self, format_=None):
  247. super().__init__()
  248. self.format = format_
  249. def _write_obj(self, out_path, obj):
  250. """write image object by PIL"""
  251. if isinstance(obj, Image.Image):
  252. img = obj
  253. elif isinstance(obj, np.ndarray):
  254. img = Image.fromarray(obj)
  255. else:
  256. raise TypeError("Unsupported object type")
  257. if len(img.getbands()) == 4:
  258. self.format = "PNG"
  259. return img.save(out_path, format=self.format)
  260. class _VideoWriterBackend(_BaseWriterBackend):
  261. """_VideoWriterBackend"""
  262. @class_requires_deps("opencv-contrib-python")
  263. class OpenCVVideoWriterBackend(_VideoWriterBackend):
  264. """OpenCVImageWriterBackend"""
  265. def _write_obj(self, out_path, obj):
  266. """write video object by OpenCV"""
  267. obj, fps = obj
  268. if isinstance(obj, np.ndarray):
  269. vr = obj
  270. width, height = vr[0].shape[1], vr[0].shape[0]
  271. fourcc = cv2.VideoWriter_fourcc(*"mp4v") # Alternatively, use 'XVID'
  272. out = cv2.VideoWriter(out_path, fourcc, fps, (width, height))
  273. for frame in vr:
  274. out.write(frame)
  275. out.release()
  276. else:
  277. raise TypeError("Unsupported object type")
  278. class _BaseJsonWriterBackend(object):
  279. def __init__(self, indent=4, ensure_ascii=False):
  280. super().__init__()
  281. self.indent = indent
  282. self.ensure_ascii = ensure_ascii
  283. def write_obj(self, out_path, obj, **bk_args):
  284. Path(out_path).parent.mkdir(parents=True, exist_ok=True)
  285. return self._write_obj(out_path, obj, **bk_args)
  286. def _write_obj(self, out_path, obj):
  287. raise NotImplementedError
  288. class JsonWriterBackend(_BaseJsonWriterBackend):
  289. def _write_obj(self, out_path, obj, **bk_args):
  290. with open(out_path, "w", encoding="utf-8") as f:
  291. json.dump(obj, f, **bk_args)
  292. class UJsonWriterBackend(_BaseJsonWriterBackend):
  293. # TODO
  294. def _write_obj(self, out_path, obj, **bk_args):
  295. raise NotImplementedError
  296. class YAMLWriterBackend(_BaseWriterBackend):
  297. def __init__(self, mode="w", encoding="utf-8"):
  298. super().__init__()
  299. self.mode = mode
  300. self.encoding = encoding
  301. def _write_obj(self, out_path, obj, **bk_args):
  302. """write text object"""
  303. with open(out_path, mode=self.mode, encoding=self.encoding) as f:
  304. yaml.dump(obj, f, **bk_args)
  305. class CSVWriter(_BaseWriter):
  306. """CSVWriter"""
  307. def __init__(self, backend="pandas", **bk_args):
  308. super().__init__(backend=backend, **bk_args)
  309. def write(self, out_path, obj):
  310. """write"""
  311. return self._backend.write_obj(str(out_path), obj)
  312. def _init_backend(self, bk_type, bk_args):
  313. """init backend"""
  314. if bk_type == "pandas":
  315. return PandasCSVWriterBackend(**bk_args)
  316. else:
  317. raise ValueError("Unsupported backend type")
  318. def get_type(self):
  319. """get type"""
  320. return WriterType.CSV
  321. class _CSVWriterBackend(_BaseWriterBackend):
  322. """_CSVWriterBackend"""
  323. class PandasCSVWriterBackend(_CSVWriterBackend):
  324. """PILImageWriterBackend"""
  325. def __init__(self):
  326. super().__init__()
  327. def _write_obj(self, out_path, obj):
  328. """write image object by PIL"""
  329. if isinstance(obj, pd.DataFrame):
  330. ts = obj
  331. else:
  332. raise TypeError("Unsupported object type")
  333. return ts.to_csv(out_path)
  334. class MarkdownWriterBackend(_BaseWriterBackend):
  335. """MarkdownWriterBackend"""
  336. def __init__(self):
  337. super().__init__()
  338. def _write_obj(self, out_path, obj):
  339. """write markdown obj"""
  340. with open(out_path, mode="w", encoding="utf-8", errors="replace") as f:
  341. f.write(obj)