ts_common.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500
  1. # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from pathlib import Path
  15. import joblib
  16. import numpy as np
  17. import pandas as pd
  18. from .....utils.download import download
  19. from .....utils.cache import CACHE_DIR
  20. from ..transform import BaseTransform
  21. from ..io.readers import TSReader
  22. from ..io.writers import TSWriter
  23. from .ts_functions import load_from_dataframe, time_feature
  24. __all__ = [
  25. "ReadTS",
  26. "BuildTSDataset",
  27. "TSCutOff",
  28. "TSNormalize",
  29. "TimeFeature",
  30. "TStoArray",
  31. "BuildPadMask",
  32. ]
  33. class ReadTS(BaseTransform):
  34. """Load image from the file."""
  35. def __init__(self):
  36. """
  37. Initialize the instance.
  38. Args:
  39. format (str, optional): Target color format to convert the image to.
  40. Choices are 'BGR', 'RGB', and 'GRAY'. Default: 'BGR'.
  41. """
  42. super().__init__()
  43. self._reader = TSReader(backend="pandas")
  44. self._writer = TSWriter(backend="pandas")
  45. def apply(self, data):
  46. """apply"""
  47. if "ts" in data:
  48. ts = data["ts"]
  49. ts_path = (Path(CACHE_DIR) / "predict_input" / "tmp_ts.csv").as_posix()
  50. self._writer.write(ts_path, ts)
  51. data["input_path"] = ts_path
  52. data["original_ts"] = ts
  53. return data
  54. elif "input_path" not in data:
  55. raise KeyError(f"Key {repr('input_path')} is required, but not found.")
  56. ts_path = data["input_path"]
  57. # XXX: auto download for url
  58. ts_path = self._download_from_url(ts_path)
  59. blob = self._reader.read(ts_path)
  60. data["input_path"] = ts_path
  61. data["ts"] = blob
  62. data["original_ts"] = blob
  63. return data
  64. def _download_from_url(self, in_path):
  65. if in_path.startswith("http"):
  66. file_name = Path(in_path).name
  67. save_path = Path(CACHE_DIR) / "predict_input" / file_name
  68. download(in_path, save_path, overwrite=True)
  69. return save_path.as_posix()
  70. return in_path
  71. @classmethod
  72. def get_input_keys(cls):
  73. """get input keys"""
  74. # input_path: Path of the image.
  75. return [["input_path"], ["ts"]]
  76. @classmethod
  77. def get_output_keys(cls):
  78. """get output keys"""
  79. # image: Image in hw or hwc format.
  80. # original_image: Original image in hw or hwc format.
  81. # original_image_size: Width and height of the original image.
  82. return ["ts", "original_ts"]
  83. class TSCutOff(BaseTransform):
  84. """Reorder the dimensions of the image from HWC to CHW."""
  85. def __init__(self, size):
  86. super().__init__()
  87. self.size = size
  88. def apply(self, data):
  89. df = data["ts"].copy()
  90. skip_len = self.size.get("skip_chunk_len", 0)
  91. if len(df) < self.size["in_chunk_len"] + skip_len:
  92. raise ValueError(
  93. f"The length of the input data is {len(df)}, but it should be at least {self.size['in_chunk_len'] + self.size['skip_chunk_len']} for training."
  94. )
  95. df = df[-(self.size["in_chunk_len"] + skip_len) :]
  96. data["ts"] = df
  97. data["original_ts"] = df
  98. return data
  99. @classmethod
  100. def get_input_keys(cls):
  101. """get input keys"""
  102. # image: Image in hwc format.
  103. return ["ts"]
  104. @classmethod
  105. def get_output_keys(cls):
  106. """get output keys"""
  107. # image: Image in chw format.
  108. return ["ts"]
  109. class TSNormalize(BaseTransform):
  110. """Flip the image vertically or horizontally."""
  111. def __init__(self, scale_path, params_info):
  112. """
  113. Initialize the instance.
  114. Args:
  115. mode (str, optional): 'H' for horizontal flipping and 'V' for vertical
  116. flipping. Default: 'H'.
  117. """
  118. super().__init__()
  119. self.scaler = joblib.load(scale_path)
  120. self.params_info = params_info
  121. def apply(self, data):
  122. """apply"""
  123. df = data["ts"].copy()
  124. if self.params_info.get("target_cols", None) is not None:
  125. df[self.params_info["target_cols"]] = self.scaler.transform(
  126. df[self.params_info["target_cols"]]
  127. )
  128. if self.params_info.get("feature_cols", None) is not None:
  129. df[self.params_info["feature_cols"]] = self.scaler.transform(
  130. df[self.params_info["feature_cols"]]
  131. )
  132. data["ts"] = df
  133. return data
  134. @classmethod
  135. def get_input_keys(cls):
  136. """get input keys"""
  137. # image: Image in hw or hwc format.
  138. return ["ts"]
  139. @classmethod
  140. def get_output_keys(cls):
  141. """get output keys"""
  142. # image: Image in hw or hwc format.
  143. return ["ts"]
  144. class TSDeNormalize(BaseTransform):
  145. """Flip the image vertically or horizontally."""
  146. def __init__(self, scale_path, params_info):
  147. """
  148. Initialize the instance.
  149. Args:
  150. mode (str, optional): 'H' for horizontal flipping and 'V' for vertical
  151. flipping. Default: 'H'.
  152. """
  153. super().__init__()
  154. self.scaler = joblib.load(scale_path)
  155. self.params_info = params_info
  156. def apply(self, data):
  157. """apply"""
  158. future_target = data["pred_ts"].copy()
  159. scale_cols = future_target.columns.values.tolist()
  160. future_target[scale_cols] = self.scaler.inverse_transform(
  161. future_target[scale_cols]
  162. )
  163. data["pred_ts"] = future_target
  164. return data
  165. @classmethod
  166. def get_input_keys(cls):
  167. """get input keys"""
  168. # image: Image in hw or hwc format.
  169. return ["pred_ts"]
  170. @classmethod
  171. def get_output_keys(cls):
  172. """get output keys"""
  173. # image: Image in hw or hwc format.
  174. return ["pred_ts"]
  175. class BuildTSDataset(BaseTransform):
  176. """bulid the ts."""
  177. def __init__(self, params_info):
  178. """
  179. Initialize the instance.
  180. Args:
  181. mode (str, optional): 'H' for horizontal flipping and 'V' for vertical
  182. flipping. Default: 'H'.
  183. """
  184. super().__init__()
  185. self.params_info = params_info
  186. def apply(self, data):
  187. """apply"""
  188. df = data["ts"].copy()
  189. tsdata = load_from_dataframe(df, **self.params_info)
  190. data["ts"] = tsdata
  191. data["original_ts"] = tsdata
  192. return data
  193. @classmethod
  194. def get_input_keys(cls):
  195. """get input keys"""
  196. # image: Image in hw or hwc format.
  197. return ["ts"]
  198. @classmethod
  199. def get_output_keys(cls):
  200. """get output keys"""
  201. # image: Image in hw or hwc format.
  202. return ["ts"]
  203. class TimeFeature(BaseTransform):
  204. """Normalize the image."""
  205. def __init__(self, params_info, size, holiday=False):
  206. """
  207. Initialize the instance.
  208. """
  209. super().__init__()
  210. self.freq = params_info["freq"]
  211. self.size = size
  212. self.holiday = holiday
  213. def apply(self, data):
  214. """apply"""
  215. ts = data["ts"].copy()
  216. if not self.holiday:
  217. ts = time_feature(
  218. ts,
  219. self.freq,
  220. ["hourofday", "dayofmonth", "dayofweek", "dayofyear"],
  221. self.size["out_chunk_len"],
  222. )
  223. else:
  224. ts = time_feature(
  225. ts,
  226. self.freq,
  227. [
  228. "minuteofhour",
  229. "hourofday",
  230. "dayofmonth",
  231. "dayofweek",
  232. "dayofyear",
  233. "monthofyear",
  234. "weekofyear",
  235. "holidays",
  236. ],
  237. self.size["out_chunk_len"],
  238. )
  239. data["ts"] = ts
  240. return data
  241. @classmethod
  242. def get_input_keys(cls):
  243. """get input keys"""
  244. # image: Image in hw or hwc format.
  245. return ["ts"]
  246. @classmethod
  247. def get_output_keys(cls):
  248. """get output keys"""
  249. # image: Image in hw or hwc format.
  250. return ["ts"]
  251. class BuildPadMask(BaseTransform):
  252. def __init__(self, input_data):
  253. super().__init__()
  254. self.input_data = input_data
  255. def apply(self, data):
  256. """apply"""
  257. df = data["ts"].copy()
  258. if "features" in self.input_data:
  259. df["features"] = df["past_target"]
  260. if "pad_mask" in self.input_data:
  261. target_dim = len(df["features"])
  262. max_length = self.input_data["pad_mask"][-1]
  263. if max_length > 0:
  264. ones = np.ones(max_length, dtype=np.int32)
  265. if max_length != target_dim:
  266. target_ndarray = np.array(df["features"]).astype(np.float32)
  267. target_ndarray_final = np.zeros(
  268. [max_length, target_dim], dtype=np.int32
  269. )
  270. end = min(target_dim, max_length)
  271. target_ndarray_final[:end, :] = target_ndarray
  272. df["features"] = target_ndarray_final
  273. ones[end:] = 0.0
  274. df["pad_mask"] = ones
  275. else:
  276. df["pad_mask"] = ones
  277. data["ts"] = df
  278. return data
  279. @classmethod
  280. def get_input_keys(cls):
  281. """get input keys"""
  282. # image: Image in hw or hwc format.
  283. return ["ts"]
  284. @classmethod
  285. def get_output_keys(cls):
  286. """get output keys"""
  287. # image: Image in hw or hwc format.
  288. return ["ts"]
  289. class TStoArray(BaseTransform):
  290. def __init__(self, input_data):
  291. super().__init__()
  292. self.input_data = input_data
  293. def apply(self, data):
  294. """apply"""
  295. df = data["ts"].copy()
  296. ts_list = []
  297. input_name = list(self.input_data.keys())
  298. input_name.sort()
  299. for key in input_name:
  300. ts_list.append(np.array(df[key]).astype("float32"))
  301. data["ts"] = ts_list
  302. return data
  303. @classmethod
  304. def get_input_keys(cls):
  305. """get input keys"""
  306. # image: Image in hw or hwc format.
  307. return ["ts"]
  308. @classmethod
  309. def get_output_keys(cls):
  310. """get output keys"""
  311. # image: Image in hw or hwc format.
  312. return ["ts"]
  313. class ArraytoTS(BaseTransform):
  314. def __init__(self, info_params):
  315. super().__init__()
  316. self.info_params = info_params
  317. def apply(self, data):
  318. """apply"""
  319. output_data = data["pred_ts"].copy()
  320. if data["original_ts"].get("past_target", None) is not None:
  321. ts = data["original_ts"]["past_target"]
  322. elif data["original_ts"].get("observed_cov_numeric", None) is not None:
  323. ts = data["original_ts"]["observed_cov_numeric"]
  324. elif data["original_ts"].get("known_cov_numeric", None) is not None:
  325. ts = data["original_ts"]["known_cov_numeric"]
  326. elif data["original_ts"].get("static_cov_numeric", None) is not None:
  327. ts = data["original_ts"]["static_cov_numeric"]
  328. else:
  329. raise ValueError("No value in original_ts")
  330. column_name = (
  331. self.info_params["target_cols"]
  332. if "target_cols" in self.info_params
  333. else self.info_params["feature_cols"]
  334. )
  335. if isinstance(self.info_params["freq"], str):
  336. past_target_index = ts.index
  337. if past_target_index.freq is None:
  338. past_target_index.freq = pd.infer_freq(ts.index)
  339. future_target_index = pd.date_range(
  340. past_target_index[-1] + past_target_index.freq,
  341. periods=output_data.shape[0],
  342. freq=self.info_params["freq"],
  343. name=self.info_params["time_col"],
  344. )
  345. elif isinstance(self.info_params["freq"], int):
  346. start_idx = max(ts.index) + 1
  347. stop_idx = start_idx + output_data.shape[0]
  348. future_target_index = pd.RangeIndex(
  349. start=start_idx,
  350. stop=stop_idx,
  351. step=self.info_params["freq"],
  352. name=self.info_params["time_col"],
  353. )
  354. future_target = pd.DataFrame(
  355. np.reshape(output_data, newshape=[output_data.shape[0], -1]),
  356. index=future_target_index,
  357. columns=column_name,
  358. )
  359. data["pred_ts"] = future_target
  360. return data
  361. @classmethod
  362. def get_input_keys(cls):
  363. """get input keys"""
  364. # image: Image in hw or hwc format.
  365. return ["pred_ts"]
  366. @classmethod
  367. def get_output_keys(cls):
  368. """get output keys"""
  369. # image: Image in hw or hwc format.
  370. return ["pred_ts"]
  371. class GetAnomaly(BaseTransform):
  372. def __init__(self, model_threshold, info_params):
  373. super().__init__()
  374. self.model_threshold = model_threshold
  375. self.info_params = info_params
  376. def apply(self, data):
  377. """apply"""
  378. output_data = data["pred_ts"].copy()
  379. if data["original_ts"].get("past_target", None) is not None:
  380. ts = data["original_ts"]["past_target"]
  381. elif data["original_ts"].get("observed_cov_numeric", None) is not None:
  382. ts = data["original_ts"]["observed_cov_numeric"]
  383. elif data["original_ts"].get("known_cov_numeric", None) is not None:
  384. ts = data["original_ts"]["known_cov_numeric"]
  385. elif data["original_ts"].get("static_cov_numeric", None) is not None:
  386. ts = data["original_ts"]["static_cov_numeric"]
  387. else:
  388. raise ValueError("No value in original_ts")
  389. column_name = (
  390. self.info_params["target_cols"]
  391. if "target_cols" in self.info_params
  392. else self.info_params["feature_cols"]
  393. )
  394. anomaly_score = np.mean(np.square(output_data - np.array(ts)), axis=-1)
  395. anomaly_label = (anomaly_score >= self.model_threshold) + 0
  396. past_target_index = ts.index
  397. past_target_index.name = self.info_params["time_col"]
  398. anomaly_label = pd.DataFrame(
  399. np.reshape(anomaly_label, newshape=[output_data.shape[0], -1]),
  400. index=past_target_index,
  401. columns=["label"],
  402. )
  403. data["pred_ts"] = anomaly_label
  404. return data
  405. @classmethod
  406. def get_input_keys(cls):
  407. """get input keys"""
  408. # image: Image in hw or hwc format.
  409. return ["pred_ts"]
  410. @classmethod
  411. def get_output_keys(cls):
  412. """get output keys"""
  413. # image: Image in hw or hwc format.
  414. return ["pred_ts"]