processors.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from typing import Any, Dict, List
  15. import numpy as np
  16. import pandas as pd
  17. from ...utils.benchmark import benchmark
  18. @benchmark.timeit
  19. class GetCls:
  20. """A class to process prediction outputs and return class IDs and scores."""
  21. def __init__(self):
  22. """Initializes the GetCls instance."""
  23. super().__init__()
  24. def __call__(self, pred_list: List[Any]) -> List[pd.DataFrame]:
  25. """
  26. Processes a list of predictions and returns a list of DataFrames with class IDs and scores.
  27. Args:
  28. pred_list (List[Any]): A list of predictions, where each prediction is expected to be an iterable of arrays.
  29. Returns:
  30. List[pd.DataFrame]: A list of DataFrames, each containing the class ID and score for the corresponding prediction.
  31. """
  32. return [self.getcls(pred) for pred in pred_list]
  33. def getcls(self, pred: Any) -> pd.DataFrame:
  34. """
  35. Computes the class ID and score from a single prediction.
  36. Args:
  37. pred (Any): A prediction, expected to be an iterable where the first element is an array representing logits or probabilities.
  38. Returns:
  39. pd.DataFrame: A DataFrame containing the class ID and score for the prediction.
  40. """
  41. pred_ts = pred[0]
  42. pred_ts -= np.max(pred_ts, axis=-1, keepdims=True)
  43. pred_ts = np.exp(pred_ts) / np.sum(np.exp(pred_ts), axis=-1, keepdims=True)
  44. classid = np.argmax(pred_ts, axis=-1)
  45. pred_score = pred_ts[classid]
  46. result = pd.DataFrame.from_dict({"classid": [classid], "score": [pred_score]})
  47. result.index.name = "sample"
  48. return result
  49. @benchmark.timeit
  50. class BuildPadMask:
  51. """A class to build padding masks for time series data."""
  52. def __init__(self, input_data: Dict[str, Any]):
  53. """
  54. Initializes the BuildPadMask instance.
  55. Args:
  56. input_data (Dict[str, Any]): A dictionary containing configuration data, including 'features'
  57. and 'pad_mask' keys that influence how padding is applied.
  58. """
  59. super().__init__()
  60. self.input_data = input_data
  61. def __call__(self, ts_list: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
  62. """
  63. Applies padding mask to a list of time series data.
  64. Args:
  65. ts_list (List[Dict[str, Any]]): A list of dictionaries, each representing a time series instance
  66. with keys like 'features' and 'past_target'.
  67. Returns:
  68. List[Dict[str, Any]]: A list of dictionaries with updated 'features' and 'pad_mask' keys.
  69. """
  70. return [self.padmask(ts) for ts in ts_list]
  71. def padmask(self, ts: Dict[str, Any]) -> Dict[str, Any]:
  72. """
  73. Builds a padding mask for a single time series instance.
  74. Args:
  75. ts (Dict[str, Any]): A dictionary representing a time series instance, expected to have keys
  76. like 'features' and 'past_target'.
  77. Returns:
  78. Dict[str, Any]: The input dictionary with potentially updated 'features' and 'pad_mask' keys.
  79. """
  80. if "features" in self.input_data:
  81. ts["features"] = ts["past_target"]
  82. if "pad_mask" in self.input_data:
  83. target_dim = len(ts["features"])
  84. max_length = self.input_data["pad_mask"][-1]
  85. if max_length > 0:
  86. ones = np.ones(max_length, dtype=np.int32)
  87. if max_length != target_dim:
  88. target_ndarray = np.array(ts["features"]).astype(np.float32)
  89. target_ndarray_final = np.zeros(
  90. [max_length, target_dim], dtype=np.int32
  91. )
  92. end = min(target_dim, max_length)
  93. target_ndarray_final[:end, :] = target_ndarray
  94. ts["features"] = target_ndarray_final
  95. ones[end:] = 0.0
  96. ts["pad_mask"] = ones
  97. else:
  98. ts["pad_mask"] = ones
  99. return ts