processors.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
  1. # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import numpy as np
  15. import pandas as pd
  16. from typing import List, Any, Dict
  17. class GetCls:
  18. """A class to process prediction outputs and return class IDs and scores."""
  19. def __init__(self):
  20. """Initializes the GetCls instance."""
  21. super().__init__()
  22. def __call__(self, pred_list: List[Any]) -> List[pd.DataFrame]:
  23. """
  24. Processes a list of predictions and returns a list of DataFrames with class IDs and scores.
  25. Args:
  26. pred_list (List[Any]): A list of predictions, where each prediction is expected to be an iterable of arrays.
  27. Returns:
  28. List[pd.DataFrame]: A list of DataFrames, each containing the class ID and score for the corresponding prediction.
  29. """
  30. return [self.getcls(pred) for pred in pred_list]
  31. def getcls(self, pred: Any) -> pd.DataFrame:
  32. """
  33. Computes the class ID and score from a single prediction.
  34. Args:
  35. pred (Any): A prediction, expected to be an iterable where the first element is an array representing logits or probabilities.
  36. Returns:
  37. pd.DataFrame: A DataFrame containing the class ID and score for the prediction.
  38. """
  39. pred_ts = pred[0]
  40. pred_ts -= np.max(pred_ts, axis=-1, keepdims=True)
  41. pred_ts = np.exp(pred_ts) / np.sum(np.exp(pred_ts), axis=-1, keepdims=True)
  42. classid = np.argmax(pred_ts, axis=-1)
  43. pred_score = pred_ts[classid]
  44. result = pd.DataFrame.from_dict({"classid": [classid], "score": [pred_score]})
  45. result.index.name = "sample"
  46. return result
  47. class BuildPadMask:
  48. """A class to build padding masks for time series data."""
  49. def __init__(self, input_data: Dict[str, Any]):
  50. """
  51. Initializes the BuildPadMask instance.
  52. Args:
  53. input_data (Dict[str, Any]): A dictionary containing configuration data, including 'features'
  54. and 'pad_mask' keys that influence how padding is applied.
  55. """
  56. super().__init__()
  57. self.input_data = input_data
  58. def __call__(self, ts_list: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
  59. """
  60. Applies padding mask to a list of time series data.
  61. Args:
  62. ts_list (List[Dict[str, Any]]): A list of dictionaries, each representing a time series instance
  63. with keys like 'features' and 'past_target'.
  64. Returns:
  65. List[Dict[str, Any]]: A list of dictionaries with updated 'features' and 'pad_mask' keys.
  66. """
  67. return [self.padmask(ts) for ts in ts_list]
  68. def padmask(self, ts: Dict[str, Any]) -> Dict[str, Any]:
  69. """
  70. Builds a padding mask for a single time series instance.
  71. Args:
  72. ts (Dict[str, Any]): A dictionary representing a time series instance, expected to have keys
  73. like 'features' and 'past_target'.
  74. Returns:
  75. Dict[str, Any]: The input dictionary with potentially updated 'features' and 'pad_mask' keys.
  76. """
  77. if "features" in self.input_data:
  78. ts["features"] = ts["past_target"]
  79. if "pad_mask" in self.input_data:
  80. target_dim = len(ts["features"])
  81. max_length = self.input_data["pad_mask"][-1]
  82. if max_length > 0:
  83. ones = np.ones(max_length, dtype=np.int32)
  84. if max_length != target_dim:
  85. target_ndarray = np.array(ts["features"]).astype(np.float32)
  86. target_ndarray_final = np.zeros(
  87. [max_length, target_dim], dtype=np.int32
  88. )
  89. end = min(target_dim, max_length)
  90. target_ndarray_final[:end, :] = target_ndarray
  91. ts["features"] = target_ndarray_final
  92. ones[end:] = 0.0
  93. ts["pad_mask"] = ones
  94. else:
  95. ts["pad_mask"] = ones
  96. return ts