audio_batch_sampler.py 2.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from pathlib import Path
  15. from ....utils import logging
  16. from ....utils.cache import CACHE_DIR
  17. from ....utils.download import download
  18. from .base_batch_sampler import BaseBatchSampler
  19. class AudioBatchSampler(BaseBatchSampler):
  20. def __init__(self):
  21. """Initializes the BaseBatchSampler.
  22. Args:
  23. batch_size (int, optional): The size of each batch. Only support 1.
  24. """
  25. super().__init__()
  26. self.batch_size = 1
  27. def _download_from_url(self, in_path):
  28. """Download a file from a URL to a cache directory.
  29. Args:
  30. in_path (str): URL of the file to be downloaded.
  31. Returns:
  32. str: Path to the downloaded file.
  33. """
  34. file_name = Path(in_path).name
  35. save_path = Path(CACHE_DIR) / "predict_input" / file_name
  36. download(in_path, save_path, overwrite=True)
  37. return save_path.as_posix()
  38. def sample(self, inputs):
  39. """Generate list of input file path.
  40. Args:
  41. inputs (str): file path.
  42. Yields:
  43. list: list of file path.
  44. """
  45. if isinstance(inputs, str):
  46. if inputs.startswith("http"):
  47. inputs = self._download_from_url(inputs)
  48. yield [inputs]
  49. elif isinstance(inputs, list):
  50. yield inputs
  51. else:
  52. raise TypeError(
  53. f"Not supported input data type! Only `str` are supported, but got: {type(inputs)}."
  54. )
  55. @BaseBatchSampler.batch_size.setter
  56. def batch_size(self, batch_size):
  57. """Sets the batch size.
  58. Args:
  59. batch_size (int): The batch size to set.
  60. Raises:
  61. Warning: If the batch size is not equal 1.
  62. """
  63. # only support batch size 1
  64. if batch_size != 1:
  65. logging.warning(
  66. f"audio batch sampler only support batch size 1, but got {batch_size}."
  67. )
  68. else:
  69. self._batch_size = batch_size