audio_batch_sampler.py 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os
  15. import ast
  16. from pathlib import Path
  17. import numpy as np
  18. from ....utils import logging
  19. from ....utils.cache import CACHE_DIR
  20. from ....utils.download import download
  21. from .base_batch_sampler import BaseBatchSampler
  22. class AudioBatchSampler(BaseBatchSampler):
  23. def __init__(self):
  24. """Initializes the BaseBatchSampler.
  25. Args:
  26. batch_size (int, optional): The size of each batch. Only support 1.
  27. """
  28. super().__init__()
  29. self.batch_size = 1
  30. def _download_from_url(self, in_path):
  31. """Download a file from a URL to a cache directory.
  32. Args:
  33. in_path (str): URL of the file to be downloaded.
  34. Returns:
  35. str: Path to the downloaded file.
  36. """
  37. file_name = Path(in_path).name
  38. save_path = Path(CACHE_DIR) / "predict_input" / file_name
  39. download(in_path, save_path, overwrite=True)
  40. return save_path.as_posix()
  41. def sample(self, inputs):
  42. """Generate list of input file path.
  43. Args:
  44. inputs (str): file path.
  45. Yields:
  46. list: list of file path.
  47. """
  48. if isinstance(inputs, str):
  49. if inputs.startswith("http"):
  50. inputs = self._download_from_url(inputs)
  51. yield [inputs]
  52. else:
  53. logging.warning(
  54. f"Not supported input data type! Only `str` are supported, but got: {input}."
  55. )
  56. @BaseBatchSampler.batch_size.setter
  57. def batch_size(self, batch_size):
  58. """Sets the batch size.
  59. Args:
  60. batch_size (int): The batch size to set.
  61. Raises:
  62. Warning: If the batch size is not equal 1.
  63. """
  64. # only support batch size 1
  65. if batch_size != 1:
  66. logging.warning(
  67. f"audio batch sampler only support batch size 1, but got {batch_size}."
  68. )
  69. else:
  70. self._batch_size = batch_size