audio_reader.py 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import numpy as np
  15. import lazy_paddle as paddle
  16. from ...utils.io import AudioReader
  17. class ReadAudio:
  18. """Load audio from the file."""
  19. def __init__(self):
  20. """
  21. Initialize the instance.
  22. """
  23. super().__init__()
  24. self._audio_reader = AudioReader(backend="wav")
  25. def read(self, input):
  26. if isinstance(input, str):
  27. audio, sample_rate = self._audio_reader.read(input)
  28. if sample_rate != 16000:
  29. raise ValueError(
  30. f"ReadAudio only supports 16k pcm or wav file.\n"
  31. f"However, got: {sample_rate}."
  32. )
  33. audio = audio[:, 0]
  34. audio = paddle.to_tensor(audio)
  35. return audio, sample_rate
  36. else:
  37. raise TypeError(
  38. f"ReadAudio only supports str, indicating an audio file path.\n"
  39. f"However, got type: {type(input).__name__}."
  40. )