batch.py 2.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
  2. # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. import inspect
  16. import functools
  17. import itertools
  18. __all__ = ["batchable_method", "apply_batch", "Batcher"]
  19. def batchable_method(func):
  20. """batchable"""
  21. @functools.wraps(func)
  22. def _wrapper(self, input_, *args, **kwargs):
  23. if isinstance(input_, list):
  24. output = []
  25. for ele in input_:
  26. out = func(self, ele, *args, **kwargs)
  27. output.append(out)
  28. return output
  29. else:
  30. return func(self, input_, *args, **kwargs)
  31. sig = inspect.signature(func)
  32. if not len(sig.parameters) >= 2:
  33. raise TypeError("The function to wrap should have at least two parameters.")
  34. return _wrapper
  35. def apply_batch(batch, callable_, *args, **kwargs):
  36. """apply batch"""
  37. output = []
  38. for ele in batch:
  39. out = callable_(ele, *args, **kwargs)
  40. output.append(out)
  41. return output
  42. class Batcher(object):
  43. """Batcher"""
  44. def __init__(self, iterable, batch_size=None):
  45. super().__init__()
  46. self.iterable = iterable
  47. self.batch_size = batch_size
  48. def __iter__(self):
  49. if self.batch_size is None:
  50. all_data = list(self.iterable)
  51. yield all_data
  52. else:
  53. iterator = iter(self.iterable)
  54. while True:
  55. batch = list(itertools.islice(iterator, self.batch_size))
  56. if not batch:
  57. break
  58. yield batch