predictor.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146
  1. # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from typing import Any, Dict, List, Tuple, Union
  15. import numpy as np
  16. from ....modules.general_recognition.model_list import MODELS
  17. from ....utils.func_register import FuncRegister
  18. from ...common.batch_sampler import ImageBatchSampler
  19. from ...common.reader import ReadImage
  20. from ..base import BasePredictor
  21. from ..common import Normalize, Resize, ResizeByShort, ToBatch, ToCHWImage
  22. from .processors import NormalizeFeatures
  23. from .result import IdentityResult
  24. class ImageFeaturePredictor(BasePredictor):
  25. """ImageFeaturePredictor that inherits from BasePredictor."""
  26. entities = MODELS
  27. _FUNC_MAP = {}
  28. register = FuncRegister(_FUNC_MAP)
  29. def __init__(self, *args: List, **kwargs: Dict) -> None:
  30. """Initializes ClasPredictor.
  31. Args:
  32. *args: Arbitrary positional arguments passed to the superclass.
  33. **kwargs: Arbitrary keyword arguments passed to the superclass.
  34. """
  35. super().__init__(*args, **kwargs)
  36. self.preprocessors, self.infer, self.postprocessors = self._build()
  37. def _build_batch_sampler(self) -> ImageBatchSampler:
  38. """Builds and returns an ImageBatchSampler instance.
  39. Returns:
  40. ImageBatchSampler: An instance of ImageBatchSampler.
  41. """
  42. return ImageBatchSampler()
  43. def _get_result_class(self) -> type:
  44. """Returns the result class, IdentityResult.
  45. Returns:
  46. type: The IdentityResult class.
  47. """
  48. return IdentityResult
  49. def _build(self) -> Tuple:
  50. """Build the preprocessors, inference engine, and postprocessors based on the configuration.
  51. Returns:
  52. tuple: A tuple containing the preprocessors, inference engine, and postprocessors.
  53. """
  54. preprocessors = {"Read": ReadImage(format="RGB")}
  55. for cfg in self.config["PreProcess"]["transform_ops"]:
  56. tf_key = list(cfg.keys())[0]
  57. func = self._FUNC_MAP[tf_key]
  58. args = cfg.get(tf_key, {})
  59. if args is not None and "return_numpy" in args:
  60. args.pop("return_numpy")
  61. name, op = func(self, **args) if args else func(self)
  62. preprocessors[name] = op
  63. preprocessors["ToBatch"] = ToBatch()
  64. infer = self.create_static_infer()
  65. postprocessors = {}
  66. for key in self.config["PostProcess"]:
  67. func = self._FUNC_MAP.get(key)
  68. args = self.config["PostProcess"].get(key, {})
  69. name, op = func(self, **args) if args else func(self)
  70. postprocessors[name] = op
  71. return preprocessors, infer, postprocessors
  72. def process(self, batch_data: List[Union[str, np.ndarray]]) -> Dict[str, Any]:
  73. """
  74. Process a batch of data through the preprocessing, inference, and postprocessing.
  75. Args:
  76. batch_data (List[Union[str, np.ndarray], ...]): A batch of input data (e.g., image file paths).
  77. Returns:
  78. dict: A dictionary containing the input path, raw image, class IDs, scores, and label names for every instance of the batch. Keys include 'input_path', 'input_img', 'class_ids', 'scores', and 'label_names'.
  79. """
  80. batch_raw_imgs = self.preprocessors["Read"](imgs=batch_data.instances)
  81. batch_imgs = self.preprocessors["Resize"](imgs=batch_raw_imgs)
  82. batch_imgs = self.preprocessors["Normalize"](imgs=batch_imgs)
  83. batch_imgs = self.preprocessors["ToCHW"](imgs=batch_imgs)
  84. x = self.preprocessors["ToBatch"](imgs=batch_imgs)
  85. batch_preds = self.infer(x=x)
  86. features = self.postprocessors["NormalizeFeatures"](batch_preds)
  87. return {
  88. "input_path": batch_data.input_paths,
  89. "page_index": batch_data.page_indexes,
  90. "input_img": batch_raw_imgs,
  91. "feature": features,
  92. }
  93. @register("ResizeImage")
  94. # TODO(gaotingquan): backend & interpolation
  95. def build_resize(
  96. self, resize_short=None, size=None, backend="cv2", interpolation="LINEAR"
  97. ):
  98. assert resize_short or size
  99. if resize_short:
  100. op = ResizeByShort(
  101. target_short_edge=resize_short, size_divisor=None, interp="LINEAR"
  102. )
  103. else:
  104. op = Resize(target_size=size)
  105. return "Resize", op
  106. @register("NormalizeImage")
  107. def build_normalize(
  108. self,
  109. mean=[0.485, 0.456, 0.406],
  110. std=[0.229, 0.224, 0.225],
  111. scale=1 / 255,
  112. order="",
  113. channel_num=3,
  114. ):
  115. assert channel_num == 3
  116. return "Normalize", Normalize(scale=scale, mean=mean, std=std)
  117. @register("ToCHWImage")
  118. def build_to_chw(self):
  119. return "ToCHW", ToCHWImage()
  120. @register("NormalizeFeatures")
  121. def build_normalize_features(self):
  122. return "NormalizeFeatures", NormalizeFeatures()