det.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os
  15. import numpy as np
  16. from ...utils.io import ImageReader
  17. from ..base import BaseComponent
  18. def restructured_boxes(boxes, labels, img_size):
  19. box_list = []
  20. w, h = img_size
  21. for box in boxes:
  22. xmin, ymin, xmax, ymax = list(map(int, box[2:]))
  23. xmin = max(0, xmin)
  24. ymin = max(0, ymin)
  25. xmax = min(w, xmax)
  26. ymax = min(h, ymax)
  27. box_list.append(
  28. {
  29. "cls_id": int(box[0]),
  30. "label": labels[int(box[0])],
  31. "score": float(box[1]),
  32. "coordinate": [xmin, ymin, xmax, ymax],
  33. }
  34. )
  35. return box_list
  36. class DetPostProcess(BaseComponent):
  37. """Save Result Transform"""
  38. INPUT_KEYS = ["img_path", "boxes", "img_size"]
  39. OUTPUT_KEYS = ["boxes"]
  40. DEAULT_INPUTS = {"boxes": "boxes", "img_size": "ori_img_size"}
  41. DEAULT_OUTPUTS = {"boxes": "boxes"}
  42. def __init__(self, threshold=0.5, labels=None):
  43. super().__init__()
  44. self.threshold = threshold
  45. self.labels = labels
  46. def apply(self, boxes, img_size):
  47. """apply"""
  48. expect_boxes = (boxes[:, 1] > self.threshold) & (boxes[:, 0] > -1)
  49. boxes = boxes[expect_boxes, :]
  50. boxes = restructured_boxes(boxes, self.labels, img_size)
  51. result = {"boxes": boxes}
  52. return result
  53. class CropByBoxes(BaseComponent):
  54. """Crop Image by Box"""
  55. YIELD_BATCH = False
  56. INPUT_KEYS = ["img_path", "boxes"]
  57. OUTPUT_KEYS = ["img", "box", "label"]
  58. DEAULT_INPUTS = {"img_path": "img_path", "boxes": "boxes"}
  59. DEAULT_OUTPUTS = {"img": "img", "box": "box", "label": "label"}
  60. def __init__(self):
  61. super().__init__()
  62. self._reader = ImageReader(backend="opencv")
  63. def apply(self, img_path, boxes):
  64. output_list = []
  65. img = self._reader.read(img_path)
  66. for bbox in boxes:
  67. label_id = bbox["cls_id"]
  68. box = bbox["coordinate"]
  69. label = bbox.get("label", label_id)
  70. xmin, ymin, xmax, ymax = [int(i) for i in box]
  71. img_crop = img[ymin:ymax, xmin:xmax]
  72. output_list.append({"img": img_crop, "box": box, "label": label})
  73. return output_list
  74. class DetPad(BaseComponent):
  75. INPUT_KEYS = "img"
  76. OUTPUT_KEYS = "img"
  77. DEAULT_INPUTS = {"img": "img"}
  78. DEAULT_OUTPUTS = {"img": "img"}
  79. def __init__(self, size, fill_value=[114.0, 114.0, 114.0]):
  80. """
  81. Pad image to a specified size.
  82. Args:
  83. size (list[int]): image target size
  84. fill_value (list[float]): rgb value of pad area, default (114.0, 114.0, 114.0)
  85. """
  86. super().__init__()
  87. if isinstance(size, int):
  88. size = [size, size]
  89. self.size = size
  90. self.fill_value = fill_value
  91. def apply(self, img):
  92. im = img
  93. im_h, im_w = im.shape[:2]
  94. h, w = self.size
  95. if h == im_h and w == im_w:
  96. return {"img": im}
  97. canvas = np.ones((h, w, 3), dtype=np.float32)
  98. canvas *= np.array(self.fill_value, dtype=np.float32)
  99. canvas[0:im_h, 0:im_w, :] = im.astype(np.float32)
  100. return {"img": canvas}