result.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import copy
  15. import PIL
  16. from PIL import Image, ImageDraw, ImageFont
  17. from ....utils.fonts import PINGFANG_FONT
  18. from ...common.result import BaseCVResult, JsonMixin
  19. from ...utils.color_map import font_colormap, get_colormap
  20. def draw_box(img, boxes):
  21. """
  22. Args:
  23. img (PIL.Image.Image): PIL image
  24. boxes (list): a list of dictionaries representing detection box information.
  25. Returns:
  26. img (PIL.Image.Image): visualized image
  27. """
  28. img = Image.fromarray(img)
  29. font_size = int(0.018 * int(img.width)) + 2
  30. font = ImageFont.truetype(PINGFANG_FONT.path, font_size, encoding="utf-8")
  31. draw_thickness = int(max(img.size) * 0.002)
  32. draw = ImageDraw.Draw(img)
  33. label2color = {}
  34. catid2fontcolor = {}
  35. color_list = get_colormap(rgb=True)
  36. for i, dt in enumerate(boxes):
  37. # clsid = dt["cls_id"]
  38. label, bbox, score = dt["label"], dt["coordinate"], dt["score"]
  39. if label not in label2color:
  40. color_index = i % len(color_list)
  41. label2color[label] = color_list[color_index]
  42. catid2fontcolor[label] = font_colormap(color_index)
  43. color = tuple(label2color[label])
  44. font_color = tuple(catid2fontcolor[label])
  45. if len(bbox) == 4:
  46. # draw bbox of normal object detection
  47. xmin, ymin, xmax, ymax = bbox
  48. rectangle = [
  49. (xmin, ymin),
  50. (xmin, ymax),
  51. (xmax, ymax),
  52. (xmax, ymin),
  53. (xmin, ymin),
  54. ]
  55. elif len(bbox) == 8:
  56. # draw bbox of rotated object detection
  57. x1, y1, x2, y2, x3, y3, x4, y4 = bbox
  58. rectangle = [(x1, y1), (x2, y2), (x3, y3), (x4, y4), (x1, y1)]
  59. xmin = min(x1, x2, x3, x4)
  60. ymin = min(y1, y2, y3, y4)
  61. else:
  62. raise ValueError(
  63. f"Only support bbox format of [xmin,ymin,xmax,ymax] or [x1,y1,x2,y2,x3,y3,x4,y4], got bbox of shape {len(bbox)}."
  64. )
  65. # draw bbox
  66. draw.line(
  67. rectangle,
  68. width=draw_thickness,
  69. fill=color,
  70. )
  71. # draw label
  72. if score is not None:
  73. text = "{} {:.2f}".format(dt["label"], score)
  74. else:
  75. text = "{}".format(dt["label"])
  76. if tuple(map(int, PIL.__version__.split("."))) <= (10, 0, 0):
  77. tw, th = draw.textsize(text, font=font)
  78. else:
  79. left, top, right, bottom = draw.textbbox((0, 0), text, font)
  80. tw, th = right - left, bottom - top + 4
  81. if ymin < th:
  82. draw.rectangle([(xmin, ymin), (xmin + tw + 4, ymin + th + 1)], fill=color)
  83. draw.text((xmin + 2, ymin - 2), text, fill=font_color, font=font)
  84. else:
  85. draw.rectangle([(xmin, ymin - th), (xmin + tw + 4, ymin + 1)], fill=color)
  86. draw.text((xmin + 2, ymin - th - 2), text, fill=font_color, font=font)
  87. return img
  88. class ShiTuResult(BaseCVResult):
  89. def _to_img(self):
  90. """apply"""
  91. boxes = [
  92. {
  93. "coordinate": box["coordinate"],
  94. "label": box["labels"][0],
  95. "score": box["rec_scores"][0],
  96. }
  97. for box in self["boxes"]
  98. if box["rec_scores"] is not None
  99. ]
  100. image = draw_box(self["input_img"][..., ::-1], boxes)
  101. return {"res": image}
  102. def _to_str(self, *args, **kwargs):
  103. data = copy.deepcopy(self)
  104. data.pop("input_img")
  105. return JsonMixin._to_str(data, *args, **kwargs)
  106. def _to_json(self, *args, **kwargs):
  107. data = copy.deepcopy(self)
  108. data.pop("input_img")
  109. return JsonMixin._to_json(data, *args, **kwargs)