det.py 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212
  1. # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import absolute_import
  15. import os
  16. import os.path as osp
  17. import random
  18. import cv2
  19. import time
  20. import numpy as np
  21. import xml.etree.ElementTree as ET
  22. import paddlex.utils.logging as logging
  23. def write_xml(im_info, label_info, anno_dir):
  24. im_fname = im_info['file_name']
  25. im_h, im_w, im_c = im_info['image_shape']
  26. is_crowd = label_info['is_crowd']
  27. gt_class = label_info['gt_class']
  28. gt_bbox = label_info['gt_bbox']
  29. gt_score = label_info['gt_score']
  30. gt_poly = label_info['gt_poly']
  31. difficult = label_info['difficult']
  32. import xml.dom.minidom as minidom
  33. xml_doc = minidom.Document()
  34. root = xml_doc.createElement("annotation")
  35. xml_doc.appendChild(root)
  36. node_filename = xml_doc.createElement("filename")
  37. node_filename.appendChild(xml_doc.createTextNode(im_fname))
  38. root.appendChild(node_filename)
  39. node_size = xml_doc.createElement("size")
  40. node_width = xml_doc.createElement("width")
  41. node_width.appendChild(xml_doc.createTextNode(str(im_w)))
  42. node_size.appendChild(node_width)
  43. node_height = xml_doc.createElement("height")
  44. node_height.appendChild(xml_doc.createTextNode(str(im_h)))
  45. node_size.appendChild(node_height)
  46. node_depth = xml_doc.createElement("depth")
  47. node_depth.appendChild(xml_doc.createTextNode(str(im_c)))
  48. node_size.appendChild(node_depth)
  49. root.appendChild(node_size)
  50. for i in range(len(label_info['gt_class'])):
  51. node_obj = xml_doc.createElement("object")
  52. node_name = xml_doc.createElement("name")
  53. label = gt_class[i]
  54. node_name.appendChild(xml_doc.createTextNode(label))
  55. node_obj.appendChild(node_name)
  56. node_diff = xml_doc.createElement("difficult")
  57. node_diff.appendChild(xml_doc.createTextNode(str(difficult[i][0])))
  58. node_obj.appendChild(node_diff)
  59. node_box = xml_doc.createElement("bndbox")
  60. node_xmin = xml_doc.createElement("xmin")
  61. node_xmin.appendChild(xml_doc.createTextNode(str(gt_bbox[i][0])))
  62. node_box.appendChild(node_xmin)
  63. node_ymin = xml_doc.createElement("ymin")
  64. node_ymin.appendChild(xml_doc.createTextNode(str(gt_bbox[i][1])))
  65. node_box.appendChild(node_ymin)
  66. node_xmax = xml_doc.createElement("xmax")
  67. node_xmax.appendChild(xml_doc.createTextNode(str(gt_bbox[i][2])))
  68. node_box.appendChild(node_xmax)
  69. node_ymax = xml_doc.createElement("ymax")
  70. node_ymax.appendChild(xml_doc.createTextNode(str(gt_bbox[i][3])))
  71. node_box.appendChild(node_ymax)
  72. node_obj.appendChild(node_box)
  73. root.appendChild(node_obj)
  74. img_name_part = im_fname.split('.')[0]
  75. with open(osp.join(anno_dir, img_name_part + ".xml"), 'w') as fxml:
  76. xml_doc.writexml(
  77. fxml, indent='\t', addindent='\t', newl='\n', encoding="utf-8")
  78. def paste_objects(templates, background, save_dir='dataset_clone'):
  79. """将目标物体粘贴在背景图片上生成新的图片,并加入到数据集中
  80. Args:
  81. templates (list|tuple):可以将多张图像上的目标物体同时粘贴在同一个背景图片上,
  82. 因此templates是一个列表,其中每个元素是一个dict,表示一张图片的目标物体。
  83. 一张图片的目标物体有`image`和`annos`两个关键字,`image`的键值是图像的路径,
  84. 或者是解码后的排列格式为(H, W, C)且类型为uint8且为BGR格式的数组。
  85. 图像上可以有多个目标物体,因此`annos`的键值是一个列表,列表中每个元素是一个dict,
  86. 表示一个目标物体的信息。该dict包含`polygon`和`category`两个关键字,
  87. 其中`polygon`表示目标物体的边缘坐标,例如[[0, 0], [0, 1], [1, 1], [1, 0]],
  88. `category`表示目标物体的类别,例如'dog'。
  89. background (dict): 背景图片可以有真值,因此background是一个dict,包含`image`和`annos`
  90. 两个关键字,`image`的键值是背景图像的路径,或者是解码后的排列格式为(H, W, C)
  91. 且类型为uint8且为BGR格式的数组。若背景图片上没有真值,则`annos`的键值是空列表[],
  92. 若有,则`annos`的键值是由多个dict组成的列表,每个dict表示一个物体的信息,
  93. 包含`bbox`和`category`两个关键字,`bbox`的键值是物体框左上角和右下角的坐标,即
  94. [x1, y1, x2, y2],`category`表示目标物体的类别,例如'dog'。
  95. save_dir (str):新图片及其标注文件的存储目录。默认值为`dataset_clone`。
  96. """
  97. if not osp.exists(save_dir):
  98. os.makedirs(save_dir)
  99. image_dir = osp.join(save_dir, 'JPEGImages_clone')
  100. anno_dir = osp.join(save_dir, 'Annotations_clone')
  101. json_path = osp.join(save_dir, "annotations.json")
  102. if not osp.exists(image_dir):
  103. os.makedirs(image_dir)
  104. if not osp.exists(anno_dir):
  105. os.makedirs(anno_dir)
  106. num_objs = len(background['annos'])
  107. for temp in templates:
  108. num_objs += len(temp['annos'])
  109. gt_bbox = np.zeros((num_objs, 4), dtype=np.float32)
  110. gt_class = list()
  111. gt_score = np.ones((num_objs, 1), dtype=np.float32)
  112. is_crowd = np.zeros((num_objs, 1), dtype=np.int32)
  113. difficult = np.zeros((num_objs, 1), dtype=np.int32)
  114. i = -1
  115. for i, back_anno in enumerate(background['annos']):
  116. gt_bbox[i] = back_anno['bbox']
  117. gt_class.append(back_anno['category'])
  118. back_im = background['image']
  119. if isinstance(back_im, np.ndarray):
  120. if len(back_im.shape) != 3:
  121. raise Exception(
  122. "background image should be 3-dimensions, but now is {}-dimensions".
  123. format(len(back_im.shape)))
  124. else:
  125. try:
  126. back_im = cv2.imread(back_im, cv2.IMREAD_UNCHANGED)
  127. except:
  128. raise TypeError('Can\'t read The image file {}!'.format(back_im))
  129. back_annos = background['annos']
  130. im_h, im_w, im_c = back_im.shape
  131. for temp in templates:
  132. temp_im = temp['image']
  133. if isinstance(temp_im, np.ndarray):
  134. if len(temp_im.shape) != 3:
  135. raise Exception(
  136. "template image should be 3-dimensions, but now is {}-dimensions".
  137. format(len(temp_im.shape)))
  138. else:
  139. try:
  140. temp_im = cv2.imread(temp_im, cv2.IMREAD_UNCHANGED)
  141. except:
  142. raise TypeError('Can\'t read The image file {}!'.format(
  143. temp_im))
  144. if im_c != temp_im.shape[-1]:
  145. raise Exception(
  146. "The channels of template({}) and background({}) images are not same. Objects cannot be pasted normally! Please check your images.".
  147. format(temp_im.shape[-1], im_c))
  148. temp_annos = temp['annos']
  149. for temp_anno in temp_annos:
  150. temp_mask = np.zeros(temp_im.shape, temp_im.dtype)
  151. temp_poly = np.array(temp_anno['polygon'], np.int32)
  152. temp_category = temp_anno['category']
  153. cv2.fillPoly(temp_mask, [temp_poly], (255, 255, 255))
  154. x_list = [temp_poly[i][0] for i in range(len(temp_poly))]
  155. y_list = [temp_poly[i][1] for i in range(len(temp_poly))]
  156. temp_poly_w = max(x_list) - min(x_list)
  157. temp_poly_h = max(y_list) - min(y_list)
  158. found = False
  159. while not found:
  160. center_x = random.randint(1, im_w - 1)
  161. center_y = random.randint(1, im_h - 1)
  162. if center_x < temp_poly_w / 2 or center_x > im_w - temp_poly_w / 2 - 1 or \
  163. center_y < temp_poly_h / 2 or center_y > im_h - temp_poly_h / 2 - 1:
  164. found = False
  165. continue
  166. if len(back_annos) == 0:
  167. found = True
  168. for back_anno in back_annos:
  169. x1, y1, x2, y2 = back_anno['bbox']
  170. if center_x > x1 and center_x < x2 and center_y > y1 and center_y < y2:
  171. found = False
  172. continue
  173. found = True
  174. center = (center_x, center_y)
  175. back_im = cv2.seamlessClone(temp_im, back_im, temp_mask, center,
  176. cv2.MIXED_CLONE)
  177. i += 1
  178. x1 = center[0] - temp_poly_w / 2
  179. x2 = center[0] + temp_poly_w / 2
  180. y1 = center[1] - temp_poly_h / 2
  181. y2 = center[1] + temp_poly_h / 2
  182. gt_bbox[i] = [x1, y1, x2, y2]
  183. gt_class.append(temp_category)
  184. im_fname = str(int(time.time() * 1000)) + '.jpg'
  185. im_info = {
  186. 'file_name': im_fname,
  187. 'image_shape': [im_h, im_w, im_c],
  188. }
  189. label_info = {
  190. 'is_crowd': is_crowd,
  191. 'gt_class': gt_class,
  192. 'gt_bbox': gt_bbox,
  193. 'gt_score': gt_score,
  194. 'difficult': difficult,
  195. 'gt_poly': [],
  196. }
  197. cv2.imwrite(osp.join(image_dir, im_fname), back_im.astype('uint8'))
  198. write_xml(im_info, label_info, anno_dir)
  199. logging.info("Gegerated image is saved in {}".format(image_dir))
  200. logging.info("Generated Annotation is saved as xml files in {}".format(
  201. anno_dir))