keypoint_operators.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664
  1. # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # function:
  15. # operators to process sample,
  16. # eg: decode/resize/crop image
  17. from __future__ import absolute_import
  18. try:
  19. from collections.abc import Sequence
  20. except Exception:
  21. from collections import Sequence
  22. import cv2
  23. import numpy as np
  24. import math
  25. import copy
  26. import os
  27. from ...modeling.keypoint_utils import get_affine_mat_kernel, warp_affine_joints, get_affine_transform, affine_transform
  28. from paddlex.ppdet.core.workspace import serializable
  29. from paddlex.ppdet.utils.logger import setup_logger
  30. logger = setup_logger(__name__)
  31. registered_ops = []
  32. __all__ = [
  33. 'RandomAffine', 'KeyPointFlip', 'TagGenerate', 'ToHeatmaps',
  34. 'NormalizePermute', 'EvalAffine', 'RandomFlipHalfBodyTransform',
  35. 'TopDownAffine', 'ToHeatmapsTopDown', 'TopDownEvalAffine'
  36. ]
  37. def register_keypointop(cls):
  38. return serializable(cls)
  39. @register_keypointop
  40. class KeyPointFlip(object):
  41. """Get the fliped image by flip_prob. flip the coords also
  42. the left coords and right coords should exchange while flip, for the right keypoint will be left keypoint after image fliped
  43. Args:
  44. flip_permutation (list[17]): the left-right exchange order list corresponding to [0,1,2,...,16]
  45. hmsize (list[2]): output heatmap's shape list of different scale outputs of higherhrnet
  46. flip_prob (float): the ratio whether to flip the image
  47. records(dict): the dict contained the image, mask and coords
  48. Returns:
  49. records(dict): contain the image, mask and coords after tranformed
  50. """
  51. def __init__(self, flip_permutation, hmsize, flip_prob=0.5):
  52. super(KeyPointFlip, self).__init__()
  53. assert isinstance(flip_permutation, Sequence)
  54. self.flip_permutation = flip_permutation
  55. self.flip_prob = flip_prob
  56. self.hmsize = hmsize
  57. def __call__(self, records):
  58. image = records['image']
  59. kpts_lst = records['joints']
  60. mask_lst = records['mask']
  61. flip = np.random.random() < self.flip_prob
  62. if flip:
  63. image = image[:, ::-1]
  64. for idx, hmsize in enumerate(self.hmsize):
  65. if len(mask_lst) > idx:
  66. mask_lst[idx] = mask_lst[idx][:, ::-1]
  67. if kpts_lst[idx].ndim == 3:
  68. kpts_lst[idx] = kpts_lst[idx][:, self.flip_permutation]
  69. else:
  70. kpts_lst[idx] = kpts_lst[idx][self.flip_permutation]
  71. kpts_lst[idx][..., 0] = hmsize - kpts_lst[idx][..., 0]
  72. kpts_lst[idx] = kpts_lst[idx].astype(np.int64)
  73. kpts_lst[idx][kpts_lst[idx][..., 0] >= hmsize, 2] = 0
  74. kpts_lst[idx][kpts_lst[idx][..., 1] >= hmsize, 2] = 0
  75. kpts_lst[idx][kpts_lst[idx][..., 0] < 0, 2] = 0
  76. kpts_lst[idx][kpts_lst[idx][..., 1] < 0, 2] = 0
  77. records['image'] = image
  78. records['joints'] = kpts_lst
  79. records['mask'] = mask_lst
  80. return records
  81. def get_warp_matrix(theta, size_input, size_dst, size_target):
  82. """Calculate the transformation matrix under the constraint of unbiased.
  83. Paper ref: Huang et al. The Devil is in the Details: Delving into Unbiased
  84. Data Processing for Human Pose Estimation (CVPR 2020).
  85. Args:
  86. theta (float): Rotation angle in degrees.
  87. size_input (np.ndarray): Size of input image [w, h].
  88. size_dst (np.ndarray): Size of output image [w, h].
  89. size_target (np.ndarray): Size of ROI in input plane [w, h].
  90. Returns:
  91. matrix (np.ndarray): A matrix for transformation.
  92. """
  93. theta = np.deg2rad(theta)
  94. matrix = np.zeros((2, 3), dtype=np.float32)
  95. scale_x = size_dst[0] / size_target[0]
  96. scale_y = size_dst[1] / size_target[1]
  97. matrix[0, 0] = math.cos(theta) * scale_x
  98. matrix[0, 1] = -math.sin(theta) * scale_x
  99. matrix[0, 2] = scale_x * (
  100. -0.5 * size_input[0] * math.cos(theta) + 0.5 * size_input[1] *
  101. math.sin(theta) + 0.5 * size_target[0])
  102. matrix[1, 0] = math.sin(theta) * scale_y
  103. matrix[1, 1] = math.cos(theta) * scale_y
  104. matrix[1, 2] = scale_y * (
  105. -0.5 * size_input[0] * math.sin(theta) - 0.5 * size_input[1] *
  106. math.cos(theta) + 0.5 * size_target[1])
  107. return matrix
  108. @register_keypointop
  109. class RandomAffine(object):
  110. """apply affine transform to image, mask and coords
  111. to achieve the rotate, scale and shift effect for training image
  112. Args:
  113. max_degree (float): the max abslute rotate degree to apply, transform range is [-max_degree, max_degree]
  114. max_scale (list[2]): the scale range to apply, transform range is [min, max]
  115. max_shift (float): the max abslute shift ratio to apply, transform range is [-max_shift*imagesize, max_shift*imagesize]
  116. hmsize (list[2]): output heatmap's shape list of different scale outputs of higherhrnet
  117. trainsize (int): the standard length used to train, the 'scale_type' of [h,w] will be resize to trainsize for standard
  118. scale_type (str): the length of [h,w] to used for trainsize, chosed between 'short' and 'long'
  119. records(dict): the dict contained the image, mask and coords
  120. Returns:
  121. records(dict): contain the image, mask and coords after tranformed
  122. """
  123. def __init__(self,
  124. max_degree=30,
  125. scale=[0.75, 1.5],
  126. max_shift=0.2,
  127. hmsize=[128, 256],
  128. trainsize=512,
  129. scale_type='short'):
  130. super(RandomAffine, self).__init__()
  131. self.max_degree = max_degree
  132. self.min_scale = scale[0]
  133. self.max_scale = scale[1]
  134. self.max_shift = max_shift
  135. self.hmsize = hmsize
  136. self.trainsize = trainsize
  137. self.scale_type = scale_type
  138. def _get_affine_matrix(self, center, scale, res, rot=0):
  139. """Generate transformation matrix."""
  140. h = scale
  141. t = np.zeros((3, 3), dtype=np.float32)
  142. t[0, 0] = float(res[1]) / h
  143. t[1, 1] = float(res[0]) / h
  144. t[0, 2] = res[1] * (-float(center[0]) / h + .5)
  145. t[1, 2] = res[0] * (-float(center[1]) / h + .5)
  146. t[2, 2] = 1
  147. if rot != 0:
  148. rot = -rot # To match direction of rotation from cropping
  149. rot_mat = np.zeros((3, 3), dtype=np.float32)
  150. rot_rad = rot * np.pi / 180
  151. sn, cs = np.sin(rot_rad), np.cos(rot_rad)
  152. rot_mat[0, :2] = [cs, -sn]
  153. rot_mat[1, :2] = [sn, cs]
  154. rot_mat[2, 2] = 1
  155. # Need to rotate around center
  156. t_mat = np.eye(3)
  157. t_mat[0, 2] = -res[1] / 2
  158. t_mat[1, 2] = -res[0] / 2
  159. t_inv = t_mat.copy()
  160. t_inv[:2, 2] *= -1
  161. t = np.dot(t_inv, np.dot(rot_mat, np.dot(t_mat, t)))
  162. return t
  163. def __call__(self, records):
  164. image = records['image']
  165. keypoints = records['joints']
  166. heatmap_mask = records['mask']
  167. degree = (np.random.random() * 2 - 1) * self.max_degree
  168. shape = np.array(image.shape[:2][::-1])
  169. center = center = np.array((np.array(shape) / 2))
  170. aug_scale = np.random.random() * (self.max_scale - self.min_scale
  171. ) + self.min_scale
  172. if self.scale_type == 'long':
  173. scale = max(shape[0], shape[1]) / 1.0
  174. elif self.scale_type == 'short':
  175. scale = min(shape[0], shape[1]) / 1.0
  176. else:
  177. raise ValueError('Unknown scale type: {}'.format(self.scale_type))
  178. roi_size = aug_scale * scale
  179. dx = int(0)
  180. dy = int(0)
  181. if self.max_shift > 0:
  182. dx = np.random.randint(-self.max_shift * roi_size,
  183. self.max_shift * roi_size)
  184. dy = np.random.randint(-self.max_shift * roi_size,
  185. self.max_shift * roi_size)
  186. center += np.array([dx, dy])
  187. input_size = 2 * center
  188. keypoints[..., :2] *= shape
  189. heatmap_mask *= 255
  190. kpts_lst = []
  191. mask_lst = []
  192. image_affine_mat = self._get_affine_matrix(
  193. center, roi_size, (self.trainsize, self.trainsize), degree)[:2]
  194. image = cv2.warpAffine(
  195. image,
  196. image_affine_mat, (self.trainsize, self.trainsize),
  197. flags=cv2.INTER_LINEAR)
  198. for hmsize in self.hmsize:
  199. kpts = copy.deepcopy(keypoints)
  200. mask_affine_mat = self._get_affine_matrix(
  201. center, roi_size, (hmsize, hmsize), degree)[:2]
  202. if heatmap_mask is not None:
  203. mask = cv2.warpAffine(heatmap_mask, mask_affine_mat,
  204. (hmsize, hmsize))
  205. mask = ((mask / 255) > 0.5).astype(np.float32)
  206. kpts[..., 0:2] = warp_affine_joints(kpts[..., 0:2].copy(),
  207. mask_affine_mat)
  208. kpts[np.trunc(kpts[..., 0]) >= hmsize, 2] = 0
  209. kpts[np.trunc(kpts[..., 1]) >= hmsize, 2] = 0
  210. kpts[np.trunc(kpts[..., 0]) < 0, 2] = 0
  211. kpts[np.trunc(kpts[..., 1]) < 0, 2] = 0
  212. kpts_lst.append(kpts)
  213. mask_lst.append(mask)
  214. records['image'] = image
  215. records['joints'] = kpts_lst
  216. records['mask'] = mask_lst
  217. return records
  218. @register_keypointop
  219. class EvalAffine(object):
  220. """apply affine transform to image
  221. resize the short of [h,w] to standard size for eval
  222. Args:
  223. size (int): the standard length used to train, the 'short' of [h,w] will be resize to trainsize for standard
  224. records(dict): the dict contained the image, mask and coords
  225. Returns:
  226. records(dict): contain the image, mask and coords after tranformed
  227. """
  228. def __init__(self, size, stride=64):
  229. super(EvalAffine, self).__init__()
  230. self.size = size
  231. self.stride = stride
  232. def __call__(self, records):
  233. image = records['image']
  234. mask = records['mask'] if 'mask' in records else None
  235. s = self.size
  236. h, w, _ = image.shape
  237. trans, size_resized = get_affine_mat_kernel(h, w, s, inv=False)
  238. image_resized = cv2.warpAffine(image, trans, size_resized)
  239. if mask is not None:
  240. mask = cv2.warpAffine(mask, trans, size_resized)
  241. records['mask'] = mask
  242. if 'joints' in records:
  243. del records['joints']
  244. records['image'] = image_resized
  245. return records
  246. @register_keypointop
  247. class NormalizePermute(object):
  248. def __init__(self,
  249. mean=[123.675, 116.28, 103.53],
  250. std=[58.395, 57.120, 57.375],
  251. is_scale=True):
  252. super(NormalizePermute, self).__init__()
  253. self.mean = mean
  254. self.std = std
  255. self.is_scale = is_scale
  256. def __call__(self, records):
  257. image = records['image']
  258. image = image.astype(np.float32)
  259. if self.is_scale:
  260. image /= 255.
  261. image = image.transpose((2, 0, 1))
  262. mean = np.array(self.mean, dtype=np.float32)
  263. std = np.array(self.std, dtype=np.float32)
  264. invstd = 1. / std
  265. for v, m, s in zip(image, mean, invstd):
  266. v.__isub__(m).__imul__(s)
  267. records['image'] = image
  268. return records
  269. @register_keypointop
  270. class TagGenerate(object):
  271. """record gt coords for aeloss to sample coords value in tagmaps
  272. Args:
  273. num_joints (int): the keypoint numbers of dataset to train
  274. num_people (int): maxmum people to support for sample aeloss
  275. records(dict): the dict contained the image, mask and coords
  276. Returns:
  277. records(dict): contain the gt coords used in tagmap
  278. """
  279. def __init__(self, num_joints, max_people=30):
  280. super(TagGenerate, self).__init__()
  281. self.max_people = max_people
  282. self.num_joints = num_joints
  283. def __call__(self, records):
  284. kpts_lst = records['joints']
  285. kpts = kpts_lst[0]
  286. tagmap = np.zeros((self.max_people, self.num_joints, 4), dtype=np.int64)
  287. inds = np.where(kpts[..., 2] > 0)
  288. p, j = inds[0], inds[1]
  289. visible = kpts[inds]
  290. # tagmap is [p, j, 3], where last dim is j, y, x
  291. tagmap[p, j, 0] = j
  292. tagmap[p, j, 1] = visible[..., 1] # y
  293. tagmap[p, j, 2] = visible[..., 0] # x
  294. tagmap[p, j, 3] = 1
  295. records['tagmap'] = tagmap
  296. del records['joints']
  297. return records
  298. @register_keypointop
  299. class ToHeatmaps(object):
  300. """to generate the gaussin heatmaps of keypoint for heatmap loss
  301. Args:
  302. num_joints (int): the keypoint numbers of dataset to train
  303. hmsize (list[2]): output heatmap's shape list of different scale outputs of higherhrnet
  304. sigma (float): the std of gaussin kernel genereted
  305. records(dict): the dict contained the image, mask and coords
  306. Returns:
  307. records(dict): contain the heatmaps used to heatmaploss
  308. """
  309. def __init__(self, num_joints, hmsize, sigma=None):
  310. super(ToHeatmaps, self).__init__()
  311. self.num_joints = num_joints
  312. self.hmsize = np.array(hmsize)
  313. if sigma is None:
  314. sigma = hmsize[0] // 64
  315. self.sigma = sigma
  316. r = 6 * sigma + 3
  317. x = np.arange(0, r, 1, np.float32)
  318. y = x[:, None]
  319. x0, y0 = 3 * sigma + 1, 3 * sigma + 1
  320. self.gaussian = np.exp(-((x - x0)**2 + (y - y0)**2) / (2 * sigma**2))
  321. def __call__(self, records):
  322. kpts_lst = records['joints']
  323. mask_lst = records['mask']
  324. for idx, hmsize in enumerate(self.hmsize):
  325. mask = mask_lst[idx]
  326. kpts = kpts_lst[idx]
  327. heatmaps = np.zeros((self.num_joints, hmsize, hmsize))
  328. inds = np.where(kpts[..., 2] > 0)
  329. visible = kpts[inds].astype(np.int64)[..., :2]
  330. ul = np.round(visible - 3 * self.sigma - 1)
  331. br = np.round(visible + 3 * self.sigma + 2)
  332. sul = np.maximum(0, -ul)
  333. sbr = np.minimum(hmsize, br) - ul
  334. dul = np.clip(ul, 0, hmsize - 1)
  335. dbr = np.clip(br, 0, hmsize)
  336. for i in range(len(visible)):
  337. dx1, dy1 = dul[i]
  338. dx2, dy2 = dbr[i]
  339. sx1, sy1 = sul[i]
  340. sx2, sy2 = sbr[i]
  341. heatmaps[inds[1][i], dy1:dy2, dx1:dx2] = np.maximum(
  342. self.gaussian[sy1:sy2, sx1:sx2],
  343. heatmaps[inds[1][i], dy1:dy2, dx1:dx2])
  344. records['heatmap_gt{}x'.format(idx + 1)] = heatmaps
  345. records['mask_{}x'.format(idx + 1)] = mask
  346. del records['mask']
  347. return records
  348. @register_keypointop
  349. class RandomFlipHalfBodyTransform(object):
  350. """apply data augment to image and coords
  351. to achieve the flip, scale, rotate and half body transform effect for training image
  352. Args:
  353. trainsize (list):[w, h], Image target size
  354. upper_body_ids (list): The upper body joint ids
  355. flip_pairs (list): The left-right joints exchange order list
  356. pixel_std (int): The pixel std of the scale
  357. scale (float): The scale factor to transform the image
  358. rot (int): The rotate factor to transform the image
  359. num_joints_half_body (int): The joints threshold of the half body transform
  360. prob_half_body (float): The threshold of the half body transform
  361. flip (bool): Whether to flip the image
  362. Returns:
  363. records(dict): contain the image and coords after tranformed
  364. """
  365. def __init__(self,
  366. trainsize,
  367. upper_body_ids,
  368. flip_pairs,
  369. pixel_std,
  370. scale=0.35,
  371. rot=40,
  372. num_joints_half_body=8,
  373. prob_half_body=0.3,
  374. flip=True,
  375. rot_prob=0.6):
  376. super(RandomFlipHalfBodyTransform, self).__init__()
  377. self.trainsize = trainsize
  378. self.upper_body_ids = upper_body_ids
  379. self.flip_pairs = flip_pairs
  380. self.pixel_std = pixel_std
  381. self.scale = scale
  382. self.rot = rot
  383. self.num_joints_half_body = num_joints_half_body
  384. self.prob_half_body = prob_half_body
  385. self.flip = flip
  386. self.aspect_ratio = trainsize[0] * 1.0 / trainsize[1]
  387. self.rot_prob = rot_prob
  388. def halfbody_transform(self, joints, joints_vis):
  389. upper_joints = []
  390. lower_joints = []
  391. for joint_id in range(joints.shape[0]):
  392. if joints_vis[joint_id][0] > 0:
  393. if joint_id in self.upper_body_ids:
  394. upper_joints.append(joints[joint_id])
  395. else:
  396. lower_joints.append(joints[joint_id])
  397. if np.random.randn() < 0.5 and len(upper_joints) > 2:
  398. selected_joints = upper_joints
  399. else:
  400. selected_joints = lower_joints if len(
  401. lower_joints) > 2 else upper_joints
  402. if len(selected_joints) < 2:
  403. return None, None
  404. selected_joints = np.array(selected_joints, dtype=np.float32)
  405. center = selected_joints.mean(axis=0)[:2]
  406. left_top = np.amin(selected_joints, axis=0)
  407. right_bottom = np.amax(selected_joints, axis=0)
  408. w = right_bottom[0] - left_top[0]
  409. h = right_bottom[1] - left_top[1]
  410. if w > self.aspect_ratio * h:
  411. h = w * 1.0 / self.aspect_ratio
  412. elif w < self.aspect_ratio * h:
  413. w = h * self.aspect_ratio
  414. scale = np.array(
  415. [w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std],
  416. dtype=np.float32)
  417. scale = scale * 1.5
  418. return center, scale
  419. def flip_joints(self, joints, joints_vis, width, matched_parts):
  420. joints[:, 0] = width - joints[:, 0] - 1
  421. for pair in matched_parts:
  422. joints[pair[0], :], joints[pair[1], :] = \
  423. joints[pair[1], :], joints[pair[0], :].copy()
  424. joints_vis[pair[0], :], joints_vis[pair[1], :] = \
  425. joints_vis[pair[1], :], joints_vis[pair[0], :].copy()
  426. return joints * joints_vis, joints_vis
  427. def __call__(self, records):
  428. image = records['image']
  429. joints = records['joints']
  430. joints_vis = records['joints_vis']
  431. c = records['center']
  432. s = records['scale']
  433. r = 0
  434. if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body and
  435. np.random.rand() < self.prob_half_body):
  436. c_half_body, s_half_body = self.halfbody_transform(joints,
  437. joints_vis)
  438. if c_half_body is not None and s_half_body is not None:
  439. c, s = c_half_body, s_half_body
  440. sf = self.scale
  441. rf = self.rot
  442. s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
  443. r = np.clip(np.random.randn() * rf, -rf * 2,
  444. rf * 2) if np.random.random() <= self.rot_prob else 0
  445. if self.flip and np.random.random() <= 0.5:
  446. image = image[:, ::-1, :]
  447. joints, joints_vis = self.flip_joints(
  448. joints, joints_vis, image.shape[1], self.flip_pairs)
  449. c[0] = image.shape[1] - c[0] - 1
  450. records['image'] = image
  451. records['joints'] = joints
  452. records['joints_vis'] = joints_vis
  453. records['center'] = c
  454. records['scale'] = s
  455. records['rotate'] = r
  456. return records
  457. @register_keypointop
  458. class TopDownAffine(object):
  459. """apply affine transform to image and coords
  460. Args:
  461. trainsize (list): [w, h], the standard size used to train
  462. records(dict): the dict contained the image and coords
  463. Returns:
  464. records (dict): contain the image and coords after tranformed
  465. """
  466. def __init__(self, trainsize):
  467. self.trainsize = trainsize
  468. def __call__(self, records):
  469. image = records['image']
  470. joints = records['joints']
  471. joints_vis = records['joints_vis']
  472. rot = records['rotate'] if "rotate" in records else 0
  473. trans = get_affine_transform(records['center'], records['scale'] * 200,
  474. rot, self.trainsize)
  475. image = cv2.warpAffine(
  476. image,
  477. trans, (int(self.trainsize[0]), int(self.trainsize[1])),
  478. flags=cv2.INTER_LINEAR)
  479. for i in range(joints.shape[0]):
  480. if joints_vis[i, 0] > 0.0:
  481. joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)
  482. records['image'] = image
  483. records['joints'] = joints
  484. return records
  485. @register_keypointop
  486. class TopDownEvalAffine(object):
  487. """apply affine transform to image and coords
  488. Args:
  489. trainsize (list): [w, h], the standard size used to train
  490. records(dict): the dict contained the image and coords
  491. Returns:
  492. records (dict): contain the image and coords after tranformed
  493. """
  494. def __init__(self, trainsize):
  495. self.trainsize = trainsize
  496. def __call__(self, records):
  497. image = records['image']
  498. rot = 0
  499. imshape = records['im_shape'][::-1]
  500. center = imshape / 2.
  501. scale = imshape
  502. trans = get_affine_transform(center, scale, rot, self.trainsize)
  503. image = cv2.warpAffine(
  504. image,
  505. trans, (int(self.trainsize[0]), int(self.trainsize[1])),
  506. flags=cv2.INTER_LINEAR)
  507. records['image'] = image
  508. return records
  509. @register_keypointop
  510. class ToHeatmapsTopDown(object):
  511. """to generate the gaussin heatmaps of keypoint for heatmap loss
  512. Args:
  513. hmsize (list): [w, h] output heatmap's size
  514. sigma (float): the std of gaussin kernel genereted
  515. records(dict): the dict contained the image and coords
  516. Returns:
  517. records (dict): contain the heatmaps used to heatmaploss
  518. """
  519. def __init__(self, hmsize, sigma):
  520. super(ToHeatmapsTopDown, self).__init__()
  521. self.hmsize = np.array(hmsize)
  522. self.sigma = sigma
  523. def __call__(self, records):
  524. joints = records['joints']
  525. joints_vis = records['joints_vis']
  526. num_joints = joints.shape[0]
  527. image_size = np.array(
  528. [records['image'].shape[1], records['image'].shape[0]])
  529. target_weight = np.ones((num_joints, 1), dtype=np.float32)
  530. target_weight[:, 0] = joints_vis[:, 0]
  531. target = np.zeros(
  532. (num_joints, self.hmsize[1], self.hmsize[0]), dtype=np.float32)
  533. tmp_size = self.sigma * 3
  534. for joint_id in range(num_joints):
  535. feat_stride = image_size / self.hmsize
  536. mu_x = int(joints[joint_id][0] / feat_stride[0] + 0.5)
  537. mu_y = int(joints[joint_id][1] / feat_stride[1] + 0.5)
  538. # Check that any part of the gaussian is in-bounds
  539. ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
  540. br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
  541. if ul[0] >= self.hmsize[0] or ul[1] >= self.hmsize[1] or br[
  542. 0] < 0 or br[1] < 0:
  543. # If not, just return the image as is
  544. target_weight[joint_id] = 0
  545. continue
  546. # # Generate gaussian
  547. size = 2 * tmp_size + 1
  548. x = np.arange(0, size, 1, np.float32)
  549. y = x[:, np.newaxis]
  550. x0 = y0 = size // 2
  551. # The gaussian is not normalized, we want the center value to equal 1
  552. g = np.exp(-((x - x0)**2 + (y - y0)**2) / (2 * self.sigma**2))
  553. # Usable gaussian range
  554. g_x = max(0, -ul[0]), min(br[0], self.hmsize[0]) - ul[0]
  555. g_y = max(0, -ul[1]), min(br[1], self.hmsize[1]) - ul[1]
  556. # Image range
  557. img_x = max(0, ul[0]), min(br[0], self.hmsize[0])
  558. img_y = max(0, ul[1]), min(br[1], self.hmsize[1])
  559. v = target_weight[joint_id]
  560. if v > 0.5:
  561. target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = g[g_y[
  562. 0]:g_y[1], g_x[0]:g_x[1]]
  563. records['target'] = target
  564. records['target_weight'] = target_weight
  565. del records['joints'], records['joints_vis']
  566. return records