jde_tracker.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335
  1. # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """
  15. This code is based on https://github.com/Zhongdao/Towards-Realtime-MOT/blob/master/tracker/multitracker.py
  16. """
  17. import numpy as np
  18. from collections import defaultdict
  19. from ..matching import jde_matching as matching
  20. from ..motion import KalmanFilter
  21. from .base_jde_tracker import TrackState, STrack
  22. from .base_jde_tracker import joint_stracks, sub_stracks, remove_duplicate_stracks
  23. from paddlex.ppdet.core.workspace import register, serializable
  24. from paddlex.ppdet.utils.logger import setup_logger
  25. logger = setup_logger(__name__)
  26. __all__ = ['JDETracker']
  27. @register
  28. @serializable
  29. class JDETracker(object):
  30. __shared__ = ['num_classes']
  31. """
  32. JDE tracker, support single class and multi classes
  33. Args:
  34. num_classes (int): the number of classes
  35. det_thresh (float): threshold of detection score
  36. track_buffer (int): buffer for tracker
  37. min_box_area (int): min box area to filter out low quality boxes
  38. vertical_ratio (float): w/h, the vertical ratio of the bbox to filter
  39. bad results. If set <0 means no need to filter bboxes,usually set
  40. 1.6 for pedestrian tracking.
  41. tracked_thresh (float): linear assignment threshold of tracked
  42. stracks and detections
  43. r_tracked_thresh (float): linear assignment threshold of
  44. tracked stracks and unmatched detections
  45. unconfirmed_thresh (float): linear assignment threshold of
  46. unconfirmed stracks and unmatched detections
  47. motion (str): motion model, KalmanFilter as default
  48. conf_thres (float): confidence threshold for tracking
  49. metric_type (str): either "euclidean" or "cosine", the distance metric
  50. used for measurement to track association.
  51. """
  52. def __init__(self,
  53. use_byte=False,
  54. num_classes=1,
  55. det_thresh=0.3,
  56. track_buffer=30,
  57. min_box_area=200,
  58. vertical_ratio=1.6,
  59. tracked_thresh=0.7,
  60. r_tracked_thresh=0.5,
  61. unconfirmed_thresh=0.7,
  62. conf_thres=0,
  63. match_thres=0.8,
  64. low_conf_thres=0.2,
  65. motion='KalmanFilter',
  66. metric_type='euclidean'):
  67. self.use_byte = use_byte
  68. self.num_classes = num_classes
  69. self.det_thresh = det_thresh if not use_byte else conf_thres + 0.1
  70. self.track_buffer = track_buffer
  71. self.min_box_area = min_box_area
  72. self.vertical_ratio = vertical_ratio
  73. self.tracked_thresh = tracked_thresh
  74. self.r_tracked_thresh = r_tracked_thresh
  75. self.unconfirmed_thresh = unconfirmed_thresh
  76. self.conf_thres = conf_thres
  77. self.match_thres = match_thres
  78. self.low_conf_thres = low_conf_thres
  79. if motion == 'KalmanFilter':
  80. self.motion = KalmanFilter()
  81. self.metric_type = metric_type
  82. self.frame_id = 0
  83. self.tracked_tracks_dict = defaultdict(list) # dict(list[STrack])
  84. self.lost_tracks_dict = defaultdict(list) # dict(list[STrack])
  85. self.removed_tracks_dict = defaultdict(list) # dict(list[STrack])
  86. self.max_time_lost = 0
  87. # max_time_lost will be calculated: int(frame_rate / 30.0 * track_buffer)
  88. def update(self, pred_dets, pred_embs=None):
  89. """
  90. Processes the image frame and finds bounding box(detections).
  91. Associates the detection with corresponding tracklets and also handles
  92. lost, removed, refound and active tracklets.
  93. Args:
  94. pred_dets (np.array): Detection results of the image, the shape is
  95. [N, 6], means 'x0, y0, x1, y1, score, cls_id'.
  96. pred_embs (np.array): Embedding results of the image, the shape is
  97. [N, 128] or [N, 512].
  98. Return:
  99. output_stracks_dict (dict(list)): The list contains information
  100. regarding the online_tracklets for the recieved image tensor.
  101. """
  102. self.frame_id += 1
  103. if self.frame_id == 1:
  104. STrack.init_count(self.num_classes)
  105. activated_tracks_dict = defaultdict(list)
  106. refined_tracks_dict = defaultdict(list)
  107. lost_tracks_dict = defaultdict(list)
  108. removed_tracks_dict = defaultdict(list)
  109. output_tracks_dict = defaultdict(list)
  110. pred_dets_dict = defaultdict(list)
  111. pred_embs_dict = defaultdict(list)
  112. # unify single and multi classes detection and embedding results
  113. for cls_id in range(self.num_classes):
  114. cls_idx = (pred_dets[:, 5:] == cls_id).squeeze(-1)
  115. pred_dets_dict[cls_id] = pred_dets[cls_idx]
  116. if pred_embs is not None:
  117. pred_embs_dict[cls_id] = pred_embs[cls_idx]
  118. else:
  119. pred_embs_dict[cls_id] = None
  120. for cls_id in range(self.num_classes):
  121. """ Step 1: Get detections by class"""
  122. pred_dets_cls = pred_dets_dict[cls_id]
  123. pred_embs_cls = pred_embs_dict[cls_id]
  124. remain_inds = (pred_dets_cls[:, 4:5] > self.conf_thres).squeeze(-1)
  125. if remain_inds.sum() > 0:
  126. pred_dets_cls = pred_dets_cls[remain_inds]
  127. if self.use_byte:
  128. detections = [
  129. STrack(
  130. STrack.tlbr_to_tlwh(tlbrs[:4]),
  131. tlbrs[4],
  132. cls_id,
  133. 30,
  134. temp_feat=None) for tlbrs in pred_dets_cls
  135. ]
  136. else:
  137. pred_embs_cls = pred_embs_cls[remain_inds]
  138. detections = [
  139. STrack(
  140. STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], cls_id,
  141. 30, temp_feat)
  142. for (tlbrs, temp_feat
  143. ) in zip(pred_dets_cls, pred_embs_cls)
  144. ]
  145. else:
  146. detections = []
  147. ''' Add newly detected tracklets to tracked_stracks'''
  148. unconfirmed_dict = defaultdict(list)
  149. tracked_tracks_dict = defaultdict(list)
  150. for track in self.tracked_tracks_dict[cls_id]:
  151. if not track.is_activated:
  152. # previous tracks which are not active in the current frame are added in unconfirmed list
  153. unconfirmed_dict[cls_id].append(track)
  154. else:
  155. # Active tracks are added to the local list 'tracked_stracks'
  156. tracked_tracks_dict[cls_id].append(track)
  157. """ Step 2: First association, with embedding"""
  158. # building tracking pool for the current frame
  159. track_pool_dict = defaultdict(list)
  160. track_pool_dict[cls_id] = joint_stracks(
  161. tracked_tracks_dict[cls_id], self.lost_tracks_dict[cls_id])
  162. # Predict the current location with KalmanFilter
  163. STrack.multi_predict(track_pool_dict[cls_id], self.motion)
  164. if self.use_byte:
  165. dists = matching.iou_distance(track_pool_dict[cls_id],
  166. detections)
  167. matches, u_track, u_detection = matching.linear_assignment(
  168. dists, thresh=self.match_thres) #
  169. else:
  170. dists = matching.embedding_distance(
  171. track_pool_dict[cls_id],
  172. detections,
  173. metric=self.metric_type)
  174. dists = matching.fuse_motion(
  175. self.motion, dists, track_pool_dict[cls_id], detections)
  176. matches, u_track, u_detection = matching.linear_assignment(
  177. dists, thresh=self.tracked_thresh)
  178. for i_tracked, idet in matches:
  179. # i_tracked is the id of the track and idet is the detection
  180. track = track_pool_dict[cls_id][i_tracked]
  181. det = detections[idet]
  182. if track.state == TrackState.Tracked:
  183. # If the track is active, add the detection to the track
  184. track.update(detections[idet], self.frame_id)
  185. activated_tracks_dict[cls_id].append(track)
  186. else:
  187. # We have obtained a detection from a track which is not active,
  188. # hence put the track in refind_stracks list
  189. track.re_activate(det, self.frame_id, new_id=False)
  190. refined_tracks_dict[cls_id].append(track)
  191. # None of the steps below happen if there are no undetected tracks.
  192. """ Step 3: Second association, with IOU"""
  193. if self.use_byte:
  194. inds_low = pred_dets_dict[cls_id][:, 4:5] > self.low_conf_thres
  195. inds_high = pred_dets_dict[cls_id][:, 4:5] < self.conf_thres
  196. inds_second = np.logical_and(inds_low, inds_high).squeeze(-1)
  197. pred_dets_cls_second = pred_dets_dict[cls_id][inds_second]
  198. # association the untrack to the low score detections
  199. if len(pred_dets_cls_second) > 0:
  200. detections_second = [
  201. STrack(
  202. STrack.tlbr_to_tlwh(tlbrs[:4]),
  203. tlbrs[4],
  204. cls_id,
  205. 30,
  206. temp_feat=None)
  207. for tlbrs in pred_dets_cls_second[:, :5]
  208. ]
  209. else:
  210. detections_second = []
  211. r_tracked_stracks = [
  212. track_pool_dict[cls_id][i] for i in u_track
  213. if track_pool_dict[cls_id][i].state == TrackState.Tracked
  214. ]
  215. dists = matching.iou_distance(r_tracked_stracks,
  216. detections_second)
  217. matches, u_track, u_detection_second = matching.linear_assignment(
  218. dists, thresh=0.4) # not r_tracked_thresh
  219. else:
  220. detections = [detections[i] for i in u_detection]
  221. r_tracked_stracks = []
  222. for i in u_track:
  223. if track_pool_dict[cls_id][i].state == TrackState.Tracked:
  224. r_tracked_stracks.append(track_pool_dict[cls_id][i])
  225. dists = matching.iou_distance(r_tracked_stracks, detections)
  226. matches, u_track, u_detection = matching.linear_assignment(
  227. dists, thresh=self.r_tracked_thresh)
  228. for i_tracked, idet in matches:
  229. track = r_tracked_stracks[i_tracked]
  230. det = detections[
  231. idet] if not self.use_byte else detections_second[idet]
  232. if track.state == TrackState.Tracked:
  233. track.update(det, self.frame_id)
  234. activated_tracks_dict[cls_id].append(track)
  235. else:
  236. track.re_activate(det, self.frame_id, new_id=False)
  237. refined_tracks_dict[cls_id].append(track)
  238. for it in u_track:
  239. track = r_tracked_stracks[it]
  240. if not track.state == TrackState.Lost:
  241. track.mark_lost()
  242. lost_tracks_dict[cls_id].append(track)
  243. '''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
  244. detections = [detections[i] for i in u_detection]
  245. dists = matching.iou_distance(unconfirmed_dict[cls_id], detections)
  246. matches, u_unconfirmed, u_detection = matching.linear_assignment(
  247. dists, thresh=self.unconfirmed_thresh)
  248. for i_tracked, idet in matches:
  249. unconfirmed_dict[cls_id][i_tracked].update(detections[idet],
  250. self.frame_id)
  251. activated_tracks_dict[cls_id].append(unconfirmed_dict[cls_id][
  252. i_tracked])
  253. for it in u_unconfirmed:
  254. track = unconfirmed_dict[cls_id][it]
  255. track.mark_removed()
  256. removed_tracks_dict[cls_id].append(track)
  257. """ Step 4: Init new stracks"""
  258. for inew in u_detection:
  259. track = detections[inew]
  260. if track.score < self.det_thresh:
  261. continue
  262. track.activate(self.motion, self.frame_id)
  263. activated_tracks_dict[cls_id].append(track)
  264. """ Step 5: Update state"""
  265. for track in self.lost_tracks_dict[cls_id]:
  266. if self.frame_id - track.end_frame > self.max_time_lost:
  267. track.mark_removed()
  268. removed_tracks_dict[cls_id].append(track)
  269. self.tracked_tracks_dict[cls_id] = [
  270. t for t in self.tracked_tracks_dict[cls_id]
  271. if t.state == TrackState.Tracked
  272. ]
  273. self.tracked_tracks_dict[cls_id] = joint_stracks(
  274. self.tracked_tracks_dict[cls_id],
  275. activated_tracks_dict[cls_id])
  276. self.tracked_tracks_dict[cls_id] = joint_stracks(
  277. self.tracked_tracks_dict[cls_id], refined_tracks_dict[cls_id])
  278. self.lost_tracks_dict[cls_id] = sub_stracks(
  279. self.lost_tracks_dict[cls_id],
  280. self.tracked_tracks_dict[cls_id])
  281. self.lost_tracks_dict[cls_id].extend(lost_tracks_dict[cls_id])
  282. self.lost_tracks_dict[cls_id] = sub_stracks(
  283. self.lost_tracks_dict[cls_id],
  284. self.removed_tracks_dict[cls_id])
  285. self.removed_tracks_dict[cls_id].extend(removed_tracks_dict[
  286. cls_id])
  287. self.tracked_tracks_dict[cls_id], self.lost_tracks_dict[
  288. cls_id] = remove_duplicate_stracks(
  289. self.tracked_tracks_dict[cls_id],
  290. self.lost_tracks_dict[cls_id])
  291. # get scores of lost tracks
  292. output_tracks_dict[cls_id] = [
  293. track for track in self.tracked_tracks_dict[cls_id]
  294. if track.is_activated
  295. ]
  296. logger.debug('===========Frame {}=========='.format(self.frame_id))
  297. logger.debug('Activated: {}'.format(
  298. [track.track_id for track in activated_tracks_dict[cls_id]]))
  299. logger.debug('Refind: {}'.format(
  300. [track.track_id for track in refined_tracks_dict[cls_id]]))
  301. logger.debug('Lost: {}'.format(
  302. [track.track_id for track in lost_tracks_dict[cls_id]]))
  303. logger.debug('Removed: {}'.format(
  304. [track.track_id for track in removed_tracks_dict[cls_id]]))
  305. return output_tracks_dict