mot.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386
  1. # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os
  15. import sys
  16. import cv2
  17. import glob
  18. import numpy as np
  19. from collections import OrderedDict
  20. try:
  21. from collections.abc import Sequence
  22. except Exception:
  23. from collections import Sequence
  24. from .dataset import DetDataset, _make_dataset, _is_valid_file
  25. from paddlex.ppdet.core.workspace import register, serializable
  26. from paddlex.ppdet.utils.logger import setup_logger
  27. logger = setup_logger(__name__)
  28. @register
  29. @serializable
  30. class MOTDataSet(DetDataset):
  31. """
  32. Load dataset with MOT format.
  33. Args:
  34. dataset_dir (str): root directory for dataset.
  35. image_lists (str|list): mot data image lists, muiti-source mot dataset.
  36. data_fields (list): key name of data dictionary, at least have 'image'.
  37. sample_num (int): number of samples to load, -1 means all.
  38. Notes:
  39. MOT datasets root directory following this:
  40. dataset/mot
  41. |——————image_lists
  42. | |——————caltech.train
  43. | |——————caltech.val
  44. | |——————mot16.train
  45. | |——————mot17.train
  46. | ......
  47. |——————Caltech
  48. |——————MOT17
  49. |——————......
  50. All the MOT datasets have the following structure:
  51. Caltech
  52. |——————images
  53. | └——————00001.jpg
  54. | |—————— ...
  55. | └——————0000N.jpg
  56. └——————labels_with_ids
  57. └——————00001.txt
  58. |—————— ...
  59. └——————0000N.txt
  60. or
  61. MOT17
  62. |——————images
  63. | └——————train
  64. | └——————test
  65. └——————labels_with_ids
  66. └——————train
  67. """
  68. def __init__(self,
  69. dataset_dir=None,
  70. image_lists=[],
  71. data_fields=['image'],
  72. sample_num=-1):
  73. super(MOTDataSet, self).__init__(
  74. dataset_dir=dataset_dir,
  75. data_fields=data_fields,
  76. sample_num=sample_num)
  77. self.dataset_dir = dataset_dir
  78. self.image_lists = image_lists
  79. if isinstance(self.image_lists, str):
  80. self.image_lists = [self.image_lists]
  81. self.roidbs = None
  82. self.cname2cid = None
  83. def get_anno(self):
  84. if self.image_lists == []:
  85. return
  86. # only used to get categories and metric
  87. return os.path.join(self.dataset_dir, 'image_lists',
  88. self.image_lists[0])
  89. def parse_dataset(self):
  90. self.img_files = OrderedDict()
  91. self.img_start_index = OrderedDict()
  92. self.label_files = OrderedDict()
  93. self.tid_num = OrderedDict()
  94. self.tid_start_index = OrderedDict()
  95. img_index = 0
  96. for data_name in self.image_lists:
  97. # check every data image list
  98. image_lists_dir = os.path.join(self.dataset_dir, 'image_lists')
  99. assert os.path.isdir(image_lists_dir), \
  100. "The {} is not a directory.".format(image_lists_dir)
  101. list_path = os.path.join(image_lists_dir, data_name)
  102. assert os.path.exists(list_path), \
  103. "The list path {} does not exist.".format(list_path)
  104. # record img_files, filter out empty ones
  105. with open(list_path, 'r') as file:
  106. self.img_files[data_name] = file.readlines()
  107. self.img_files[data_name] = [
  108. os.path.join(self.dataset_dir, x.strip())
  109. for x in self.img_files[data_name]
  110. ]
  111. self.img_files[data_name] = list(
  112. filter(lambda x: len(x) > 0, self.img_files[data_name]))
  113. self.img_start_index[data_name] = img_index
  114. img_index += len(self.img_files[data_name])
  115. # record label_files
  116. self.label_files[data_name] = [
  117. x.replace('images', 'labels_with_ids').replace(
  118. '.png', '.txt').replace('.jpg', '.txt')
  119. for x in self.img_files[data_name]
  120. ]
  121. for data_name, label_paths in self.label_files.items():
  122. max_index = -1
  123. for lp in label_paths:
  124. lb = np.loadtxt(lp)
  125. if len(lb) < 1:
  126. continue
  127. if len(lb.shape) < 2:
  128. img_max = lb[1]
  129. else:
  130. img_max = np.max(lb[:, 1])
  131. if img_max > max_index:
  132. max_index = img_max
  133. self.tid_num[data_name] = int(max_index + 1)
  134. last_index = 0
  135. for i, (k, v) in enumerate(self.tid_num.items()):
  136. self.tid_start_index[k] = last_index
  137. last_index += v
  138. self.total_identities = int(last_index + 1)
  139. self.num_imgs_each_data = [len(x) for x in self.img_files.values()]
  140. self.total_imgs = sum(self.num_imgs_each_data)
  141. logger.info('=' * 80)
  142. logger.info('MOT dataset summary: ')
  143. logger.info(self.tid_num)
  144. logger.info('total images: {}'.format(self.total_imgs))
  145. logger.info('image start index: {}'.format(self.img_start_index))
  146. logger.info('total identities: {}'.format(self.total_identities))
  147. logger.info('identity start index: {}'.format(self.tid_start_index))
  148. logger.info('=' * 80)
  149. records = []
  150. cname2cid = mot_label()
  151. for img_index in range(self.total_imgs):
  152. for i, (k, v) in enumerate(self.img_start_index.items()):
  153. if img_index >= v:
  154. data_name = list(self.label_files.keys())[i]
  155. start_index = v
  156. img_file = self.img_files[data_name][img_index - start_index]
  157. lbl_file = self.label_files[data_name][img_index - start_index]
  158. if not os.path.exists(img_file):
  159. logger.warning(
  160. 'Illegal image file: {}, and it will be ignored'.format(
  161. img_file))
  162. continue
  163. if not os.path.isfile(lbl_file):
  164. logger.warning(
  165. 'Illegal label file: {}, and it will be ignored'.format(
  166. lbl_file))
  167. continue
  168. labels = np.loadtxt(lbl_file, dtype=np.float32).reshape(-1, 6)
  169. # each row in labels (N, 6) is [gt_class, gt_identity, cx, cy, w, h]
  170. cx, cy = labels[:, 2], labels[:, 3]
  171. w, h = labels[:, 4], labels[:, 5]
  172. gt_bbox = np.stack((cx, cy, w, h)).T.astype('float32')
  173. gt_class = labels[:, 0:1].astype('int32')
  174. gt_score = np.ones((len(labels), 1)).astype('float32')
  175. gt_ide = labels[:, 1:2].astype('int32')
  176. for i, _ in enumerate(gt_ide):
  177. if gt_ide[i] > -1:
  178. gt_ide[i] += self.tid_start_index[data_name]
  179. mot_rec = {
  180. 'im_file': img_file,
  181. 'im_id': img_index,
  182. } if 'image' in self.data_fields else {}
  183. gt_rec = {
  184. 'gt_class': gt_class,
  185. 'gt_score': gt_score,
  186. 'gt_bbox': gt_bbox,
  187. 'gt_ide': gt_ide,
  188. }
  189. for k, v in gt_rec.items():
  190. if k in self.data_fields:
  191. mot_rec[k] = v
  192. records.append(mot_rec)
  193. if self.sample_num > 0 and img_index >= self.sample_num:
  194. break
  195. assert len(records) > 0, 'not found any mot record in %s' % (
  196. self.image_lists)
  197. self.roidbs, self.cname2cid = records, cname2cid
  198. def mot_label():
  199. labels_map = {'person': 0}
  200. return labels_map
  201. @register
  202. @serializable
  203. class MOTImageFolder(DetDataset):
  204. """
  205. Load MOT dataset with MOT format from image folder or video .
  206. Args:
  207. video_file (str): path of the video file, default ''.
  208. frame_rate (int): frame rate of the video, use cv2 VideoCapture if not set.
  209. dataset_dir (str): root directory for dataset.
  210. keep_ori_im (bool): whether to keep original image, default False.
  211. Set True when used during MOT model inference while saving
  212. images or video, or used in DeepSORT.
  213. """
  214. def __init__(self,
  215. video_file=None,
  216. frame_rate=-1,
  217. dataset_dir=None,
  218. data_root=None,
  219. image_dir=None,
  220. sample_num=-1,
  221. keep_ori_im=False,
  222. **kwargs):
  223. super(MOTImageFolder, self).__init__(
  224. dataset_dir, image_dir, sample_num=sample_num)
  225. self.video_file = video_file
  226. self.data_root = data_root
  227. self.keep_ori_im = keep_ori_im
  228. self._imid2path = {}
  229. self.roidbs = None
  230. self.frame_rate = frame_rate
  231. def check_or_download_dataset(self):
  232. return
  233. def parse_dataset(self, ):
  234. if not self.roidbs:
  235. if self.video_file is None:
  236. self.frame_rate = 30 # set as default if infer image folder
  237. self.roidbs = self._load_images()
  238. else:
  239. self.roidbs = self._load_video_images()
  240. def _load_video_images(self):
  241. if self.frame_rate == -1:
  242. # if frame_rate is not set for video, use cv2.VideoCapture
  243. cap = cv2.VideoCapture(self.video_file)
  244. self.frame_rate = int(cap.get(cv2.CAP_PROP_FPS))
  245. extension = self.video_file.split('.')[-1]
  246. output_path = self.video_file.replace('.{}'.format(extension), '')
  247. frames_path = video2frames(self.video_file, output_path,
  248. self.frame_rate)
  249. self.video_frames = sorted(
  250. glob.glob(os.path.join(frames_path, '*.png')))
  251. self.video_length = len(self.video_frames)
  252. logger.info('Length of the video: {:d} frames.'.format(
  253. self.video_length))
  254. ct = 0
  255. records = []
  256. for image in self.video_frames:
  257. assert image != '' and os.path.isfile(image), \
  258. "Image {} not found".format(image)
  259. if self.sample_num > 0 and ct >= self.sample_num:
  260. break
  261. rec = {'im_id': np.array([ct]), 'im_file': image}
  262. if self.keep_ori_im:
  263. rec.update({'keep_ori_im': 1})
  264. self._imid2path[ct] = image
  265. ct += 1
  266. records.append(rec)
  267. assert len(records) > 0, "No image file found"
  268. return records
  269. def _find_images(self):
  270. image_dir = self.image_dir
  271. if not isinstance(image_dir, Sequence):
  272. image_dir = [image_dir]
  273. images = []
  274. for im_dir in image_dir:
  275. if os.path.isdir(im_dir):
  276. im_dir = os.path.join(self.dataset_dir, im_dir)
  277. images.extend(_make_dataset(im_dir))
  278. elif os.path.isfile(im_dir) and _is_valid_file(im_dir):
  279. images.append(im_dir)
  280. return images
  281. def _load_images(self):
  282. images = self._find_images()
  283. ct = 0
  284. records = []
  285. for image in images:
  286. assert image != '' and os.path.isfile(image), \
  287. "Image {} not found".format(image)
  288. if self.sample_num > 0 and ct >= self.sample_num:
  289. break
  290. rec = {'im_id': np.array([ct]), 'im_file': image}
  291. if self.keep_ori_im:
  292. rec.update({'keep_ori_im': 1})
  293. self._imid2path[ct] = image
  294. ct += 1
  295. records.append(rec)
  296. assert len(records) > 0, "No image file found"
  297. return records
  298. def get_imid2path(self):
  299. return self._imid2path
  300. def set_images(self, images):
  301. self.image_dir = images
  302. self.roidbs = self._load_images()
  303. def set_video(self, video_file, frame_rate):
  304. # update video_file and frame_rate by command line of tools/infer_mot.py
  305. self.video_file = video_file
  306. self.frame_rate = frame_rate
  307. assert os.path.isfile(self.video_file) and _is_valid_video(self.video_file), \
  308. "wrong or unsupported file format: {}".format(self.video_file)
  309. self.roidbs = self._load_video_images()
  310. def _is_valid_video(f, extensions=('.mp4', '.avi', '.mov', '.rmvb', 'flv')):
  311. return f.lower().endswith(extensions)
  312. def video2frames(video_path, outpath, frame_rate, **kargs):
  313. def _dict2str(kargs):
  314. cmd_str = ''
  315. for k, v in kargs.items():
  316. cmd_str += (' ' + str(k) + ' ' + str(v))
  317. return cmd_str
  318. ffmpeg = ['ffmpeg ', ' -y -loglevel ', ' error ']
  319. vid_name = os.path.basename(video_path).split('.')[0]
  320. out_full_path = os.path.join(outpath, vid_name)
  321. if not os.path.exists(out_full_path):
  322. os.makedirs(out_full_path)
  323. # video file name
  324. outformat = os.path.join(out_full_path, '%08d.png')
  325. cmd = ffmpeg
  326. cmd = ffmpeg + [
  327. ' -i ', video_path, ' -r ', str(frame_rate), ' -f image2 ', outformat
  328. ]
  329. cmd = ''.join(cmd) + _dict2str(kargs)
  330. if os.system(cmd) != 0:
  331. raise RuntimeError('ffmpeg process video: {} error'.format(video_path))
  332. sys.exit(-1)
  333. sys.stdout.flush()
  334. return out_full_path