detection.py 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218
  1. # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os.path as osp
  15. import math
  16. import pickle
  17. import os
  18. def build_yolo_transforms(params):
  19. from paddlex.det import transforms
  20. target_size = params.image_shape[0]
  21. use_mixup = params.use_mixup
  22. dt_list = []
  23. if use_mixup:
  24. dt_list.append(
  25. transforms.MixupImage(
  26. alpha=params.mixup_alpha,
  27. beta=params.mixup_beta,
  28. mixup_epoch=int(params.num_epochs * 25. / 27)))
  29. dt_list.extend([
  30. transforms.RandomDistort(
  31. brightness_range=params.brightness_range,
  32. brightness_prob=params.brightness_prob,
  33. contrast_range=params.contrast_range,
  34. contrast_prob=params.contrast_prob,
  35. saturation_range=params.saturation_range,
  36. saturation_prob=params.saturation_prob,
  37. hue_range=params.hue_range,
  38. hue_prob=params.hue_prob), transforms.RandomExpand(
  39. prob=params.expand_prob,
  40. fill_value=[float(int(x * 255)) for x in params.image_mean])
  41. ])
  42. crop_image = params.crop_image
  43. if crop_image:
  44. dt_list.append(transforms.RandomCrop())
  45. dt_list.extend([
  46. transforms.Resize(
  47. target_size=target_size, interp='RANDOM'),
  48. transforms.RandomHorizontalFlip(prob=params.horizontal_flip_prob),
  49. transforms.Normalize(
  50. mean=params.image_mean, std=params.image_std)
  51. ])
  52. train_transforms = transforms.Compose(dt_list)
  53. eval_transforms = transforms.Compose([
  54. transforms.Resize(
  55. target_size=target_size, interp='CUBIC'),
  56. transforms.Normalize(
  57. mean=params.image_mean, std=params.image_std),
  58. ])
  59. return train_transforms, eval_transforms
  60. def build_rcnn_transforms(params):
  61. from paddlex.det import transforms
  62. short_size = min(params.image_shape)
  63. max_size = max(params.image_shape)
  64. train_transforms = transforms.Compose([
  65. transforms.RandomDistort(
  66. brightness_range=params.brightness_range,
  67. brightness_prob=params.brightness_prob,
  68. contrast_range=params.contrast_range,
  69. contrast_prob=params.contrast_prob,
  70. saturation_range=params.saturation_range,
  71. saturation_prob=params.saturation_prob,
  72. hue_range=params.hue_range,
  73. hue_prob=params.hue_prob),
  74. transforms.RandomHorizontalFlip(prob=params.horizontal_flip_prob),
  75. transforms.Normalize(
  76. mean=params.image_mean, std=params.image_std),
  77. transforms.ResizeByShort(
  78. short_size=short_size, max_size=max_size),
  79. transforms.Padding(coarsest_stride=32 if params.with_fpn else 1),
  80. ])
  81. eval_transforms = transforms.Compose([
  82. transforms.Normalize(), transforms.ResizeByShort(
  83. short_size=short_size, max_size=max_size),
  84. transforms.Padding(coarsest_stride=32 if params.with_fpn else 1)
  85. ])
  86. return train_transforms, eval_transforms
  87. def build_voc_datasets(dataset_path, train_transforms, eval_transforms):
  88. import paddlex as pdx
  89. train_file_list = osp.join(dataset_path, 'train_list.txt')
  90. eval_file_list = osp.join(dataset_path, 'val_list.txt')
  91. label_list = osp.join(dataset_path, 'labels.txt')
  92. train_dataset = pdx.datasets.VOCDetection(
  93. data_dir=dataset_path,
  94. file_list=train_file_list,
  95. label_list=label_list,
  96. transforms=train_transforms,
  97. shuffle=True)
  98. eval_dataset = pdx.datasets.VOCDetection(
  99. data_dir=dataset_path,
  100. file_list=eval_file_list,
  101. label_list=label_list,
  102. transforms=eval_transforms)
  103. return train_dataset, eval_dataset
  104. def build_coco_datasets(dataset_path, train_transforms, eval_transforms):
  105. import paddlex as pdx
  106. data_dir = osp.join(dataset_path, 'JPEGImages')
  107. train_ann_file = osp.join(dataset_path, 'train.json')
  108. eval_ann_file = osp.join(dataset_path, 'val.json')
  109. train_dataset = pdx.datasets.CocoDetection(
  110. data_dir=data_dir,
  111. ann_file=train_ann_file,
  112. transforms=train_transforms,
  113. shuffle=True)
  114. eval_dataset = pdx.datasets.CocoDetection(
  115. data_dir=data_dir, ann_file=eval_ann_file, transforms=eval_transforms)
  116. return train_dataset, eval_dataset
  117. def build_optimizer(step_each_epoch, params):
  118. import paddle.fluid as fluid
  119. from paddle.fluid.regularizer import L2Decay
  120. learning_rate = params.learning_rate
  121. num_epochs = params.num_epochs
  122. lr_decay_epochs = params.lr_decay_epochs
  123. warmup_steps = params.warmup_steps
  124. warmup_start_lr = params.warmup_start_lr
  125. boundaries = [b * step_each_epoch for b in lr_decay_epochs]
  126. values = [
  127. learning_rate * (0.1**i) for i in range(len(lr_decay_epochs) + 1)
  128. ]
  129. lr = fluid.layers.piecewise_decay(boundaries=boundaries, values=values)
  130. lr = fluid.layers.linear_lr_warmup(
  131. learning_rate=lr,
  132. warmup_steps=warmup_steps,
  133. start_lr=warmup_start_lr,
  134. end_lr=learning_rate)
  135. factor = 1e-04 if params.model in ['FasterRCNN', 'MaskRCNN'] else 5e-04
  136. optimizer = fluid.optimizer.Momentum(
  137. learning_rate=lr, momentum=0.9, regularization=L2Decay(factor))
  138. return optimizer
  139. def train(task_path, dataset_path, params):
  140. import paddlex as pdx
  141. pdx.log_level = 3
  142. if params.model in ['YOLOv3', 'PPYOLO']:
  143. train_transforms, eval_transforms = build_yolo_transforms(params)
  144. elif params.model in ['FasterRCNN', 'MaskRCNN']:
  145. train_transforms, eval_transforms = build_rcnn_transforms(params)
  146. if osp.exists(osp.join(dataset_path, 'JPEGImages')) and \
  147. osp.exists(osp.join(dataset_path, 'train.json')) and \
  148. osp.exists(osp.join(dataset_path, 'val.json')):
  149. train_dataset, eval_dataset = build_coco_datasets(
  150. dataset_path=dataset_path,
  151. train_transforms=train_transforms,
  152. eval_transforms=eval_transforms)
  153. elif osp.exists(osp.join(dataset_path, 'train_list.txt')) and \
  154. osp.exists(osp.join(dataset_path, 'val_list.txt')) and \
  155. osp.exists(osp.join(dataset_path, 'labels.txt')):
  156. train_dataset, eval_dataset = build_voc_datasets(
  157. dataset_path=dataset_path,
  158. train_transforms=train_transforms,
  159. eval_transforms=eval_transforms)
  160. step_each_epoch = train_dataset.num_samples // params.batch_size
  161. train_batch_size = params.batch_size
  162. save_interval_epochs = params.save_interval_epochs
  163. save_dir = osp.join(task_path, 'output')
  164. pretrain_weights = params.pretrain_weights
  165. optimizer = build_optimizer(step_each_epoch, params)
  166. detector = getattr(pdx.cv.models, params.model)
  167. num_classes = len(train_dataset.labels) if params.model in ['YOLOv3', 'PPYOLO'] else \
  168. len(train_dataset.labels) + 1
  169. sensitivities_path = params.sensitivities_path
  170. eval_metric_loss = params.eval_metric_loss
  171. if eval_metric_loss is None:
  172. eval_metric_loss = 0.05
  173. model = detector(num_classes=num_classes, backbone=params.backbone)
  174. if params.model in ['YOLOv3', 'PPYOLO']:
  175. model.train_random_shapes = params.random_shape_sizes
  176. if params.model == 'YOLOv3':
  177. model.train(
  178. num_epochs=params.num_epochs,
  179. train_dataset=train_dataset,
  180. train_batch_size=train_batch_size,
  181. eval_dataset=eval_dataset,
  182. save_interval_epochs=save_interval_epochs,
  183. log_interval_steps=2,
  184. save_dir=save_dir,
  185. pretrain_weights=pretrain_weights,
  186. optimizer=optimizer,
  187. use_vdl=True,
  188. sensitivities_file=sensitivities_path,
  189. eval_metric_loss=eval_metric_loss,
  190. resume_checkpoint=params.resume_checkpoint)
  191. else:
  192. model.train(
  193. num_epochs=params.num_epochs,
  194. train_dataset=train_dataset,
  195. train_batch_size=train_batch_size,
  196. eval_dataset=eval_dataset,
  197. save_interval_epochs=save_interval_epochs,
  198. log_interval_steps=2,
  199. save_dir=save_dir,
  200. pretrain_weights=pretrain_weights,
  201. optimizer=optimizer,
  202. use_vdl=True,
  203. resume_checkpoint=params.resume_checkpoint)