train.py 2.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. import numpy as np
  2. import paddlex as pdx
  3. from paddlex import transforms as T
  4. # 定义训练和验证时的transforms
  5. # API说明:https://github.com/PaddlePaddle/PaddleX/blob/release/2.0.0/paddlex/cv/transforms/operators.py
  6. train_transforms = T.Compose([
  7. T.MixupImage(mixup_epoch=-1), T.RandomDistort(),
  8. T.RandomExpand(im_padding_value=[123.675, 116.28, 103.53]), T.RandomCrop(),
  9. T.RandomHorizontalFlip(), T.BatchRandomResize(
  10. target_sizes=[320, 352, 384, 416, 448, 480, 512, 544, 576, 608],
  11. interp='RANDOM'), T.Normalize(
  12. mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
  13. ])
  14. eval_transforms = T.Compose([
  15. T.Resize(
  16. target_size=480, interp='CUBIC'), T.Normalize(
  17. mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
  18. ])
  19. # 定义训练和验证所用的数据集
  20. # API说明:https://github.com/PaddlePaddle/PaddleX/blob/release/2.0.0/paddlex/cv/datasets/voc.py
  21. train_dataset = pdx.datasets.VOCDetection(
  22. data_dir='work',
  23. file_list='work/train_list.txt',
  24. label_list='work/label_list.txt',
  25. transforms=train_transforms,
  26. shuffle=True)
  27. eval_dataset = pdx.datasets.VOCDetection(
  28. data_dir='work',
  29. file_list='work/val_list.txt',
  30. label_list='work/label_list.txt',
  31. transforms=eval_transforms,
  32. shuffle=False)
  33. # YOLO检测模型的预置anchor生成
  34. # API说明: https://github.com/PaddlePaddle/PaddleX/blob/release/2.0.0/paddlex/tools/anchor_clustering/yolo_cluster.py
  35. anchors = train_dataset.cluster_yolo_anchor(num_anchors=9, image_size=480)
  36. anchor_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
  37. # 初始化模型,并进行训练
  38. # 可使用VisualDL查看训练指标,参考https://github.com/PaddlePaddle/PaddleX/tree/release/2.0.0/tutorials/train#visualdl可视化训练指标
  39. num_classes = len(train_dataset.labels)
  40. model = pdx.det.YOLOv3(
  41. num_classes=num_classes,
  42. backbone='DarkNet53',
  43. anchors=anchors.tolist() if isinstance(anchors, np.ndarray) else anchors,
  44. anchor_masks=[[6, 7, 8], [3, 4, 5], [0, 1, 2]],
  45. label_smooth=True,
  46. ignore_threshold=0.6)
  47. # API说明:https://github.com/PaddlePaddle/PaddleX/blob/release/2.0.0/paddlex/cv/models/detector.py
  48. # 各参数介绍与调整说明:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html
  49. model.train(
  50. num_epochs=200, # 训练轮次
  51. train_dataset=train_dataset, # 训练数据
  52. eval_dataset=eval_dataset, # 验证数据
  53. train_batch_size=16, # 批大小
  54. pretrain_weights='COCO', # 预训练权重
  55. learning_rate=0.005 / 12, # 学习率
  56. warmup_steps=500, # 预热步数
  57. warmup_start_lr=0.0, # 预热起始学习率
  58. save_interval_epochs=5, # 每5个轮次保存一次,有验证数据时,自动评估
  59. lr_decay_epochs=[85, 135], # step学习率衰减
  60. save_dir='output/yolov3_darknet53', # 保存路径
  61. use_vdl=True) # 其用visuadl进行可视化训练记录