PP-TSM-R50_8frames_uniform.yaml 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
  1. Global:
  2. checkpoints: null
  3. pretrained_model: https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/PP-TSM-R50_8frames_uniform_pretrained.pdparams
  4. output_dir: ./output/
  5. device: gpu
  6. use_visualdl: False
  7. save_inference_dir: ./inference
  8. # training model under @to_static
  9. to_static: False
  10. algorithm: PP-TSM-R50_8frames_uniform
  11. MODEL: #MODEL field
  12. framework: "Recognizer2D" #Mandatory, indicate the type of network, associate to the 'paddlevideo/modeling/framework/' .
  13. backbone: #Mandatory, indicate the type of backbone, associate to the 'paddlevideo/modeling/backbones/' .
  14. name: "ResNetTweaksTSM" #Mandatory, The name of backbone.
  15. pretrained: null
  16. depth: 50 #Optional, the depth of backbone architecture.
  17. head:
  18. name: "ppTSMHead" #Mandatory, indicate the type of head, associate to the 'paddlevideo/modeling/heads'
  19. num_classes: 400 #Optional, the number of classes to be classified.
  20. in_channels: 2048 #input channel of the extracted feature.
  21. drop_ratio: 0.5 #the ratio of dropout
  22. std: 0.01 #std value in params initialization
  23. ls_eps: 0.1 # label smooth factor
  24. DATASET: #DATASET field
  25. batch_size: 16 #Mandatory, bacth size
  26. num_workers: 4 #Mandatory, the number of subprocess on each GPU.
  27. # test_batch_size: 1
  28. train:
  29. format: "VideoDataset" #Mandatory, indicate the type of dataset, associate to the 'paddlevidel/loader/dateset'
  30. data_prefix: "K400_dataset/K400/videos" #Mandatory, train data root path
  31. file_path: "K400_dataset/K400/train.txt" #Mandatory, train data index file path
  32. valid:
  33. format: "VideoDataset" #Mandatory, indicate the type of dataset, associate to the 'paddlevidel/loader/dateset'
  34. data_prefix: "K400_dataset/K400/videos" #Mandatory, train data root path
  35. file_path: "K400_dataset/K400/val.txt" #Mandatory, valid data index file path
  36. test:
  37. format: "VideoDataset" #Mandatory, indicate the type of dataset, associate to the 'paddlevidel/loader/dateset'
  38. data_prefix: "K400_dataset/K400/videos" #Mandatory, train data root path
  39. file_path: "K400_dataset/K400/val.txt" #Mandatory, valid data index file path
  40. PIPELINE: #PIPELINE field
  41. train: #Mandotary, indicate the pipeline to deal with the training data, associate to the 'paddlevideo/loader/pipelines/'
  42. decode:
  43. name: "VideoDecoder"
  44. backend: "decord"
  45. sample:
  46. name: "Sampler"
  47. num_seg: 8
  48. seg_len: 1
  49. valid_mode: False
  50. transform: #Mandotary, image transform operator
  51. - Scale:
  52. short_size: 256
  53. - MultiScaleCrop:
  54. target_size: 256
  55. - RandomCrop:
  56. target_size: 224
  57. - RandomFlip:
  58. - Image2Array:
  59. - Normalization:
  60. mean: [0.485, 0.456, 0.406]
  61. std: [0.229, 0.224, 0.225]
  62. valid: #Mandatory, indicate the pipeline to deal with the validating data. associate to the 'paddlevideo/loader/pipelines/'
  63. decode:
  64. name: "VideoDecoder"
  65. backend: "decord"
  66. sample:
  67. name: "Sampler"
  68. num_seg: 8
  69. seg_len: 1
  70. valid_mode: True
  71. transform:
  72. - Scale:
  73. short_size: 256
  74. - CenterCrop:
  75. target_size: 224
  76. - Image2Array:
  77. - Normalization:
  78. mean: [0.485, 0.456, 0.406]
  79. std: [0.229, 0.224, 0.225]
  80. test: #Mandatory, indicate the pipeline to deal with the validating data. associate to the 'paddlevideo/loader/pipelines/'
  81. decode:
  82. name: "VideoDecoder"
  83. backend: "decord"
  84. sample:
  85. name: "Sampler"
  86. num_seg: 8
  87. seg_len: 1
  88. valid_mode: True
  89. transform:
  90. - Scale:
  91. short_size: 256
  92. - CenterCrop:
  93. target_size: 224
  94. - Image2Array:
  95. - Normalization:
  96. mean: [0.485, 0.456, 0.406]
  97. std: [0.229, 0.224, 0.225]
  98. OPTIMIZER: #OPTIMIZER field
  99. name: 'Momentum'
  100. momentum: 0.9
  101. learning_rate:
  102. iter_step: True
  103. name: 'CustomWarmupCosineDecay'
  104. max_epoch: 80
  105. warmup_epochs: 10
  106. warmup_start_lr: 0.005
  107. cosine_base_lr: 0.01
  108. weight_decay:
  109. name: 'L2'
  110. value: 1e-4
  111. use_nesterov: True
  112. MIX:
  113. name: "Mixup"
  114. alpha: 0.2
  115. PRECISEBN:
  116. preciseBN_interval: 5 # epoch interval to do preciseBN, default 1.
  117. num_iters_preciseBN: 200 # how many batches used to do preciseBN, default 200.
  118. METRIC:
  119. name: 'CenterCropMetric'
  120. INFERENCE:
  121. name: 'ppTSM_Inference_helper'
  122. num_seg: 8
  123. target_size: 224
  124. Infer:
  125. transforms:
  126. - ReadVideo:
  127. num_seg: 8
  128. sample_type: 'uniform'
  129. - Scale:
  130. short_size: 256
  131. - CenterCrop:
  132. target_size: 224
  133. - Image2Array:
  134. data_format: 'tchw'
  135. - NormalizeVideo:
  136. mean: [0.485, 0.456, 0.406]
  137. std: [0.229, 0.224, 0.225]
  138. PostProcess:
  139. name: Topk
  140. topk: 1
  141. class_id_map_file: data/k400/Kinetics-400_label_list.txt
  142. model_name: "ppTSM"
  143. log_interval: 10 #Optional, the interval of logger, default:10
  144. epochs: 80 #Mandatory, total epoch
  145. log_level: "INFO" #Optional, the logger level. default: "INFO"