ds_config.json 1006 B

12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. {
  2. "fp16": {
  3. "enabled": "auto",
  4. "loss_scale": 0,
  5. "loss_scale_window": 1000,
  6. "initial_scale_power": 16,
  7. "hysteresis": 2,
  8. "min_loss_scale": 1
  9. },
  10. "bf16": {
  11. "enabled": "auto"
  12. },
  13. "optimizer": {
  14. "type": "AdamW",
  15. "params": {
  16. "lr": "auto",
  17. "betas": "auto",
  18. "eps": "auto",
  19. "weight_decay": "auto"
  20. }
  21. },
  22. "scheduler": {
  23. "type": "WarmupDecayLR",
  24. "params": {
  25. "warmup_min_lr": "auto",
  26. "warmup_max_lr": "auto",
  27. "warmup_num_steps": "auto",
  28. "total_num_steps": "auto"
  29. }
  30. },
  31. "zero_optimization": {
  32. "stage": 2,
  33. "allgather_partitions": true,
  34. "allgather_bucket_size": 2e8,
  35. "overlap_comm": true,
  36. "reduce_scatter": true,
  37. "reduce_bucket_size": 2e8,
  38. "contiguous_gradients": true
  39. },
  40. "gradient_accumulation_steps": "auto",
  41. "gradient_clipping": "auto",
  42. "steps_per_print": 2000,
  43. "train_batch_size": "auto",
  44. "train_micro_batch_size_per_gpu": "auto",
  45. "wall_clock_breakdown": false
  46. }