learning_rate.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326
  1. # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import (absolute_import, division, print_function,
  15. unicode_literals)
  16. from paddle.optimizer import lr
  17. from paddle.optimizer.lr import LRScheduler
  18. from paddlex.ppcls.utils import logger
  19. class Linear(object):
  20. """
  21. Linear learning rate decay
  22. Args:
  23. lr (float): The initial learning rate. It is a python float number.
  24. epochs(int): The decay step size. It determines the decay cycle.
  25. end_lr(float, optional): The minimum final learning rate. Default: 0.0001.
  26. power(float, optional): Power of polynomial. Default: 1.0.
  27. warmup_epoch(int): The epoch numbers for LinearWarmup. Default: 0.
  28. warmup_start_lr(float): Initial learning rate of warm up. Default: 0.0.
  29. last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
  30. """
  31. def __init__(self,
  32. learning_rate,
  33. epochs,
  34. step_each_epoch,
  35. end_lr=0.0,
  36. power=1.0,
  37. warmup_epoch=0,
  38. warmup_start_lr=0.0,
  39. last_epoch=-1,
  40. **kwargs):
  41. super().__init__()
  42. if warmup_epoch >= epochs:
  43. msg = f"When using warm up, the value of \"Global.epochs\" must be greater than value of \"Optimizer.lr.warmup_epoch\". The value of \"Optimizer.lr.warmup_epoch\" has been set to {epochs}."
  44. logger.warning(msg)
  45. warmup_epoch = epochs
  46. self.learning_rate = learning_rate
  47. self.steps = (epochs - warmup_epoch) * step_each_epoch
  48. self.end_lr = end_lr
  49. self.power = power
  50. self.last_epoch = last_epoch
  51. self.warmup_steps = round(warmup_epoch * step_each_epoch)
  52. self.warmup_start_lr = warmup_start_lr
  53. def __call__(self):
  54. learning_rate = lr.PolynomialDecay(
  55. learning_rate=self.learning_rate,
  56. decay_steps=self.steps,
  57. end_lr=self.end_lr,
  58. power=self.power,
  59. last_epoch=self.
  60. last_epoch) if self.steps > 0 else self.learning_rate
  61. if self.warmup_steps > 0:
  62. learning_rate = lr.LinearWarmup(
  63. learning_rate=learning_rate,
  64. warmup_steps=self.warmup_steps,
  65. start_lr=self.warmup_start_lr,
  66. end_lr=self.learning_rate,
  67. last_epoch=self.last_epoch)
  68. return learning_rate
  69. class Cosine(object):
  70. """
  71. Cosine learning rate decay
  72. lr = 0.05 * (math.cos(epoch * (math.pi / epochs)) + 1)
  73. Args:
  74. lr(float): initial learning rate
  75. step_each_epoch(int): steps each epoch
  76. epochs(int): total training epochs
  77. eta_min(float): Minimum learning rate. Default: 0.0.
  78. warmup_epoch(int): The epoch numbers for LinearWarmup. Default: 0.
  79. warmup_start_lr(float): Initial learning rate of warm up. Default: 0.0.
  80. last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
  81. """
  82. def __init__(self,
  83. learning_rate,
  84. step_each_epoch,
  85. epochs,
  86. eta_min=0.0,
  87. warmup_epoch=0,
  88. warmup_start_lr=0.0,
  89. last_epoch=-1,
  90. **kwargs):
  91. super().__init__()
  92. if warmup_epoch >= epochs:
  93. msg = f"When using warm up, the value of \"Global.epochs\" must be greater than value of \"Optimizer.lr.warmup_epoch\". The value of \"Optimizer.lr.warmup_epoch\" has been set to {epochs}."
  94. logger.warning(msg)
  95. warmup_epoch = epochs
  96. self.learning_rate = learning_rate
  97. self.T_max = (epochs - warmup_epoch) * step_each_epoch
  98. self.eta_min = eta_min
  99. self.last_epoch = last_epoch
  100. self.warmup_steps = round(warmup_epoch * step_each_epoch)
  101. self.warmup_start_lr = warmup_start_lr
  102. def __call__(self):
  103. learning_rate = lr.CosineAnnealingDecay(
  104. learning_rate=self.learning_rate,
  105. T_max=self.T_max,
  106. eta_min=self.eta_min,
  107. last_epoch=self.
  108. last_epoch) if self.T_max > 0 else self.learning_rate
  109. if self.warmup_steps > 0:
  110. learning_rate = lr.LinearWarmup(
  111. learning_rate=learning_rate,
  112. warmup_steps=self.warmup_steps,
  113. start_lr=self.warmup_start_lr,
  114. end_lr=self.learning_rate,
  115. last_epoch=self.last_epoch)
  116. return learning_rate
  117. class Step(object):
  118. """
  119. Piecewise learning rate decay
  120. Args:
  121. step_each_epoch(int): steps each epoch
  122. learning_rate (float): The initial learning rate. It is a python float number.
  123. step_size (int): the interval to update.
  124. gamma (float, optional): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * gamma`` .
  125. It should be less than 1.0. Default: 0.1.
  126. warmup_epoch(int): The epoch numbers for LinearWarmup. Default: 0.
  127. warmup_start_lr(float): Initial learning rate of warm up. Default: 0.0.
  128. last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
  129. """
  130. def __init__(self,
  131. learning_rate,
  132. step_size,
  133. step_each_epoch,
  134. epochs,
  135. gamma,
  136. warmup_epoch=0,
  137. warmup_start_lr=0.0,
  138. last_epoch=-1,
  139. **kwargs):
  140. super().__init__()
  141. if warmup_epoch >= epochs:
  142. msg = f"When using warm up, the value of \"Global.epochs\" must be greater than value of \"Optimizer.lr.warmup_epoch\". The value of \"Optimizer.lr.warmup_epoch\" has been set to {epochs}."
  143. logger.warning(msg)
  144. warmup_epoch = epochs
  145. self.step_size = step_each_epoch * step_size
  146. self.learning_rate = learning_rate
  147. self.gamma = gamma
  148. self.last_epoch = last_epoch
  149. self.warmup_steps = round(warmup_epoch * step_each_epoch)
  150. self.warmup_start_lr = warmup_start_lr
  151. def __call__(self):
  152. learning_rate = lr.StepDecay(
  153. learning_rate=self.learning_rate,
  154. step_size=self.step_size,
  155. gamma=self.gamma,
  156. last_epoch=self.last_epoch)
  157. if self.warmup_steps > 0:
  158. learning_rate = lr.LinearWarmup(
  159. learning_rate=learning_rate,
  160. warmup_steps=self.warmup_steps,
  161. start_lr=self.warmup_start_lr,
  162. end_lr=self.learning_rate,
  163. last_epoch=self.last_epoch)
  164. return learning_rate
  165. class Piecewise(object):
  166. """
  167. Piecewise learning rate decay
  168. Args:
  169. boundaries(list): A list of steps numbers. The type of element in the list is python int.
  170. values(list): A list of learning rate values that will be picked during different epoch boundaries.
  171. The type of element in the list is python float.
  172. warmup_epoch(int): The epoch numbers for LinearWarmup. Default: 0.
  173. warmup_start_lr(float): Initial learning rate of warm up. Default: 0.0.
  174. last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
  175. """
  176. def __init__(self,
  177. step_each_epoch,
  178. decay_epochs,
  179. values,
  180. epochs,
  181. warmup_epoch=0,
  182. warmup_start_lr=0.0,
  183. last_epoch=-1,
  184. **kwargs):
  185. super().__init__()
  186. if warmup_epoch >= epochs:
  187. msg = f"When using warm up, the value of \"Global.epochs\" must be greater than value of \"Optimizer.lr.warmup_epoch\". The value of \"Optimizer.lr.warmup_epoch\" has been set to {epochs}."
  188. logger.warning(msg)
  189. warmup_epoch = epochs
  190. self.boundaries = [step_each_epoch * e for e in decay_epochs]
  191. self.values = values
  192. self.last_epoch = last_epoch
  193. self.warmup_steps = round(warmup_epoch * step_each_epoch)
  194. self.warmup_start_lr = warmup_start_lr
  195. def __call__(self):
  196. learning_rate = lr.PiecewiseDecay(
  197. boundaries=self.boundaries,
  198. values=self.values,
  199. last_epoch=self.last_epoch)
  200. if self.warmup_steps > 0:
  201. learning_rate = lr.LinearWarmup(
  202. learning_rate=learning_rate,
  203. warmup_steps=self.warmup_steps,
  204. start_lr=self.warmup_start_lr,
  205. end_lr=self.values[0],
  206. last_epoch=self.last_epoch)
  207. return learning_rate
  208. class MultiStepDecay(LRScheduler):
  209. """
  210. Update the learning rate by ``gamma`` once ``epoch`` reaches one of the milestones.
  211. The algorithm can be described as the code below.
  212. .. code-block:: text
  213. learning_rate = 0.5
  214. milestones = [30, 50]
  215. gamma = 0.1
  216. if epoch < 30:
  217. learning_rate = 0.5
  218. elif epoch < 50:
  219. learning_rate = 0.05
  220. else:
  221. learning_rate = 0.005
  222. Args:
  223. learning_rate (float): The initial learning rate. It is a python float number.
  224. milestones (tuple|list): List or tuple of each boundaries. Must be increasing.
  225. gamma (float, optional): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * gamma`` .
  226. It should be less than 1.0. Default: 0.1.
  227. last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
  228. verbose (bool, optional): If ``True``, prints a message to stdout for each update. Default: ``False`` .
  229. Returns:
  230. ``MultiStepDecay`` instance to schedule learning rate.
  231. Examples:
  232. .. code-block:: python
  233. import paddle
  234. import numpy as np
  235. # train on default dynamic graph mode
  236. linear = paddle.nn.Linear(10, 10)
  237. scheduler = paddle.optimizer.lr.MultiStepDecay(learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8, verbose=True)
  238. sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
  239. for epoch in range(20):
  240. for batch_id in range(5):
  241. x = paddle.uniform([10, 10])
  242. out = linear(x)
  243. loss = paddle.mean(out)
  244. loss.backward()
  245. sgd.step()
  246. sgd.clear_gradients()
  247. scheduler.step() # If you update learning rate each step
  248. # scheduler.step() # If you update learning rate each epoch
  249. # train on static graph mode
  250. paddle.enable_static()
  251. main_prog = paddle.static.Program()
  252. start_prog = paddle.static.Program()
  253. with paddle.static.program_guard(main_prog, start_prog):
  254. x = paddle.static.data(name='x', shape=[None, 4, 5])
  255. y = paddle.static.data(name='y', shape=[None, 4, 5])
  256. z = paddle.static.nn.fc(x, 100)
  257. loss = paddle.mean(z)
  258. scheduler = paddle.optimizer.lr.MultiStepDecay(learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8, verbose=True)
  259. sgd = paddle.optimizer.SGD(learning_rate=scheduler)
  260. sgd.minimize(loss)
  261. exe = paddle.static.Executor()
  262. exe.run(start_prog)
  263. for epoch in range(20):
  264. for batch_id in range(5):
  265. out = exe.run(
  266. main_prog,
  267. feed={
  268. 'x': np.random.randn(3, 4, 5).astype('float32'),
  269. 'y': np.random.randn(3, 4, 5).astype('float32')
  270. },
  271. fetch_list=loss.name)
  272. scheduler.step() # If you update learning rate each step
  273. # scheduler.step() # If you update learning rate each epoch
  274. """
  275. def __init__(self,
  276. learning_rate,
  277. milestones,
  278. epochs,
  279. step_each_epoch,
  280. gamma=0.1,
  281. last_epoch=-1,
  282. verbose=False):
  283. if not isinstance(milestones, (tuple, list)):
  284. raise TypeError(
  285. "The type of 'milestones' in 'MultiStepDecay' must be 'tuple, list', but received %s."
  286. % type(milestones))
  287. if not all([
  288. milestones[i] < milestones[i + 1]
  289. for i in range(len(milestones) - 1)
  290. ]):
  291. raise ValueError('The elements of milestones must be incremented')
  292. if gamma >= 1.0:
  293. raise ValueError('gamma should be < 1.0.')
  294. self.milestones = [x * step_each_epoch for x in milestones]
  295. self.gamma = gamma
  296. super().__init__(learning_rate, last_epoch, verbose)
  297. def get_lr(self):
  298. for i in range(len(self.milestones)):
  299. if self.last_epoch < self.milestones[i]:
  300. return self.base_lr * (self.gamma**i)
  301. return self.base_lr * (self.gamma**len(self.milestones))