iou_aware_loss.py 2.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import absolute_import
  15. from __future__ import division
  16. from __future__ import print_function
  17. import numpy as np
  18. from paddle.fluid.param_attr import ParamAttr
  19. from paddle.fluid.initializer import NumpyArrayInitializer
  20. from paddle import fluid
  21. from .iou_loss import IouLoss
  22. class IouAwareLoss(IouLoss):
  23. """
  24. iou aware loss, see https://arxiv.org/abs/1912.05992
  25. Args:
  26. loss_weight (float): iou aware loss weight, default is 1.0
  27. max_height (int): max height of input to support random shape input
  28. max_width (int): max width of input to support random shape input
  29. """
  30. def __init__(self, loss_weight=1.0, max_height=608, max_width=608):
  31. super(IouAwareLoss, self).__init__(
  32. loss_weight=loss_weight,
  33. max_height=max_height,
  34. max_width=max_width)
  35. def __call__(self,
  36. ioup,
  37. x,
  38. y,
  39. w,
  40. h,
  41. tx,
  42. ty,
  43. tw,
  44. th,
  45. anchors,
  46. downsample_ratio,
  47. batch_size,
  48. scale_x_y,
  49. eps=1.e-10):
  50. '''
  51. Args:
  52. ioup ([Variables]): the predicted iou
  53. x | y | w | h ([Variables]): the output of yolov3 for encoded x|y|w|h
  54. tx |ty |tw |th ([Variables]): the target of yolov3 for encoded x|y|w|h
  55. anchors ([float]): list of anchors for current output layer
  56. downsample_ratio (float): the downsample ratio for current output layer
  57. batch_size (int): training batch size
  58. eps (float): the decimal to prevent the denominator eqaul zero
  59. '''
  60. pred = self._bbox_transform(x, y, w, h, anchors, downsample_ratio,
  61. batch_size, False, scale_x_y, eps)
  62. gt = self._bbox_transform(tx, ty, tw, th, anchors, downsample_ratio,
  63. batch_size, True, scale_x_y, eps)
  64. iouk = self._iou(pred, gt, ioup, eps)
  65. iouk.stop_gradient = True
  66. loss_iou_aware = fluid.layers.cross_entropy(
  67. ioup, iouk, soft_label=True)
  68. loss_iou_aware = loss_iou_aware * self._loss_weight
  69. return loss_iou_aware