# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
from numbers import Integral
import math
import six

import paddle
from paddle import fluid


def bbox_overlaps(boxes_1, boxes_2):
    '''
    bbox_overlaps
        boxes_1: x1, y, x2, y2
        boxes_2: x1, y, x2, y2
    '''
    assert boxes_1.shape[1] == 4 and boxes_2.shape[1] == 4

    num_1 = boxes_1.shape[0]
    num_2 = boxes_2.shape[0]

    x1_1 = boxes_1[:, 0:1]
    y1_1 = boxes_1[:, 1:2]
    x2_1 = boxes_1[:, 2:3]
    y2_1 = boxes_1[:, 3:4]
    area_1 = (x2_1 - x1_1 + 1) * (y2_1 - y1_1 + 1)

    x1_2 = boxes_2[:, 0].transpose()
    y1_2 = boxes_2[:, 1].transpose()
    x2_2 = boxes_2[:, 2].transpose()
    y2_2 = boxes_2[:, 3].transpose()
    area_2 = (x2_2 - x1_2 + 1) * (y2_2 - y1_2 + 1)

    xx1 = np.maximum(x1_1, x1_2)
    yy1 = np.maximum(y1_1, y1_2)
    xx2 = np.minimum(x2_1, x2_2)
    yy2 = np.minimum(y2_1, y2_2)

    w = np.maximum(0.0, xx2 - xx1 + 1)
    h = np.maximum(0.0, yy2 - yy1 + 1)
    inter = w * h

    ovr = inter / (area_1 + area_2 - inter)
    return ovr


def box_to_delta(ex_boxes, gt_boxes, weights):
    """ box_to_delta """
    ex_w = ex_boxes[:, 2] - ex_boxes[:, 0] + 1
    ex_h = ex_boxes[:, 3] - ex_boxes[:, 1] + 1
    ex_ctr_x = ex_boxes[:, 0] + 0.5 * ex_w
    ex_ctr_y = ex_boxes[:, 1] + 0.5 * ex_h

    gt_w = gt_boxes[:, 2] - gt_boxes[:, 0] + 1
    gt_h = gt_boxes[:, 3] - gt_boxes[:, 1] + 1
    gt_ctr_x = gt_boxes[:, 0] + 0.5 * gt_w
    gt_ctr_y = gt_boxes[:, 1] + 0.5 * gt_h

    dx = (gt_ctr_x - ex_ctr_x) / ex_w / weights[0]
    dy = (gt_ctr_y - ex_ctr_y) / ex_h / weights[1]
    dw = (np.log(gt_w / ex_w)) / weights[2]
    dh = (np.log(gt_h / ex_h)) / weights[3]

    targets = np.vstack([dx, dy, dw, dh]).transpose()
    return targets


def DropBlock(input, block_size, keep_prob, is_test):
    if is_test:
        return input

    def CalculateGamma(input, block_size, keep_prob):
        input_shape = fluid.layers.shape(input)
        feat_shape_tmp = fluid.layers.slice(input_shape, [0], [3], [4])
        feat_shape_tmp = fluid.layers.cast(feat_shape_tmp, dtype="float32")
        feat_shape_t = fluid.layers.reshape(feat_shape_tmp, [1, 1, 1, 1])
        feat_area = fluid.layers.pow(feat_shape_t, factor=2)

        block_shape_t = fluid.layers.fill_constant(
            shape=[1, 1, 1, 1], value=block_size, dtype='float32')
        block_area = fluid.layers.pow(block_shape_t, factor=2)

        useful_shape_t = feat_shape_t - block_shape_t + 1
        useful_area = fluid.layers.pow(useful_shape_t, factor=2)

        upper_t = feat_area * (1 - keep_prob)
        bottom_t = block_area * useful_area
        output = upper_t / bottom_t
        return output

    gamma = CalculateGamma(input, block_size=block_size, keep_prob=keep_prob)
    input_shape = fluid.layers.shape(input)
    p = fluid.layers.expand_as(gamma, input)

    input_shape_tmp = fluid.layers.cast(input_shape, dtype="int64")
    random_matrix = fluid.layers.uniform_random(
        input_shape_tmp, dtype='float32', min=0.0, max=1.0)
    one_zero_m = fluid.layers.less_than(random_matrix, p)
    one_zero_m.stop_gradient = True
    one_zero_m = fluid.layers.cast(one_zero_m, dtype="float32")

    mask_flag = fluid.layers.pool2d(
        one_zero_m,
        pool_size=block_size,
        pool_type='max',
        pool_stride=1,
        pool_padding=block_size // 2)
    mask = 1.0 - mask_flag

    elem_numel = fluid.layers.reduce_prod(input_shape)
    elem_numel_m = fluid.layers.cast(elem_numel, dtype="float32")
    elem_numel_m.stop_gradient = True

    elem_sum = fluid.layers.reduce_sum(mask)
    elem_sum_m = fluid.layers.cast(elem_sum, dtype="float32")
    elem_sum_m.stop_gradient = True

    output = input * mask * elem_numel_m / elem_sum_m
    return output


class MultiClassNMS(object):
    def __init__(self,
                 score_threshold=.05,
                 nms_top_k=-1,
                 keep_top_k=100,
                 nms_threshold=.5,
                 normalized=False,
                 nms_eta=1.0,
                 background_label=0):
        super(MultiClassNMS, self).__init__()
        self.score_threshold = score_threshold
        self.nms_top_k = nms_top_k
        self.keep_top_k = keep_top_k
        self.nms_threshold = nms_threshold
        self.normalized = normalized
        self.nms_eta = nms_eta
        self.background_label = background_label

    def __call__(self, bboxes, scores):
        return fluid.layers.multiclass_nms(
            bboxes=bboxes,
            scores=scores,
            score_threshold=self.score_threshold,
            nms_top_k=self.nms_top_k,
            keep_top_k=self.keep_top_k,
            normalized=self.normalized,
            nms_threshold=self.nms_threshold,
            nms_eta=self.nms_eta,
            background_label=self.background_label)


class MatrixNMS(object):
    def __init__(self,
                 score_threshold=.05,
                 post_threshold=.05,
                 nms_top_k=-1,
                 keep_top_k=100,
                 use_gaussian=False,
                 gaussian_sigma=2.,
                 normalized=False,
                 background_label=0):
        super(MatrixNMS, self).__init__()
        self.score_threshold = score_threshold
        self.post_threshold = post_threshold
        self.nms_top_k = nms_top_k
        self.keep_top_k = keep_top_k
        self.normalized = normalized
        self.use_gaussian = use_gaussian
        self.gaussian_sigma = gaussian_sigma
        self.background_label = background_label

    def __call__(self, bboxes, scores):
        return paddle.fluid.layers.matrix_nms(
            bboxes=bboxes,
            scores=scores,
            score_threshold=self.score_threshold,
            post_threshold=self.post_threshold,
            nms_top_k=self.nms_top_k,
            keep_top_k=self.keep_top_k,
            normalized=self.normalized,
            use_gaussian=self.use_gaussian,
            gaussian_sigma=self.gaussian_sigma,
            background_label=self.background_label)


class MultiClassSoftNMS(object):
    def __init__(
            self,
            score_threshold=0.01,
            keep_top_k=300,
            softnms_sigma=0.5,
            normalized=False,
            background_label=0, ):
        super(MultiClassSoftNMS, self).__init__()
        self.score_threshold = score_threshold
        self.keep_top_k = keep_top_k
        self.softnms_sigma = softnms_sigma
        self.normalized = normalized
        self.background_label = background_label

    def __call__(self, bboxes, scores):
        def create_tmp_var(program, name, dtype, shape, lod_level):
            return program.current_block().create_var(
                name=name, dtype=dtype, shape=shape, lod_level=lod_level)

        def _soft_nms_for_cls(dets, sigma, thres):
            """soft_nms_for_cls"""
            dets_final = []
            while len(dets) > 0:
                maxpos = np.argmax(dets[:, 0])
                dets_final.append(dets[maxpos].copy())
                ts, tx1, ty1, tx2, ty2 = dets[maxpos]
                scores = dets[:, 0]
                # force remove bbox at maxpos
                scores[maxpos] = -1
                x1 = dets[:, 1]
                y1 = dets[:, 2]
                x2 = dets[:, 3]
                y2 = dets[:, 4]
                eta = 0 if self.normalized else 1
                areas = (x2 - x1 + eta) * (y2 - y1 + eta)
                xx1 = np.maximum(tx1, x1)
                yy1 = np.maximum(ty1, y1)
                xx2 = np.minimum(tx2, x2)
                yy2 = np.minimum(ty2, y2)
                w = np.maximum(0.0, xx2 - xx1 + eta)
                h = np.maximum(0.0, yy2 - yy1 + eta)
                inter = w * h
                ovr = inter / (areas + areas[maxpos] - inter)
                weight = np.exp(-(ovr * ovr) / sigma)
                scores = scores * weight
                idx_keep = np.where(scores >= thres)
                dets[:, 0] = scores
                dets = dets[idx_keep]
            dets_final = np.array(dets_final).reshape(-1, 5)
            return dets_final

        def _soft_nms(bboxes, scores):
            class_nums = scores.shape[-1]

            softnms_thres = self.score_threshold
            softnms_sigma = self.softnms_sigma
            keep_top_k = self.keep_top_k

            cls_boxes = [[] for _ in range(class_nums)]
            cls_ids = [[] for _ in range(class_nums)]

            start_idx = 1 if self.background_label == 0 else 0
            for j in range(start_idx, class_nums):
                inds = np.where(scores[:, j] >= softnms_thres)[0]
                scores_j = scores[inds, j]
                rois_j = bboxes[inds, j, :] if len(
                    bboxes.shape) > 2 else bboxes[inds, :]
                dets_j = np.hstack((scores_j[:, np.newaxis], rois_j)).astype(
                    np.float32, copy=False)
                cls_rank = np.argsort(-dets_j[:, 0])
                dets_j = dets_j[cls_rank]

                cls_boxes[j] = _soft_nms_for_cls(
                    dets_j, sigma=softnms_sigma, thres=softnms_thres)
                cls_ids[j] = np.array([j] * cls_boxes[j].shape[0]).reshape(-1,
                                                                           1)

            cls_boxes = np.vstack(cls_boxes[start_idx:])
            cls_ids = np.vstack(cls_ids[start_idx:])
            pred_result = np.hstack([cls_ids, cls_boxes])

            # Limit to max_per_image detections **over all classes**
            image_scores = cls_boxes[:, 0]
            if len(image_scores) > keep_top_k:
                image_thresh = np.sort(image_scores)[-keep_top_k]
                keep = np.where(cls_boxes[:, 0] >= image_thresh)[0]
                pred_result = pred_result[keep, :]

            return pred_result

        def _batch_softnms(bboxes, scores):
            batch_offsets = bboxes.lod()
            bboxes = np.array(bboxes)
            scores = np.array(scores)
            out_offsets = [0]
            pred_res = []
            if len(batch_offsets) > 0:
                batch_offset = batch_offsets[0]
                for i in range(len(batch_offset) - 1):
                    s, e = batch_offset[i], batch_offset[i + 1]
                    pred = _soft_nms(bboxes[s:e], scores[s:e])
                    out_offsets.append(pred.shape[0] + out_offsets[-1])
                    pred_res.append(pred)
            else:
                assert len(bboxes.shape) == 3
                assert len(scores.shape) == 3
                for i in range(bboxes.shape[0]):
                    pred = _soft_nms(bboxes[i], scores[i])
                    out_offsets.append(pred.shape[0] + out_offsets[-1])
                    pred_res.append(pred)

            res = fluid.LoDTensor()
            res.set_lod([out_offsets])
            if len(pred_res) == 0:
                pred_res = np.array([[1]], dtype=np.float32)
            res.set(np.vstack(pred_res).astype(np.float32), fluid.CPUPlace())
            return res

        pred_result = create_tmp_var(
            fluid.default_main_program(),
            name='softnms_pred_result',
            dtype='float32',
            shape=[-1, 6],
            lod_level=1)
        fluid.layers.py_func(
            func=_batch_softnms, x=[bboxes, scores], out=pred_result)
        return pred_result


class MultiClassDiouNMS(object):
    def __init__(
            self,
            score_threshold=0.05,
            keep_top_k=100,
            nms_threshold=0.5,
            normalized=False,
            background_label=0, ):
        super(MultiClassDiouNMS, self).__init__()
        self.score_threshold = score_threshold
        self.nms_threshold = nms_threshold
        self.keep_top_k = keep_top_k
        self.normalized = normalized
        self.background_label = background_label

    def __call__(self, bboxes, scores):
        def create_tmp_var(program, name, dtype, shape, lod_level):
            return program.current_block().create_var(
                name=name, dtype=dtype, shape=shape, lod_level=lod_level)

        def _calc_diou_term(dets1, dets2):
            eps = 1.e-10
            eta = 0 if self.normalized else 1

            x1, y1, x2, y2 = dets1[0], dets1[1], dets1[2], dets1[3]
            x1g, y1g, x2g, y2g = dets2[0], dets2[1], dets2[2], dets2[3]

            cx = (x1 + x2) / 2
            cy = (y1 + y2) / 2
            w = x2 - x1 + eta
            h = y2 - y1 + eta

            cxg = (x1g + x2g) / 2
            cyg = (y1g + y2g) / 2
            wg = x2g - x1g + eta
            hg = y2g - y1g + eta

            x2 = np.maximum(x1, x2)
            y2 = np.maximum(y1, y2)

            # A or B
            xc1 = np.minimum(x1, x1g)
            yc1 = np.minimum(y1, y1g)
            xc2 = np.maximum(x2, x2g)
            yc2 = np.maximum(y2, y2g)

            # DIOU term
            dist_intersection = (cx - cxg)**2 + (cy - cyg)**2
            dist_union = (xc2 - xc1)**2 + (yc2 - yc1)**2
            diou_term = (dist_intersection + eps) / (dist_union + eps)
            return diou_term

        def _diou_nms_for_cls(dets, thres):
            """_diou_nms_for_cls"""
            scores = dets[:, 0]
            x1 = dets[:, 1]
            y1 = dets[:, 2]
            x2 = dets[:, 3]
            y2 = dets[:, 4]
            eta = 0 if self.normalized else 1
            areas = (x2 - x1 + eta) * (y2 - y1 + eta)
            dt_num = dets.shape[0]
            order = np.array(range(dt_num))

            keep = []
            while order.size > 0:
                i = order[0]
                keep.append(i)
                xx1 = np.maximum(x1[i], x1[order[1:]])
                yy1 = np.maximum(y1[i], y1[order[1:]])
                xx2 = np.minimum(x2[i], x2[order[1:]])
                yy2 = np.minimum(y2[i], y2[order[1:]])

                w = np.maximum(0.0, xx2 - xx1 + eta)
                h = np.maximum(0.0, yy2 - yy1 + eta)
                inter = w * h
                ovr = inter / (areas[i] + areas[order[1:]] - inter)

                diou_term = _calc_diou_term([x1[i], y1[i], x2[i], y2[i]], [
                    x1[order[1:]], y1[order[1:]], x2[order[1:]], y2[order[1:]]
                ])

                inds = np.where(ovr - diou_term <= thres)[0]

                order = order[inds + 1]

            dets_final = dets[keep]
            return dets_final

        def _diou_nms(bboxes, scores):
            bboxes = np.array(bboxes)
            scores = np.array(scores)
            class_nums = scores.shape[-1]

            score_threshold = self.score_threshold
            nms_threshold = self.nms_threshold
            keep_top_k = self.keep_top_k

            cls_boxes = [[] for _ in range(class_nums)]
            cls_ids = [[] for _ in range(class_nums)]

            start_idx = 1 if self.background_label == 0 else 0
            for j in range(start_idx, class_nums):
                inds = np.where(scores[:, j] >= score_threshold)[0]
                scores_j = scores[inds, j]
                rois_j = bboxes[inds, j, :]
                dets_j = np.hstack((scores_j[:, np.newaxis], rois_j)).astype(
                    np.float32, copy=False)
                cls_rank = np.argsort(-dets_j[:, 0])
                dets_j = dets_j[cls_rank]

                cls_boxes[j] = _diou_nms_for_cls(dets_j, thres=nms_threshold)
                cls_ids[j] = np.array([j] * cls_boxes[j].shape[0]).reshape(-1,
                                                                           1)

            cls_boxes = np.vstack(cls_boxes[start_idx:])
            cls_ids = np.vstack(cls_ids[start_idx:])
            pred_result = np.hstack([cls_ids, cls_boxes]).astype(np.float32)

            # Limit to max_per_image detections **over all classes**
            image_scores = cls_boxes[:, 0]
            if len(image_scores) > keep_top_k:
                image_thresh = np.sort(image_scores)[-keep_top_k]
                keep = np.where(cls_boxes[:, 0] >= image_thresh)[0]
                pred_result = pred_result[keep, :]

            res = fluid.LoDTensor()
            res.set_lod([[0, pred_result.shape[0]]])
            if pred_result.shape[0] == 0:
                pred_result = np.array([[1]], dtype=np.float32)
            res.set(pred_result, fluid.CPUPlace())

            return res

        pred_result = create_tmp_var(
            fluid.default_main_program(),
            name='diou_nms_pred_result',
            dtype='float32',
            shape=[-1, 6],
            lod_level=0)
        fluid.layers.py_func(
            func=_diou_nms, x=[bboxes, scores], out=pred_result)
        return pred_result


class LibraBBoxAssigner(object):
    def __init__(self,
                 batch_size_per_im=512,
                 fg_fraction=.25,
                 fg_thresh=.5,
                 bg_thresh_hi=.5,
                 bg_thresh_lo=0.,
                 bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
                 num_classes=81,
                 shuffle_before_sample=True,
                 is_cls_agnostic=False,
                 num_bins=3):
        super(LibraBBoxAssigner, self).__init__()
        self.batch_size_per_im = batch_size_per_im
        self.fg_fraction = fg_fraction
        self.fg_thresh = fg_thresh
        self.bg_thresh_hi = bg_thresh_hi
        self.bg_thresh_lo = bg_thresh_lo
        self.bbox_reg_weights = bbox_reg_weights
        self.class_nums = num_classes
        self.use_random = shuffle_before_sample
        self.is_cls_agnostic = is_cls_agnostic
        self.num_bins = num_bins

    def __call__(
            self,
            rpn_rois,
            gt_classes,
            is_crowd,
            gt_boxes,
            im_info, ):
        return self.generate_proposal_label_libra(
            rpn_rois=rpn_rois,
            gt_classes=gt_classes,
            is_crowd=is_crowd,
            gt_boxes=gt_boxes,
            im_info=im_info,
            batch_size_per_im=self.batch_size_per_im,
            fg_fraction=self.fg_fraction,
            fg_thresh=self.fg_thresh,
            bg_thresh_hi=self.bg_thresh_hi,
            bg_thresh_lo=self.bg_thresh_lo,
            bbox_reg_weights=self.bbox_reg_weights,
            class_nums=self.class_nums,
            use_random=self.use_random,
            is_cls_agnostic=self.is_cls_agnostic,
            is_cascade_rcnn=False)

    def generate_proposal_label_libra(
            self, rpn_rois, gt_classes, is_crowd, gt_boxes, im_info,
            batch_size_per_im, fg_fraction, fg_thresh, bg_thresh_hi,
            bg_thresh_lo, bbox_reg_weights, class_nums, use_random,
            is_cls_agnostic, is_cascade_rcnn):
        num_bins = self.num_bins

        def create_tmp_var(program, name, dtype, shape, lod_level=None):
            return program.current_block().create_var(
                name=name, dtype=dtype, shape=shape, lod_level=lod_level)

        def _sample_pos(max_overlaps, max_classes, pos_inds, num_expected):
            if len(pos_inds) <= num_expected:
                return pos_inds
            else:
                unique_gt_inds = np.unique(max_classes[pos_inds])
                num_gts = len(unique_gt_inds)
                num_per_gt = int(round(num_expected / float(num_gts)) + 1)

                sampled_inds = []
                for i in unique_gt_inds:
                    inds = np.nonzero(max_classes == i)[0]
                    before_len = len(inds)
                    inds = list(set(inds) & set(pos_inds))
                    after_len = len(inds)
                    if len(inds) > num_per_gt:
                        inds = np.random.choice(
                            inds, size=num_per_gt, replace=False)
                    sampled_inds.extend(list(inds))  # combine as a new sampler
                if len(sampled_inds) < num_expected:
                    num_extra = num_expected - len(sampled_inds)
                    extra_inds = np.array(
                        list(set(pos_inds) - set(sampled_inds)))
                    assert len(sampled_inds)+len(extra_inds) == len(pos_inds), \
                        "sum of sampled_inds({}) and extra_inds({}) length must be equal with pos_inds({})!".format(
                            len(sampled_inds), len(extra_inds), len(pos_inds))
                    if len(extra_inds) > num_extra:
                        extra_inds = np.random.choice(
                            extra_inds, size=num_extra, replace=False)
                    sampled_inds.extend(extra_inds.tolist())
                elif len(sampled_inds) > num_expected:
                    sampled_inds = np.random.choice(
                        sampled_inds, size=num_expected, replace=False)
                return sampled_inds

        def sample_via_interval(max_overlaps, full_set, num_expected,
                                floor_thr, num_bins, bg_thresh_hi):
            max_iou = max_overlaps.max()
            iou_interval = (max_iou - floor_thr) / num_bins
            per_num_expected = int(num_expected / num_bins)

            sampled_inds = []
            for i in range(num_bins):
                start_iou = floor_thr + i * iou_interval
                end_iou = floor_thr + (i + 1) * iou_interval

                tmp_set = set(
                    np.where(
                        np.logical_and(max_overlaps >= start_iou, max_overlaps
                                       < end_iou))[0])
                tmp_inds = list(tmp_set & full_set)

                if len(tmp_inds) > per_num_expected:
                    tmp_sampled_set = np.random.choice(
                        tmp_inds, size=per_num_expected, replace=False)
                else:
                    tmp_sampled_set = np.array(tmp_inds, dtype=np.int)
                sampled_inds.append(tmp_sampled_set)

            sampled_inds = np.concatenate(sampled_inds)
            if len(sampled_inds) < num_expected:
                num_extra = num_expected - len(sampled_inds)
                extra_inds = np.array(list(full_set - set(sampled_inds)))
                assert len(sampled_inds)+len(extra_inds) == len(full_set), \
                    "sum of sampled_inds({}) and extra_inds({}) length must be equal with full_set({})!".format(
                            len(sampled_inds), len(extra_inds), len(full_set))

                if len(extra_inds) > num_extra:
                    extra_inds = np.random.choice(
                        extra_inds, num_extra, replace=False)
                sampled_inds = np.concatenate([sampled_inds, extra_inds])

            return sampled_inds

        def _sample_neg(max_overlaps,
                        max_classes,
                        neg_inds,
                        num_expected,
                        floor_thr=-1,
                        floor_fraction=0,
                        num_bins=3,
                        bg_thresh_hi=0.5):
            if len(neg_inds) <= num_expected:
                return neg_inds
            else:
                # balance sampling for negative samples
                neg_set = set(neg_inds)
                if floor_thr > 0:
                    floor_set = set(
                        np.where(
                            np.logical_and(max_overlaps >= 0, max_overlaps <
                                           floor_thr))[0])
                    iou_sampling_set = set(
                        np.where(max_overlaps >= floor_thr)[0])
                elif floor_thr == 0:
                    floor_set = set(np.where(max_overlaps == 0)[0])
                    iou_sampling_set = set(
                        np.where(max_overlaps > floor_thr)[0])
                else:
                    floor_set = set()
                    iou_sampling_set = set(
                        np.where(max_overlaps > floor_thr)[0])
                    floor_thr = 0

                floor_neg_inds = list(floor_set & neg_set)
                iou_sampling_neg_inds = list(iou_sampling_set & neg_set)

                num_expected_iou_sampling = int(num_expected *
                                                (1 - floor_fraction))
                if len(iou_sampling_neg_inds) > num_expected_iou_sampling:
                    if num_bins >= 2:
                        iou_sampled_inds = sample_via_interval(
                            max_overlaps,
                            set(iou_sampling_neg_inds),
                            num_expected_iou_sampling, floor_thr, num_bins,
                            bg_thresh_hi)
                    else:
                        iou_sampled_inds = np.random.choice(
                            iou_sampling_neg_inds,
                            size=num_expected_iou_sampling,
                            replace=False)
                else:
                    iou_sampled_inds = np.array(
                        iou_sampling_neg_inds, dtype=np.int)
                num_expected_floor = num_expected - len(iou_sampled_inds)
                if len(floor_neg_inds) > num_expected_floor:
                    sampled_floor_inds = np.random.choice(
                        floor_neg_inds, size=num_expected_floor, replace=False)
                else:
                    sampled_floor_inds = np.array(floor_neg_inds, dtype=np.int)
                sampled_inds = np.concatenate(
                    (sampled_floor_inds, iou_sampled_inds))
                if len(sampled_inds) < num_expected:
                    num_extra = num_expected - len(sampled_inds)
                    extra_inds = np.array(list(neg_set - set(sampled_inds)))
                    if len(extra_inds) > num_extra:
                        extra_inds = np.random.choice(
                            extra_inds, size=num_extra, replace=False)
                    sampled_inds = np.concatenate((sampled_inds, extra_inds))
                return sampled_inds

        def _sample_rois(rpn_rois, gt_classes, is_crowd, gt_boxes, im_info,
                         batch_size_per_im, fg_fraction, fg_thresh,
                         bg_thresh_hi, bg_thresh_lo, bbox_reg_weights,
                         class_nums, use_random, is_cls_agnostic,
                         is_cascade_rcnn):
            rois_per_image = int(batch_size_per_im)
            fg_rois_per_im = int(np.round(fg_fraction * rois_per_image))

            # Roidb
            im_scale = im_info[2]
            inv_im_scale = 1. / im_scale
            rpn_rois = rpn_rois * inv_im_scale
            if is_cascade_rcnn:
                rpn_rois = rpn_rois[gt_boxes.shape[0]:, :]
            boxes = np.vstack([gt_boxes, rpn_rois])
            gt_overlaps = np.zeros((boxes.shape[0], class_nums))
            box_to_gt_ind_map = np.zeros((boxes.shape[0]), dtype=np.int32)
            if len(gt_boxes) > 0:
                proposal_to_gt_overlaps = bbox_overlaps(boxes, gt_boxes)

                overlaps_argmax = proposal_to_gt_overlaps.argmax(axis=1)
                overlaps_max = proposal_to_gt_overlaps.max(axis=1)
                # Boxes which with non-zero overlap with gt boxes
                overlapped_boxes_ind = np.where(overlaps_max > 0)[0]

                overlapped_boxes_gt_classes = gt_classes[overlaps_argmax[
                    overlapped_boxes_ind]]

                for idx in range(len(overlapped_boxes_ind)):
                    gt_overlaps[overlapped_boxes_ind[
                        idx], overlapped_boxes_gt_classes[idx]] = overlaps_max[
                            overlapped_boxes_ind[idx]]
                    box_to_gt_ind_map[overlapped_boxes_ind[
                        idx]] = overlaps_argmax[overlapped_boxes_ind[idx]]

            crowd_ind = np.where(is_crowd)[0]
            gt_overlaps[crowd_ind] = -1

            max_overlaps = gt_overlaps.max(axis=1)
            max_classes = gt_overlaps.argmax(axis=1)

            # Cascade RCNN Decode Filter
            if is_cascade_rcnn:
                ws = boxes[:, 2] - boxes[:, 0] + 1
                hs = boxes[:, 3] - boxes[:, 1] + 1
                keep = np.where((ws > 0) & (hs > 0))[0]
                boxes = boxes[keep]
                max_overlaps = max_overlaps[keep]
                fg_inds = np.where(max_overlaps >= fg_thresh)[0]
                bg_inds = np.where((max_overlaps < bg_thresh_hi) & (
                    max_overlaps >= bg_thresh_lo))[0]
                fg_rois_per_this_image = fg_inds.shape[0]
                bg_rois_per_this_image = bg_inds.shape[0]
            else:
                # Foreground
                fg_inds = np.where(max_overlaps >= fg_thresh)[0]
                fg_rois_per_this_image = np.minimum(fg_rois_per_im,
                                                    fg_inds.shape[0])
                # Sample foreground if there are too many
                if fg_inds.shape[0] > fg_rois_per_this_image:
                    if use_random:
                        fg_inds = _sample_pos(max_overlaps, max_classes,
                                              fg_inds, fg_rois_per_this_image)
                fg_inds = fg_inds[:fg_rois_per_this_image]

                # Background
                bg_inds = np.where((max_overlaps < bg_thresh_hi) & (
                    max_overlaps >= bg_thresh_lo))[0]
                bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
                bg_rois_per_this_image = np.minimum(bg_rois_per_this_image,
                                                    bg_inds.shape[0])
                assert bg_rois_per_this_image >= 0, "bg_rois_per_this_image must be >= 0 but got {}".format(
                    bg_rois_per_this_image)

                # Sample background if there are too many
                if bg_inds.shape[0] > bg_rois_per_this_image:
                    if use_random:
                        # libra neg sample
                        bg_inds = _sample_neg(
                            max_overlaps,
                            max_classes,
                            bg_inds,
                            bg_rois_per_this_image,
                            num_bins=num_bins,
                            bg_thresh_hi=bg_thresh_hi)
                bg_inds = bg_inds[:bg_rois_per_this_image]

            keep_inds = np.append(fg_inds, bg_inds)
            sampled_labels = max_classes[keep_inds]  # N x 1
            sampled_labels[fg_rois_per_this_image:] = 0
            sampled_boxes = boxes[keep_inds]  # N x 324
            sampled_gts = gt_boxes[box_to_gt_ind_map[keep_inds]]
            sampled_gts[fg_rois_per_this_image:, :] = gt_boxes[0]
            bbox_label_targets = _compute_targets(
                sampled_boxes, sampled_gts, sampled_labels, bbox_reg_weights)
            bbox_targets, bbox_inside_weights = _expand_bbox_targets(
                bbox_label_targets, class_nums, is_cls_agnostic)
            bbox_outside_weights = np.array(
                bbox_inside_weights > 0, dtype=bbox_inside_weights.dtype)
            # Scale rois
            sampled_rois = sampled_boxes * im_scale

            # Faster RCNN blobs
            frcn_blobs = dict(
                rois=sampled_rois,
                labels_int32=sampled_labels,
                bbox_targets=bbox_targets,
                bbox_inside_weights=bbox_inside_weights,
                bbox_outside_weights=bbox_outside_weights)
            return frcn_blobs

        def _compute_targets(roi_boxes, gt_boxes, labels, bbox_reg_weights):
            assert roi_boxes.shape[0] == gt_boxes.shape[0]
            assert roi_boxes.shape[1] == 4
            assert gt_boxes.shape[1] == 4

            targets = np.zeros(roi_boxes.shape)
            bbox_reg_weights = np.asarray(bbox_reg_weights)
            targets = box_to_delta(
                ex_boxes=roi_boxes,
                gt_boxes=gt_boxes,
                weights=bbox_reg_weights)

            return np.hstack([labels[:, np.newaxis], targets]).astype(
                np.float32, copy=False)

        def _expand_bbox_targets(bbox_targets_input, class_nums,
                                 is_cls_agnostic):
            class_labels = bbox_targets_input[:, 0]
            fg_inds = np.where(class_labels > 0)[0]
            bbox_targets = np.zeros((class_labels.shape[0], 4 * class_nums
                                     if not is_cls_agnostic else 4 * 2))
            bbox_inside_weights = np.zeros(bbox_targets.shape)
            for ind in fg_inds:
                class_label = int(class_labels[
                    ind]) if not is_cls_agnostic else 1
                start_ind = class_label * 4
                end_ind = class_label * 4 + 4
                bbox_targets[ind, start_ind:end_ind] = bbox_targets_input[ind,
                                                                          1:]
                bbox_inside_weights[ind, start_ind:end_ind] = (1.0, 1.0, 1.0,
                                                               1.0)
            return bbox_targets, bbox_inside_weights

        def generate_func(
                rpn_rois,
                gt_classes,
                is_crowd,
                gt_boxes,
                im_info, ):
            rpn_rois_lod = rpn_rois.lod()[0]
            gt_classes_lod = gt_classes.lod()[0]

            # convert
            rpn_rois = np.array(rpn_rois)
            gt_classes = np.array(gt_classes)
            is_crowd = np.array(is_crowd)
            gt_boxes = np.array(gt_boxes)
            im_info = np.array(im_info)

            rois = []
            labels_int32 = []
            bbox_targets = []
            bbox_inside_weights = []
            bbox_outside_weights = []
            lod = [0]

            for idx in range(len(rpn_rois_lod) - 1):
                rois_si = rpn_rois_lod[idx]
                rois_ei = rpn_rois_lod[idx + 1]

                gt_si = gt_classes_lod[idx]
                gt_ei = gt_classes_lod[idx + 1]
                frcn_blobs = _sample_rois(
                    rpn_rois[rois_si:rois_ei], gt_classes[gt_si:gt_ei],
                    is_crowd[gt_si:gt_ei], gt_boxes[gt_si:gt_ei], im_info[idx],
                    batch_size_per_im, fg_fraction, fg_thresh, bg_thresh_hi,
                    bg_thresh_lo, bbox_reg_weights, class_nums, use_random,
                    is_cls_agnostic, is_cascade_rcnn)
                lod.append(frcn_blobs['rois'].shape[0] + lod[-1])
                rois.append(frcn_blobs['rois'])
                labels_int32.append(frcn_blobs['labels_int32'].reshape(-1, 1))
                bbox_targets.append(frcn_blobs['bbox_targets'])
                bbox_inside_weights.append(frcn_blobs['bbox_inside_weights'])
                bbox_outside_weights.append(frcn_blobs['bbox_outside_weights'])

            rois = np.vstack(rois)
            labels_int32 = np.vstack(labels_int32)
            bbox_targets = np.vstack(bbox_targets)
            bbox_inside_weights = np.vstack(bbox_inside_weights)
            bbox_outside_weights = np.vstack(bbox_outside_weights)

            # create lod-tensor for return
            # notice that the func create_lod_tensor does not work well here
            ret_rois = fluid.LoDTensor()
            ret_rois.set_lod([lod])
            ret_rois.set(rois.astype("float32"), fluid.CPUPlace())

            ret_labels_int32 = fluid.LoDTensor()
            ret_labels_int32.set_lod([lod])
            ret_labels_int32.set(
                labels_int32.astype("int32"), fluid.CPUPlace())

            ret_bbox_targets = fluid.LoDTensor()
            ret_bbox_targets.set_lod([lod])
            ret_bbox_targets.set(
                bbox_targets.astype("float32"), fluid.CPUPlace())

            ret_bbox_inside_weights = fluid.LoDTensor()
            ret_bbox_inside_weights.set_lod([lod])
            ret_bbox_inside_weights.set(
                bbox_inside_weights.astype("float32"), fluid.CPUPlace())

            ret_bbox_outside_weights = fluid.LoDTensor()
            ret_bbox_outside_weights.set_lod([lod])
            ret_bbox_outside_weights.set(
                bbox_outside_weights.astype("float32"), fluid.CPUPlace())

            return ret_rois, ret_labels_int32, ret_bbox_targets, ret_bbox_inside_weights, ret_bbox_outside_weights

        rois = create_tmp_var(
            fluid.default_main_program(),
            name=None,  #'rois',
            dtype='float32',
            shape=[-1, 4], )
        bbox_inside_weights = create_tmp_var(
            fluid.default_main_program(),
            name=None,  #'bbox_inside_weights',
            dtype='float32',
            shape=[-1, 8 if self.is_cls_agnostic else self.class_nums * 4], )
        bbox_outside_weights = create_tmp_var(
            fluid.default_main_program(),
            name=None,  #'bbox_outside_weights',
            dtype='float32',
            shape=[-1, 8 if self.is_cls_agnostic else self.class_nums * 4], )
        bbox_targets = create_tmp_var(
            fluid.default_main_program(),
            name=None,  #'bbox_targets',
            dtype='float32',
            shape=[-1, 8 if self.is_cls_agnostic else self.class_nums * 4], )
        labels_int32 = create_tmp_var(
            fluid.default_main_program(),
            name=None,  #'labels_int32',
            dtype='int32',
            shape=[-1, 1], )

        outs = [
            rois, labels_int32, bbox_targets, bbox_inside_weights,
            bbox_outside_weights
        ]

        fluid.layers.py_func(
            func=generate_func,
            x=[rpn_rois, gt_classes, is_crowd, gt_boxes, im_info],
            out=outs)
        return outs


class BBoxAssigner(object):
    def __init__(self,
                 batch_size_per_im=512,
                 fg_fraction=.25,
                 fg_thresh=.5,
                 bg_thresh_hi=.5,
                 bg_thresh_lo=0.,
                 bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
                 num_classes=81,
                 shuffle_before_sample=True):
        super(BBoxAssigner, self).__init__()
        self.batch_size_per_im = batch_size_per_im
        self.fg_fraction = fg_fraction
        self.fg_thresh = fg_thresh
        self.bg_thresh_hi = bg_thresh_hi
        self.bg_thresh_lo = bg_thresh_lo
        self.bbox_reg_weights = bbox_reg_weights
        self.class_nums = num_classes
        self.use_random = shuffle_before_sample

    def __call__(self, rpn_rois, gt_classes, is_crowd, gt_boxes, im_info):
        return fluid.layers.generate_proposal_labels(
            rpn_rois=rpn_rois,
            gt_classes=gt_classes,
            is_crowd=is_crowd,
            gt_boxes=gt_boxes,
            im_info=im_info,
            batch_size_per_im=self.batch_size_per_im,
            fg_fraction=self.fg_fraction,
            fg_thresh=self.fg_thresh,
            bg_thresh_hi=self.bg_thresh_hi,
            bg_thresh_lo=self.bg_thresh_lo,
            bbox_reg_weights=self.bbox_reg_weights,
            class_nums=self.class_nums,
            use_random=self.use_random)