| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966 |
- # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- import numpy as np
- from numbers import Integral
- import math
- import six
- import paddle
- from paddle import fluid
- def bbox_overlaps(boxes_1, boxes_2):
- '''
- bbox_overlaps
- boxes_1: x1, y, x2, y2
- boxes_2: x1, y, x2, y2
- '''
- assert boxes_1.shape[1] == 4 and boxes_2.shape[1] == 4
- num_1 = boxes_1.shape[0]
- num_2 = boxes_2.shape[0]
- x1_1 = boxes_1[:, 0:1]
- y1_1 = boxes_1[:, 1:2]
- x2_1 = boxes_1[:, 2:3]
- y2_1 = boxes_1[:, 3:4]
- area_1 = (x2_1 - x1_1 + 1) * (y2_1 - y1_1 + 1)
- x1_2 = boxes_2[:, 0].transpose()
- y1_2 = boxes_2[:, 1].transpose()
- x2_2 = boxes_2[:, 2].transpose()
- y2_2 = boxes_2[:, 3].transpose()
- area_2 = (x2_2 - x1_2 + 1) * (y2_2 - y1_2 + 1)
- xx1 = np.maximum(x1_1, x1_2)
- yy1 = np.maximum(y1_1, y1_2)
- xx2 = np.minimum(x2_1, x2_2)
- yy2 = np.minimum(y2_1, y2_2)
- w = np.maximum(0.0, xx2 - xx1 + 1)
- h = np.maximum(0.0, yy2 - yy1 + 1)
- inter = w * h
- ovr = inter / (area_1 + area_2 - inter)
- return ovr
- def box_to_delta(ex_boxes, gt_boxes, weights):
- """ box_to_delta """
- ex_w = ex_boxes[:, 2] - ex_boxes[:, 0] + 1
- ex_h = ex_boxes[:, 3] - ex_boxes[:, 1] + 1
- ex_ctr_x = ex_boxes[:, 0] + 0.5 * ex_w
- ex_ctr_y = ex_boxes[:, 1] + 0.5 * ex_h
- gt_w = gt_boxes[:, 2] - gt_boxes[:, 0] + 1
- gt_h = gt_boxes[:, 3] - gt_boxes[:, 1] + 1
- gt_ctr_x = gt_boxes[:, 0] + 0.5 * gt_w
- gt_ctr_y = gt_boxes[:, 1] + 0.5 * gt_h
- dx = (gt_ctr_x - ex_ctr_x) / ex_w / weights[0]
- dy = (gt_ctr_y - ex_ctr_y) / ex_h / weights[1]
- dw = (np.log(gt_w / ex_w)) / weights[2]
- dh = (np.log(gt_h / ex_h)) / weights[3]
- targets = np.vstack([dx, dy, dw, dh]).transpose()
- return targets
- def DropBlock(input, block_size, keep_prob, is_test):
- if is_test:
- return input
- def CalculateGamma(input, block_size, keep_prob):
- input_shape = fluid.layers.shape(input)
- feat_shape_tmp = fluid.layers.slice(input_shape, [0], [3], [4])
- feat_shape_tmp = fluid.layers.cast(feat_shape_tmp, dtype="float32")
- feat_shape_t = fluid.layers.reshape(feat_shape_tmp, [1, 1, 1, 1])
- feat_area = fluid.layers.pow(feat_shape_t, factor=2)
- block_shape_t = fluid.layers.fill_constant(
- shape=[1, 1, 1, 1], value=block_size, dtype='float32')
- block_area = fluid.layers.pow(block_shape_t, factor=2)
- useful_shape_t = feat_shape_t - block_shape_t + 1
- useful_area = fluid.layers.pow(useful_shape_t, factor=2)
- upper_t = feat_area * (1 - keep_prob)
- bottom_t = block_area * useful_area
- output = upper_t / bottom_t
- return output
- gamma = CalculateGamma(input, block_size=block_size, keep_prob=keep_prob)
- input_shape = fluid.layers.shape(input)
- p = fluid.layers.expand_as(gamma, input)
- input_shape_tmp = fluid.layers.cast(input_shape, dtype="int64")
- random_matrix = fluid.layers.uniform_random(
- input_shape_tmp, dtype='float32', min=0.0, max=1.0)
- one_zero_m = fluid.layers.less_than(random_matrix, p)
- one_zero_m.stop_gradient = True
- one_zero_m = fluid.layers.cast(one_zero_m, dtype="float32")
- mask_flag = fluid.layers.pool2d(
- one_zero_m,
- pool_size=block_size,
- pool_type='max',
- pool_stride=1,
- pool_padding=block_size // 2)
- mask = 1.0 - mask_flag
- elem_numel = fluid.layers.reduce_prod(input_shape)
- elem_numel_m = fluid.layers.cast(elem_numel, dtype="float32")
- elem_numel_m.stop_gradient = True
- elem_sum = fluid.layers.reduce_sum(mask)
- elem_sum_m = fluid.layers.cast(elem_sum, dtype="float32")
- elem_sum_m.stop_gradient = True
- output = input * mask * elem_numel_m / elem_sum_m
- return output
- class MultiClassNMS(object):
- def __init__(self,
- score_threshold=.05,
- nms_top_k=-1,
- keep_top_k=100,
- nms_threshold=.5,
- normalized=False,
- nms_eta=1.0,
- background_label=0):
- super(MultiClassNMS, self).__init__()
- self.score_threshold = score_threshold
- self.nms_top_k = nms_top_k
- self.keep_top_k = keep_top_k
- self.nms_threshold = nms_threshold
- self.normalized = normalized
- self.nms_eta = nms_eta
- self.background_label = background_label
- def __call__(self, bboxes, scores):
- return fluid.layers.multiclass_nms(
- bboxes=bboxes,
- scores=scores,
- score_threshold=self.score_threshold,
- nms_top_k=self.nms_top_k,
- keep_top_k=self.keep_top_k,
- normalized=self.normalized,
- nms_threshold=self.nms_threshold,
- nms_eta=self.nms_eta,
- background_label=self.background_label)
- class MatrixNMS(object):
- def __init__(self,
- score_threshold=.05,
- post_threshold=.05,
- nms_top_k=-1,
- keep_top_k=100,
- use_gaussian=False,
- gaussian_sigma=2.,
- normalized=False,
- background_label=0):
- super(MatrixNMS, self).__init__()
- self.score_threshold = score_threshold
- self.post_threshold = post_threshold
- self.nms_top_k = nms_top_k
- self.keep_top_k = keep_top_k
- self.normalized = normalized
- self.use_gaussian = use_gaussian
- self.gaussian_sigma = gaussian_sigma
- self.background_label = background_label
- def __call__(self, bboxes, scores):
- return paddle.fluid.layers.matrix_nms(
- bboxes=bboxes,
- scores=scores,
- score_threshold=self.score_threshold,
- post_threshold=self.post_threshold,
- nms_top_k=self.nms_top_k,
- keep_top_k=self.keep_top_k,
- normalized=self.normalized,
- use_gaussian=self.use_gaussian,
- gaussian_sigma=self.gaussian_sigma,
- background_label=self.background_label)
- class MultiClassSoftNMS(object):
- def __init__(
- self,
- score_threshold=0.01,
- keep_top_k=300,
- softnms_sigma=0.5,
- normalized=False,
- background_label=0, ):
- super(MultiClassSoftNMS, self).__init__()
- self.score_threshold = score_threshold
- self.keep_top_k = keep_top_k
- self.softnms_sigma = softnms_sigma
- self.normalized = normalized
- self.background_label = background_label
- def __call__(self, bboxes, scores):
- def create_tmp_var(program, name, dtype, shape, lod_level):
- return program.current_block().create_var(
- name=name, dtype=dtype, shape=shape, lod_level=lod_level)
- def _soft_nms_for_cls(dets, sigma, thres):
- """soft_nms_for_cls"""
- dets_final = []
- while len(dets) > 0:
- maxpos = np.argmax(dets[:, 0])
- dets_final.append(dets[maxpos].copy())
- ts, tx1, ty1, tx2, ty2 = dets[maxpos]
- scores = dets[:, 0]
- # force remove bbox at maxpos
- scores[maxpos] = -1
- x1 = dets[:, 1]
- y1 = dets[:, 2]
- x2 = dets[:, 3]
- y2 = dets[:, 4]
- eta = 0 if self.normalized else 1
- areas = (x2 - x1 + eta) * (y2 - y1 + eta)
- xx1 = np.maximum(tx1, x1)
- yy1 = np.maximum(ty1, y1)
- xx2 = np.minimum(tx2, x2)
- yy2 = np.minimum(ty2, y2)
- w = np.maximum(0.0, xx2 - xx1 + eta)
- h = np.maximum(0.0, yy2 - yy1 + eta)
- inter = w * h
- ovr = inter / (areas + areas[maxpos] - inter)
- weight = np.exp(-(ovr * ovr) / sigma)
- scores = scores * weight
- idx_keep = np.where(scores >= thres)
- dets[:, 0] = scores
- dets = dets[idx_keep]
- dets_final = np.array(dets_final).reshape(-1, 5)
- return dets_final
- def _soft_nms(bboxes, scores):
- class_nums = scores.shape[-1]
- softnms_thres = self.score_threshold
- softnms_sigma = self.softnms_sigma
- keep_top_k = self.keep_top_k
- cls_boxes = [[] for _ in range(class_nums)]
- cls_ids = [[] for _ in range(class_nums)]
- start_idx = 1 if self.background_label == 0 else 0
- for j in range(start_idx, class_nums):
- inds = np.where(scores[:, j] >= softnms_thres)[0]
- scores_j = scores[inds, j]
- rois_j = bboxes[inds, j, :] if len(
- bboxes.shape) > 2 else bboxes[inds, :]
- dets_j = np.hstack((scores_j[:, np.newaxis], rois_j)).astype(
- np.float32, copy=False)
- cls_rank = np.argsort(-dets_j[:, 0])
- dets_j = dets_j[cls_rank]
- cls_boxes[j] = _soft_nms_for_cls(
- dets_j, sigma=softnms_sigma, thres=softnms_thres)
- cls_ids[j] = np.array([j] * cls_boxes[j].shape[0]).reshape(-1,
- 1)
- cls_boxes = np.vstack(cls_boxes[start_idx:])
- cls_ids = np.vstack(cls_ids[start_idx:])
- pred_result = np.hstack([cls_ids, cls_boxes])
- # Limit to max_per_image detections **over all classes**
- image_scores = cls_boxes[:, 0]
- if len(image_scores) > keep_top_k:
- image_thresh = np.sort(image_scores)[-keep_top_k]
- keep = np.where(cls_boxes[:, 0] >= image_thresh)[0]
- pred_result = pred_result[keep, :]
- return pred_result
- def _batch_softnms(bboxes, scores):
- batch_offsets = bboxes.lod()
- bboxes = np.array(bboxes)
- scores = np.array(scores)
- out_offsets = [0]
- pred_res = []
- if len(batch_offsets) > 0:
- batch_offset = batch_offsets[0]
- for i in range(len(batch_offset) - 1):
- s, e = batch_offset[i], batch_offset[i + 1]
- pred = _soft_nms(bboxes[s:e], scores[s:e])
- out_offsets.append(pred.shape[0] + out_offsets[-1])
- pred_res.append(pred)
- else:
- assert len(bboxes.shape) == 3
- assert len(scores.shape) == 3
- for i in range(bboxes.shape[0]):
- pred = _soft_nms(bboxes[i], scores[i])
- out_offsets.append(pred.shape[0] + out_offsets[-1])
- pred_res.append(pred)
- res = fluid.LoDTensor()
- res.set_lod([out_offsets])
- if len(pred_res) == 0:
- pred_res = np.array([[1]], dtype=np.float32)
- res.set(np.vstack(pred_res).astype(np.float32), fluid.CPUPlace())
- return res
- pred_result = create_tmp_var(
- fluid.default_main_program(),
- name='softnms_pred_result',
- dtype='float32',
- shape=[-1, 6],
- lod_level=1)
- fluid.layers.py_func(
- func=_batch_softnms, x=[bboxes, scores], out=pred_result)
- return pred_result
- class MultiClassDiouNMS(object):
- def __init__(
- self,
- score_threshold=0.05,
- keep_top_k=100,
- nms_threshold=0.5,
- normalized=False,
- background_label=0, ):
- super(MultiClassDiouNMS, self).__init__()
- self.score_threshold = score_threshold
- self.nms_threshold = nms_threshold
- self.keep_top_k = keep_top_k
- self.normalized = normalized
- self.background_label = background_label
- def __call__(self, bboxes, scores):
- def create_tmp_var(program, name, dtype, shape, lod_level):
- return program.current_block().create_var(
- name=name, dtype=dtype, shape=shape, lod_level=lod_level)
- def _calc_diou_term(dets1, dets2):
- eps = 1.e-10
- eta = 0 if self.normalized else 1
- x1, y1, x2, y2 = dets1[0], dets1[1], dets1[2], dets1[3]
- x1g, y1g, x2g, y2g = dets2[0], dets2[1], dets2[2], dets2[3]
- cx = (x1 + x2) / 2
- cy = (y1 + y2) / 2
- w = x2 - x1 + eta
- h = y2 - y1 + eta
- cxg = (x1g + x2g) / 2
- cyg = (y1g + y2g) / 2
- wg = x2g - x1g + eta
- hg = y2g - y1g + eta
- x2 = np.maximum(x1, x2)
- y2 = np.maximum(y1, y2)
- # A or B
- xc1 = np.minimum(x1, x1g)
- yc1 = np.minimum(y1, y1g)
- xc2 = np.maximum(x2, x2g)
- yc2 = np.maximum(y2, y2g)
- # DIOU term
- dist_intersection = (cx - cxg)**2 + (cy - cyg)**2
- dist_union = (xc2 - xc1)**2 + (yc2 - yc1)**2
- diou_term = (dist_intersection + eps) / (dist_union + eps)
- return diou_term
- def _diou_nms_for_cls(dets, thres):
- """_diou_nms_for_cls"""
- scores = dets[:, 0]
- x1 = dets[:, 1]
- y1 = dets[:, 2]
- x2 = dets[:, 3]
- y2 = dets[:, 4]
- eta = 0 if self.normalized else 1
- areas = (x2 - x1 + eta) * (y2 - y1 + eta)
- dt_num = dets.shape[0]
- order = np.array(range(dt_num))
- keep = []
- while order.size > 0:
- i = order[0]
- keep.append(i)
- xx1 = np.maximum(x1[i], x1[order[1:]])
- yy1 = np.maximum(y1[i], y1[order[1:]])
- xx2 = np.minimum(x2[i], x2[order[1:]])
- yy2 = np.minimum(y2[i], y2[order[1:]])
- w = np.maximum(0.0, xx2 - xx1 + eta)
- h = np.maximum(0.0, yy2 - yy1 + eta)
- inter = w * h
- ovr = inter / (areas[i] + areas[order[1:]] - inter)
- diou_term = _calc_diou_term([x1[i], y1[i], x2[i], y2[i]], [
- x1[order[1:]], y1[order[1:]], x2[order[1:]], y2[order[1:]]
- ])
- inds = np.where(ovr - diou_term <= thres)[0]
- order = order[inds + 1]
- dets_final = dets[keep]
- return dets_final
- def _diou_nms(bboxes, scores):
- bboxes = np.array(bboxes)
- scores = np.array(scores)
- class_nums = scores.shape[-1]
- score_threshold = self.score_threshold
- nms_threshold = self.nms_threshold
- keep_top_k = self.keep_top_k
- cls_boxes = [[] for _ in range(class_nums)]
- cls_ids = [[] for _ in range(class_nums)]
- start_idx = 1 if self.background_label == 0 else 0
- for j in range(start_idx, class_nums):
- inds = np.where(scores[:, j] >= score_threshold)[0]
- scores_j = scores[inds, j]
- rois_j = bboxes[inds, j, :]
- dets_j = np.hstack((scores_j[:, np.newaxis], rois_j)).astype(
- np.float32, copy=False)
- cls_rank = np.argsort(-dets_j[:, 0])
- dets_j = dets_j[cls_rank]
- cls_boxes[j] = _diou_nms_for_cls(dets_j, thres=nms_threshold)
- cls_ids[j] = np.array([j] * cls_boxes[j].shape[0]).reshape(-1,
- 1)
- cls_boxes = np.vstack(cls_boxes[start_idx:])
- cls_ids = np.vstack(cls_ids[start_idx:])
- pred_result = np.hstack([cls_ids, cls_boxes]).astype(np.float32)
- # Limit to max_per_image detections **over all classes**
- image_scores = cls_boxes[:, 0]
- if len(image_scores) > keep_top_k:
- image_thresh = np.sort(image_scores)[-keep_top_k]
- keep = np.where(cls_boxes[:, 0] >= image_thresh)[0]
- pred_result = pred_result[keep, :]
- res = fluid.LoDTensor()
- res.set_lod([[0, pred_result.shape[0]]])
- if pred_result.shape[0] == 0:
- pred_result = np.array([[1]], dtype=np.float32)
- res.set(pred_result, fluid.CPUPlace())
- return res
- pred_result = create_tmp_var(
- fluid.default_main_program(),
- name='diou_nms_pred_result',
- dtype='float32',
- shape=[-1, 6],
- lod_level=0)
- fluid.layers.py_func(
- func=_diou_nms, x=[bboxes, scores], out=pred_result)
- return pred_result
- class LibraBBoxAssigner(object):
- def __init__(self,
- batch_size_per_im=512,
- fg_fraction=.25,
- fg_thresh=.5,
- bg_thresh_hi=.5,
- bg_thresh_lo=0.,
- bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
- num_classes=81,
- shuffle_before_sample=True,
- is_cls_agnostic=False,
- num_bins=3):
- super(LibraBBoxAssigner, self).__init__()
- self.batch_size_per_im = batch_size_per_im
- self.fg_fraction = fg_fraction
- self.fg_thresh = fg_thresh
- self.bg_thresh_hi = bg_thresh_hi
- self.bg_thresh_lo = bg_thresh_lo
- self.bbox_reg_weights = bbox_reg_weights
- self.class_nums = num_classes
- self.use_random = shuffle_before_sample
- self.is_cls_agnostic = is_cls_agnostic
- self.num_bins = num_bins
- def __call__(
- self,
- rpn_rois,
- gt_classes,
- is_crowd,
- gt_boxes,
- im_info, ):
- return self.generate_proposal_label_libra(
- rpn_rois=rpn_rois,
- gt_classes=gt_classes,
- is_crowd=is_crowd,
- gt_boxes=gt_boxes,
- im_info=im_info,
- batch_size_per_im=self.batch_size_per_im,
- fg_fraction=self.fg_fraction,
- fg_thresh=self.fg_thresh,
- bg_thresh_hi=self.bg_thresh_hi,
- bg_thresh_lo=self.bg_thresh_lo,
- bbox_reg_weights=self.bbox_reg_weights,
- class_nums=self.class_nums,
- use_random=self.use_random,
- is_cls_agnostic=self.is_cls_agnostic,
- is_cascade_rcnn=False)
- def generate_proposal_label_libra(
- self, rpn_rois, gt_classes, is_crowd, gt_boxes, im_info,
- batch_size_per_im, fg_fraction, fg_thresh, bg_thresh_hi,
- bg_thresh_lo, bbox_reg_weights, class_nums, use_random,
- is_cls_agnostic, is_cascade_rcnn):
- num_bins = self.num_bins
- def create_tmp_var(program, name, dtype, shape, lod_level=None):
- return program.current_block().create_var(
- name=name, dtype=dtype, shape=shape, lod_level=lod_level)
- def _sample_pos(max_overlaps, max_classes, pos_inds, num_expected):
- if len(pos_inds) <= num_expected:
- return pos_inds
- else:
- unique_gt_inds = np.unique(max_classes[pos_inds])
- num_gts = len(unique_gt_inds)
- num_per_gt = int(round(num_expected / float(num_gts)) + 1)
- sampled_inds = []
- for i in unique_gt_inds:
- inds = np.nonzero(max_classes == i)[0]
- before_len = len(inds)
- inds = list(set(inds) & set(pos_inds))
- after_len = len(inds)
- if len(inds) > num_per_gt:
- inds = np.random.choice(
- inds, size=num_per_gt, replace=False)
- sampled_inds.extend(list(inds)) # combine as a new sampler
- if len(sampled_inds) < num_expected:
- num_extra = num_expected - len(sampled_inds)
- extra_inds = np.array(
- list(set(pos_inds) - set(sampled_inds)))
- assert len(sampled_inds)+len(extra_inds) == len(pos_inds), \
- "sum of sampled_inds({}) and extra_inds({}) length must be equal with pos_inds({})!".format(
- len(sampled_inds), len(extra_inds), len(pos_inds))
- if len(extra_inds) > num_extra:
- extra_inds = np.random.choice(
- extra_inds, size=num_extra, replace=False)
- sampled_inds.extend(extra_inds.tolist())
- elif len(sampled_inds) > num_expected:
- sampled_inds = np.random.choice(
- sampled_inds, size=num_expected, replace=False)
- return sampled_inds
- def sample_via_interval(max_overlaps, full_set, num_expected,
- floor_thr, num_bins, bg_thresh_hi):
- max_iou = max_overlaps.max()
- iou_interval = (max_iou - floor_thr) / num_bins
- per_num_expected = int(num_expected / num_bins)
- sampled_inds = []
- for i in range(num_bins):
- start_iou = floor_thr + i * iou_interval
- end_iou = floor_thr + (i + 1) * iou_interval
- tmp_set = set(
- np.where(
- np.logical_and(max_overlaps >= start_iou, max_overlaps
- < end_iou))[0])
- tmp_inds = list(tmp_set & full_set)
- if len(tmp_inds) > per_num_expected:
- tmp_sampled_set = np.random.choice(
- tmp_inds, size=per_num_expected, replace=False)
- else:
- tmp_sampled_set = np.array(tmp_inds, dtype=np.int)
- sampled_inds.append(tmp_sampled_set)
- sampled_inds = np.concatenate(sampled_inds)
- if len(sampled_inds) < num_expected:
- num_extra = num_expected - len(sampled_inds)
- extra_inds = np.array(list(full_set - set(sampled_inds)))
- assert len(sampled_inds)+len(extra_inds) == len(full_set), \
- "sum of sampled_inds({}) and extra_inds({}) length must be equal with full_set({})!".format(
- len(sampled_inds), len(extra_inds), len(full_set))
- if len(extra_inds) > num_extra:
- extra_inds = np.random.choice(
- extra_inds, num_extra, replace=False)
- sampled_inds = np.concatenate([sampled_inds, extra_inds])
- return sampled_inds
- def _sample_neg(max_overlaps,
- max_classes,
- neg_inds,
- num_expected,
- floor_thr=-1,
- floor_fraction=0,
- num_bins=3,
- bg_thresh_hi=0.5):
- if len(neg_inds) <= num_expected:
- return neg_inds
- else:
- # balance sampling for negative samples
- neg_set = set(neg_inds)
- if floor_thr > 0:
- floor_set = set(
- np.where(
- np.logical_and(max_overlaps >= 0, max_overlaps <
- floor_thr))[0])
- iou_sampling_set = set(
- np.where(max_overlaps >= floor_thr)[0])
- elif floor_thr == 0:
- floor_set = set(np.where(max_overlaps == 0)[0])
- iou_sampling_set = set(
- np.where(max_overlaps > floor_thr)[0])
- else:
- floor_set = set()
- iou_sampling_set = set(
- np.where(max_overlaps > floor_thr)[0])
- floor_thr = 0
- floor_neg_inds = list(floor_set & neg_set)
- iou_sampling_neg_inds = list(iou_sampling_set & neg_set)
- num_expected_iou_sampling = int(num_expected *
- (1 - floor_fraction))
- if len(iou_sampling_neg_inds) > num_expected_iou_sampling:
- if num_bins >= 2:
- iou_sampled_inds = sample_via_interval(
- max_overlaps,
- set(iou_sampling_neg_inds),
- num_expected_iou_sampling, floor_thr, num_bins,
- bg_thresh_hi)
- else:
- iou_sampled_inds = np.random.choice(
- iou_sampling_neg_inds,
- size=num_expected_iou_sampling,
- replace=False)
- else:
- iou_sampled_inds = np.array(
- iou_sampling_neg_inds, dtype=np.int)
- num_expected_floor = num_expected - len(iou_sampled_inds)
- if len(floor_neg_inds) > num_expected_floor:
- sampled_floor_inds = np.random.choice(
- floor_neg_inds, size=num_expected_floor, replace=False)
- else:
- sampled_floor_inds = np.array(floor_neg_inds, dtype=np.int)
- sampled_inds = np.concatenate(
- (sampled_floor_inds, iou_sampled_inds))
- if len(sampled_inds) < num_expected:
- num_extra = num_expected - len(sampled_inds)
- extra_inds = np.array(list(neg_set - set(sampled_inds)))
- if len(extra_inds) > num_extra:
- extra_inds = np.random.choice(
- extra_inds, size=num_extra, replace=False)
- sampled_inds = np.concatenate((sampled_inds, extra_inds))
- return sampled_inds
- def _sample_rois(rpn_rois, gt_classes, is_crowd, gt_boxes, im_info,
- batch_size_per_im, fg_fraction, fg_thresh,
- bg_thresh_hi, bg_thresh_lo, bbox_reg_weights,
- class_nums, use_random, is_cls_agnostic,
- is_cascade_rcnn):
- rois_per_image = int(batch_size_per_im)
- fg_rois_per_im = int(np.round(fg_fraction * rois_per_image))
- # Roidb
- im_scale = im_info[2]
- inv_im_scale = 1. / im_scale
- rpn_rois = rpn_rois * inv_im_scale
- if is_cascade_rcnn:
- rpn_rois = rpn_rois[gt_boxes.shape[0]:, :]
- boxes = np.vstack([gt_boxes, rpn_rois])
- gt_overlaps = np.zeros((boxes.shape[0], class_nums))
- box_to_gt_ind_map = np.zeros((boxes.shape[0]), dtype=np.int32)
- if len(gt_boxes) > 0:
- proposal_to_gt_overlaps = bbox_overlaps(boxes, gt_boxes)
- overlaps_argmax = proposal_to_gt_overlaps.argmax(axis=1)
- overlaps_max = proposal_to_gt_overlaps.max(axis=1)
- # Boxes which with non-zero overlap with gt boxes
- overlapped_boxes_ind = np.where(overlaps_max > 0)[0]
- overlapped_boxes_gt_classes = gt_classes[overlaps_argmax[
- overlapped_boxes_ind]]
- for idx in range(len(overlapped_boxes_ind)):
- gt_overlaps[overlapped_boxes_ind[
- idx], overlapped_boxes_gt_classes[idx]] = overlaps_max[
- overlapped_boxes_ind[idx]]
- box_to_gt_ind_map[overlapped_boxes_ind[
- idx]] = overlaps_argmax[overlapped_boxes_ind[idx]]
- crowd_ind = np.where(is_crowd)[0]
- gt_overlaps[crowd_ind] = -1
- max_overlaps = gt_overlaps.max(axis=1)
- max_classes = gt_overlaps.argmax(axis=1)
- # Cascade RCNN Decode Filter
- if is_cascade_rcnn:
- ws = boxes[:, 2] - boxes[:, 0] + 1
- hs = boxes[:, 3] - boxes[:, 1] + 1
- keep = np.where((ws > 0) & (hs > 0))[0]
- boxes = boxes[keep]
- max_overlaps = max_overlaps[keep]
- fg_inds = np.where(max_overlaps >= fg_thresh)[0]
- bg_inds = np.where((max_overlaps < bg_thresh_hi) & (
- max_overlaps >= bg_thresh_lo))[0]
- fg_rois_per_this_image = fg_inds.shape[0]
- bg_rois_per_this_image = bg_inds.shape[0]
- else:
- # Foreground
- fg_inds = np.where(max_overlaps >= fg_thresh)[0]
- fg_rois_per_this_image = np.minimum(fg_rois_per_im,
- fg_inds.shape[0])
- # Sample foreground if there are too many
- if fg_inds.shape[0] > fg_rois_per_this_image:
- if use_random:
- fg_inds = _sample_pos(max_overlaps, max_classes,
- fg_inds, fg_rois_per_this_image)
- fg_inds = fg_inds[:fg_rois_per_this_image]
- # Background
- bg_inds = np.where((max_overlaps < bg_thresh_hi) & (
- max_overlaps >= bg_thresh_lo))[0]
- bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
- bg_rois_per_this_image = np.minimum(bg_rois_per_this_image,
- bg_inds.shape[0])
- assert bg_rois_per_this_image >= 0, "bg_rois_per_this_image must be >= 0 but got {}".format(
- bg_rois_per_this_image)
- # Sample background if there are too many
- if bg_inds.shape[0] > bg_rois_per_this_image:
- if use_random:
- # libra neg sample
- bg_inds = _sample_neg(
- max_overlaps,
- max_classes,
- bg_inds,
- bg_rois_per_this_image,
- num_bins=num_bins,
- bg_thresh_hi=bg_thresh_hi)
- bg_inds = bg_inds[:bg_rois_per_this_image]
- keep_inds = np.append(fg_inds, bg_inds)
- sampled_labels = max_classes[keep_inds] # N x 1
- sampled_labels[fg_rois_per_this_image:] = 0
- sampled_boxes = boxes[keep_inds] # N x 324
- sampled_gts = gt_boxes[box_to_gt_ind_map[keep_inds]]
- sampled_gts[fg_rois_per_this_image:, :] = gt_boxes[0]
- bbox_label_targets = _compute_targets(
- sampled_boxes, sampled_gts, sampled_labels, bbox_reg_weights)
- bbox_targets, bbox_inside_weights = _expand_bbox_targets(
- bbox_label_targets, class_nums, is_cls_agnostic)
- bbox_outside_weights = np.array(
- bbox_inside_weights > 0, dtype=bbox_inside_weights.dtype)
- # Scale rois
- sampled_rois = sampled_boxes * im_scale
- # Faster RCNN blobs
- frcn_blobs = dict(
- rois=sampled_rois,
- labels_int32=sampled_labels,
- bbox_targets=bbox_targets,
- bbox_inside_weights=bbox_inside_weights,
- bbox_outside_weights=bbox_outside_weights)
- return frcn_blobs
- def _compute_targets(roi_boxes, gt_boxes, labels, bbox_reg_weights):
- assert roi_boxes.shape[0] == gt_boxes.shape[0]
- assert roi_boxes.shape[1] == 4
- assert gt_boxes.shape[1] == 4
- targets = np.zeros(roi_boxes.shape)
- bbox_reg_weights = np.asarray(bbox_reg_weights)
- targets = box_to_delta(
- ex_boxes=roi_boxes,
- gt_boxes=gt_boxes,
- weights=bbox_reg_weights)
- return np.hstack([labels[:, np.newaxis], targets]).astype(
- np.float32, copy=False)
- def _expand_bbox_targets(bbox_targets_input, class_nums,
- is_cls_agnostic):
- class_labels = bbox_targets_input[:, 0]
- fg_inds = np.where(class_labels > 0)[0]
- bbox_targets = np.zeros((class_labels.shape[0], 4 * class_nums
- if not is_cls_agnostic else 4 * 2))
- bbox_inside_weights = np.zeros(bbox_targets.shape)
- for ind in fg_inds:
- class_label = int(class_labels[
- ind]) if not is_cls_agnostic else 1
- start_ind = class_label * 4
- end_ind = class_label * 4 + 4
- bbox_targets[ind, start_ind:end_ind] = bbox_targets_input[ind,
- 1:]
- bbox_inside_weights[ind, start_ind:end_ind] = (1.0, 1.0, 1.0,
- 1.0)
- return bbox_targets, bbox_inside_weights
- def generate_func(
- rpn_rois,
- gt_classes,
- is_crowd,
- gt_boxes,
- im_info, ):
- rpn_rois_lod = rpn_rois.lod()[0]
- gt_classes_lod = gt_classes.lod()[0]
- # convert
- rpn_rois = np.array(rpn_rois)
- gt_classes = np.array(gt_classes)
- is_crowd = np.array(is_crowd)
- gt_boxes = np.array(gt_boxes)
- im_info = np.array(im_info)
- rois = []
- labels_int32 = []
- bbox_targets = []
- bbox_inside_weights = []
- bbox_outside_weights = []
- lod = [0]
- for idx in range(len(rpn_rois_lod) - 1):
- rois_si = rpn_rois_lod[idx]
- rois_ei = rpn_rois_lod[idx + 1]
- gt_si = gt_classes_lod[idx]
- gt_ei = gt_classes_lod[idx + 1]
- frcn_blobs = _sample_rois(
- rpn_rois[rois_si:rois_ei], gt_classes[gt_si:gt_ei],
- is_crowd[gt_si:gt_ei], gt_boxes[gt_si:gt_ei], im_info[idx],
- batch_size_per_im, fg_fraction, fg_thresh, bg_thresh_hi,
- bg_thresh_lo, bbox_reg_weights, class_nums, use_random,
- is_cls_agnostic, is_cascade_rcnn)
- lod.append(frcn_blobs['rois'].shape[0] + lod[-1])
- rois.append(frcn_blobs['rois'])
- labels_int32.append(frcn_blobs['labels_int32'].reshape(-1, 1))
- bbox_targets.append(frcn_blobs['bbox_targets'])
- bbox_inside_weights.append(frcn_blobs['bbox_inside_weights'])
- bbox_outside_weights.append(frcn_blobs['bbox_outside_weights'])
- rois = np.vstack(rois)
- labels_int32 = np.vstack(labels_int32)
- bbox_targets = np.vstack(bbox_targets)
- bbox_inside_weights = np.vstack(bbox_inside_weights)
- bbox_outside_weights = np.vstack(bbox_outside_weights)
- # create lod-tensor for return
- # notice that the func create_lod_tensor does not work well here
- ret_rois = fluid.LoDTensor()
- ret_rois.set_lod([lod])
- ret_rois.set(rois.astype("float32"), fluid.CPUPlace())
- ret_labels_int32 = fluid.LoDTensor()
- ret_labels_int32.set_lod([lod])
- ret_labels_int32.set(
- labels_int32.astype("int32"), fluid.CPUPlace())
- ret_bbox_targets = fluid.LoDTensor()
- ret_bbox_targets.set_lod([lod])
- ret_bbox_targets.set(
- bbox_targets.astype("float32"), fluid.CPUPlace())
- ret_bbox_inside_weights = fluid.LoDTensor()
- ret_bbox_inside_weights.set_lod([lod])
- ret_bbox_inside_weights.set(
- bbox_inside_weights.astype("float32"), fluid.CPUPlace())
- ret_bbox_outside_weights = fluid.LoDTensor()
- ret_bbox_outside_weights.set_lod([lod])
- ret_bbox_outside_weights.set(
- bbox_outside_weights.astype("float32"), fluid.CPUPlace())
- return ret_rois, ret_labels_int32, ret_bbox_targets, ret_bbox_inside_weights, ret_bbox_outside_weights
- rois = create_tmp_var(
- fluid.default_main_program(),
- name=None, #'rois',
- dtype='float32',
- shape=[-1, 4], )
- bbox_inside_weights = create_tmp_var(
- fluid.default_main_program(),
- name=None, #'bbox_inside_weights',
- dtype='float32',
- shape=[-1, 8 if self.is_cls_agnostic else self.class_nums * 4], )
- bbox_outside_weights = create_tmp_var(
- fluid.default_main_program(),
- name=None, #'bbox_outside_weights',
- dtype='float32',
- shape=[-1, 8 if self.is_cls_agnostic else self.class_nums * 4], )
- bbox_targets = create_tmp_var(
- fluid.default_main_program(),
- name=None, #'bbox_targets',
- dtype='float32',
- shape=[-1, 8 if self.is_cls_agnostic else self.class_nums * 4], )
- labels_int32 = create_tmp_var(
- fluid.default_main_program(),
- name=None, #'labels_int32',
- dtype='int32',
- shape=[-1, 1], )
- outs = [
- rois, labels_int32, bbox_targets, bbox_inside_weights,
- bbox_outside_weights
- ]
- fluid.layers.py_func(
- func=generate_func,
- x=[rpn_rois, gt_classes, is_crowd, gt_boxes, im_info],
- out=outs)
- return outs
- class BBoxAssigner(object):
- def __init__(self,
- batch_size_per_im=512,
- fg_fraction=.25,
- fg_thresh=.5,
- bg_thresh_hi=.5,
- bg_thresh_lo=0.,
- bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
- num_classes=81,
- shuffle_before_sample=True):
- super(BBoxAssigner, self).__init__()
- self.batch_size_per_im = batch_size_per_im
- self.fg_fraction = fg_fraction
- self.fg_thresh = fg_thresh
- self.bg_thresh_hi = bg_thresh_hi
- self.bg_thresh_lo = bg_thresh_lo
- self.bbox_reg_weights = bbox_reg_weights
- self.class_nums = num_classes
- self.use_random = shuffle_before_sample
- def __call__(self, rpn_rois, gt_classes, is_crowd, gt_boxes, im_info):
- return fluid.layers.generate_proposal_labels(
- rpn_rois=rpn_rois,
- gt_classes=gt_classes,
- is_crowd=is_crowd,
- gt_boxes=gt_boxes,
- im_info=im_info,
- batch_size_per_im=self.batch_size_per_im,
- fg_fraction=self.fg_fraction,
- fg_thresh=self.fg_thresh,
- bg_thresh_hi=self.bg_thresh_hi,
- bg_thresh_lo=self.bg_thresh_lo,
- bbox_reg_weights=self.bbox_reg_weights,
- class_nums=self.class_nums,
- use_random=self.use_random)
|