Spaces:

scfive
/

samewind

Build error

samewind / mmdet /models /dense_heads /paa_head.py

scfive

Resolve README.md conflict and continue rebase

e8f2571 10 days ago

33.3 kB

	# Copyright (c) OpenMMLab. All rights reserved.
	from typing import List, Optional, Tuple

	import numpy as np
	import torch
	from mmengine.structures import InstanceData
	from torch import Tensor

	from mmdet.registry import MODELS
	from mmdet.structures.bbox import bbox_overlaps
	from mmdet.utils import (ConfigType, InstanceList, OptConfigType,
	OptInstanceList)
	from ..layers import multiclass_nms
	from ..utils import levels_to_images, multi_apply
	from . import ATSSHead

	EPS = 1e-12
	try:
	import sklearn.mixture as skm
	except ImportError:
	skm = None


	@MODELS.register_module()
	class PAAHead(ATSSHead):
	"""Head of PAAAssignment: Probabilistic Anchor Assignment with IoU
	Prediction for Object Detection.

	Code is modified from the `official github repo
	<https://github.com/kkhoot/PAA/blob/master/paa_core
	/modeling/rpn/paa/loss.py>`_.

	More details can be found in the `paper
	<https://arxiv.org/abs/2007.08103>`_ .

	Args:
	topk (int): Select topk samples with smallest loss in
	each level.
	score_voting (bool): Whether to use score voting in post-process.
	covariance_type : String describing the type of covariance parameters
	to be used in :class:`sklearn.mixture.GaussianMixture`.
	It must be one of:

	- 'full': each component has its own general covariance matrix
	- 'tied': all components share the same general covariance matrix
	- 'diag': each component has its own diagonal covariance matrix
	- 'spherical': each component has its own single variance
	Default: 'diag'. From 'full' to 'spherical', the gmm fitting
	process is faster yet the performance could be influenced. For most
	cases, 'diag' should be a good choice.
	"""

	def __init__(self,
	*args,
	topk: int = 9,
	score_voting: bool = True,
	covariance_type: str = 'diag',
	**kwargs):
	# topk used in paa reassign process
	self.topk = topk
	self.with_score_voting = score_voting
	self.covariance_type = covariance_type
	super().__init__(args, *kwargs)

	def loss_by_feat(
	self,
	cls_scores: List[Tensor],
	bbox_preds: List[Tensor],
	iou_preds: List[Tensor],
	batch_gt_instances: InstanceList,
	batch_img_metas: List[dict],
	batch_gt_instances_ignore: OptInstanceList = None) -> dict:
	"""Calculate the loss based on the features extracted by the detection
	head.

	Args:
	cls_scores (list[Tensor]): Box scores for each scale level
	Has shape (N, num_anchors * num_classes, H, W)
	bbox_preds (list[Tensor]): Box energies / deltas for each scale
	level with shape (N, num_anchors * 4, H, W)
	iou_preds (list[Tensor]): iou_preds for each scale
	level with shape (N, num_anchors * 1, H, W)
	batch_gt_instances (list[:obj:`InstanceData`]): Batch of
	gt_instance. It usually includes ``bboxes`` and ``labels``
	attributes.
	batch_img_metas (list[dict]): Meta information of each image, e.g.,
	image size, scaling factor, etc.
	batch_gt_instances_ignore (list[:obj:`InstanceData`], optional):
	Batch of gt_instances_ignore. It includes ``bboxes`` attribute
	data that is ignored during training and testing.
	Defaults to None.

	Returns:
	dict[str, Tensor]: A dictionary of loss gmm_assignment.
	"""

	featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]
	assert len(featmap_sizes) == self.prior_generator.num_levels

	device = cls_scores[0].device
	anchor_list, valid_flag_list = self.get_anchors(
	featmap_sizes, batch_img_metas, device=device)
	cls_reg_targets = self.get_targets(
	anchor_list,
	valid_flag_list,
	batch_gt_instances,
	batch_img_metas,
	batch_gt_instances_ignore=batch_gt_instances_ignore,
	)
	(labels, labels_weight, bboxes_target, bboxes_weight, pos_inds,
	pos_gt_index) = cls_reg_targets
	cls_scores = levels_to_images(cls_scores)
	cls_scores = [
	item.reshape(-1, self.cls_out_channels) for item in cls_scores
	]
	bbox_preds = levels_to_images(bbox_preds)
	bbox_preds = [item.reshape(-1, 4) for item in bbox_preds]
	iou_preds = levels_to_images(iou_preds)
	iou_preds = [item.reshape(-1, 1) for item in iou_preds]
	pos_losses_list, = multi_apply(self.get_pos_loss, anchor_list,
	cls_scores, bbox_preds, labels,
	labels_weight, bboxes_target,
	bboxes_weight, pos_inds)

	with torch.no_grad():
	reassign_labels, reassign_label_weight, \
	reassign_bbox_weights, num_pos = multi_apply(
	self.paa_reassign,
	pos_losses_list,
	labels,
	labels_weight,
	bboxes_weight,
	pos_inds,
	pos_gt_index,
	anchor_list)
	num_pos = sum(num_pos)
	# convert all tensor list to a flatten tensor
	cls_scores = torch.cat(cls_scores, 0).view(-1, cls_scores[0].size(-1))
	bbox_preds = torch.cat(bbox_preds, 0).view(-1, bbox_preds[0].size(-1))
	iou_preds = torch.cat(iou_preds, 0).view(-1, iou_preds[0].size(-1))
	labels = torch.cat(reassign_labels, 0).view(-1)
	flatten_anchors = torch.cat(
	[torch.cat(item, 0) for item in anchor_list])
	labels_weight = torch.cat(reassign_label_weight, 0).view(-1)
	bboxes_target = torch.cat(bboxes_target,
	0).view(-1, bboxes_target[0].size(-1))

	pos_inds_flatten = ((labels >= 0)
	&
	(labels < self.num_classes)).nonzero().reshape(-1)

	losses_cls = self.loss_cls(
	cls_scores,
	labels,
	labels_weight,
	avg_factor=max(num_pos, len(batch_img_metas))) # avoid num_pos=0
	if num_pos:
	pos_bbox_pred = self.bbox_coder.decode(
	flatten_anchors[pos_inds_flatten],
	bbox_preds[pos_inds_flatten])
	pos_bbox_target = bboxes_target[pos_inds_flatten]
	iou_target = bbox_overlaps(
	pos_bbox_pred.detach(), pos_bbox_target, is_aligned=True)
	losses_iou = self.loss_centerness(
	iou_preds[pos_inds_flatten],
	iou_target.unsqueeze(-1),
	avg_factor=num_pos)
	losses_bbox = self.loss_bbox(
	pos_bbox_pred,
	pos_bbox_target,
	iou_target.clamp(min=EPS),
	avg_factor=iou_target.sum())
	else:
	losses_iou = iou_preds.sum() * 0
	losses_bbox = bbox_preds.sum() * 0

	return dict(
	loss_cls=losses_cls, loss_bbox=losses_bbox, loss_iou=losses_iou)

	def get_pos_loss(self, anchors: List[Tensor], cls_score: Tensor,
	bbox_pred: Tensor, label: Tensor, label_weight: Tensor,
	bbox_target: dict, bbox_weight: Tensor,
	pos_inds: Tensor) -> Tensor:
	"""Calculate loss of all potential positive samples obtained from first
	match process.

	Args:
	anchors (list[Tensor]): Anchors of each scale.
	cls_score (Tensor): Box scores of single image with shape
	(num_anchors, num_classes)
	bbox_pred (Tensor): Box energies / deltas of single image
	with shape (num_anchors, 4)
	label (Tensor): classification target of each anchor with
	shape (num_anchors,)
	label_weight (Tensor): Classification loss weight of each
	anchor with shape (num_anchors).
	bbox_target (dict): Regression target of each anchor with
	shape (num_anchors, 4).
	bbox_weight (Tensor): Bbox weight of each anchor with shape
	(num_anchors, 4).
	pos_inds (Tensor): Index of all positive samples got from
	first assign process.

	Returns:
	Tensor: Losses of all positive samples in single image.
	"""
	if not len(pos_inds):
	return cls_score.new([]),
	anchors_all_level = torch.cat(anchors, 0)
	pos_scores = cls_score[pos_inds]
	pos_bbox_pred = bbox_pred[pos_inds]
	pos_label = label[pos_inds]
	pos_label_weight = label_weight[pos_inds]
	pos_bbox_target = bbox_target[pos_inds]
	pos_bbox_weight = bbox_weight[pos_inds]
	pos_anchors = anchors_all_level[pos_inds]
	pos_bbox_pred = self.bbox_coder.decode(pos_anchors, pos_bbox_pred)

	# to keep loss dimension
	loss_cls = self.loss_cls(
	pos_scores,
	pos_label,
	pos_label_weight,
	avg_factor=1.0,
	reduction_override='none')

	loss_bbox = self.loss_bbox(
	pos_bbox_pred,
	pos_bbox_target,
	pos_bbox_weight,
	avg_factor=1.0, # keep same loss weight before reassign
	reduction_override='none')

	loss_cls = loss_cls.sum(-1)
	pos_loss = loss_bbox + loss_cls
	return pos_loss,

	def paa_reassign(self, pos_losses: Tensor, label: Tensor,
	label_weight: Tensor, bbox_weight: Tensor,
	pos_inds: Tensor, pos_gt_inds: Tensor,
	anchors: List[Tensor]) -> tuple:
	"""Fit loss to GMM distribution and separate positive, ignore, negative
	samples again with GMM model.

	Args:
	pos_losses (Tensor): Losses of all positive samples in
	single image.
	label (Tensor): classification target of each anchor with
	shape (num_anchors,)
	label_weight (Tensor): Classification loss weight of each
	anchor with shape (num_anchors).
	bbox_weight (Tensor): Bbox weight of each anchor with shape
	(num_anchors, 4).
	pos_inds (Tensor): Index of all positive samples got from
	first assign process.
	pos_gt_inds (Tensor): Gt_index of all positive samples got
	from first assign process.
	anchors (list[Tensor]): Anchors of each scale.

	Returns:
	tuple: Usually returns a tuple containing learning targets.

	- label (Tensor): classification target of each anchor after
	paa assign, with shape (num_anchors,)
	- label_weight (Tensor): Classification loss weight of each
	anchor after paa assign, with shape (num_anchors).
	- bbox_weight (Tensor): Bbox weight of each anchor with shape
	(num_anchors, 4).
	- num_pos (int): The number of positive samples after paa
	assign.
	"""
	if not len(pos_inds):
	return label, label_weight, bbox_weight, 0
	label = label.clone()
	label_weight = label_weight.clone()
	bbox_weight = bbox_weight.clone()
	num_gt = pos_gt_inds.max() + 1
	num_level = len(anchors)
	num_anchors_each_level = [item.size(0) for item in anchors]
	num_anchors_each_level.insert(0, 0)
	inds_level_interval = np.cumsum(num_anchors_each_level)
	pos_level_mask = []
	for i in range(num_level):
	mask = (pos_inds >= inds_level_interval[i]) & (
	pos_inds < inds_level_interval[i + 1])
	pos_level_mask.append(mask)
	pos_inds_after_paa = [label.new_tensor([])]
	ignore_inds_after_paa = [label.new_tensor([])]
	for gt_ind in range(num_gt):
	pos_inds_gmm = []
	pos_loss_gmm = []
	gt_mask = pos_gt_inds == gt_ind
	for level in range(num_level):
	level_mask = pos_level_mask[level]
	level_gt_mask = level_mask & gt_mask
	value, topk_inds = pos_losses[level_gt_mask].topk(
	min(level_gt_mask.sum(), self.topk), largest=False)
	pos_inds_gmm.append(pos_inds[level_gt_mask][topk_inds])
	pos_loss_gmm.append(value)
	pos_inds_gmm = torch.cat(pos_inds_gmm)
	pos_loss_gmm = torch.cat(pos_loss_gmm)
	# fix gmm need at least two sample
	if len(pos_inds_gmm) < 2:
	continue
	device = pos_inds_gmm.device
	pos_loss_gmm, sort_inds = pos_loss_gmm.sort()
	pos_inds_gmm = pos_inds_gmm[sort_inds]
	pos_loss_gmm = pos_loss_gmm.view(-1, 1).cpu().numpy()
	min_loss, max_loss = pos_loss_gmm.min(), pos_loss_gmm.max()
	means_init = np.array([min_loss, max_loss]).reshape(2, 1)
	weights_init = np.array([0.5, 0.5])
	precisions_init = np.array([1.0, 1.0]).reshape(2, 1, 1) # full
	if self.covariance_type == 'spherical':
	precisions_init = precisions_init.reshape(2)
	elif self.covariance_type == 'diag':
	precisions_init = precisions_init.reshape(2, 1)
	elif self.covariance_type == 'tied':
	precisions_init = np.array([[1.0]])
	if skm is None:
	raise ImportError('Please run "pip install sklearn" '
	'to install sklearn first.')
	gmm = skm.GaussianMixture(
	2,
	weights_init=weights_init,
	means_init=means_init,
	precisions_init=precisions_init,
	covariance_type=self.covariance_type)
	gmm.fit(pos_loss_gmm)
	gmm_assignment = gmm.predict(pos_loss_gmm)
	scores = gmm.score_samples(pos_loss_gmm)
	gmm_assignment = torch.from_numpy(gmm_assignment).to(device)
	scores = torch.from_numpy(scores).to(device)

	pos_inds_temp, ignore_inds_temp = self.gmm_separation_scheme(
	gmm_assignment, scores, pos_inds_gmm)
	pos_inds_after_paa.append(pos_inds_temp)
	ignore_inds_after_paa.append(ignore_inds_temp)

	pos_inds_after_paa = torch.cat(pos_inds_after_paa)
	ignore_inds_after_paa = torch.cat(ignore_inds_after_paa)
	reassign_mask = (pos_inds.unsqueeze(1) != pos_inds_after_paa).all(1)
	reassign_ids = pos_inds[reassign_mask]
	label[reassign_ids] = self.num_classes
	label_weight[ignore_inds_after_paa] = 0
	bbox_weight[reassign_ids] = 0
	num_pos = len(pos_inds_after_paa)
	return label, label_weight, bbox_weight, num_pos

	def gmm_separation_scheme(self, gmm_assignment: Tensor, scores: Tensor,
	pos_inds_gmm: Tensor) -> Tuple[Tensor, Tensor]:
	"""A general separation scheme for gmm model.

	It separates a GMM distribution of candidate samples into three
	parts, 0 1 and uncertain areas, and you can implement other
	separation schemes by rewriting this function.

	Args:
	gmm_assignment (Tensor): The prediction of GMM which is of shape
	(num_samples,). The 0/1 value indicates the distribution
	that each sample comes from.
	scores (Tensor): The probability of sample coming from the
	fit GMM distribution. The tensor is of shape (num_samples,).
	pos_inds_gmm (Tensor): All the indexes of samples which are used
	to fit GMM model. The tensor is of shape (num_samples,)

	Returns:
	tuple[Tensor, Tensor]: The indices of positive and ignored samples.

	- pos_inds_temp (Tensor): Indices of positive samples.
	- ignore_inds_temp (Tensor): Indices of ignore samples.
	"""
	# The implementation is (c) in Fig.3 in origin paper instead of (b).
	# You can refer to issues such as
	# https://github.com/kkhoot/PAA/issues/8 and
	# https://github.com/kkhoot/PAA/issues/9.
	fgs = gmm_assignment == 0
	pos_inds_temp = fgs.new_tensor([], dtype=torch.long)
	ignore_inds_temp = fgs.new_tensor([], dtype=torch.long)
	if fgs.nonzero().numel():
	_, pos_thr_ind = scores[fgs].topk(1)
	pos_inds_temp = pos_inds_gmm[fgs][:pos_thr_ind + 1]
	ignore_inds_temp = pos_inds_gmm.new_tensor([])
	return pos_inds_temp, ignore_inds_temp

	def get_targets(self,
	anchor_list: List[List[Tensor]],
	valid_flag_list: List[List[Tensor]],
	batch_gt_instances: InstanceList,
	batch_img_metas: List[dict],
	batch_gt_instances_ignore: OptInstanceList = None,
	unmap_outputs: bool = True) -> tuple:
	"""Get targets for PAA head.

	This method is almost the same as `AnchorHead.get_targets()`. We direct
	return the results from _get_targets_single instead map it to levels
	by images_to_levels function.

	Args:
	anchor_list (list[list[Tensor]]): Multi level anchors of each
	image. The outer list indicates images, and the inner list
	corresponds to feature levels of the image. Each element of
	the inner list is a tensor of shape (num_anchors, 4).
	valid_flag_list (list[list[Tensor]]): Multi level valid flags of
	each image. The outer list indicates images, and the inner list
	corresponds to feature levels of the image. Each element of
	the inner list is a tensor of shape (num_anchors, )
	batch_gt_instances (list[:obj:`InstanceData`]): Batch of
	gt_instance. It usually includes ``bboxes`` and ``labels``
	attributes.
	batch_img_metas (list[dict]): Meta information of each image, e.g.,
	image size, scaling factor, etc.
	batch_gt_instances_ignore (list[:obj:`InstanceData`], optional):
	Batch of gt_instances_ignore. It includes ``bboxes`` attribute
	data that is ignored during training and testing.
	Defaults to None.
	unmap_outputs (bool): Whether to map outputs back to the original
	set of anchors. Defaults to True.

	Returns:
	tuple: Usually returns a tuple containing learning targets.

	- labels (list[Tensor]): Labels of all anchors, each with
	shape (num_anchors,).
	- label_weights (list[Tensor]): Label weights of all anchor.
	each with shape (num_anchors,).
	- bbox_targets (list[Tensor]): BBox targets of all anchors.
	each with shape (num_anchors, 4).
	- bbox_weights (list[Tensor]): BBox weights of all anchors.
	each with shape (num_anchors, 4).
	- pos_inds (list[Tensor]): Contains all index of positive
	sample in all anchor.
	- gt_inds (list[Tensor]): Contains all gt_index of positive
	sample in all anchor.
	"""

	num_imgs = len(batch_img_metas)
	assert len(anchor_list) == len(valid_flag_list) == num_imgs
	concat_anchor_list = []
	concat_valid_flag_list = []
	for i in range(num_imgs):
	assert len(anchor_list[i]) == len(valid_flag_list[i])
	concat_anchor_list.append(torch.cat(anchor_list[i]))
	concat_valid_flag_list.append(torch.cat(valid_flag_list[i]))

	# compute targets for each image
	if batch_gt_instances_ignore is None:
	batch_gt_instances_ignore = [None] * num_imgs
	results = multi_apply(
	self._get_targets_single,
	concat_anchor_list,
	concat_valid_flag_list,
	batch_gt_instances,
	batch_img_metas,
	batch_gt_instances_ignore,
	unmap_outputs=unmap_outputs)

	(labels, label_weights, bbox_targets, bbox_weights, valid_pos_inds,
	valid_neg_inds, sampling_result) = results

	# Due to valid flag of anchors, we have to calculate the real pos_inds
	# in origin anchor set.
	pos_inds = []
	for i, single_labels in enumerate(labels):
	pos_mask = (0 <= single_labels) & (
	single_labels < self.num_classes)
	pos_inds.append(pos_mask.nonzero().view(-1))

	gt_inds = [item.pos_assigned_gt_inds for item in sampling_result]
	return (labels, label_weights, bbox_targets, bbox_weights, pos_inds,
	gt_inds)

	def _get_targets_single(self,
	flat_anchors: Tensor,
	valid_flags: Tensor,
	gt_instances: InstanceData,
	img_meta: dict,
	gt_instances_ignore: Optional[InstanceData] = None,
	unmap_outputs: bool = True) -> tuple:
	"""Compute regression and classification targets for anchors in a
	single image.

	This method is same as `AnchorHead._get_targets_single()`.
	"""
	assert unmap_outputs, 'We must map outputs back to the original' \
	'set of anchors in PAAhead'
	return super(ATSSHead, self)._get_targets_single(
	flat_anchors,
	valid_flags,
	gt_instances,
	img_meta,
	gt_instances_ignore,
	unmap_outputs=True)

	def predict_by_feat(self,
	cls_scores: List[Tensor],
	bbox_preds: List[Tensor],
	score_factors: Optional[List[Tensor]] = None,
	batch_img_metas: Optional[List[dict]] = None,
	cfg: OptConfigType = None,
	rescale: bool = False,
	with_nms: bool = True) -> InstanceList:
	"""Transform a batch of output features extracted from the head into
	bbox results.

	This method is same as `BaseDenseHead.get_results()`.
	"""
	assert with_nms, 'PAA only supports "with_nms=True" now and it ' \
	'means PAAHead does not support ' \
	'test-time augmentation'
	return super().predict_by_feat(
	cls_scores=cls_scores,
	bbox_preds=bbox_preds,
	score_factors=score_factors,
	batch_img_metas=batch_img_metas,
	cfg=cfg,
	rescale=rescale,
	with_nms=with_nms)

	def _predict_by_feat_single(self,
	cls_score_list: List[Tensor],
	bbox_pred_list: List[Tensor],
	score_factor_list: List[Tensor],
	mlvl_priors: List[Tensor],
	img_meta: dict,
	cfg: OptConfigType = None,
	rescale: bool = False,
	with_nms: bool = True) -> InstanceData:
	"""Transform a single image's features extracted from the head into
	bbox results.

	Args:
	cls_score_list (list[Tensor]): Box scores from all scale
	levels of a single image, each item has shape
	(num_priors * num_classes, H, W).
	bbox_pred_list (list[Tensor]): Box energies / deltas from
	all scale levels of a single image, each item has shape
	(num_priors * 4, H, W).
	score_factor_list (list[Tensor]): Score factors from all scale
	levels of a single image, each item has shape
	(num_priors * 1, H, W).
	mlvl_priors (list[Tensor]): Each element in the list is
	the priors of a single level in feature pyramid, has shape
	(num_priors, 4).
	img_meta (dict): Image meta info.
	cfg (:obj:`ConfigDict` or dict, optional): Test / postprocessing
	configuration, if None, test_cfg would be used.
	rescale (bool): If True, return boxes in original image space.
	Default: False.
	with_nms (bool): If True, do nms before return boxes.
	Default: True.

	Returns:
	:obj:`InstanceData`: Detection results of each image
	after the post process.
	Each item usually contains following keys.

	- scores (Tensor): Classification scores, has a shape
	(num_instance, )
	- labels (Tensor): Labels of bboxes, has a shape
	(num_instances, ).
	- bboxes (Tensor): Has a shape (num_instances, 4),
	the last dimension 4 arrange as (x1, y1, x2, y2).
	"""
	cfg = self.test_cfg if cfg is None else cfg
	img_shape = img_meta['img_shape']
	nms_pre = cfg.get('nms_pre', -1)

	mlvl_bboxes = []
	mlvl_scores = []
	mlvl_score_factors = []
	for level_idx, (cls_score, bbox_pred, score_factor, priors) in \
	enumerate(zip(cls_score_list, bbox_pred_list,
	score_factor_list, mlvl_priors)):
	assert cls_score.size()[-2:] == bbox_pred.size()[-2:]

	scores = cls_score.permute(1, 2, 0).reshape(
	-1, self.cls_out_channels).sigmoid()
	bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)
	score_factor = score_factor.permute(1, 2, 0).reshape(-1).sigmoid()

	if 0 < nms_pre < scores.shape[0]:
	max_scores, _ = (scores *
	score_factor[:, None]).sqrt().max(dim=1)
	_, topk_inds = max_scores.topk(nms_pre)
	priors = priors[topk_inds, :]
	bbox_pred = bbox_pred[topk_inds, :]
	scores = scores[topk_inds, :]
	score_factor = score_factor[topk_inds]

	bboxes = self.bbox_coder.decode(
	priors, bbox_pred, max_shape=img_shape)
	mlvl_bboxes.append(bboxes)
	mlvl_scores.append(scores)
	mlvl_score_factors.append(score_factor)

	results = InstanceData()
	results.bboxes = torch.cat(mlvl_bboxes)
	results.scores = torch.cat(mlvl_scores)
	results.score_factors = torch.cat(mlvl_score_factors)

	return self._bbox_post_process(results, cfg, rescale, with_nms,
	img_meta)

	def _bbox_post_process(self,
	results: InstanceData,
	cfg: ConfigType,
	rescale: bool = False,
	with_nms: bool = True,
	img_meta: Optional[dict] = None):
	"""bbox post-processing method.

	The boxes would be rescaled to the original image scale and do
	the nms operation. Usually with_nms is False is used for aug test.

	Args:
	results (:obj:`InstaceData`): Detection instance results,
	each item has shape (num_bboxes, ).
	cfg (:obj:`ConfigDict` or dict): Test / postprocessing
	configuration, if None, test_cfg would be used.
	rescale (bool): If True, return boxes in original image space.
	Default: False.
	with_nms (bool): If True, do nms before return boxes.
	Default: True.
	img_meta (dict, optional): Image meta info. Defaults to None.

	Returns:
	:obj:`InstanceData`: Detection results of each image
	after the post process.
	Each item usually contains following keys.

	- scores (Tensor): Classification scores, has a shape
	(num_instance, )
	- labels (Tensor): Labels of bboxes, has a shape
	(num_instances, ).
	- bboxes (Tensor): Has a shape (num_instances, 4),
	the last dimension 4 arrange as (x1, y1, x2, y2).
	"""
	if rescale:
	results.bboxes /= results.bboxes.new_tensor(
	img_meta['scale_factor']).repeat((1, 2))
	# Add a dummy background class to the backend when using sigmoid
	# remind that we set FG labels to [0, num_class-1] since mmdet v2.0
	# BG cat_id: num_class
	padding = results.scores.new_zeros(results.scores.shape[0], 1)
	mlvl_scores = torch.cat([results.scores, padding], dim=1)

	mlvl_nms_scores = (mlvl_scores * results.score_factors[:, None]).sqrt()
	det_bboxes, det_labels = multiclass_nms(
	results.bboxes,
	mlvl_nms_scores,
	cfg.score_thr,
	cfg.nms,
	cfg.max_per_img,
	score_factors=None)
	if self.with_score_voting and len(det_bboxes) > 0:
	det_bboxes, det_labels = self.score_voting(det_bboxes, det_labels,
	results.bboxes,
	mlvl_nms_scores,
	cfg.score_thr)
	nms_results = InstanceData()
	nms_results.bboxes = det_bboxes[:, :-1]
	nms_results.scores = det_bboxes[:, -1]
	nms_results.labels = det_labels
	return nms_results

	def score_voting(self, det_bboxes: Tensor, det_labels: Tensor,
	mlvl_bboxes: Tensor, mlvl_nms_scores: Tensor,
	score_thr: float) -> Tuple[Tensor, Tensor]:
	"""Implementation of score voting method works on each remaining boxes
	after NMS procedure.

	Args:
	det_bboxes (Tensor): Remaining boxes after NMS procedure,
	with shape (k, 5), each dimension means
	(x1, y1, x2, y2, score).
	det_labels (Tensor): The label of remaining boxes, with shape
	(k, 1),Labels are 0-based.
	mlvl_bboxes (Tensor): All boxes before the NMS procedure,
	with shape (num_anchors,4).
	mlvl_nms_scores (Tensor): The scores of all boxes which is used
	in the NMS procedure, with shape (num_anchors, num_class)
	score_thr (float): The score threshold of bboxes.

	Returns:
	tuple: Usually returns a tuple containing voting results.

	- det_bboxes_voted (Tensor): Remaining boxes after
	score voting procedure, with shape (k, 5), each
	dimension means (x1, y1, x2, y2, score).
	- det_labels_voted (Tensor): Label of remaining bboxes
	after voting, with shape (num_anchors,).
	"""
	candidate_mask = mlvl_nms_scores > score_thr
	candidate_mask_nonzeros = candidate_mask.nonzero(as_tuple=False)
	candidate_inds = candidate_mask_nonzeros[:, 0]
	candidate_labels = candidate_mask_nonzeros[:, 1]
	candidate_bboxes = mlvl_bboxes[candidate_inds]
	candidate_scores = mlvl_nms_scores[candidate_mask]
	det_bboxes_voted = []
	det_labels_voted = []
	for cls in range(self.cls_out_channels):
	candidate_cls_mask = candidate_labels == cls
	if not candidate_cls_mask.any():
	continue
	candidate_cls_scores = candidate_scores[candidate_cls_mask]
	candidate_cls_bboxes = candidate_bboxes[candidate_cls_mask]
	det_cls_mask = det_labels == cls
	det_cls_bboxes = det_bboxes[det_cls_mask].view(
	-1, det_bboxes.size(-1))
	det_candidate_ious = bbox_overlaps(det_cls_bboxes[:, :4],
	candidate_cls_bboxes)
	for det_ind in range(len(det_cls_bboxes)):
	single_det_ious = det_candidate_ious[det_ind]
	pos_ious_mask = single_det_ious > 0.01
	pos_ious = single_det_ious[pos_ious_mask]
	pos_bboxes = candidate_cls_bboxes[pos_ious_mask]
	pos_scores = candidate_cls_scores[pos_ious_mask]
	pis = (torch.exp(-(1 - pos_ious)*2 / 0.025)
	pos_scores)[:, None]
	voted_box = torch.sum(
	pis * pos_bboxes, dim=0) / torch.sum(
	pis, dim=0)
	voted_score = det_cls_bboxes[det_ind][-1:][None, :]
	det_bboxes_voted.append(
	torch.cat((voted_box[None, :], voted_score), dim=1))
	det_labels_voted.append(cls)

	det_bboxes_voted = torch.cat(det_bboxes_voted, dim=0)
	det_labels_voted = det_labels.new_tensor(det_labels_voted)
	return det_bboxes_voted, det_labels_voted