|
|
|
from typing import Optional, Tuple, Union |
|
|
|
import mmcv |
|
import numpy as np |
|
import pycocotools.mask as maskUtils |
|
import torch |
|
from mmcv.transforms import BaseTransform |
|
from mmcv.transforms import LoadAnnotations as MMCV_LoadAnnotations |
|
from mmcv.transforms import LoadImageFromFile |
|
from mmengine.fileio import get |
|
from mmengine.structures import BaseDataElement |
|
|
|
from mmdet.registry import TRANSFORMS |
|
from mmdet.structures.bbox import get_box_type |
|
from mmdet.structures.bbox.box_type import autocast_box_type |
|
from mmdet.structures.mask import BitmapMasks, PolygonMasks |
|
|
|
|
|
@TRANSFORMS.register_module() |
|
class LoadImageFromNDArray(LoadImageFromFile): |
|
"""Load an image from ``results['img']``. |
|
|
|
Similar with :obj:`LoadImageFromFile`, but the image has been loaded as |
|
:obj:`np.ndarray` in ``results['img']``. Can be used when loading image |
|
from webcam. |
|
|
|
Required Keys: |
|
|
|
- img |
|
|
|
Modified Keys: |
|
|
|
- img |
|
- img_path |
|
- img_shape |
|
- ori_shape |
|
|
|
Args: |
|
to_float32 (bool): Whether to convert the loaded image to a float32 |
|
numpy array. If set to False, the loaded image is an uint8 array. |
|
Defaults to False. |
|
""" |
|
|
|
def transform(self, results: dict) -> dict: |
|
"""Transform function to add image meta information. |
|
|
|
Args: |
|
results (dict): Result dict with Webcam read image in |
|
``results['img']``. |
|
|
|
Returns: |
|
dict: The dict contains loaded image and meta information. |
|
""" |
|
|
|
img = results['img'] |
|
if self.to_float32: |
|
img = img.astype(np.float32) |
|
|
|
results['img_path'] = None |
|
results['img'] = img |
|
results['img_shape'] = img.shape[:2] |
|
results['ori_shape'] = img.shape[:2] |
|
return results |
|
|
|
|
|
@TRANSFORMS.register_module() |
|
class LoadMultiChannelImageFromFiles(BaseTransform): |
|
"""Load multi-channel images from a list of separate channel files. |
|
|
|
Required Keys: |
|
|
|
- img_path |
|
|
|
Modified Keys: |
|
|
|
- img |
|
- img_shape |
|
- ori_shape |
|
|
|
Args: |
|
to_float32 (bool): Whether to convert the loaded image to a float32 |
|
numpy array. If set to False, the loaded image is an uint8 array. |
|
Defaults to False. |
|
color_type (str): The flag argument for :func:``mmcv.imfrombytes``. |
|
Defaults to 'unchanged'. |
|
imdecode_backend (str): The image decoding backend type. The backend |
|
argument for :func:``mmcv.imfrombytes``. |
|
See :func:``mmcv.imfrombytes`` for details. |
|
Defaults to 'cv2'. |
|
file_client_args (dict): Arguments to instantiate the |
|
corresponding backend in mmdet <= 3.0.0rc6. Defaults to None. |
|
backend_args (dict, optional): Arguments to instantiate the |
|
corresponding backend in mmdet >= 3.0.0rc7. Defaults to None. |
|
""" |
|
|
|
def __init__( |
|
self, |
|
to_float32: bool = False, |
|
color_type: str = 'unchanged', |
|
imdecode_backend: str = 'cv2', |
|
file_client_args: dict = None, |
|
backend_args: dict = None, |
|
) -> None: |
|
self.to_float32 = to_float32 |
|
self.color_type = color_type |
|
self.imdecode_backend = imdecode_backend |
|
self.backend_args = backend_args |
|
if file_client_args is not None: |
|
raise RuntimeError( |
|
'The `file_client_args` is deprecated, ' |
|
'please use `backend_args` instead, please refer to' |
|
'https://github.com/open-mmlab/mmdetection/blob/main/configs/_base_/datasets/coco_detection.py' |
|
) |
|
|
|
def transform(self, results: dict) -> dict: |
|
"""Transform functions to load multiple images and get images meta |
|
information. |
|
|
|
Args: |
|
results (dict): Result dict from :obj:`mmdet.CustomDataset`. |
|
|
|
Returns: |
|
dict: The dict contains loaded images and meta information. |
|
""" |
|
|
|
assert isinstance(results['img_path'], list) |
|
img = [] |
|
for name in results['img_path']: |
|
img_bytes = get(name, backend_args=self.backend_args) |
|
img.append( |
|
mmcv.imfrombytes( |
|
img_bytes, |
|
flag=self.color_type, |
|
backend=self.imdecode_backend)) |
|
img = np.stack(img, axis=-1) |
|
if self.to_float32: |
|
img = img.astype(np.float32) |
|
|
|
results['img'] = img |
|
results['img_shape'] = img.shape[:2] |
|
results['ori_shape'] = img.shape[:2] |
|
return results |
|
|
|
def __repr__(self): |
|
repr_str = (f'{self.__class__.__name__}(' |
|
f'to_float32={self.to_float32}, ' |
|
f"color_type='{self.color_type}', " |
|
f"imdecode_backend='{self.imdecode_backend}', " |
|
f'backend_args={self.backend_args})') |
|
return repr_str |
|
|
|
|
|
@TRANSFORMS.register_module() |
|
class LoadAnnotations(MMCV_LoadAnnotations): |
|
"""Load and process the ``instances`` and ``seg_map`` annotation provided |
|
by dataset. |
|
|
|
The annotation format is as the following: |
|
|
|
.. code-block:: python |
|
|
|
{ |
|
'instances': |
|
[ |
|
{ |
|
# List of 4 numbers representing the bounding box of the |
|
# instance, in (x1, y1, x2, y2) order. |
|
'bbox': [x1, y1, x2, y2], |
|
|
|
# Label of image classification. |
|
'bbox_label': 1, |
|
|
|
# Used in instance/panoptic segmentation. The segmentation mask |
|
# of the instance or the information of segments. |
|
# 1. If list[list[float]], it represents a list of polygons, |
|
# one for each connected component of the object. Each |
|
# list[float] is one simple polygon in the format of |
|
# [x1, y1, ..., xn, yn] (n≥3). The Xs and Ys are absolute |
|
# coordinates in unit of pixels. |
|
# 2. If dict, it represents the per-pixel segmentation mask in |
|
# COCO’s compressed RLE format. The dict should have keys |
|
# “size” and “counts”. Can be loaded by pycocotools |
|
'mask': list[list[float]] or dict, |
|
|
|
} |
|
] |
|
# Filename of semantic or panoptic segmentation ground truth file. |
|
'seg_map_path': 'a/b/c' |
|
} |
|
|
|
After this module, the annotation has been changed to the format below: |
|
|
|
.. code-block:: python |
|
|
|
{ |
|
# In (x1, y1, x2, y2) order, float type. N is the number of bboxes |
|
# in an image |
|
'gt_bboxes': BaseBoxes(N, 4) |
|
# In int type. |
|
'gt_bboxes_labels': np.ndarray(N, ) |
|
# In built-in class |
|
'gt_masks': PolygonMasks (H, W) or BitmapMasks (H, W) |
|
# In uint8 type. |
|
'gt_seg_map': np.ndarray (H, W) |
|
# in (x, y, v) order, float type. |
|
} |
|
|
|
Required Keys: |
|
|
|
- height |
|
- width |
|
- instances |
|
|
|
- bbox (optional) |
|
- bbox_label |
|
- mask (optional) |
|
- ignore_flag |
|
|
|
- seg_map_path (optional) |
|
|
|
Added Keys: |
|
|
|
- gt_bboxes (BaseBoxes[torch.float32]) |
|
- gt_bboxes_labels (np.int64) |
|
- gt_masks (BitmapMasks | PolygonMasks) |
|
- gt_seg_map (np.uint8) |
|
- gt_ignore_flags (bool) |
|
|
|
Args: |
|
with_bbox (bool): Whether to parse and load the bbox annotation. |
|
Defaults to True. |
|
with_label (bool): Whether to parse and load the label annotation. |
|
Defaults to True. |
|
with_mask (bool): Whether to parse and load the mask annotation. |
|
Default: False. |
|
with_seg (bool): Whether to parse and load the semantic segmentation |
|
annotation. Defaults to False. |
|
poly2mask (bool): Whether to convert mask to bitmap. Default: True. |
|
box_type (str): The box type used to wrap the bboxes. If ``box_type`` |
|
is None, gt_bboxes will keep being np.ndarray. Defaults to 'hbox'. |
|
imdecode_backend (str): The image decoding backend type. The backend |
|
argument for :func:``mmcv.imfrombytes``. |
|
See :fun:``mmcv.imfrombytes`` for details. |
|
Defaults to 'cv2'. |
|
backend_args (dict, optional): Arguments to instantiate the |
|
corresponding backend. Defaults to None. |
|
""" |
|
|
|
def __init__(self, |
|
with_mask: bool = False, |
|
poly2mask: bool = True, |
|
box_type: str = 'hbox', |
|
**kwargs) -> None: |
|
super(LoadAnnotations, self).__init__(**kwargs) |
|
self.with_mask = with_mask |
|
self.poly2mask = poly2mask |
|
self.box_type = box_type |
|
|
|
def _load_bboxes(self, results: dict) -> None: |
|
"""Private function to load bounding box annotations. |
|
|
|
Args: |
|
results (dict): Result dict from :obj:``mmengine.BaseDataset``. |
|
Returns: |
|
dict: The dict contains loaded bounding box annotations. |
|
""" |
|
gt_bboxes = [] |
|
gt_ignore_flags = [] |
|
for instance in results.get('instances', []): |
|
gt_bboxes.append(instance['bbox']) |
|
gt_ignore_flags.append(instance['ignore_flag']) |
|
if self.box_type is None: |
|
results['gt_bboxes'] = np.array( |
|
gt_bboxes, dtype=np.float32).reshape((-1, 4)) |
|
else: |
|
_, box_type_cls = get_box_type(self.box_type) |
|
results['gt_bboxes'] = box_type_cls(gt_bboxes, dtype=torch.float32) |
|
results['gt_ignore_flags'] = np.array(gt_ignore_flags, dtype=bool) |
|
|
|
def _load_labels(self, results: dict) -> None: |
|
"""Private function to load label annotations. |
|
|
|
Args: |
|
results (dict): Result dict from :obj:``mmengine.BaseDataset``. |
|
|
|
Returns: |
|
dict: The dict contains loaded label annotations. |
|
""" |
|
gt_bboxes_labels = [] |
|
for instance in results.get('instances', []): |
|
gt_bboxes_labels.append(instance['bbox_label']) |
|
|
|
results['gt_bboxes_labels'] = np.array( |
|
gt_bboxes_labels, dtype=np.int64) |
|
|
|
def _poly2mask(self, mask_ann: Union[list, dict], img_h: int, |
|
img_w: int) -> np.ndarray: |
|
"""Private function to convert masks represented with polygon to |
|
bitmaps. |
|
|
|
Args: |
|
mask_ann (list | dict): Polygon mask annotation input. |
|
img_h (int): The height of output mask. |
|
img_w (int): The width of output mask. |
|
|
|
Returns: |
|
np.ndarray: The decode bitmap mask of shape (img_h, img_w). |
|
""" |
|
|
|
if isinstance(mask_ann, list): |
|
|
|
|
|
rles = maskUtils.frPyObjects(mask_ann, img_h, img_w) |
|
rle = maskUtils.merge(rles) |
|
elif isinstance(mask_ann['counts'], list): |
|
|
|
rle = maskUtils.frPyObjects(mask_ann, img_h, img_w) |
|
else: |
|
|
|
rle = mask_ann |
|
mask = maskUtils.decode(rle) |
|
return mask |
|
|
|
def _process_masks(self, results: dict) -> list: |
|
"""Process gt_masks and filter invalid polygons. |
|
|
|
Args: |
|
results (dict): Result dict from :obj:``mmengine.BaseDataset``. |
|
|
|
Returns: |
|
list: Processed gt_masks. |
|
""" |
|
gt_masks = [] |
|
gt_ignore_flags = [] |
|
for instance in results.get('instances', []): |
|
gt_mask = instance['mask'] |
|
|
|
|
|
if isinstance(gt_mask, list): |
|
gt_mask = [ |
|
np.array(polygon) for polygon in gt_mask |
|
if len(polygon) % 2 == 0 and len(polygon) >= 6 |
|
] |
|
if len(gt_mask) == 0: |
|
|
|
instance['ignore_flag'] = 1 |
|
gt_mask = [np.zeros(6)] |
|
elif not self.poly2mask: |
|
|
|
|
|
instance['ignore_flag'] = 1 |
|
gt_mask = [np.zeros(6)] |
|
elif isinstance(gt_mask, dict) and \ |
|
not (gt_mask.get('counts') is not None and |
|
gt_mask.get('size') is not None and |
|
isinstance(gt_mask['counts'], (list, str))): |
|
|
|
|
|
instance['ignore_flag'] = 1 |
|
gt_mask = [np.zeros(6)] |
|
gt_masks.append(gt_mask) |
|
|
|
gt_ignore_flags.append(instance['ignore_flag']) |
|
results['gt_ignore_flags'] = np.array(gt_ignore_flags, dtype=bool) |
|
return gt_masks |
|
|
|
def _load_masks(self, results: dict) -> None: |
|
"""Private function to load mask annotations. |
|
|
|
Args: |
|
results (dict): Result dict from :obj:``mmengine.BaseDataset``. |
|
""" |
|
h, w = results['ori_shape'] |
|
gt_masks = self._process_masks(results) |
|
if self.poly2mask: |
|
gt_masks = BitmapMasks( |
|
[self._poly2mask(mask, h, w) for mask in gt_masks], h, w) |
|
else: |
|
|
|
gt_masks = PolygonMasks([mask for mask in gt_masks], h, w) |
|
results['gt_masks'] = gt_masks |
|
|
|
def transform(self, results: dict) -> dict: |
|
"""Function to load multiple types annotations. |
|
|
|
Args: |
|
results (dict): Result dict from :obj:``mmengine.BaseDataset``. |
|
|
|
Returns: |
|
dict: The dict contains loaded bounding box, label and |
|
semantic segmentation. |
|
""" |
|
|
|
if self.with_bbox: |
|
self._load_bboxes(results) |
|
if self.with_label: |
|
self._load_labels(results) |
|
if self.with_mask: |
|
self._load_masks(results) |
|
if self.with_seg: |
|
self._load_seg_map(results) |
|
return results |
|
|
|
def __repr__(self) -> str: |
|
repr_str = self.__class__.__name__ |
|
repr_str += f'(with_bbox={self.with_bbox}, ' |
|
repr_str += f'with_label={self.with_label}, ' |
|
repr_str += f'with_mask={self.with_mask}, ' |
|
repr_str += f'with_seg={self.with_seg}, ' |
|
repr_str += f'poly2mask={self.poly2mask}, ' |
|
repr_str += f"imdecode_backend='{self.imdecode_backend}', " |
|
repr_str += f'backend_args={self.backend_args})' |
|
return repr_str |
|
|
|
|
|
@TRANSFORMS.register_module() |
|
class LoadPanopticAnnotations(LoadAnnotations): |
|
"""Load multiple types of panoptic annotations. |
|
|
|
The annotation format is as the following: |
|
|
|
.. code-block:: python |
|
|
|
{ |
|
'instances': |
|
[ |
|
{ |
|
# List of 4 numbers representing the bounding box of the |
|
# instance, in (x1, y1, x2, y2) order. |
|
'bbox': [x1, y1, x2, y2], |
|
|
|
# Label of image classification. |
|
'bbox_label': 1, |
|
}, |
|
... |
|
] |
|
'segments_info': |
|
[ |
|
{ |
|
# id = cls_id + instance_id * INSTANCE_OFFSET |
|
'id': int, |
|
|
|
# Contiguous category id defined in dataset. |
|
'category': int |
|
|
|
# Thing flag. |
|
'is_thing': bool |
|
}, |
|
... |
|
] |
|
|
|
# Filename of semantic or panoptic segmentation ground truth file. |
|
'seg_map_path': 'a/b/c' |
|
} |
|
|
|
After this module, the annotation has been changed to the format below: |
|
|
|
.. code-block:: python |
|
|
|
{ |
|
# In (x1, y1, x2, y2) order, float type. N is the number of bboxes |
|
# in an image |
|
'gt_bboxes': BaseBoxes(N, 4) |
|
# In int type. |
|
'gt_bboxes_labels': np.ndarray(N, ) |
|
# In built-in class |
|
'gt_masks': PolygonMasks (H, W) or BitmapMasks (H, W) |
|
# In uint8 type. |
|
'gt_seg_map': np.ndarray (H, W) |
|
# in (x, y, v) order, float type. |
|
} |
|
|
|
Required Keys: |
|
|
|
- height |
|
- width |
|
- instances |
|
- bbox |
|
- bbox_label |
|
- ignore_flag |
|
- segments_info |
|
- id |
|
- category |
|
- is_thing |
|
- seg_map_path |
|
|
|
Added Keys: |
|
|
|
- gt_bboxes (BaseBoxes[torch.float32]) |
|
- gt_bboxes_labels (np.int64) |
|
- gt_masks (BitmapMasks | PolygonMasks) |
|
- gt_seg_map (np.uint8) |
|
- gt_ignore_flags (bool) |
|
|
|
Args: |
|
with_bbox (bool): Whether to parse and load the bbox annotation. |
|
Defaults to True. |
|
with_label (bool): Whether to parse and load the label annotation. |
|
Defaults to True. |
|
with_mask (bool): Whether to parse and load the mask annotation. |
|
Defaults to True. |
|
with_seg (bool): Whether to parse and load the semantic segmentation |
|
annotation. Defaults to False. |
|
box_type (str): The box mode used to wrap the bboxes. |
|
imdecode_backend (str): The image decoding backend type. The backend |
|
argument for :func:``mmcv.imfrombytes``. |
|
See :fun:``mmcv.imfrombytes`` for details. |
|
Defaults to 'cv2'. |
|
backend_args (dict, optional): Arguments to instantiate the |
|
corresponding backend in mmdet >= 3.0.0rc7. Defaults to None. |
|
""" |
|
|
|
def __init__(self, |
|
with_bbox: bool = True, |
|
with_label: bool = True, |
|
with_mask: bool = True, |
|
with_seg: bool = True, |
|
box_type: str = 'hbox', |
|
imdecode_backend: str = 'cv2', |
|
backend_args: dict = None) -> None: |
|
try: |
|
from panopticapi import utils |
|
except ImportError: |
|
raise ImportError( |
|
'panopticapi is not installed, please install it by: ' |
|
'pip install git+https://github.com/cocodataset/' |
|
'panopticapi.git.') |
|
self.rgb2id = utils.rgb2id |
|
|
|
super(LoadPanopticAnnotations, self).__init__( |
|
with_bbox=with_bbox, |
|
with_label=with_label, |
|
with_mask=with_mask, |
|
with_seg=with_seg, |
|
with_keypoints=False, |
|
box_type=box_type, |
|
imdecode_backend=imdecode_backend, |
|
backend_args=backend_args) |
|
|
|
def _load_masks_and_semantic_segs(self, results: dict) -> None: |
|
"""Private function to load mask and semantic segmentation annotations. |
|
|
|
In gt_semantic_seg, the foreground label is from ``0`` to |
|
``num_things - 1``, the background label is from ``num_things`` to |
|
``num_things + num_stuff - 1``, 255 means the ignored label (``VOID``). |
|
|
|
Args: |
|
results (dict): Result dict from :obj:``mmdet.CustomDataset``. |
|
""" |
|
|
|
if results.get('seg_map_path', None) is None: |
|
return |
|
|
|
img_bytes = get( |
|
results['seg_map_path'], backend_args=self.backend_args) |
|
pan_png = mmcv.imfrombytes( |
|
img_bytes, flag='color', channel_order='rgb').squeeze() |
|
pan_png = self.rgb2id(pan_png) |
|
|
|
gt_masks = [] |
|
gt_seg = np.zeros_like(pan_png) + 255 |
|
|
|
for segment_info in results['segments_info']: |
|
mask = (pan_png == segment_info['id']) |
|
gt_seg = np.where(mask, segment_info['category'], gt_seg) |
|
|
|
|
|
if segment_info.get('is_thing'): |
|
gt_masks.append(mask.astype(np.uint8)) |
|
|
|
if self.with_mask: |
|
h, w = results['ori_shape'] |
|
gt_masks = BitmapMasks(gt_masks, h, w) |
|
results['gt_masks'] = gt_masks |
|
|
|
if self.with_seg: |
|
results['gt_seg_map'] = gt_seg |
|
|
|
def transform(self, results: dict) -> dict: |
|
"""Function to load multiple types panoptic annotations. |
|
|
|
Args: |
|
results (dict): Result dict from :obj:``mmdet.CustomDataset``. |
|
|
|
Returns: |
|
dict: The dict contains loaded bounding box, label, mask and |
|
semantic segmentation annotations. |
|
""" |
|
|
|
if self.with_bbox: |
|
self._load_bboxes(results) |
|
if self.with_label: |
|
self._load_labels(results) |
|
if self.with_mask or self.with_seg: |
|
|
|
|
|
self._load_masks_and_semantic_segs(results) |
|
|
|
return results |
|
|
|
|
|
@TRANSFORMS.register_module() |
|
class LoadProposals(BaseTransform): |
|
"""Load proposal pipeline. |
|
|
|
Required Keys: |
|
|
|
- proposals |
|
|
|
Modified Keys: |
|
|
|
- proposals |
|
|
|
Args: |
|
num_max_proposals (int, optional): Maximum number of proposals to load. |
|
If not specified, all proposals will be loaded. |
|
""" |
|
|
|
def __init__(self, num_max_proposals: Optional[int] = None) -> None: |
|
self.num_max_proposals = num_max_proposals |
|
|
|
def transform(self, results: dict) -> dict: |
|
"""Transform function to load proposals from file. |
|
|
|
Args: |
|
results (dict): Result dict from :obj:`mmdet.CustomDataset`. |
|
|
|
Returns: |
|
dict: The dict contains loaded proposal annotations. |
|
""" |
|
|
|
proposals = results['proposals'] |
|
|
|
assert isinstance(proposals, dict) \ |
|
or isinstance(proposals, BaseDataElement) |
|
bboxes = proposals['bboxes'].astype(np.float32) |
|
assert bboxes.shape[1] == 4, \ |
|
f'Proposals should have shapes (n, 4), but found {bboxes.shape}' |
|
|
|
if 'scores' in proposals: |
|
scores = proposals['scores'].astype(np.float32) |
|
assert bboxes.shape[0] == scores.shape[0] |
|
else: |
|
scores = np.zeros(bboxes.shape[0], dtype=np.float32) |
|
|
|
if self.num_max_proposals is not None: |
|
|
|
bboxes = bboxes[:self.num_max_proposals] |
|
scores = scores[:self.num_max_proposals] |
|
|
|
if len(bboxes) == 0: |
|
bboxes = np.zeros((0, 4), dtype=np.float32) |
|
scores = np.zeros(0, dtype=np.float32) |
|
|
|
results['proposals'] = bboxes |
|
results['proposals_scores'] = scores |
|
return results |
|
|
|
def __repr__(self): |
|
return self.__class__.__name__ + \ |
|
f'(num_max_proposals={self.num_max_proposals})' |
|
|
|
|
|
@TRANSFORMS.register_module() |
|
class FilterAnnotations(BaseTransform): |
|
"""Filter invalid annotations. |
|
|
|
Required Keys: |
|
|
|
- gt_bboxes (BaseBoxes[torch.float32]) (optional) |
|
- gt_bboxes_labels (np.int64) (optional) |
|
- gt_masks (BitmapMasks | PolygonMasks) (optional) |
|
- gt_ignore_flags (bool) (optional) |
|
|
|
Modified Keys: |
|
|
|
- gt_bboxes (optional) |
|
- gt_bboxes_labels (optional) |
|
- gt_masks (optional) |
|
- gt_ignore_flags (optional) |
|
|
|
Args: |
|
min_gt_bbox_wh (tuple[float]): Minimum width and height of ground truth |
|
boxes. Default: (1., 1.) |
|
min_gt_mask_area (int): Minimum foreground area of ground truth masks. |
|
Default: 1 |
|
by_box (bool): Filter instances with bounding boxes not meeting the |
|
min_gt_bbox_wh threshold. Default: True |
|
by_mask (bool): Filter instances with masks not meeting |
|
min_gt_mask_area threshold. Default: False |
|
keep_empty (bool): Whether to return None when it |
|
becomes an empty bbox after filtering. Defaults to True. |
|
""" |
|
|
|
def __init__(self, |
|
min_gt_bbox_wh: Tuple[int, int] = (1, 1), |
|
min_gt_mask_area: int = 1, |
|
by_box: bool = True, |
|
by_mask: bool = False, |
|
keep_empty: bool = True) -> None: |
|
|
|
assert by_box or by_mask |
|
self.min_gt_bbox_wh = min_gt_bbox_wh |
|
self.min_gt_mask_area = min_gt_mask_area |
|
self.by_box = by_box |
|
self.by_mask = by_mask |
|
self.keep_empty = keep_empty |
|
|
|
@autocast_box_type() |
|
def transform(self, results: dict) -> Union[dict, None]: |
|
"""Transform function to filter annotations. |
|
|
|
Args: |
|
results (dict): Result dict. |
|
|
|
Returns: |
|
dict: Updated result dict. |
|
""" |
|
assert 'gt_bboxes' in results |
|
gt_bboxes = results['gt_bboxes'] |
|
if gt_bboxes.shape[0] == 0: |
|
return results |
|
|
|
tests = [] |
|
if self.by_box: |
|
tests.append( |
|
((gt_bboxes.widths > self.min_gt_bbox_wh[0]) & |
|
(gt_bboxes.heights > self.min_gt_bbox_wh[1])).numpy()) |
|
if self.by_mask: |
|
assert 'gt_masks' in results |
|
gt_masks = results['gt_masks'] |
|
tests.append(gt_masks.areas >= self.min_gt_mask_area) |
|
|
|
keep = tests[0] |
|
for t in tests[1:]: |
|
keep = keep & t |
|
|
|
if not keep.any(): |
|
if self.keep_empty: |
|
return None |
|
|
|
keys = ('gt_bboxes', 'gt_bboxes_labels', 'gt_masks', 'gt_ignore_flags') |
|
for key in keys: |
|
if key in results: |
|
results[key] = results[key][keep] |
|
|
|
return results |
|
|
|
def __repr__(self): |
|
return self.__class__.__name__ + \ |
|
f'(min_gt_bbox_wh={self.min_gt_bbox_wh}, ' \ |
|
f'keep_empty={self.keep_empty})' |
|
|
|
|
|
@TRANSFORMS.register_module() |
|
class LoadEmptyAnnotations(BaseTransform): |
|
"""Load Empty Annotations for unlabeled images. |
|
|
|
Added Keys: |
|
- gt_bboxes (np.float32) |
|
- gt_bboxes_labels (np.int64) |
|
- gt_masks (BitmapMasks | PolygonMasks) |
|
- gt_seg_map (np.uint8) |
|
- gt_ignore_flags (bool) |
|
|
|
Args: |
|
with_bbox (bool): Whether to load the pseudo bbox annotation. |
|
Defaults to True. |
|
with_label (bool): Whether to load the pseudo label annotation. |
|
Defaults to True. |
|
with_mask (bool): Whether to load the pseudo mask annotation. |
|
Default: False. |
|
with_seg (bool): Whether to load the pseudo semantic segmentation |
|
annotation. Defaults to False. |
|
seg_ignore_label (int): The fill value used for segmentation map. |
|
Note this value must equals ``ignore_label`` in ``semantic_head`` |
|
of the corresponding config. Defaults to 255. |
|
""" |
|
|
|
def __init__(self, |
|
with_bbox: bool = True, |
|
with_label: bool = True, |
|
with_mask: bool = False, |
|
with_seg: bool = False, |
|
seg_ignore_label: int = 255) -> None: |
|
self.with_bbox = with_bbox |
|
self.with_label = with_label |
|
self.with_mask = with_mask |
|
self.with_seg = with_seg |
|
self.seg_ignore_label = seg_ignore_label |
|
|
|
def transform(self, results: dict) -> dict: |
|
"""Transform function to load empty annotations. |
|
|
|
Args: |
|
results (dict): Result dict. |
|
Returns: |
|
dict: Updated result dict. |
|
""" |
|
if self.with_bbox: |
|
results['gt_bboxes'] = np.zeros((0, 4), dtype=np.float32) |
|
results['gt_ignore_flags'] = np.zeros((0, ), dtype=bool) |
|
if self.with_label: |
|
results['gt_bboxes_labels'] = np.zeros((0, ), dtype=np.int64) |
|
if self.with_mask: |
|
|
|
h, w = results['img_shape'] |
|
gt_masks = np.zeros((0, h, w), dtype=np.uint8) |
|
results['gt_masks'] = BitmapMasks(gt_masks, h, w) |
|
if self.with_seg: |
|
h, w = results['img_shape'] |
|
results['gt_seg_map'] = self.seg_ignore_label * np.ones( |
|
(h, w), dtype=np.uint8) |
|
return results |
|
|
|
def __repr__(self) -> str: |
|
repr_str = self.__class__.__name__ |
|
repr_str += f'(with_bbox={self.with_bbox}, ' |
|
repr_str += f'with_label={self.with_label}, ' |
|
repr_str += f'with_mask={self.with_mask}, ' |
|
repr_str += f'with_seg={self.with_seg}, ' |
|
repr_str += f'seg_ignore_label={self.seg_ignore_label})' |
|
return repr_str |
|
|
|
|
|
@TRANSFORMS.register_module() |
|
class InferencerLoader(BaseTransform): |
|
"""Load an image from ``results['img']``. |
|
|
|
Similar with :obj:`LoadImageFromFile`, but the image has been loaded as |
|
:obj:`np.ndarray` in ``results['img']``. Can be used when loading image |
|
from webcam. |
|
|
|
Required Keys: |
|
|
|
- img |
|
|
|
Modified Keys: |
|
|
|
- img |
|
- img_path |
|
- img_shape |
|
- ori_shape |
|
|
|
Args: |
|
to_float32 (bool): Whether to convert the loaded image to a float32 |
|
numpy array. If set to False, the loaded image is an uint8 array. |
|
Defaults to False. |
|
""" |
|
|
|
def __init__(self, **kwargs) -> None: |
|
super().__init__() |
|
self.from_file = TRANSFORMS.build( |
|
dict(type='LoadImageFromFile', **kwargs)) |
|
self.from_ndarray = TRANSFORMS.build( |
|
dict(type='mmdet.LoadImageFromNDArray', **kwargs)) |
|
|
|
def transform(self, results: Union[str, np.ndarray, dict]) -> dict: |
|
"""Transform function to add image meta information. |
|
|
|
Args: |
|
results (str, np.ndarray or dict): The result. |
|
|
|
Returns: |
|
dict: The dict contains loaded image and meta information. |
|
""" |
|
if isinstance(results, str): |
|
inputs = dict(img_path=results) |
|
elif isinstance(results, np.ndarray): |
|
inputs = dict(img=results) |
|
elif isinstance(results, dict): |
|
inputs = results |
|
else: |
|
raise NotImplementedError |
|
|
|
if 'img' in inputs: |
|
return self.from_ndarray(inputs) |
|
return self.from_file(inputs) |
|
|