Source code for mmpose.codecs.annotation_processors
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Dict, List, Optional, Tuple
import numpy as np
from mmpose.registry import KEYPOINT_CODECS
from .base import BaseKeypointCodec
INF = 1e6
NEG_INF = -1e6
class BaseAnnotationProcessor(BaseKeypointCodec):
"""Base class for annotation processors."""
def decode(self, *args, **kwargs):
pass
[docs]@KEYPOINT_CODECS.register_module()
class YOLOXPoseAnnotationProcessor(BaseAnnotationProcessor):
"""Convert dataset annotations to the input format of YOLOX-Pose.
This processor expands bounding boxes and converts category IDs to labels.
Args:
expand_bbox (bool, optional): Whether to expand the bounding box
to include all keypoints. Defaults to False.
input_size (tuple, optional): The size of the input image for the
model, formatted as (h, w). This argument is necessary for the
codec in deployment but is not used indeed.
"""
auxiliary_encode_keys = {'category_id', 'bbox'}
label_mapping_table = dict(
bbox='bboxes',
bbox_labels='labels',
keypoints='keypoints',
keypoints_visible='keypoints_visible',
area='areas',
)
instance_mapping_table = dict(
bbox='bboxes',
bbox_score='bbox_scores',
keypoints='keypoints',
keypoints_visible='keypoints_visible',
# remove 'bbox_scales' in default instance_mapping_table to avoid
# length mismatch during training with multiple datasets
)
def __init__(self,
expand_bbox: bool = False,
input_size: Optional[Tuple] = None):
super().__init__()
self.expand_bbox = expand_bbox
[docs] def encode(self,
keypoints: Optional[np.ndarray] = None,
keypoints_visible: Optional[np.ndarray] = None,
bbox: Optional[np.ndarray] = None,
category_id: Optional[List[int]] = None
) -> Dict[str, np.ndarray]:
"""Encode keypoints, bounding boxes, and category IDs.
Args:
keypoints (np.ndarray, optional): Keypoints array. Defaults
to None.
keypoints_visible (np.ndarray, optional): Visibility array for
keypoints. Defaults to None.
bbox (np.ndarray, optional): Bounding box array. Defaults to None.
category_id (List[int], optional): List of category IDs. Defaults
to None.
Returns:
Dict[str, np.ndarray]: Encoded annotations.
"""
results = {}
if self.expand_bbox and bbox is not None:
# Handle keypoints visibility
if keypoints_visible.ndim == 3:
keypoints_visible = keypoints_visible[..., 0]
# Expand bounding box to include keypoints
kpts_min = keypoints.copy()
kpts_min[keypoints_visible == 0] = INF
bbox[..., :2] = np.minimum(bbox[..., :2], kpts_min.min(axis=1))
kpts_max = keypoints.copy()
kpts_max[keypoints_visible == 0] = NEG_INF
bbox[..., 2:] = np.maximum(bbox[..., 2:], kpts_max.max(axis=1))
results['bbox'] = bbox
if category_id is not None:
# Convert category IDs to labels
bbox_labels = np.array(category_id).astype(np.int8) - 1
results['bbox_labels'] = bbox_labels
return results