mmpose.codecs.simcc_label 源代码
# Copyright (c) OpenMMLab. All rights reserved.
from itertools import product
from typing import Optional, Tuple, Union
import numpy as np
from mmpose.codecs.utils import get_simcc_maximum
from mmpose.codecs.utils.refinement import refine_simcc_dark
from mmpose.registry import KEYPOINT_CODECS
from .base import BaseKeypointCodec
[文档]@KEYPOINT_CODECS.register_module()
class SimCCLabel(BaseKeypointCodec):
r"""Generate keypoint representation via "SimCC" approach.
See the paper: `SimCC: a Simple Coordinate Classification Perspective for
Human Pose Estimation`_ by Li et al (2022) for more details.
Old name: SimDR
Note:
- instance number: N
- keypoint number: K
- keypoint dimension: D
- image size: [w, h]
Encoded:
- keypoint_x_labels (np.ndarray): The generated SimCC label for x-axis.
The label shape is (N, K, Wx) if ``smoothing_type=='gaussian'``
and (N, K) if `smoothing_type=='standard'``, where
:math:`Wx=w*simcc_split_ratio`
- keypoint_y_labels (np.ndarray): The generated SimCC label for y-axis.
The label shape is (N, K, Wy) if ``smoothing_type=='gaussian'``
and (N, K) if `smoothing_type=='standard'``, where
:math:`Wy=h*simcc_split_ratio`
- keypoint_weights (np.ndarray): The target weights in shape (N, K)
Args:
input_size (tuple): Input image size in [w, h]
smoothing_type (str): The SimCC label smoothing strategy. Options are
``'gaussian'`` and ``'standard'``. Defaults to ``'gaussian'``
sigma (float | int | tuple): The sigma value in the Gaussian SimCC
label. Defaults to 6.0
simcc_split_ratio (float): The ratio of the label size to the input
size. For example, if the input width is ``w``, the x label size
will be :math:`w*simcc_split_ratio`. Defaults to 2.0
label_smooth_weight (float): Label Smoothing weight. Defaults to 0.0
normalize (bool): Whether to normalize the heatmaps. Defaults to True.
use_dark (bool): Whether to use the DARK post processing. Defaults to
False.
decode_visibility (bool): Whether to decode the visibility. Defaults
to False.
decode_beta (float): The beta value for decoding visibility. Defaults
to 150.0.
.. _`SimCC: a Simple Coordinate Classification Perspective for Human Pose
Estimation`: https://arxiv.org/abs/2107.03332
"""
label_mapping_table = dict(
keypoint_x_labels='keypoint_x_labels',
keypoint_y_labels='keypoint_y_labels',
keypoint_weights='keypoint_weights',
)
def __init__(
self,
input_size: Tuple[int, int],
smoothing_type: str = 'gaussian',
sigma: Union[float, int, Tuple[float]] = 6.0,
simcc_split_ratio: float = 2.0,
label_smooth_weight: float = 0.0,
normalize: bool = True,
use_dark: bool = False,
decode_visibility: bool = False,
decode_beta: float = 150.0,
) -> None:
super().__init__()
self.input_size = input_size
self.smoothing_type = smoothing_type
self.simcc_split_ratio = simcc_split_ratio
self.label_smooth_weight = label_smooth_weight
self.normalize = normalize
self.use_dark = use_dark
self.decode_visibility = decode_visibility
self.decode_beta = decode_beta
if isinstance(sigma, (float, int)):
self.sigma = np.array([sigma, sigma])
else:
self.sigma = np.array(sigma)
if self.smoothing_type not in {'gaussian', 'standard'}:
raise ValueError(
f'{self.__class__.__name__} got invalid `smoothing_type` value'
f'{self.smoothing_type}. Should be one of '
'{"gaussian", "standard"}')
if self.smoothing_type == 'gaussian' and self.label_smooth_weight > 0:
raise ValueError('Attribute `label_smooth_weight` is only '
'used for `standard` mode.')
if self.label_smooth_weight < 0.0 or self.label_smooth_weight > 1.0:
raise ValueError('`label_smooth_weight` should be in range [0, 1]')
[文档] def encode(self,
keypoints: np.ndarray,
keypoints_visible: Optional[np.ndarray] = None) -> dict:
"""Encoding keypoints into SimCC labels. Note that the original
keypoint coordinates should be in the input image space.
Args:
keypoints (np.ndarray): Keypoint coordinates in shape (N, K, D)
keypoints_visible (np.ndarray): Keypoint visibilities in shape
(N, K)
Returns:
dict:
- keypoint_x_labels (np.ndarray): The generated SimCC label for
x-axis.
The label shape is (N, K, Wx) if ``smoothing_type=='gaussian'``
and (N, K) if `smoothing_type=='standard'``, where
:math:`Wx=w*simcc_split_ratio`
- keypoint_y_labels (np.ndarray): The generated SimCC label for
y-axis.
The label shape is (N, K, Wy) if ``smoothing_type=='gaussian'``
and (N, K) if `smoothing_type=='standard'``, where
:math:`Wy=h*simcc_split_ratio`
- keypoint_weights (np.ndarray): The target weights in shape
(N, K)
"""
if keypoints_visible is None:
keypoints_visible = np.ones(keypoints.shape[:2], dtype=np.float32)
if self.smoothing_type == 'gaussian':
x_labels, y_labels, keypoint_weights = self._generate_gaussian(
keypoints, keypoints_visible)
elif self.smoothing_type == 'standard':
x_labels, y_labels, keypoint_weights = self._generate_standard(
keypoints, keypoints_visible)
else:
raise ValueError(
f'{self.__class__.__name__} got invalid `smoothing_type` value'
f'{self.smoothing_type}. Should be one of '
'{"gaussian", "standard"}')
encoded = dict(
keypoint_x_labels=x_labels,
keypoint_y_labels=y_labels,
keypoint_weights=keypoint_weights)
return encoded
[文档] def decode(self, simcc_x: np.ndarray,
simcc_y: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
"""Decode keypoint coordinates from SimCC representations. The decoded
coordinates are in the input image space.
Args:
encoded (Tuple[np.ndarray, np.ndarray]): SimCC labels for x-axis
and y-axis
simcc_x (np.ndarray): SimCC label for x-axis
simcc_y (np.ndarray): SimCC label for y-axis
Returns:
tuple:
- keypoints (np.ndarray): Decoded coordinates in shape (N, K, D)
- socres (np.ndarray): The keypoint scores in shape (N, K).
It usually represents the confidence of the keypoint prediction
"""
keypoints, scores = get_simcc_maximum(simcc_x, simcc_y)
# Unsqueeze the instance dimension for single-instance results
if keypoints.ndim == 2:
keypoints = keypoints[None, :]
scores = scores[None, :]
if self.use_dark:
x_blur = int((self.sigma[0] * 20 - 7) // 3)
y_blur = int((self.sigma[1] * 20 - 7) // 3)
x_blur -= int((x_blur % 2) == 0)
y_blur -= int((y_blur % 2) == 0)
keypoints[:, :, 0] = refine_simcc_dark(keypoints[:, :, 0], simcc_x,
x_blur)
keypoints[:, :, 1] = refine_simcc_dark(keypoints[:, :, 1], simcc_y,
y_blur)
keypoints /= self.simcc_split_ratio
if self.decode_visibility:
_, visibility = get_simcc_maximum(
simcc_x * self.decode_beta * self.sigma[0],
simcc_y * self.decode_beta * self.sigma[1],
apply_softmax=True)
return keypoints, (scores, visibility)
else:
return keypoints, scores
def _map_coordinates(
self,
keypoints: np.ndarray,
keypoints_visible: Optional[np.ndarray] = None
) -> Tuple[np.ndarray, np.ndarray]:
"""Mapping keypoint coordinates into SimCC space."""
keypoints_split = keypoints.copy()
keypoints_split = np.around(keypoints_split * self.simcc_split_ratio)
keypoints_split = keypoints_split.astype(np.int64)
keypoint_weights = keypoints_visible.copy()
return keypoints_split, keypoint_weights
def _generate_standard(
self,
keypoints: np.ndarray,
keypoints_visible: Optional[np.ndarray] = None
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
"""Encoding keypoints into SimCC labels with Standard Label Smoothing
strategy.
Labels will be one-hot vectors if self.label_smooth_weight==0.0
"""
N, K, _ = keypoints.shape
w, h = self.input_size
W = np.around(w * self.simcc_split_ratio).astype(int)
H = np.around(h * self.simcc_split_ratio).astype(int)
keypoints_split, keypoint_weights = self._map_coordinates(
keypoints, keypoints_visible)
target_x = np.zeros((N, K, W), dtype=np.float32)
target_y = np.zeros((N, K, H), dtype=np.float32)
for n, k in product(range(N), range(K)):
# skip unlabled keypoints
if keypoints_visible[n, k] < 0.5:
continue
# get center coordinates
mu_x, mu_y = keypoints_split[n, k].astype(np.int64)
# detect abnormal coords and assign the weight 0
if mu_x >= W or mu_y >= H or mu_x < 0 or mu_y < 0:
keypoint_weights[n, k] = 0
continue
if self.label_smooth_weight > 0:
target_x[n, k] = self.label_smooth_weight / (W - 1)
target_y[n, k] = self.label_smooth_weight / (H - 1)
target_x[n, k, mu_x] = 1.0 - self.label_smooth_weight
target_y[n, k, mu_y] = 1.0 - self.label_smooth_weight
return target_x, target_y, keypoint_weights
def _generate_gaussian(
self,
keypoints: np.ndarray,
keypoints_visible: Optional[np.ndarray] = None
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
"""Encoding keypoints into SimCC labels with Gaussian Label Smoothing
strategy."""
N, K, _ = keypoints.shape
w, h = self.input_size
W = np.around(w * self.simcc_split_ratio).astype(int)
H = np.around(h * self.simcc_split_ratio).astype(int)
keypoints_split, keypoint_weights = self._map_coordinates(
keypoints, keypoints_visible)
target_x = np.zeros((N, K, W), dtype=np.float32)
target_y = np.zeros((N, K, H), dtype=np.float32)
# 3-sigma rule
radius = self.sigma * 3
# xy grid
x = np.arange(0, W, 1, dtype=np.float32)
y = np.arange(0, H, 1, dtype=np.float32)
for n, k in product(range(N), range(K)):
# skip unlabled keypoints
if keypoints_visible[n, k] < 0.5:
continue
mu = keypoints_split[n, k]
# check that the gaussian has in-bounds part
left, top = mu - radius
right, bottom = mu + radius + 1
if left >= W or top >= H or right < 0 or bottom < 0:
keypoint_weights[n, k] = 0
continue
mu_x, mu_y = mu
target_x[n, k] = np.exp(-((x - mu_x)**2) / (2 * self.sigma[0]**2))
target_y[n, k] = np.exp(-((y - mu_y)**2) / (2 * self.sigma[1]**2))
if self.normalize:
norm_value = self.sigma * np.sqrt(np.pi * 2)
target_x /= norm_value[0]
target_y /= norm_value[1]
return target_x, target_y, keypoint_weights