Shortcuts

mmpose.codecs.simcc_label 源代码

# Copyright (c) OpenMMLab. All rights reserved.
from itertools import product
from typing import Optional, Tuple, Union

import numpy as np

from mmpose.codecs.utils import get_simcc_maximum
from mmpose.codecs.utils.refinement import refine_simcc_dark
from mmpose.registry import KEYPOINT_CODECS
from .base import BaseKeypointCodec


[文档]@KEYPOINT_CODECS.register_module() class SimCCLabel(BaseKeypointCodec): r"""Generate keypoint representation via "SimCC" approach. See the paper: `SimCC: a Simple Coordinate Classification Perspective for Human Pose Estimation`_ by Li et al (2022) for more details. Old name: SimDR Note: - instance number: N - keypoint number: K - keypoint dimension: D - image size: [w, h] Encoded: - keypoint_x_labels (np.ndarray): The generated SimCC label for x-axis. The label shape is (N, K, Wx) if ``smoothing_type=='gaussian'`` and (N, K) if `smoothing_type=='standard'``, where :math:`Wx=w*simcc_split_ratio` - keypoint_y_labels (np.ndarray): The generated SimCC label for y-axis. The label shape is (N, K, Wy) if ``smoothing_type=='gaussian'`` and (N, K) if `smoothing_type=='standard'``, where :math:`Wy=h*simcc_split_ratio` - keypoint_weights (np.ndarray): The target weights in shape (N, K) Args: input_size (tuple): Input image size in [w, h] smoothing_type (str): The SimCC label smoothing strategy. Options are ``'gaussian'`` and ``'standard'``. Defaults to ``'gaussian'`` sigma (float | int | tuple): The sigma value in the Gaussian SimCC label. Defaults to 6.0 simcc_split_ratio (float): The ratio of the label size to the input size. For example, if the input width is ``w``, the x label size will be :math:`w*simcc_split_ratio`. Defaults to 2.0 label_smooth_weight (float): Label Smoothing weight. Defaults to 0.0 normalize (bool): Whether to normalize the heatmaps. Defaults to True. use_dark (bool): Whether to use the DARK post processing. Defaults to False. decode_visibility (bool): Whether to decode the visibility. Defaults to False. decode_beta (float): The beta value for decoding visibility. Defaults to 150.0. .. _`SimCC: a Simple Coordinate Classification Perspective for Human Pose Estimation`: https://arxiv.org/abs/2107.03332 """ label_mapping_table = dict( keypoint_x_labels='keypoint_x_labels', keypoint_y_labels='keypoint_y_labels', keypoint_weights='keypoint_weights', ) def __init__( self, input_size: Tuple[int, int], smoothing_type: str = 'gaussian', sigma: Union[float, int, Tuple[float]] = 6.0, simcc_split_ratio: float = 2.0, label_smooth_weight: float = 0.0, normalize: bool = True, use_dark: bool = False, decode_visibility: bool = False, decode_beta: float = 150.0, ) -> None: super().__init__() self.input_size = input_size self.smoothing_type = smoothing_type self.simcc_split_ratio = simcc_split_ratio self.label_smooth_weight = label_smooth_weight self.normalize = normalize self.use_dark = use_dark self.decode_visibility = decode_visibility self.decode_beta = decode_beta if isinstance(sigma, (float, int)): self.sigma = np.array([sigma, sigma]) else: self.sigma = np.array(sigma) if self.smoothing_type not in {'gaussian', 'standard'}: raise ValueError( f'{self.__class__.__name__} got invalid `smoothing_type` value' f'{self.smoothing_type}. Should be one of ' '{"gaussian", "standard"}') if self.smoothing_type == 'gaussian' and self.label_smooth_weight > 0: raise ValueError('Attribute `label_smooth_weight` is only ' 'used for `standard` mode.') if self.label_smooth_weight < 0.0 or self.label_smooth_weight > 1.0: raise ValueError('`label_smooth_weight` should be in range [0, 1]')
[文档] def encode(self, keypoints: np.ndarray, keypoints_visible: Optional[np.ndarray] = None) -> dict: """Encoding keypoints into SimCC labels. Note that the original keypoint coordinates should be in the input image space. Args: keypoints (np.ndarray): Keypoint coordinates in shape (N, K, D) keypoints_visible (np.ndarray): Keypoint visibilities in shape (N, K) Returns: dict: - keypoint_x_labels (np.ndarray): The generated SimCC label for x-axis. The label shape is (N, K, Wx) if ``smoothing_type=='gaussian'`` and (N, K) if `smoothing_type=='standard'``, where :math:`Wx=w*simcc_split_ratio` - keypoint_y_labels (np.ndarray): The generated SimCC label for y-axis. The label shape is (N, K, Wy) if ``smoothing_type=='gaussian'`` and (N, K) if `smoothing_type=='standard'``, where :math:`Wy=h*simcc_split_ratio` - keypoint_weights (np.ndarray): The target weights in shape (N, K) """ if keypoints_visible is None: keypoints_visible = np.ones(keypoints.shape[:2], dtype=np.float32) if self.smoothing_type == 'gaussian': x_labels, y_labels, keypoint_weights = self._generate_gaussian( keypoints, keypoints_visible) elif self.smoothing_type == 'standard': x_labels, y_labels, keypoint_weights = self._generate_standard( keypoints, keypoints_visible) else: raise ValueError( f'{self.__class__.__name__} got invalid `smoothing_type` value' f'{self.smoothing_type}. Should be one of ' '{"gaussian", "standard"}') encoded = dict( keypoint_x_labels=x_labels, keypoint_y_labels=y_labels, keypoint_weights=keypoint_weights) return encoded
[文档] def decode(self, simcc_x: np.ndarray, simcc_y: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: """Decode keypoint coordinates from SimCC representations. The decoded coordinates are in the input image space. Args: encoded (Tuple[np.ndarray, np.ndarray]): SimCC labels for x-axis and y-axis simcc_x (np.ndarray): SimCC label for x-axis simcc_y (np.ndarray): SimCC label for y-axis Returns: tuple: - keypoints (np.ndarray): Decoded coordinates in shape (N, K, D) - socres (np.ndarray): The keypoint scores in shape (N, K). It usually represents the confidence of the keypoint prediction """ keypoints, scores = get_simcc_maximum(simcc_x, simcc_y) # Unsqueeze the instance dimension for single-instance results if keypoints.ndim == 2: keypoints = keypoints[None, :] scores = scores[None, :] if self.use_dark: x_blur = int((self.sigma[0] * 20 - 7) // 3) y_blur = int((self.sigma[1] * 20 - 7) // 3) x_blur -= int((x_blur % 2) == 0) y_blur -= int((y_blur % 2) == 0) keypoints[:, :, 0] = refine_simcc_dark(keypoints[:, :, 0], simcc_x, x_blur) keypoints[:, :, 1] = refine_simcc_dark(keypoints[:, :, 1], simcc_y, y_blur) keypoints /= self.simcc_split_ratio if self.decode_visibility: _, visibility = get_simcc_maximum( simcc_x * self.decode_beta * self.sigma[0], simcc_y * self.decode_beta * self.sigma[1], apply_softmax=True) return keypoints, (scores, visibility) else: return keypoints, scores
def _map_coordinates( self, keypoints: np.ndarray, keypoints_visible: Optional[np.ndarray] = None ) -> Tuple[np.ndarray, np.ndarray]: """Mapping keypoint coordinates into SimCC space.""" keypoints_split = keypoints.copy() keypoints_split = np.around(keypoints_split * self.simcc_split_ratio) keypoints_split = keypoints_split.astype(np.int64) keypoint_weights = keypoints_visible.copy() return keypoints_split, keypoint_weights def _generate_standard( self, keypoints: np.ndarray, keypoints_visible: Optional[np.ndarray] = None ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: """Encoding keypoints into SimCC labels with Standard Label Smoothing strategy. Labels will be one-hot vectors if self.label_smooth_weight==0.0 """ N, K, _ = keypoints.shape w, h = self.input_size W = np.around(w * self.simcc_split_ratio).astype(int) H = np.around(h * self.simcc_split_ratio).astype(int) keypoints_split, keypoint_weights = self._map_coordinates( keypoints, keypoints_visible) target_x = np.zeros((N, K, W), dtype=np.float32) target_y = np.zeros((N, K, H), dtype=np.float32) for n, k in product(range(N), range(K)): # skip unlabled keypoints if keypoints_visible[n, k] < 0.5: continue # get center coordinates mu_x, mu_y = keypoints_split[n, k].astype(np.int64) # detect abnormal coords and assign the weight 0 if mu_x >= W or mu_y >= H or mu_x < 0 or mu_y < 0: keypoint_weights[n, k] = 0 continue if self.label_smooth_weight > 0: target_x[n, k] = self.label_smooth_weight / (W - 1) target_y[n, k] = self.label_smooth_weight / (H - 1) target_x[n, k, mu_x] = 1.0 - self.label_smooth_weight target_y[n, k, mu_y] = 1.0 - self.label_smooth_weight return target_x, target_y, keypoint_weights def _generate_gaussian( self, keypoints: np.ndarray, keypoints_visible: Optional[np.ndarray] = None ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: """Encoding keypoints into SimCC labels with Gaussian Label Smoothing strategy.""" N, K, _ = keypoints.shape w, h = self.input_size W = np.around(w * self.simcc_split_ratio).astype(int) H = np.around(h * self.simcc_split_ratio).astype(int) keypoints_split, keypoint_weights = self._map_coordinates( keypoints, keypoints_visible) target_x = np.zeros((N, K, W), dtype=np.float32) target_y = np.zeros((N, K, H), dtype=np.float32) # 3-sigma rule radius = self.sigma * 3 # xy grid x = np.arange(0, W, 1, dtype=np.float32) y = np.arange(0, H, 1, dtype=np.float32) for n, k in product(range(N), range(K)): # skip unlabled keypoints if keypoints_visible[n, k] < 0.5: continue mu = keypoints_split[n, k] # check that the gaussian has in-bounds part left, top = mu - radius right, bottom = mu + radius + 1 if left >= W or top >= H or right < 0 or bottom < 0: keypoint_weights[n, k] = 0 continue mu_x, mu_y = mu target_x[n, k] = np.exp(-((x - mu_x)**2) / (2 * self.sigma[0]**2)) target_y[n, k] = np.exp(-((y - mu_y)**2) / (2 * self.sigma[1]**2)) if self.normalize: norm_value = self.sigma * np.sqrt(np.pi * 2) target_x /= norm_value[0] target_y /= norm_value[1] return target_x, target_y, keypoint_weights