Shortcuts

Source code for mmpose.codecs.regression_label

# Copyright (c) OpenMMLab. All rights reserved.

from typing import Optional, Tuple

import numpy as np

from mmpose.registry import KEYPOINT_CODECS
from .base import BaseKeypointCodec


[docs]@KEYPOINT_CODECS.register_module() class RegressionLabel(BaseKeypointCodec): r"""Generate keypoint coordinates. Note: - instance number: N - keypoint number: K - keypoint dimension: D - image size: [w, h] Encoded: - keypoint_labels (np.ndarray): The normalized regression labels in shape (N, K, D) where D is 2 for 2d coordinates - keypoint_weights (np.ndarray): The target weights in shape (N, K) Args: input_size (tuple): Input image size in [w, h] """ label_mapping_table = dict( keypoint_labels='keypoint_labels', keypoint_weights='keypoint_weights', ) def __init__(self, input_size: Tuple[int, int]) -> None: super().__init__() self.input_size = input_size
[docs] def encode(self, keypoints: np.ndarray, keypoints_visible: Optional[np.ndarray] = None) -> dict: """Encoding keypoints from input image space to normalized space. Args: keypoints (np.ndarray): Keypoint coordinates in shape (N, K, D) keypoints_visible (np.ndarray): Keypoint visibilities in shape (N, K) Returns: dict: - keypoint_labels (np.ndarray): The normalized regression labels in shape (N, K, D) where D is 2 for 2d coordinates - keypoint_weights (np.ndarray): The target weights in shape (N, K) """ if keypoints_visible is None: keypoints_visible = np.ones(keypoints.shape[:2], dtype=np.float32) w, h = self.input_size valid = ((keypoints >= 0) & (keypoints <= [w - 1, h - 1])).all(axis=-1) & ( keypoints_visible > 0.5) keypoint_labels = (keypoints / np.array([w, h])).astype(np.float32) keypoint_weights = np.where(valid, 1., 0.).astype(np.float32) encoded = dict( keypoint_labels=keypoint_labels, keypoint_weights=keypoint_weights) return encoded
[docs] def decode(self, encoded: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: """Decode keypoint coordinates from normalized space to input image space. Args: encoded (np.ndarray): Coordinates in shape (N, K, D) Returns: tuple: - keypoints (np.ndarray): Decoded coordinates in shape (N, K, D) - scores (np.ndarray): The keypoint scores in shape (N, K). It usually represents the confidence of the keypoint prediction """ if encoded.shape[-1] == 2: N, K, _ = encoded.shape normalized_coords = encoded.copy() scores = np.ones((N, K), dtype=np.float32) elif encoded.shape[-1] == 4: # split coords and sigma if outputs contain output_sigma normalized_coords = encoded[..., :2].copy() output_sigma = encoded[..., 2:4].copy() scores = (1 - output_sigma).mean(axis=-1) else: raise ValueError( 'Keypoint dimension should be 2 or 4 (with sigma), ' f'but got {encoded.shape[-1]}') w, h = self.input_size keypoints = normalized_coords * np.array([w, h]) return keypoints, scores
Read the Docs v: latest
Versions
latest
0.x
dev-1.x
Downloads
epub
On Read the Docs
Project Home
Builds

Free document hosting provided by Read the Docs.