Shortcuts

Source code for mmpose.codecs.msra_heatmap

# Copyright (c) OpenMMLab. All rights reserved.
from typing import Optional, Tuple

import numpy as np

from mmpose.registry import KEYPOINT_CODECS
from .base import BaseKeypointCodec
from .utils.gaussian_heatmap import (generate_gaussian_heatmaps,
                                     generate_unbiased_gaussian_heatmaps)
from .utils.post_processing import get_heatmap_maximum
from .utils.refinement import refine_keypoints, refine_keypoints_dark


[docs]@KEYPOINT_CODECS.register_module() class MSRAHeatmap(BaseKeypointCodec): """Represent keypoints as heatmaps via "MSRA" approach. See the paper: `Simple Baselines for Human Pose Estimation and Tracking`_ by Xiao et al (2018) for details. Note: - instance number: N - keypoint number: K - keypoint dimension: D - image size: [w, h] - heatmap size: [W, H] Encoded: - heatmaps (np.ndarray): The generated heatmap in shape (K, H, W) where [W, H] is the `heatmap_size` - keypoint_weights (np.ndarray): The target weights in shape (N, K) Args: input_size (tuple): Image size in [w, h] heatmap_size (tuple): Heatmap size in [W, H] sigma (float): The sigma value of the Gaussian heatmap unbiased (bool): Whether use unbiased method (DarkPose) in ``'msra'`` encoding. See `Dark Pose`_ for details. Defaults to ``False`` blur_kernel_size (int): The Gaussian blur kernel size of the heatmap modulation in DarkPose. The kernel size and sigma should follow the expirical formula :math:`sigma = 0.3*((ks-1)*0.5-1)+0.8`. Defaults to 11 .. _`Simple Baselines for Human Pose Estimation and Tracking`: https://arxiv.org/abs/1804.06208 .. _`Dark Pose`: https://arxiv.org/abs/1910.06278 """ label_mapping_table = dict(keypoint_weights='keypoint_weights', ) field_mapping_table = dict(heatmaps='heatmaps', ) def __init__(self, input_size: Tuple[int, int], heatmap_size: Tuple[int, int], sigma: float, unbiased: bool = False, blur_kernel_size: int = 11) -> None: super().__init__() self.input_size = input_size self.heatmap_size = heatmap_size self.sigma = sigma self.unbiased = unbiased # The Gaussian blur kernel size of the heatmap modulation # in DarkPose and the sigma value follows the expirical # formula :math:`sigma = 0.3*((ks-1)*0.5-1)+0.8` # which gives: # sigma~=3 if ks=17 # sigma=2 if ks=11; # sigma~=1.5 if ks=7; # sigma~=1 if ks=3; self.blur_kernel_size = blur_kernel_size self.scale_factor = (np.array(input_size) / heatmap_size).astype(np.float32)
[docs] def encode(self, keypoints: np.ndarray, keypoints_visible: Optional[np.ndarray] = None) -> dict: """Encode keypoints into heatmaps. Note that the original keypoint coordinates should be in the input image space. Args: keypoints (np.ndarray): Keypoint coordinates in shape (N, K, D) keypoints_visible (np.ndarray): Keypoint visibilities in shape (N, K) Returns: dict: - heatmaps (np.ndarray): The generated heatmap in shape (K, H, W) where [W, H] is the `heatmap_size` - keypoint_weights (np.ndarray): The target weights in shape (N, K) """ assert keypoints.shape[0] == 1, ( f'{self.__class__.__name__} only support single-instance ' 'keypoint encoding') if keypoints_visible is None: keypoints_visible = np.ones(keypoints.shape[:2], dtype=np.float32) if self.unbiased: heatmaps, keypoint_weights = generate_unbiased_gaussian_heatmaps( heatmap_size=self.heatmap_size, keypoints=keypoints / self.scale_factor, keypoints_visible=keypoints_visible, sigma=self.sigma) else: heatmaps, keypoint_weights = generate_gaussian_heatmaps( heatmap_size=self.heatmap_size, keypoints=keypoints / self.scale_factor, keypoints_visible=keypoints_visible, sigma=self.sigma) encoded = dict(heatmaps=heatmaps, keypoint_weights=keypoint_weights) return encoded
[docs] def decode(self, encoded: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: """Decode keypoint coordinates from heatmaps. The decoded keypoint coordinates are in the input image space. Args: encoded (np.ndarray): Heatmaps in shape (K, H, W) Returns: tuple: - keypoints (np.ndarray): Decoded keypoint coordinates in shape (N, K, D) - scores (np.ndarray): The keypoint scores in shape (N, K). It usually represents the confidence of the keypoint prediction """ heatmaps = encoded.copy() K, H, W = heatmaps.shape keypoints, scores = get_heatmap_maximum(heatmaps) # Unsqueeze the instance dimension for single-instance results keypoints, scores = keypoints[None], scores[None] if self.unbiased: # Alleviate biased coordinate keypoints = refine_keypoints_dark( keypoints, heatmaps, blur_kernel_size=self.blur_kernel_size) else: keypoints = refine_keypoints(keypoints, heatmaps) # Restore the keypoint scale keypoints = keypoints * self.scale_factor return keypoints, scores
Read the Docs v: latest
Versions
latest
0.x
dev-1.x
Downloads
epub
On Read the Docs
Project Home
Builds

Free document hosting provided by Read the Docs.