mmpose.codecs.integral_regression_label 源代码
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Optional, Tuple
import numpy as np
from mmpose.registry import KEYPOINT_CODECS
from .base import BaseKeypointCodec
from .msra_heatmap import MSRAHeatmap
from .regression_label import RegressionLabel
[文档]@KEYPOINT_CODECS.register_module()
class IntegralRegressionLabel(BaseKeypointCodec):
"""Generate keypoint coordinates and normalized heatmaps. See the paper:
`DSNT`_ by Nibali et al(2018).
Note:
- instance number: N
- keypoint number: K
- keypoint dimension: D
- image size: [w, h]
Encoded:
- keypoint_labels (np.ndarray): The normalized regression labels in
shape (N, K, D) where D is 2 for 2d coordinates
- heatmaps (np.ndarray): The generated heatmap in shape (K, H, W) where
[W, H] is the `heatmap_size`
- keypoint_weights (np.ndarray): The target weights in shape (N, K)
Args:
input_size (tuple): Input image size in [w, h]
heatmap_size (tuple): Heatmap size in [W, H]
sigma (float): The sigma value of the Gaussian heatmap
unbiased (bool): Whether use unbiased method (DarkPose) in ``'msra'``
encoding. See `Dark Pose`_ for details. Defaults to ``False``
blur_kernel_size (int): The Gaussian blur kernel size of the heatmap
modulation in DarkPose. The kernel size and sigma should follow
the expirical formula :math:`sigma = 0.3*((ks-1)*0.5-1)+0.8`.
Defaults to 11
normalize (bool): Whether to normalize the heatmaps. Defaults to True.
.. _`DSNT`: https://arxiv.org/abs/1801.07372
"""
label_mapping_table = dict(
keypoint_labels='keypoint_labels',
keypoint_weights='keypoint_weights',
)
field_mapping_table = dict(heatmaps='heatmaps', )
def __init__(self,
input_size: Tuple[int, int],
heatmap_size: Tuple[int, int],
sigma: float,
unbiased: bool = False,
blur_kernel_size: int = 11,
normalize: bool = True) -> None:
super().__init__()
self.heatmap_codec = MSRAHeatmap(input_size, heatmap_size, sigma,
unbiased, blur_kernel_size)
self.keypoint_codec = RegressionLabel(input_size)
self.normalize = normalize
[文档] def encode(self,
keypoints: np.ndarray,
keypoints_visible: Optional[np.ndarray] = None) -> dict:
"""Encoding keypoints to regression labels and heatmaps.
Args:
keypoints (np.ndarray): Keypoint coordinates in shape (N, K, D)
keypoints_visible (np.ndarray): Keypoint visibilities in shape
(N, K)
Returns:
dict:
- keypoint_labels (np.ndarray): The normalized regression labels in
shape (N, K, D) where D is 2 for 2d coordinates
- heatmaps (np.ndarray): The generated heatmap in shape
(K, H, W) where [W, H] is the `heatmap_size`
- keypoint_weights (np.ndarray): The target weights in shape
(N, K)
"""
encoded_hm = self.heatmap_codec.encode(keypoints, keypoints_visible)
encoded_kp = self.keypoint_codec.encode(keypoints, keypoints_visible)
heatmaps = encoded_hm['heatmaps']
keypoint_labels = encoded_kp['keypoint_labels']
keypoint_weights = encoded_kp['keypoint_weights']
if self.normalize:
val_sum = heatmaps.sum(axis=(-1, -2)).reshape(-1, 1, 1) + 1e-24
heatmaps = heatmaps / val_sum
encoded = dict(
keypoint_labels=keypoint_labels,
heatmaps=heatmaps,
keypoint_weights=keypoint_weights)
return encoded
[文档] def decode(self, encoded: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
"""Decode keypoint coordinates from normalized space to input image
space.
Args:
encoded (np.ndarray): Coordinates in shape (N, K, D)
Returns:
tuple:
- keypoints (np.ndarray): Decoded coordinates in shape (N, K, D)
- socres (np.ndarray): The keypoint scores in shape (N, K).
It usually represents the confidence of the keypoint prediction
"""
keypoints, scores = self.keypoint_codec.decode(encoded)
return keypoints, scores