Shortcuts

Source code for mmpose.codecs.hand_3d_heatmap

# Copyright (c) OpenMMLab. All rights reserved.
from typing import Optional, Tuple

import numpy as np

from mmpose.registry import KEYPOINT_CODECS
from .base import BaseKeypointCodec
from .utils.gaussian_heatmap import generate_3d_gaussian_heatmaps
from .utils.post_processing import get_heatmap_3d_maximum


[docs]@KEYPOINT_CODECS.register_module() class Hand3DHeatmap(BaseKeypointCodec): r"""Generate target 3d heatmap and relative root depth for hand datasets. Note: - instance number: N - keypoint number: K - keypoint dimension: D Args: image_size (tuple): Size of image. Default: ``[256, 256]``. root_heatmap_size (int): Size of heatmap of root head. Default: 64. heatmap_size (tuple): Size of heatmap. Default: ``[64, 64, 64]``. heatmap3d_depth_bound (float): Boundary for 3d heatmap depth. Default: 400.0. heatmap_size_root (int): Size of 3d heatmap root. Default: 64. depth_size (int): Number of depth discretization size, used for decoding. Defaults to 64. root_depth_bound (float): Boundary for 3d heatmap root depth. Default: 400.0. use_different_joint_weights (bool): Whether to use different joint weights. Default: ``False``. sigma (int): Sigma of heatmap gaussian. Default: 2. joint_indices (list, optional): Indices of joints used for heatmap generation. If None (default) is given, all joints will be used. Default: ``None``. max_bound (float): The maximal value of heatmap. Default: 1.0. """ auxiliary_encode_keys = { 'dataset_keypoint_weights', 'rel_root_depth', 'rel_root_valid', 'hand_type', 'hand_type_valid', 'focal', 'principal_pt' } instance_mapping_table = { 'keypoints': 'keypoints', 'keypoints_visible': 'keypoints_visible', 'keypoints_cam': 'keypoints_cam', } label_mapping_table = { 'keypoint_weights': 'keypoint_weights', 'root_depth_weight': 'root_depth_weight', 'type_weight': 'type_weight', 'root_depth': 'root_depth', 'type': 'type' } def __init__(self, image_size: Tuple[int, int] = [256, 256], root_heatmap_size: int = 64, heatmap_size: Tuple[int, int, int] = [64, 64, 64], heatmap3d_depth_bound: float = 400.0, heatmap_size_root: int = 64, root_depth_bound: float = 400.0, depth_size: int = 64, use_different_joint_weights: bool = False, sigma: int = 2, joint_indices: Optional[list] = None, max_bound: float = 1.0): super().__init__() self.image_size = np.array(image_size) self.root_heatmap_size = root_heatmap_size self.heatmap_size = np.array(heatmap_size) self.heatmap3d_depth_bound = heatmap3d_depth_bound self.heatmap_size_root = heatmap_size_root self.root_depth_bound = root_depth_bound self.depth_size = depth_size self.use_different_joint_weights = use_different_joint_weights self.sigma = sigma self.joint_indices = joint_indices self.max_bound = max_bound self.scale_factor = (np.array(image_size) / heatmap_size[:-1]).astype(np.float32)
[docs] def encode( self, keypoints: np.ndarray, keypoints_visible: Optional[np.ndarray], dataset_keypoint_weights: Optional[np.ndarray], rel_root_depth: np.float32, rel_root_valid: np.float32, hand_type: np.ndarray, hand_type_valid: np.ndarray, focal: np.ndarray, principal_pt: np.ndarray, ) -> dict: """Encoding keypoints from input image space to input image space. Args: keypoints (np.ndarray): Keypoint coordinates in shape (N, K, D). keypoints_visible (np.ndarray, optional): Keypoint visibilities in shape (N, K). dataset_keypoint_weights (np.ndarray, optional): Keypoints weight in shape (K, ). rel_root_depth (np.float32): Relative root depth. rel_root_valid (float): Validity of relative root depth. hand_type (np.ndarray): Type of hand encoded as a array. hand_type_valid (np.ndarray): Validity of hand type. focal (np.ndarray): Focal length of camera. principal_pt (np.ndarray): Principal point of camera. Returns: encoded (dict): Contains the following items: - heatmaps (np.ndarray): The generated heatmap in shape (K * D, H, W) where [W, H, D] is the `heatmap_size` - keypoint_weights (np.ndarray): The target weights in shape (N, K) - root_depth (np.ndarray): Encoded relative root depth - root_depth_weight (np.ndarray): The weights of relative root depth - type (np.ndarray): Encoded hand type - type_weight (np.ndarray): The weights of hand type """ if keypoints_visible is None: keypoints_visible = np.ones(keypoints.shape[:-1], dtype=np.float32) if self.use_different_joint_weights: assert dataset_keypoint_weights is not None, 'To use different ' \ 'joint weights,`dataset_keypoint_weights` cannot be None.' heatmaps, keypoint_weights = generate_3d_gaussian_heatmaps( heatmap_size=self.heatmap_size, keypoints=keypoints, keypoints_visible=keypoints_visible, sigma=self.sigma, image_size=self.image_size, heatmap3d_depth_bound=self.heatmap3d_depth_bound, joint_indices=self.joint_indices, max_bound=self.max_bound, use_different_joint_weights=self.use_different_joint_weights, dataset_keypoint_weights=dataset_keypoint_weights) rel_root_depth = (rel_root_depth / self.root_depth_bound + 0.5) * self.heatmap_size_root rel_root_valid = rel_root_valid * (rel_root_depth >= 0) * ( rel_root_depth <= self.heatmap_size_root) encoded = dict( heatmaps=heatmaps, keypoint_weights=keypoint_weights, root_depth=rel_root_depth * np.ones(1, dtype=np.float32), type=hand_type, type_weight=hand_type_valid, root_depth_weight=rel_root_valid * np.ones(1, dtype=np.float32)) return encoded
[docs] def decode(self, heatmaps: np.ndarray, root_depth: np.ndarray, hand_type: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: """Decode keypoint coordinates from heatmaps. The decoded keypoint coordinates are in the input image space. Args: heatmaps (np.ndarray): Heatmaps in shape (K, D, H, W) root_depth (np.ndarray): Root depth prediction. hand_type (np.ndarray): Hand type prediction. Returns: tuple: - keypoints (np.ndarray): Decoded keypoint coordinates in shape (N, K, D) - scores (np.ndarray): The keypoint scores in shape (N, K). It usually represents the confidence of the keypoint prediction """ heatmap3d = heatmaps.copy() keypoints, scores = get_heatmap_3d_maximum(heatmap3d) # transform keypoint depth to camera space keypoints[..., 2] = (keypoints[..., 2] / self.depth_size - 0.5) * self.heatmap3d_depth_bound # Unsqueeze the instance dimension for single-instance results keypoints, scores = keypoints[None], scores[None] # Restore the keypoint scale keypoints[..., :2] = keypoints[..., :2] * self.scale_factor # decode relative hand root depth # transform relative root depth to camera space rel_root_depth = ((root_depth / self.root_heatmap_size - 0.5) * self.root_depth_bound) hand_type = (hand_type > 0).reshape(1, -1).astype(int) return keypoints, scores, rel_root_depth, hand_type
Read the Docs v: latest
Versions
latest
0.x
dev-1.x
Downloads
epub
On Read the Docs
Project Home
Builds

Free document hosting provided by Read the Docs.