
mmpose.codecs.decoupled_heatmap 源代码

# Copyright (c) OpenMMLab. All rights reserved.
import random
from typing import Optional, Tuple

import numpy as np

from mmpose.registry import KEYPOINT_CODECS
from .base import BaseKeypointCodec
from .utils import (generate_gaussian_heatmaps, get_diagonal_lengths,
                    get_instance_bbox, get_instance_root)
from .utils.post_processing import get_heatmap_maximum
from .utils.refinement import refine_keypoints

[文档]@KEYPOINT_CODECS.register_module() class DecoupledHeatmap(BaseKeypointCodec): """Encode/decode keypoints with the method introduced in the paper CID. See the paper Contextual Instance Decoupling for Robust Multi-Person Pose Estimation`_ by Wang et al (2022) for details Note: - instance number: N - keypoint number: K - keypoint dimension: D - image size: [w, h] - heatmap size: [W, H] Encoded: - heatmaps (np.ndarray): The coupled heatmap in shape (1+K, H, W) where [W, H] is the `heatmap_size`. - instance_heatmaps (np.ndarray): The decoupled heatmap in shape (M*K, H, W) where M is the number of instances. - keypoint_weights (np.ndarray): The weight for heatmaps in shape (M*K). - instance_coords (np.ndarray): The coordinates of instance roots in shape (M, 2) Args: input_size (tuple): Image size in [w, h] heatmap_size (tuple): Heatmap size in [W, H] root_type (str): The method to generate the instance root. Options are: - ``'kpt_center'``: Average coordinate of all visible keypoints. - ``'bbox_center'``: Center point of bounding boxes outlined by all visible keypoints. Defaults to ``'kpt_center'`` heatmap_min_overlap (float): Minimum overlap rate among instances. Used when calculating sigmas for instances. Defaults to 0.7 background_weight (float): Loss weight of background pixels. Defaults to 0.1 encode_max_instances (int): The maximum number of instances to encode for each sample. Defaults to 30 .. _`CID`: Contextual_Instance_Decoupling_for_Robust_Multi-Person_Pose_Estimation_ CVPR_2022_paper.html """ # DecoupledHeatmap requires bounding boxes to determine the size of each # instance, so that it can assign varying sigmas based on their size auxiliary_encode_keys = {'bbox'} label_mapping_table = dict( keypoint_weights='keypoint_weights', instance_coords='instance_coords', ) field_mapping_table = dict( heatmaps='heatmaps', instance_heatmaps='instance_heatmaps', ) def __init__( self, input_size: Tuple[int, int], heatmap_size: Tuple[int, int], root_type: str = 'kpt_center', heatmap_min_overlap: float = 0.7, encode_max_instances: int = 30, ): super().__init__() self.input_size = input_size self.heatmap_size = heatmap_size self.root_type = root_type self.encode_max_instances = encode_max_instances self.heatmap_min_overlap = heatmap_min_overlap self.scale_factor = (np.array(input_size) / heatmap_size).astype(np.float32) def _get_instance_wise_sigmas( self, bbox: np.ndarray, ) -> np.ndarray: """Get sigma values for each instance according to their size. Args: bbox (np.ndarray): Bounding box in shape (N, 4, 2) Returns: np.ndarray: Array containing the sigma values for each instance. """ sigmas = np.zeros((bbox.shape[0], ), dtype=np.float32) heights = np.sqrt(np.power(bbox[:, 0] - bbox[:, 1], 2).sum(axis=-1)) widths = np.sqrt(np.power(bbox[:, 0] - bbox[:, 2], 2).sum(axis=-1)) for i in range(bbox.shape[0]): h, w = heights[i], widths[i] # compute sigma for each instance # condition 1 a1, b1 = 1, h + w c1 = w * h * (1 - self.heatmap_min_overlap) / ( 1 + self.heatmap_min_overlap) sq1 = np.sqrt(b1**2 - 4 * a1 * c1) r1 = (b1 + sq1) / 2 # condition 2 a2 = 4 b2 = 2 * (h + w) c2 = (1 - self.heatmap_min_overlap) * w * h sq2 = np.sqrt(b2**2 - 4 * a2 * c2) r2 = (b2 + sq2) / 2 # condition 3 a3 = 4 * self.heatmap_min_overlap b3 = -2 * self.heatmap_min_overlap * (h + w) c3 = (self.heatmap_min_overlap - 1) * w * h sq3 = np.sqrt(b3**2 - 4 * a3 * c3) r3 = (b3 + sq3) / 2 sigmas[i] = min(r1, r2, r3) / 3 return sigmas
[文档] def encode(self, keypoints: np.ndarray, keypoints_visible: Optional[np.ndarray] = None, bbox: Optional[np.ndarray] = None) -> dict: """Encode keypoints into heatmaps. Args: keypoints (np.ndarray): Keypoint coordinates in shape (N, K, D) keypoints_visible (np.ndarray): Keypoint visibilities in shape (N, K) bbox (np.ndarray): Bounding box in shape (N, 8) which includes coordinates of 4 corners. Returns: dict: - heatmaps (np.ndarray): The coupled heatmap in shape (1+K, H, W) where [W, H] is the `heatmap_size`. - instance_heatmaps (np.ndarray): The decoupled heatmap in shape (N*K, H, W) where M is the number of instances. - keypoint_weights (np.ndarray): The weight for heatmaps in shape (N*K). - instance_coords (np.ndarray): The coordinates of instance roots in shape (N, 2) """ if keypoints_visible is None: keypoints_visible = np.ones(keypoints.shape[:2], dtype=np.float32) if bbox is None: # generate pseudo bbox via visible keypoints bbox = get_instance_bbox(keypoints, keypoints_visible) bbox = np.tile(bbox, 2).reshape(-1, 4, 2) # corner order: left_top, left_bottom, right_top, right_bottom bbox[:, 1:3, 0] = bbox[:, 0:2, 0] # keypoint coordinates in heatmap _keypoints = keypoints / self.scale_factor _bbox = bbox.reshape(-1, 4, 2) / self.scale_factor # compute the root and scale of each instance roots, roots_visible = get_instance_root(_keypoints, keypoints_visible, self.root_type) sigmas = self._get_instance_wise_sigmas(_bbox) # generate global heatmaps heatmaps, keypoint_weights = generate_gaussian_heatmaps( heatmap_size=self.heatmap_size, keypoints=np.concatenate((_keypoints, roots[:, None]), axis=1), keypoints_visible=np.concatenate( (keypoints_visible, roots_visible[:, None]), axis=1), sigma=sigmas) roots_visible = keypoint_weights[:, -1] # select instances inst_roots, inst_indices = [], [] diagonal_lengths = get_diagonal_lengths(_keypoints, keypoints_visible) for i in np.argsort(diagonal_lengths): if roots_visible[i] < 1: continue # rand root point in 3x3 grid x, y = roots[i] + np.random.randint(-1, 2, (2, )) x = max(0, min(x, self.heatmap_size[0] - 1)) y = max(0, min(y, self.heatmap_size[1] - 1)) if (x, y) not in inst_roots: inst_roots.append((x, y)) inst_indices.append(i) if len(inst_indices) > self.encode_max_instances: rand_indices = random.sample( range(len(inst_indices)), self.encode_max_instances) inst_roots = [inst_roots[i] for i in rand_indices] inst_indices = [inst_indices[i] for i in rand_indices] # generate instance-wise heatmaps inst_heatmaps, inst_heatmap_weights = [], [] for i in inst_indices: inst_heatmap, inst_heatmap_weight = generate_gaussian_heatmaps( heatmap_size=self.heatmap_size, keypoints=_keypoints[i:i + 1], keypoints_visible=keypoints_visible[i:i + 1], sigma=sigmas[i].item()) inst_heatmaps.append(inst_heatmap) inst_heatmap_weights.append(inst_heatmap_weight) if len(inst_indices) > 0: inst_heatmaps = np.concatenate(inst_heatmaps) inst_heatmap_weights = np.concatenate(inst_heatmap_weights) inst_roots = np.array(inst_roots, dtype=np.int32) else: inst_heatmaps = np.empty((0, *self.heatmap_size[::-1])) inst_heatmap_weights = np.empty((0, )) inst_roots = np.empty((0, 2), dtype=np.int32) encoded = dict( heatmaps=heatmaps, instance_heatmaps=inst_heatmaps, keypoint_weights=inst_heatmap_weights, instance_coords=inst_roots) return encoded
[文档] def decode(self, instance_heatmaps: np.ndarray, instance_scores: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: """Decode keypoint coordinates from decoupled heatmaps. The decoded keypoint coordinates are in the input image space. Args: instance_heatmaps (np.ndarray): Heatmaps in shape (N, K, H, W) instance_scores (np.ndarray): Confidence of instance roots prediction in shape (N, 1) Returns: tuple: - keypoints (np.ndarray): Decoded keypoint coordinates in shape (N, K, D) - scores (np.ndarray): The keypoint scores in shape (N, K). It usually represents the confidence of the keypoint prediction """ keypoints, keypoint_scores = [], [] for i in range(instance_heatmaps.shape[0]): heatmaps = instance_heatmaps[i].copy() kpts, scores = get_heatmap_maximum(heatmaps) keypoints.append(refine_keypoints(kpts[None], heatmaps)) keypoint_scores.append(scores[None]) keypoints = np.concatenate(keypoints) # Restore the keypoint scale keypoints = keypoints * self.scale_factor keypoint_scores = np.concatenate(keypoint_scores) keypoint_scores *= instance_scores return keypoints, keypoint_scores