mmpose.codecs.decoupled_heatmap 源代码
# Copyright (c) OpenMMLab. All rights reserved.
import random
from typing import Optional, Tuple
import numpy as np
from mmpose.registry import KEYPOINT_CODECS
from .base import BaseKeypointCodec
from .utils import (generate_gaussian_heatmaps, get_diagonal_lengths,
get_instance_bbox, get_instance_root)
from .utils.post_processing import get_heatmap_maximum
from .utils.refinement import refine_keypoints
[文档]@KEYPOINT_CODECS.register_module()
class DecoupledHeatmap(BaseKeypointCodec):
"""Encode/decode keypoints with the method introduced in the paper CID.
See the paper Contextual Instance Decoupling for Robust Multi-Person
Pose Estimation`_ by Wang et al (2022) for details
Note:
- instance number: N
- keypoint number: K
- keypoint dimension: D
- image size: [w, h]
- heatmap size: [W, H]
Encoded:
- heatmaps (np.ndarray): The coupled heatmap in shape
(1+K, H, W) where [W, H] is the `heatmap_size`.
- instance_heatmaps (np.ndarray): The decoupled heatmap in shape
(M*K, H, W) where M is the number of instances.
- keypoint_weights (np.ndarray): The weight for heatmaps in shape
(M*K).
- instance_coords (np.ndarray): The coordinates of instance roots
in shape (M, 2)
Args:
input_size (tuple): Image size in [w, h]
heatmap_size (tuple): Heatmap size in [W, H]
root_type (str): The method to generate the instance root. Options
are:
- ``'kpt_center'``: Average coordinate of all visible keypoints.
- ``'bbox_center'``: Center point of bounding boxes outlined by
all visible keypoints.
Defaults to ``'kpt_center'``
heatmap_min_overlap (float): Minimum overlap rate among instances.
Used when calculating sigmas for instances. Defaults to 0.7
background_weight (float): Loss weight of background pixels.
Defaults to 0.1
encode_max_instances (int): The maximum number of instances
to encode for each sample. Defaults to 30
.. _`CID`: https://openaccess.thecvf.com/content/CVPR2022/html/Wang_
Contextual_Instance_Decoupling_for_Robust_Multi-Person_Pose_Estimation_
CVPR_2022_paper.html
"""
# DecoupledHeatmap requires bounding boxes to determine the size of each
# instance, so that it can assign varying sigmas based on their size
auxiliary_encode_keys = {'bbox'}
label_mapping_table = dict(
keypoint_weights='keypoint_weights',
instance_coords='instance_coords',
)
field_mapping_table = dict(
heatmaps='heatmaps',
instance_heatmaps='instance_heatmaps',
)
def __init__(
self,
input_size: Tuple[int, int],
heatmap_size: Tuple[int, int],
root_type: str = 'kpt_center',
heatmap_min_overlap: float = 0.7,
encode_max_instances: int = 30,
):
super().__init__()
self.input_size = input_size
self.heatmap_size = heatmap_size
self.root_type = root_type
self.encode_max_instances = encode_max_instances
self.heatmap_min_overlap = heatmap_min_overlap
self.scale_factor = (np.array(input_size) /
heatmap_size).astype(np.float32)
def _get_instance_wise_sigmas(
self,
bbox: np.ndarray,
) -> np.ndarray:
"""Get sigma values for each instance according to their size.
Args:
bbox (np.ndarray): Bounding box in shape (N, 4, 2)
Returns:
np.ndarray: Array containing the sigma values for each instance.
"""
sigmas = np.zeros((bbox.shape[0], ), dtype=np.float32)
heights = np.sqrt(np.power(bbox[:, 0] - bbox[:, 1], 2).sum(axis=-1))
widths = np.sqrt(np.power(bbox[:, 0] - bbox[:, 2], 2).sum(axis=-1))
for i in range(bbox.shape[0]):
h, w = heights[i], widths[i]
# compute sigma for each instance
# condition 1
a1, b1 = 1, h + w
c1 = w * h * (1 - self.heatmap_min_overlap) / (
1 + self.heatmap_min_overlap)
sq1 = np.sqrt(b1**2 - 4 * a1 * c1)
r1 = (b1 + sq1) / 2
# condition 2
a2 = 4
b2 = 2 * (h + w)
c2 = (1 - self.heatmap_min_overlap) * w * h
sq2 = np.sqrt(b2**2 - 4 * a2 * c2)
r2 = (b2 + sq2) / 2
# condition 3
a3 = 4 * self.heatmap_min_overlap
b3 = -2 * self.heatmap_min_overlap * (h + w)
c3 = (self.heatmap_min_overlap - 1) * w * h
sq3 = np.sqrt(b3**2 - 4 * a3 * c3)
r3 = (b3 + sq3) / 2
sigmas[i] = min(r1, r2, r3) / 3
return sigmas
[文档] def encode(self,
keypoints: np.ndarray,
keypoints_visible: Optional[np.ndarray] = None,
bbox: Optional[np.ndarray] = None) -> dict:
"""Encode keypoints into heatmaps.
Args:
keypoints (np.ndarray): Keypoint coordinates in shape (N, K, D)
keypoints_visible (np.ndarray): Keypoint visibilities in shape
(N, K)
bbox (np.ndarray): Bounding box in shape (N, 8) which includes
coordinates of 4 corners.
Returns:
dict:
- heatmaps (np.ndarray): The coupled heatmap in shape
(1+K, H, W) where [W, H] is the `heatmap_size`.
- instance_heatmaps (np.ndarray): The decoupled heatmap in shape
(N*K, H, W) where M is the number of instances.
- keypoint_weights (np.ndarray): The weight for heatmaps in shape
(N*K).
- instance_coords (np.ndarray): The coordinates of instance roots
in shape (N, 2)
"""
if keypoints_visible is None:
keypoints_visible = np.ones(keypoints.shape[:2], dtype=np.float32)
if bbox is None:
# generate pseudo bbox via visible keypoints
bbox = get_instance_bbox(keypoints, keypoints_visible)
bbox = np.tile(bbox, 2).reshape(-1, 4, 2)
# corner order: left_top, left_bottom, right_top, right_bottom
bbox[:, 1:3, 0] = bbox[:, 0:2, 0]
# keypoint coordinates in heatmap
_keypoints = keypoints / self.scale_factor
_bbox = bbox.reshape(-1, 4, 2) / self.scale_factor
# compute the root and scale of each instance
roots, roots_visible = get_instance_root(_keypoints, keypoints_visible,
self.root_type)
sigmas = self._get_instance_wise_sigmas(_bbox)
# generate global heatmaps
heatmaps, keypoint_weights = generate_gaussian_heatmaps(
heatmap_size=self.heatmap_size,
keypoints=np.concatenate((_keypoints, roots[:, None]), axis=1),
keypoints_visible=np.concatenate(
(keypoints_visible, roots_visible[:, None]), axis=1),
sigma=sigmas)
roots_visible = keypoint_weights[:, -1]
# select instances
inst_roots, inst_indices = [], []
diagonal_lengths = get_diagonal_lengths(_keypoints, keypoints_visible)
for i in np.argsort(diagonal_lengths):
if roots_visible[i] < 1:
continue
# rand root point in 3x3 grid
x, y = roots[i] + np.random.randint(-1, 2, (2, ))
x = max(0, min(x, self.heatmap_size[0] - 1))
y = max(0, min(y, self.heatmap_size[1] - 1))
if (x, y) not in inst_roots:
inst_roots.append((x, y))
inst_indices.append(i)
if len(inst_indices) > self.encode_max_instances:
rand_indices = random.sample(
range(len(inst_indices)), self.encode_max_instances)
inst_roots = [inst_roots[i] for i in rand_indices]
inst_indices = [inst_indices[i] for i in rand_indices]
# generate instance-wise heatmaps
inst_heatmaps, inst_heatmap_weights = [], []
for i in inst_indices:
inst_heatmap, inst_heatmap_weight = generate_gaussian_heatmaps(
heatmap_size=self.heatmap_size,
keypoints=_keypoints[i:i + 1],
keypoints_visible=keypoints_visible[i:i + 1],
sigma=sigmas[i].item())
inst_heatmaps.append(inst_heatmap)
inst_heatmap_weights.append(inst_heatmap_weight)
if len(inst_indices) > 0:
inst_heatmaps = np.concatenate(inst_heatmaps)
inst_heatmap_weights = np.concatenate(inst_heatmap_weights)
inst_roots = np.array(inst_roots, dtype=np.int32)
else:
inst_heatmaps = np.empty((0, *self.heatmap_size[::-1]))
inst_heatmap_weights = np.empty((0, ))
inst_roots = np.empty((0, 2), dtype=np.int32)
encoded = dict(
heatmaps=heatmaps,
instance_heatmaps=inst_heatmaps,
keypoint_weights=inst_heatmap_weights,
instance_coords=inst_roots)
return encoded
[文档] def decode(self, instance_heatmaps: np.ndarray,
instance_scores: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
"""Decode keypoint coordinates from decoupled heatmaps. The decoded
keypoint coordinates are in the input image space.
Args:
instance_heatmaps (np.ndarray): Heatmaps in shape (N, K, H, W)
instance_scores (np.ndarray): Confidence of instance roots
prediction in shape (N, 1)
Returns:
tuple:
- keypoints (np.ndarray): Decoded keypoint coordinates in shape
(N, K, D)
- scores (np.ndarray): The keypoint scores in shape (N, K). It
usually represents the confidence of the keypoint prediction
"""
keypoints, keypoint_scores = [], []
for i in range(instance_heatmaps.shape[0]):
heatmaps = instance_heatmaps[i].copy()
kpts, scores = get_heatmap_maximum(heatmaps)
keypoints.append(refine_keypoints(kpts[None], heatmaps))
keypoint_scores.append(scores[None])
keypoints = np.concatenate(keypoints)
# Restore the keypoint scale
keypoints = keypoints * self.scale_factor
keypoint_scores = np.concatenate(keypoint_scores)
keypoint_scores *= instance_scores
return keypoints, keypoint_scores