
mmpose.codecs.edpose_label 源代码

# Copyright (c) OpenMMLab. All rights reserved.
from typing import Optional

import numpy as np

from mmpose.registry import KEYPOINT_CODECS
from mmpose.structures import bbox_cs2xyxy, bbox_xyxy2cs
from .base import BaseKeypointCodec

[文档]@KEYPOINT_CODECS.register_module() class EDPoseLabel(BaseKeypointCodec): r"""Generate keypoint and label coordinates for `ED-Pose`_ by Yang J. et al (2023). Note: - instance number: N - keypoint number: K - keypoint dimension: D - image size: [w, h] Encoded: - keypoints (np.ndarray): Keypoint coordinates in shape (N, K, D) - keypoints_visible (np.ndarray): Keypoint visibility in shape (N, K, D) - area (np.ndarray): Area in shape (N) - bbox (np.ndarray): Bbox in shape (N, 4) Args: num_select (int): The number of candidate instances num_keypoints (int): The Number of keypoints """ auxiliary_encode_keys = {'area', 'bboxes', 'img_shape'} instance_mapping_table = dict( bbox='bboxes', keypoints='keypoints', keypoints_visible='keypoints_visible', area='areas', ) def __init__(self, num_select: int = 100, num_keypoints: int = 17): super().__init__() self.num_select = num_select self.num_keypoints = num_keypoints
[文档] def encode( self, img_shape, keypoints: np.ndarray, keypoints_visible: Optional[np.ndarray] = None, area: Optional[np.ndarray] = None, bboxes: Optional[np.ndarray] = None, ) -> dict: """Encoding keypoints, area and bbox from input image space to normalized space. Args: - img_shape (Sequence[int]): The shape of image in the format of (width, height). - keypoints (np.ndarray): Keypoint coordinates in shape (N, K, D). - keypoints_visible (np.ndarray): Keypoint visibility in shape (N, K) - area (np.ndarray): - bboxes (np.ndarray): Returns: encoded (dict): Contains the following items: - keypoint_labels (np.ndarray): The processed keypoints in shape like (N, K, D). - keypoints_visible (np.ndarray): Keypoint visibility in shape (N, K, D) - area_labels (np.ndarray): The processed target area in shape (N). - bboxes_labels: The processed target bbox in shape (N, 4). """ w, h = img_shape if keypoints_visible is None: keypoints_visible = np.ones(keypoints.shape[:2], dtype=np.float32) if bboxes is not None: bboxes = np.concatenate(bbox_xyxy2cs(bboxes), axis=-1) bboxes = bboxes / np.array([w, h, w, h], dtype=np.float32) if area is not None: area = area / float(w * h) if keypoints is not None: keypoints = keypoints / np.array([w, h], dtype=np.float32) encoded = dict( keypoints=keypoints, area=area, bbox=bboxes, keypoints_visible=keypoints_visible) return encoded
[文档] def decode(self, input_shapes: np.ndarray, pred_logits: np.ndarray, pred_boxes: np.ndarray, pred_keypoints: np.ndarray): """Select the final top-k keypoints, and decode the results from normalize size to origin input size. Args: input_shapes (Tensor): The size of input image resize. test_cfg (ConfigType): Config of testing. pred_logits (Tensor): The result of score. pred_boxes (Tensor): The result of bbox. pred_keypoints (Tensor): The result of keypoints. Returns: tuple: Decoded boxes, keypoints, and keypoint scores. """ # Initialization num_keypoints = self.num_keypoints prob = pred_logits.reshape(-1) # Select top-k instances based on prediction scores topk_indexes = np.argsort(-prob)[:self.num_select] topk_values = np.take_along_axis(prob, topk_indexes, axis=0) scores = np.tile(topk_values[:, np.newaxis], [1, num_keypoints]) # Decode bounding boxes topk_boxes = topk_indexes // pred_logits.shape[1] boxes = bbox_cs2xyxy(*np.split(pred_boxes, [2], axis=-1)) boxes = np.take_along_axis( boxes, np.tile(topk_boxes[:, np.newaxis], [1, 4]), axis=0) # Convert from relative to absolute coordinates img_h, img_w = np.split(input_shapes, 2, axis=0) scale_fct = np.hstack([img_w, img_h, img_w, img_h]) boxes = boxes * scale_fct[np.newaxis, :] # Decode keypoints topk_keypoints = topk_indexes // pred_logits.shape[1] keypoints = np.take_along_axis( pred_keypoints, np.tile(topk_keypoints[:, np.newaxis], [1, num_keypoints * 3]), axis=0) keypoints = keypoints[:, :(num_keypoints * 2)] keypoints = keypoints * np.tile( np.hstack([img_w, img_h]), [num_keypoints])[np.newaxis, :] keypoints = keypoints.reshape(-1, num_keypoints, 2) return boxes, keypoints, scores