mmpose.models.heads.regression_heads.dsnt_head 源代码

# Copyright (c) OpenMMLab. All rights reserved.
from typing import Optional, Sequence, Tuple, Union

import numpy as np
import torch
from mmengine.logging import MessageHub
from torch import Tensor

from mmpose.evaluation.functional import keypoint_pck_accuracy
from mmpose.registry import MODELS
from mmpose.utils.tensor_utils import to_numpy
from mmpose.utils.typing import ConfigType, OptConfigType, OptSampleList
from .integral_regression_head import IntegralRegressionHead

OptIntSeq = Optional[Sequence[int]]

[文档]@MODELS.register_module() class DSNTHead(IntegralRegressionHead): """Top-down integral regression head introduced in `DSNT`_ by Nibali et al(2018). The head contains a differentiable spatial to numerical transform (DSNT) layer that do soft-argmax operation on the predicted heatmaps to regress the coordinates. This head is used for algorithms that require supervision of heatmaps in `DSNT` approach. Args: in_channels (int | sequence[int]): Number of input channels in_featuremap_size (int | sequence[int]): Size of input feature map num_joints (int): Number of joints lambda_t (int): Discard heatmap-based loss when current epoch > lambda_t. Defaults to -1. debias (bool): Whether to remove the bias of Integral Pose Regression. see `Removing the Bias of Integral Pose Regression`_ by Gu et al (2021). Defaults to ``False``. beta (float): A smoothing parameter in softmax. Defaults to ``1.0``. deconv_out_channels (sequence[int]): The output channel number of each deconv layer. Defaults to ``(256, 256, 256)`` deconv_kernel_sizes (sequence[int | tuple], optional): The kernel size of each deconv layer. Each element should be either an integer for both height and width dimensions, or a tuple of two integers for the height and the width dimension respectively.Defaults to ``(4, 4, 4)`` conv_out_channels (sequence[int], optional): The output channel number of each intermediate conv layer. ``None`` means no intermediate conv layer between deconv layers and the final conv layer. Defaults to ``None`` conv_kernel_sizes (sequence[int | tuple], optional): The kernel size of each intermediate conv layer. Defaults to ``None`` final_layer (dict): Arguments of the final Conv2d layer. Defaults to ``dict(kernel_size=1)`` loss (Config): Config for keypoint loss. Defaults to use :class:`DSNTLoss` decoder (Config, optional): The decoder config that controls decoding keypoint coordinates from the network output. Defaults to ``None`` init_cfg (Config, optional): Config to control the initialization. See :attr:`default_init_cfg` for default settings .. _`DSNT`: """ _version = 2 def __init__(self, in_channels: Union[int, Sequence[int]], in_featuremap_size: Tuple[int, int], num_joints: int, lambda_t: int = -1, debias: bool = False, beta: float = 1.0, deconv_out_channels: OptIntSeq = (256, 256, 256), deconv_kernel_sizes: OptIntSeq = (4, 4, 4), conv_out_channels: OptIntSeq = None, conv_kernel_sizes: OptIntSeq = None, final_layer: dict = dict(kernel_size=1), loss: ConfigType = dict( type='MultipleLossWrapper', losses=[ dict(type='SmoothL1Loss', use_target_weight=True), dict(type='JSDiscretLoss', use_target_weight=True) ]), decoder: OptConfigType = None, init_cfg: OptConfigType = None): super().__init__( in_channels=in_channels, in_featuremap_size=in_featuremap_size, num_joints=num_joints, debias=debias, beta=beta, deconv_out_channels=deconv_out_channels, deconv_kernel_sizes=deconv_kernel_sizes, conv_out_channels=conv_out_channels, conv_kernel_sizes=conv_kernel_sizes, final_layer=final_layer, loss=loss, decoder=decoder, init_cfg=init_cfg) self.lambda_t = lambda_t
[文档] def loss(self, inputs: Tuple[Tensor], batch_data_samples: OptSampleList, train_cfg: ConfigType = {}) -> dict: """Calculate losses from a batch of inputs and data samples.""" pred_coords, pred_heatmaps = self.forward(inputs) keypoint_labels = [d.gt_instance_labels.keypoint_labels for d in batch_data_samples]) keypoint_weights =[ d.gt_instance_labels.keypoint_weights for d in batch_data_samples ]) gt_heatmaps = torch.stack( [d.gt_fields.heatmaps for d in batch_data_samples]) input_list = [pred_coords, pred_heatmaps] target_list = [keypoint_labels, gt_heatmaps] # calculate losses losses = dict() loss_list = self.loss_module(input_list, target_list, keypoint_weights) loss = loss_list[0] + loss_list[1] if self.lambda_t > 0: mh = MessageHub.get_current_instance() cur_epoch = mh.get_info('epoch') if cur_epoch >= self.lambda_t: loss = loss_list[0] losses.update(loss_kpt=loss) # calculate accuracy _, avg_acc, _ = keypoint_pck_accuracy( pred=to_numpy(pred_coords), gt=to_numpy(keypoint_labels), mask=to_numpy(keypoint_weights) > 0, thr=0.05, norm_factor=np.ones((pred_coords.size(0), 2), dtype=np.float32)) acc_pose = torch.tensor(avg_acc, device=keypoint_labels.device) losses.update(acc_pose=acc_pose) return losses
Read the Docs v: latest
On Read the Docs
Project Home

Free document hosting provided by Read the Docs.