# Wholebody 2D Keypoint¶

## Coco-Wholebody Dataset¶

### Topdown Heatmap + Resnet on Coco-Wholebody¶

SimpleBaseline2D (ECCV'2018)
@inproceedings{xiao2018simple,
title={Simple baselines for human pose estimation and tracking},
author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
booktitle={Proceedings of the European conference on computer vision (ECCV)},
pages={466--481},
year={2018}
}

COCO-WholeBody (ECCV'2020)
@inproceedings{jin2020whole,
title={Whole-Body Human Pose Estimation in the Wild},
author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
year={2020}
}


Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset

Arch Input Size Body AP Body AR Foot AP Foot AR Face AP Face AR Hand AP Hand AR Whole AP Whole AR ckpt log
pose_resnet_50 256x192 0.652 0.738 0.615 0.749 0.606 0.715 0.460 0.584 0.521 0.633 ckpt log
pose_resnet_50 384x288 0.666 0.747 0.634 0.763 0.731 0.811 0.536 0.646 0.574 0.670 ckpt log
pose_resnet_101 256x192 0.669 0.753 0.637 0.766 0.611 0.722 0.463 0.589 0.531 0.645 ckpt log
pose_resnet_101 384x288 0.692 0.770 0.680 0.799 0.746 0.820 0.548 0.657 0.597 0.693 ckpt log
pose_resnet_152 256x192 0.682 0.764 0.661 0.787 0.623 0.728 0.481 0.607 0.548 0.661 ckpt log
pose_resnet_152 384x288 0.704 0.780 0.693 0.813 0.751 0.824 0.559 0.666 0.610 0.705 ckpt log

### Topdown Heatmap + Hrnet on Coco-Wholebody¶

HRNet (CVPR'2019)
@inproceedings{sun2019deep,
title={Deep high-resolution representation learning for human pose estimation},
author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={5693--5703},
year={2019}
}

COCO-WholeBody (ECCV'2020)
@inproceedings{jin2020whole,
title={Whole-Body Human Pose Estimation in the Wild},
author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
year={2020}
}


Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset

Arch Input Size Body AP Body AR Foot AP Foot AR Face AP Face AR Hand AP Hand AR Whole AP Whole AR ckpt log
pose_hrnet_w32 256x192 0.678 0.755 0.543 0.661 0.630 0.708 0.467 0.566 0.536 0.636 ckpt log
pose_hrnet_w32 384x288 0.700 0.772 0.585 0.691 0.726 0.783 0.515 0.603 0.586 0.673 ckpt log
pose_hrnet_w48 256x192 0.701 0.776 0.675 0.787 0.656 0.743 0.535 0.639 0.579 0.681 ckpt log
pose_hrnet_w48 384x288 0.722 0.791 0.696 0.801 0.776 0.834 0.587 0.678 0.632 0.717 ckpt log

### Topdown Heatmap + Vipnas on Coco-Wholebody¶

ViPNAS (CVPR'2021)
@article{xu2021vipnas,
title={ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search},
author={Xu, Lumin and Guan, Yingda and Jin, Sheng and Liu, Wentao and Qian, Chen and Luo, Ping and Ouyang, Wanli and Wang, Xiaogang},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
year={2021}
}

COCO-WholeBody (ECCV'2020)
@inproceedings{jin2020whole,
title={Whole-Body Human Pose Estimation in the Wild},
author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
year={2020}
}


Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset

Arch Input Size Body AP Body AR Foot AP Foot AR Face AP Face AR Hand AP Hand AR Whole AP Whole AR ckpt log
S-ViPNAS-MobileNetV3 256x192 0.619 0.700 0.477 0.608 0.585 0.689 0.386 0.505 0.473 0.578 ckpt log
S-ViPNAS-Res50 256x192 0.643 0.726 0.553 0.694 0.587 0.698 0.410 0.529 0.495 0.607 ckpt log

### Topdown Heatmap + Cspnext + Udp on Coco-Wholebody¶

RTMDet (ArXiv 2022)
@misc{lyu2022rtmdet,
title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors},
author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen},
year={2022},
eprint={2212.07784},
archivePrefix={arXiv},
primaryClass={cs.CV}
}

UDP (CVPR'2020)
@InProceedings{Huang_2020_CVPR,
author = {Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan},
title = {The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation},
booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
month = {June},
year = {2020}
}

COCO-WholeBody (ECCV'2020)
@inproceedings{jin2020whole,
title={Whole-Body Human Pose Estimation in the Wild},
author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
year={2020}
}


Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset

Arch Input Size Body AP Body AR Foot AP Foot AR Face AP Face AR Hand AP Hand AR Whole AP Whole AR ckpt log
pose_cspnext_m_udp 256x192 0.687 0.735 0.680 0.763 0.697 0.755 0.460 0.543 0.567 0.641 ckpt log

### Topdown Heatmap + Vipnas + Dark on Coco-Wholebody¶

ViPNAS (CVPR'2021)
@article{xu2021vipnas,
title={ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search},
author={Xu, Lumin and Guan, Yingda and Jin, Sheng and Liu, Wentao and Qian, Chen and Luo, Ping and Ouyang, Wanli and Wang, Xiaogang},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
year={2021}
}

DarkPose (CVPR'2020)
@inproceedings{zhang2020distribution,
title={Distribution-aware coordinate representation for human pose estimation},
author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
pages={7093--7102},
year={2020}
}

COCO-WholeBody (ECCV'2020)
@inproceedings{jin2020whole,
title={Whole-Body Human Pose Estimation in the Wild},
author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
year={2020}
}


Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset

Arch Input Size Body AP Body AR Foot AP Foot AR Face AP Face AR Hand AP Hand AR Whole AP Whole AR ckpt log
S-ViPNAS-MobileNetV3_dark 256x192 0.632 0.710 0.530 0.660 0.672 0.771 0.404 0.519 0.508 0.607 ckpt log
S-ViPNAS-Res50_dark 256x192 0.650 0.732 0.550 0.686 0.684 0.783 0.437 0.554 0.528 0.632 ckpt log

### Topdown Heatmap + Hrnet + Dark on Coco-Wholebody¶

HRNet (CVPR'2019)
@inproceedings{sun2019deep,
title={Deep high-resolution representation learning for human pose estimation},
author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={5693--5703},
year={2019}
}

DarkPose (CVPR'2020)
@inproceedings{zhang2020distribution,
title={Distribution-aware coordinate representation for human pose estimation},
author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
pages={7093--7102},
year={2020}
}

COCO-WholeBody (ECCV'2020)
@inproceedings{jin2020whole,
title={Whole-Body Human Pose Estimation in the Wild},
author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
year={2020}
}


Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset

Arch Input Size Body AP Body AR Foot AP Foot AR Face AP Face AR Hand AP Hand AR Whole AP Whole AR ckpt log
pose_hrnet_w32_dark 256x192 0.693 0.764 0.564 0.674 0.737 0.809 0.503 0.602 0.582 0.671 ckpt log
pose_hrnet_w48_dark+ 384x288 0.742 0.807 0.707 0.806 0.841 0.892 0.602 0.694 0.661 0.743 ckpt log

Note: + means the model is first pre-trained on original COCO dataset, and then fine-tuned on COCO-WholeBody dataset. We find this will lead to better performance.

### Rtmpose + Rtmpose on Coco-Wholebody¶

RTMPose (arXiv'2023)
@misc{https://doi.org/10.48550/arxiv.2303.07399,
doi = {10.48550/ARXIV.2303.07399},
url = {https://arxiv.org/abs/2303.07399},
author = {Jiang, Tao and Lu, Peng and Zhang, Li and Ma, Ningsheng and Han, Rui and Lyu, Chengqi and Li, Yining and Chen, Kai},
keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences},
title = {RTMPose: Real-Time Multi-Person Pose Estimation based on MMPose},
publisher = {arXiv},
year = {2023},
copyright = {Creative Commons Attribution 4.0 International}
}

RTMDet (arXiv'2022)
@misc{lyu2022rtmdet,
title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors},
author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen},
year={2022},
eprint={2212.07784},
archivePrefix={arXiv},
primaryClass={cs.CV}
}

COCO-WholeBody (ECCV'2020)
@inproceedings{jin2020whole,
title={Whole-Body Human Pose Estimation in the Wild},
author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
year={2020}
}


Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset

Arch Input Size Body AP Body AR Foot AP Foot AR Face AP Face AR Hand AP Hand AR Whole AP Whole AR ckpt log
rtmpose-m 256x192 0.697 0.743 0.660 0.749 0.822 0.858 0.483 0.564 0.604 0.667 ckpt log
rtmpose-l 256x192 0.721 0.764 0.693 0.780 0.844 0.876 0.523 0.600 0.632 0.694 ckpt log
rtmpose-l 384x288 0.736 0.776 0.738 0.810 0.895 0.918 0.591 0.659 0.670 0.723 ckpt log