Wholebody 2D Keypoint¶

Ubody2d Dataset¶

Topdown Heatmap + Hrnet + Ubody-Coco-Wholebody on Ubody2d¶

HRNet (CVPR'2019)

@inproceedings{sun2019deep,
  title={Deep high-resolution representation learning for human pose estimation},
  author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={5693--5703},
  year={2019}
}

UBody (CVPR'2023)

@article{lin2023one,
  title={One-Stage 3D Whole-Body Mesh Recovery with Component Aware Transformer},
  author={Lin, Jing and Zeng, Ailing and Wang, Haoqian and Zhang, Lei and Li, Yu},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  year={2023},
}

Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	Body AP	Body AR	Foot AP	Foot AR	Face AP	Face AR	Hand AP	Hand AR	Whole AP	Whole AR	ckpt	log
pose_hrnet_w32	256x192	0.685	0.759	0.564	0.675	0.625	0.705	0.516	0.609	0.549	0.646	ckpt	log

Coco-Wholebody Dataset¶

Topdown Heatmap + Vipnas + Dark on Coco-Wholebody¶

ViPNAS (CVPR'2021)

@article{xu2021vipnas,
  title={ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search},
  author={Xu, Lumin and Guan, Yingda and Jin, Sheng and Liu, Wentao and Qian, Chen and Luo, Ping and Ouyang, Wanli and Wang, Xiaogang},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  year={2021}
}

DarkPose (CVPR'2020)

@inproceedings{zhang2020distribution,
  title={Distribution-aware coordinate representation for human pose estimation},
  author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={7093--7102},
  year={2020}
}

COCO-WholeBody (ECCV'2020)

@inproceedings{jin2020whole,
  title={Whole-Body Human Pose Estimation in the Wild},
  author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
  booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
  year={2020}
}

Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	Body AP	Body AR	Foot AP	Foot AR	Face AP	Face AR	Hand AP	Hand AR	Whole AP	Whole AR	ckpt	log
S-ViPNAS-MobileNetV3_dark	256x192	0.632	0.710	0.530	0.660	0.672	0.771	0.404	0.519	0.508	0.607	ckpt	log
S-ViPNAS-Res50_dark	256x192	0.650	0.732	0.550	0.686	0.684	0.783	0.437	0.554	0.528	0.632	ckpt	log

Topdown Heatmap + Resnet on Coco-Wholebody¶

SimpleBaseline2D (ECCV'2018)

@inproceedings{xiao2018simple,
  title={Simple baselines for human pose estimation and tracking},
  author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={466--481},
  year={2018}
}

COCO-WholeBody (ECCV'2020)

@inproceedings{jin2020whole,
  title={Whole-Body Human Pose Estimation in the Wild},
  author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
  booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
  year={2020}
}

Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	Body AP	Body AR	Foot AP	Foot AR	Face AP	Face AR	Hand AP	Hand AR	Whole AP	Whole AR	ckpt	log
pose_resnet_50	256x192	0.652	0.738	0.615	0.749	0.606	0.715	0.460	0.584	0.521	0.633	ckpt	log
pose_resnet_50	384x288	0.666	0.747	0.634	0.763	0.731	0.811	0.536	0.646	0.574	0.670	ckpt	log
pose_resnet_101	256x192	0.669	0.753	0.637	0.766	0.611	0.722	0.463	0.589	0.531	0.645	ckpt	log
pose_resnet_101	384x288	0.692	0.770	0.680	0.799	0.746	0.820	0.548	0.657	0.597	0.693	ckpt	log
pose_resnet_152	256x192	0.682	0.764	0.661	0.787	0.623	0.728	0.481	0.607	0.548	0.661	ckpt	log
pose_resnet_152	384x288	0.704	0.780	0.693	0.813	0.751	0.824	0.559	0.666	0.610	0.705	ckpt	log

Topdown Heatmap + Hrnet on Coco-Wholebody¶

HRNet (CVPR'2019)

@inproceedings{sun2019deep,
  title={Deep high-resolution representation learning for human pose estimation},
  author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={5693--5703},
  year={2019}
}

COCO-WholeBody (ECCV'2020)

@inproceedings{jin2020whole,
  title={Whole-Body Human Pose Estimation in the Wild},
  author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
  booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
  year={2020}
}

Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	Body AP	Body AR	Foot AP	Foot AR	Face AP	Face AR	Hand AP	Hand AR	Whole AP	Whole AR	ckpt	log
pose_hrnet_w32	256x192	0.678	0.755	0.543	0.661	0.630	0.708	0.467	0.566	0.536	0.636	ckpt	log
pose_hrnet_w32	384x288	0.700	0.772	0.585	0.691	0.726	0.783	0.515	0.603	0.586	0.673	ckpt	log
pose_hrnet_w48	256x192	0.701	0.776	0.675	0.787	0.656	0.743	0.535	0.639	0.579	0.681	ckpt	log
pose_hrnet_w48	384x288	0.722	0.791	0.696	0.801	0.776	0.834	0.587	0.678	0.632	0.717	ckpt	log

Topdown Heatmap + Vipnas on Coco-Wholebody¶

ViPNAS (CVPR'2021)

@article{xu2021vipnas,
  title={ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search},
  author={Xu, Lumin and Guan, Yingda and Jin, Sheng and Liu, Wentao and Qian, Chen and Luo, Ping and Ouyang, Wanli and Wang, Xiaogang},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  year={2021}
}

COCO-WholeBody (ECCV'2020)

@inproceedings{jin2020whole,
  title={Whole-Body Human Pose Estimation in the Wild},
  author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
  booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
  year={2020}
}

Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	Body AP	Body AR	Foot AP	Foot AR	Face AP	Face AR	Hand AP	Hand AR	Whole AP	Whole AR	ckpt	log
S-ViPNAS-MobileNetV3	256x192	0.619	0.700	0.477	0.608	0.585	0.689	0.386	0.505	0.473	0.578	ckpt	log
S-ViPNAS-Res50	256x192	0.643	0.726	0.553	0.694	0.587	0.698	0.410	0.529	0.495	0.607	ckpt	log

Topdown Heatmap + Cspnext + Udp on Coco-Wholebody¶

RTMDet (ArXiv 2022)

@misc{lyu2022rtmdet,
      title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors},
      author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen},
      year={2022},
      eprint={2212.07784},
      archivePrefix={arXiv},
      primaryClass={cs.CV}
}

UDP (CVPR'2020)

@InProceedings{Huang_2020_CVPR,
  author = {Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan},
  title = {The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation},
  booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
  month = {June},
  year = {2020}
}

COCO-WholeBody (ECCV'2020)

@inproceedings{jin2020whole,
  title={Whole-Body Human Pose Estimation in the Wild},
  author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
  booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
  year={2020}
}

Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	Body AP	Body AR	Foot AP	Foot AR	Face AP	Face AR	Hand AP	Hand AR	Whole AP	Whole AR	ckpt	log
pose_cspnext_m_udp	256x192	0.687	0.735	0.680	0.763	0.697	0.755	0.460	0.543	0.567	0.641	ckpt	log

Topdown Heatmap + Hrnet + Dark on Coco-Wholebody¶

HRNet (CVPR'2019)

@inproceedings{sun2019deep,
  title={Deep high-resolution representation learning for human pose estimation},
  author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={5693--5703},
  year={2019}
}

DarkPose (CVPR'2020)

@inproceedings{zhang2020distribution,
  title={Distribution-aware coordinate representation for human pose estimation},
  author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={7093--7102},
  year={2020}
}

COCO-WholeBody (ECCV'2020)

@inproceedings{jin2020whole,
  title={Whole-Body Human Pose Estimation in the Wild},
  author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
  booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
  year={2020}
}

Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	Body AP	Body AR	Foot AP	Foot AR	Face AP	Face AR	Hand AP	Hand AR	Whole AP	Whole AR	ckpt	log
pose_hrnet_w32_dark	256x192	0.693	0.764	0.564	0.674	0.737	0.809	0.503	0.602	0.582	0.671	ckpt	log
pose_hrnet_w48_dark+	384x288	0.742	0.807	0.707	0.806	0.841	0.892	0.602	0.694	0.661	0.743	ckpt	log

Note: + means the model is first pre-trained on original COCO dataset, and then fine-tuned on COCO-WholeBody dataset. We find this will lead to better performance.

Rtmpose + Rtmpose on Coco-Wholebody¶

RTMPose (arXiv'2023)

@misc{https://doi.org/10.48550/arxiv.2303.07399,
  doi = {10.48550/ARXIV.2303.07399},
  url = {https://arxiv.org/abs/2303.07399},
  author = {Jiang, Tao and Lu, Peng and Zhang, Li and Ma, Ningsheng and Han, Rui and Lyu, Chengqi and Li, Yining and Chen, Kai},
  keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences},
  title = {RTMPose: Real-Time Multi-Person Pose Estimation based on MMPose},
  publisher = {arXiv},
  year = {2023},
  copyright = {Creative Commons Attribution 4.0 International}
}

RTMDet (arXiv'2022)

@misc{lyu2022rtmdet,
      title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors},
      author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen},
      year={2022},
      eprint={2212.07784},
      archivePrefix={arXiv},
      primaryClass={cs.CV}
}

COCO-WholeBody (ECCV'2020)

@inproceedings{jin2020whole,
  title={Whole-Body Human Pose Estimation in the Wild},
  author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
  booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
  year={2020}
}

Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	Body AP	Body AR	Foot AP	Foot AR	Face AP	Face AR	Hand AP	Hand AR	Whole AP	Whole AR	ckpt	log
rtmpose-m	256x192	0.673	0.750	0.615	0.752	0.813	0.871	0.475	0.589	0.582	0.674	ckpt	log
rtmpose-l	256x192	0.695	0.769	0.658	0.785	0.833	0.887	0.519	0.628	0.611	0.700	ckpt	log
rtmpose-l	384x288	0.712	0.781	0.693	0.811	0.882	0.919	0.579	0.677	0.648	0.730	ckpt	log

Cocktail14 Dataset¶

Rtmpose + RTMW on Cocktail14¶

RTMPose (arXiv'2023)

@misc{https://doi.org/10.48550/arxiv.2303.07399,
  doi = {10.48550/ARXIV.2303.07399},
  url = {https://arxiv.org/abs/2303.07399},
  author = {Jiang, Tao and Lu, Peng and Zhang, Li and Ma, Ningsheng and Han, Rui and Lyu, Chengqi and Li, Yining and Chen, Kai},
  keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences},
  title = {RTMPose: Real-Time Multi-Person Pose Estimation based on MMPose},
  publisher = {arXiv},
  year = {2023},
  copyright = {Creative Commons Attribution 4.0 International}
}

RTMDet (arXiv'2022)

@misc{lyu2022rtmdet,
      title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors},
      author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen},
      year={2022},
      eprint={2212.07784},
      archivePrefix={arXiv},
      primaryClass={cs.CV}
}

COCO-WholeBody (ECCV'2020)

@inproceedings{jin2020whole,
  title={Whole-Body Human Pose Estimation in the Wild},
  author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
  booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
  year={2020}
}

Cocktail14 denotes model trained on 14 public datasets:
- AI Challenger
- CrowdPose
- MPII
- sub-JHMDB
- Halpe
- PoseTrack18
- COCO-Wholebody
- UBody
- Human-Art
- WFLW
- 300W
- COFW
- LaPa
- InterHand

Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	Body AP	Body AR	Foot AP	Foot AR	Face AP	Face AR	Hand AP	Hand AR	Whole AP	Whole AR	ckpt	log
rtmw-m	256x192	0.676	0.747	0.671	0.794	0.783	0.854	0.491	0.604	0.582	0.673	ckpt	-
rtmw-l	256x192	0.743	0.807	0.763	0.868	0.834	0.889	0.598	0.701	0.660	0.746	ckpt	-
rtmw-x	256x192	0.746	0.808	0.770	0.869	0.844	0.896	0.610	0.710	0.672	0.752	ckpt	-
rtmw-l	384x288	0.761	0.824	0.793	0.885	0.884	0.921	0.663	0.752	0.701	0.780	ckpt	-
rtmw-x	384x288	0.763	0.826	0.796	0.888	0.884	0.923	0.664	0.755	0.702	0.781	ckpt	-