Backbones¶

PVT (ICCV’2021)¶

Topdown Heatmap + PVT on Coco¶

PVT (ICCV'2021)

@inproceedings{wang2021pyramid,
  title={Pyramid vision transformer: A versatile backbone for dense prediction without convolutions},
  author={Wang, Wenhai and Xie, Enze and Li, Xiang and Fan, Deng-Ping and Song, Kaitao and Liang, Ding and Lu, Tong and Luo, Ping and Shao, Ling},
  booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
  pages={568--578},
  year={2021}
}

PVTV2 (CVMJ'2022)

@article{wang2022pvt,
  title={PVT v2: Improved baselines with Pyramid Vision Transformer},
  author={Wang, Wenhai and Xie, Enze and Li, Xiang and Fan, Deng-Ping and Song, Kaitao and Liang, Ding and Lu, Tong and Luo, Ping and Shao, Ling},
  journal={Computational Visual Media},
  pages={1--10},
  year={2022},
  publisher={Springer}
}

COCO (ECCV'2014)

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
pose_pvt-s	256x192	0.714	0.896	0.794	0.773	0.936	ckpt	log
pose_pvtv2-b2	256x192	0.737	0.905	0.812	0.791	0.942	ckpt	log

AlexNet (NeurIPS’2012)¶

Topdown Heatmap + Alexnet on Coco¶

AlexNet (NeurIPS'2012)

@inproceedings{krizhevsky2012imagenet,
  title={Imagenet classification with deep convolutional neural networks},
  author={Krizhevsky, Alex and Sutskever, Ilya and Hinton, Geoffrey E},
  booktitle={Advances in neural information processing systems},
  pages={1097--1105},
  year={2012}
}

COCO (ECCV'2014)

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
pose_alexnet	256x192	0.448	0.767	0.461	0.521	0.829	ckpt	log

ResNeSt (ArXiv’2020)¶

Topdown Heatmap + Resnest on Coco¶

ResNeSt (ArXiv'2020)

@article{zhang2020resnest,
  title={ResNeSt: Split-Attention Networks},
  author={Zhang, Hang and Wu, Chongruo and Zhang, Zhongyue and Zhu, Yi and Zhang, Zhi and Lin, Haibin and Sun, Yue and He, Tong and Muller, Jonas and Manmatha, R. and Li, Mu and Smola, Alexander},
  journal={arXiv preprint arXiv:2004.08955},
  year={2020}
}

COCO (ECCV'2014)

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
pose_resnest_50	256x192	0.720	0.899	0.800	0.775	0.939	ckpt	log
pose_resnest_50	384x288	0.737	0.900	0.811	0.789	0.937	ckpt	log
pose_resnest_101	256x192	0.725	0.900	0.807	0.781	0.939	ckpt	log
pose_resnest_101	384x288	0.745	0.905	0.818	0.798	0.942	ckpt	log
pose_resnest_200	256x192	0.731	0.905	0.812	0.787	0.943	ckpt	log
pose_resnest_200	384x288	0.753	0.907	0.827	0.805	0.943	ckpt	log
pose_resnest_269	256x192	0.737	0.907	0.819	0.792	0.943	ckpt	log
pose_resnest_269	384x288	0.754	0.908	0.828	0.805	0.943	ckpt	log

ShufflenetV1 (CVPR’2018)¶

Topdown Heatmap + Shufflenetv1 on Mpii¶

ShufflenetV1 (CVPR'2018)

@inproceedings{zhang2018shufflenet,
  title={Shufflenet: An extremely efficient convolutional neural network for mobile devices},
  author={Zhang, Xiangyu and Zhou, Xinyu and Lin, Mengxiao and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={6848--6856},
  year={2018}
}

MPII (CVPR'2014)

@inproceedings{andriluka14cvpr,
  author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
  title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
  booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
  year = {2014},
  month = {June}
}

Results on MPII val set

Arch	Input Size	Mean	Mean@0.1	ckpt	log
pose_shufflenetv1	256x256	0.824	0.195	ckpt	log

Topdown Heatmap + Shufflenetv1 on Coco¶

ShufflenetV1 (CVPR'2018)

@inproceedings{zhang2018shufflenet,
  title={Shufflenet: An extremely efficient convolutional neural network for mobile devices},
  author={Zhang, Xiangyu and Zhou, Xinyu and Lin, Mengxiao and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={6848--6856},
  year={2018}
}

COCO (ECCV'2014)

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
pose_shufflenetv1	256x192	0.587	0.849	0.654	0.654	0.896	ckpt	log
pose_shufflenetv1	384x288	0.626	0.862	0.696	0.687	0.903	ckpt	log

HRNet (CVPR’2019)¶

Topdown Heatmap + Hrnet + Ubody-Coco-Wholebody on Ubody2d¶

HRNet (CVPR'2019)

@inproceedings{sun2019deep,
  title={Deep high-resolution representation learning for human pose estimation},
  author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={5693--5703},
  year={2019}
}

UBody (CVPR'2023)

@article{lin2023one,
  title={One-Stage 3D Whole-Body Mesh Recovery with Component Aware Transformer},
  author={Lin, Jing and Zeng, Ailing and Wang, Haoqian and Zhang, Lei and Li, Yu},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  year={2023},
}

Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	Body AP	Body AR	Foot AP	Foot AR	Face AP	Face AR	Hand AP	Hand AR	Whole AP	Whole AR	ckpt	log
pose_hrnet_w32	256x192	0.685	0.759	0.564	0.675	0.625	0.705	0.516	0.609	0.549	0.646	ckpt	log

Topdown Heatmap + Hrnet on Coco-Wholebody¶

HRNet (CVPR'2019)

@inproceedings{sun2019deep,
  title={Deep high-resolution representation learning for human pose estimation},
  author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={5693--5703},
  year={2019}
}

COCO-WholeBody (ECCV'2020)

@inproceedings{jin2020whole,
  title={Whole-Body Human Pose Estimation in the Wild},
  author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
  booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
  year={2020}
}

Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	Body AP	Body AR	Foot AP	Foot AR	Face AP	Face AR	Hand AP	Hand AR	Whole AP	Whole AR	ckpt	log
pose_hrnet_w32	256x192	0.678	0.755	0.543	0.661	0.630	0.708	0.467	0.566	0.536	0.636	ckpt	log
pose_hrnet_w32	384x288	0.700	0.772	0.585	0.691	0.726	0.783	0.515	0.603	0.586	0.673	ckpt	log
pose_hrnet_w48	256x192	0.701	0.776	0.675	0.787	0.656	0.743	0.535	0.639	0.579	0.681	ckpt	log
pose_hrnet_w48	384x288	0.722	0.791	0.696	0.801	0.776	0.834	0.587	0.678	0.632	0.717	ckpt	log

Topdown Heatmap + Hrnet + Dark on Coco-Wholebody¶

HRNet (CVPR'2019)

@inproceedings{sun2019deep,
  title={Deep high-resolution representation learning for human pose estimation},
  author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={5693--5703},
  year={2019}
}

DarkPose (CVPR'2020)

@inproceedings{zhang2020distribution,
  title={Distribution-aware coordinate representation for human pose estimation},
  author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={7093--7102},
  year={2020}
}

COCO-WholeBody (ECCV'2020)

@inproceedings{jin2020whole,
  title={Whole-Body Human Pose Estimation in the Wild},
  author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
  booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
  year={2020}
}

Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	Body AP	Body AR	Foot AP	Foot AR	Face AP	Face AR	Hand AP	Hand AR	Whole AP	Whole AR	ckpt	log
pose_hrnet_w32_dark	256x192	0.693	0.764	0.564	0.674	0.737	0.809	0.503	0.602	0.582	0.671	ckpt	log
pose_hrnet_w48_dark+	384x288	0.742	0.807	0.707	0.806	0.841	0.892	0.602	0.694	0.661	0.743	ckpt	log

Note: + means the model is first pre-trained on original COCO dataset, and then fine-tuned on COCO-WholeBody dataset. We find this will lead to better performance.

Topdown Heatmap + Hrnet on Deepfashion¶

HRNet (CVPR'2019)

@inproceedings{sun2019deep,
  title={Deep high-resolution representation learning for human pose estimation},
  author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={5693--5703},
  year={2019}
}

UDP (CVPR'2020)

@InProceedings{Huang_2020_CVPR,
  author = {Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan},
  title = {The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation},
  booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
  month = {June},
  year = {2020}
}

DeepFashion (CVPR'2016)

@inproceedings{liuLQWTcvpr16DeepFashion,
 author = {Liu, Ziwei and Luo, Ping and Qiu, Shi and Wang, Xiaogang and Tang, Xiaoou},
 title = {DeepFashion: Powering Robust Clothes Recognition and Retrieval with Rich Annotations},
 booktitle = {Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
 month = {June},
 year = {2016}
}

DeepFashion (ECCV'2016)

@inproceedings{liuYLWTeccv16FashionLandmark,
 author = {Liu, Ziwei and Yan, Sijie and Luo, Ping and Wang, Xiaogang and Tang, Xiaoou},
 title = {Fashion Landmark Detection in the Wild},
 booktitle = {European Conference on Computer Vision (ECCV)},
 month = {October},
 year = {2016}
 }

Results on DeepFashion val set

Set	Arch	Input Size	PCK@0.2	AUC	EPE	ckpt	log
upper	pose_hrnet_w48_udp	256x192	96.1	60.9	15.1	ckpt	log
lower	pose_hrnet_w48_udp	256x192	97.8	76.1	8.9	ckpt	log
full	pose_hrnet_w48_udp	256x192	98.3	67.3	11.7	ckpt	log

Note: Due to the time constraints, we have only trained resnet50 models. We warmly welcome any contributions if you can successfully reproduce the results from the paper!

Topdown Heatmap + Hrnet + Dark on Mpii¶

HRNet (CVPR'2019)

@inproceedings{sun2019deep,
  title={Deep high-resolution representation learning for human pose estimation},
  author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={5693--5703},
  year={2019}
}

DarkPose (CVPR'2020)

@inproceedings{zhang2020distribution,
  title={Distribution-aware coordinate representation for human pose estimation},
  author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={7093--7102},
  year={2020}
}

MPII (CVPR'2014)

@inproceedings{andriluka14cvpr,
  author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
  title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
  booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
  year = {2014},
  month = {June}
}

Results on MPII val set

Arch	Input Size	Mean	Mean@0.1	ckpt	log
pose_hrnet_w32_dark	256x256	0.904	0.354	ckpt	log
pose_hrnet_w48_dark	256x256	0.905	0.360	ckpt	log

Topdown Heatmap + Hrnet on Mpii¶

HRNet (CVPR'2019)

@inproceedings{sun2019deep,
  title={Deep high-resolution representation learning for human pose estimation},
  author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={5693--5703},
  year={2019}
}

MPII (CVPR'2014)

@inproceedings{andriluka14cvpr,
  author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
  title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
  booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
  year = {2014},
  month = {June}
}

Results on MPII val set

Arch	Input Size	Mean	Mean@0.1	ckpt	log
pose_hrnet_w32	256x256	0.900	0.334	ckpt	log
pose_hrnet_w48	256x256	0.901	0.337	ckpt	log

Topdown Heatmap + Hrnet + Augmentation on Coco¶

HRNet (CVPR'2019)

@inproceedings{sun2019deep,
  title={Deep high-resolution representation learning for human pose estimation},
  author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={5693--5703},
  year={2019}
}

Albumentations (Information'2020)

@article{buslaev2020albumentations,
  title={Albumentations: fast and flexible image augmentations},
  author={Buslaev, Alexander and Iglovikov, Vladimir I and Khvedchenya, Eugene and Parinov, Alex and Druzhinin, Mikhail and Kalinin, Alexandr A},
  journal={Information},
  volume={11},
  number={2},
  pages={125},
  year={2020},
  publisher={Multidisciplinary Digital Publishing Institute}
}

COCO (ECCV'2014)

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
coarsedropout	256x192	0.753	0.908	0.822	0.805	0.944	ckpt	log
gridmask	256x192	0.752	0.906	0.825	0.804	0.943	ckpt	log
photometric	256x192	0.754	0.908	0.825	0.805	0.943	ckpt	log

Topdown Heatmap + Hrnet on Coco¶

HRNet (CVPR'2019)

@inproceedings{sun2019deep,
  title={Deep high-resolution representation learning for human pose estimation},
  author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={5693--5703},
  year={2019}
}

COCO (ECCV'2014)

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
pose_hrnet_w32	256x192	0.749	0.906	0.821	0.804	0.945	ckpt	log
pose_hrnet_w32	384x288	0.761	0.908	0.826	0.811	0.944	ckpt	log
pose_hrnet_w48	256x192	0.756	0.908	0.826	0.809	0.945	ckpt	log
pose_hrnet_w48	384x288	0.767	0.911	0.832	0.817	0.947	ckpt	log

Topdown Heatmap + Hrnet + Aic on Coco¶

HRNet (CVPR'2019)

@inproceedings{sun2019deep,
  title={Deep high-resolution representation learning for human pose estimation},
  author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={5693--5703},
  year={2019}
}

COCO (ECCV'2014)

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

AI Challenger (ArXiv'2017)

@article{wu2017ai,
  title={Ai challenger: A large-scale dataset for going deeper in image understanding},
  author={Wu, Jiahong and Zheng, He and Zhao, Bo and Li, Yixin and Yan, Baoming and Liang, Rui and Wang, Wenjia and Zhou, Shipei and Lin, Guosen and Fu, Yanwei and others},
  journal={arXiv preprint arXiv:1711.06475},
  year={2017}
}

MMPose supports training model with combined datasets. coco-aic-merge and coco-aic-combine are two examples.

coco-aic-merge leverages AIC data with partial keypoints as auxiliary data to train a COCO model
coco-aic-combine constructs a combined dataset whose keypoints are the union of COCO and AIC keypoints to train a model that predicts keypoints of both datasets.

Evaluation results on COCO val2017 of models trained with solely COCO dataset and combined dataset as shown below. These models are evaluated with detector having human AP of 56.4 on COCO val2017 dataset.

Train Set	Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
coco	pose_hrnet_w32	256x192	0.749	0.906	0.821	0.804	0.945	ckpt	log
coco-aic-merge	pose_hrnet_w32	256x192	0.756	0.907	0.828	0.809	0.944	ckpt	log
coco-aic-combine	pose_hrnet_w32	256x192	0.755	0.904	0.825	0.807	0.942	ckpt	log

Topdown Heatmap + Hrnet + Udp on Coco¶

HRNet (CVPR'2019)

@inproceedings{sun2019deep,
  title={Deep high-resolution representation learning for human pose estimation},
  author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={5693--5703},
  year={2019}
}

UDP (CVPR'2020)

@InProceedings{Huang_2020_CVPR,
  author = {Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan},
  title = {The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation},
  booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
  month = {June},
  year = {2020}
}

COCO (ECCV'2014)

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
pose_hrnet_w32_udp	256x192	0.762	0.907	0.829	0.810	0.942	ckpt	log
pose_hrnet_w32_udp	384x288	0.768	0.909	0.832	0.815	0.945	ckpt	log
pose_hrnet_w48_udp	256x192	0.768	0.908	0.833	0.817	0.945	ckpt	log
pose_hrnet_w48_udp	384x288	0.773	0.911	0.836	0.821	0.946	ckpt	log
pose_hrnet_w32_udp_regress	256x192	0.759	0.907	0.827	0.813	0.943	ckpt	log

Note that, UDP also adopts the unbiased encoding/decoding algorithm of DARK.

Topdown Heatmap + Hrnet + Fp16 on Coco¶

HRNet (CVPR'2019)

@inproceedings{sun2019deep,
  title={Deep high-resolution representation learning for human pose estimation},
  author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={5693--5703},
  year={2019}
}

FP16 (ArXiv'2017)

@article{micikevicius2017mixed,
  title={Mixed precision training},
  author={Micikevicius, Paulius and Narang, Sharan and Alben, Jonah and Diamos, Gregory and Elsen, Erich and Garcia, David and Ginsburg, Boris and Houston, Michael and Kuchaiev, Oleksii and Venkatesh, Ganesh and others},
  journal={arXiv preprint arXiv:1710.03740},
  year={2017}
}

COCO (ECCV'2014)

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
pose_hrnet_w32_fp16	256x192	0.749	0.907	0.822	0.802	0.946	ckpt	log

Topdown Heatmap + Hrnet + Dark on Coco¶

HRNet (CVPR'2019)

@inproceedings{sun2019deep,
  title={Deep high-resolution representation learning for human pose estimation},
  author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={5693--5703},
  year={2019}
}

DarkPose (CVPR'2020)

@inproceedings{zhang2020distribution,
  title={Distribution-aware coordinate representation for human pose estimation},
  author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={7093--7102},
  year={2020}
}

COCO (ECCV'2014)

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
pose_hrnet_w32_dark	256x192	0.757	0.907	0.825	0.807	0.943	ckpt	log
pose_hrnet_w32_dark	384x288	0.766	0.907	0.829	0.815	0.943	ckpt	log
pose_hrnet_w48_dark	256x192	0.764	0.907	0.831	0.814	0.942	ckpt	log
pose_hrnet_w48_dark	384x288	0.772	0.911	0.833	0.821	0.948	ckpt	log

Dekr + Hrnet on Coco¶

DEKR (CVPR'2021)

@inproceedings{geng2021bottom,
  title={Bottom-up human pose estimation via disentangled keypoint regression},
  author={Geng, Zigang and Sun, Ke and Xiao, Bin and Zhang, Zhaoxiang and Wang, Jingdong},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={14676--14686},
  year={2021}
}

HRNet (CVPR'2019)

@inproceedings{sun2019deep,
  title={Deep high-resolution representation learning for human pose estimation},
  author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={5693--5703},
  year={2019}
}

COCO (ECCV'2014)

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 without multi-scale test

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
HRNet-w32	512x512	0.686	0.868	0.750	0.735	0.898	ckpt	log
HRNet-w48	640x640	0.714	0.883	0.777	0.762	0.915	ckpt	log

Associative Embedding + Hrnet on Coco¶

Associative Embedding (NIPS'2017)

@inproceedings{newell2017associative,
  title={Associative embedding: End-to-end learning for joint detection and grouping},
  author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
  booktitle={Advances in neural information processing systems},
  pages={2277--2287},
  year={2017}
}

HRNet (CVPR'2019)

@inproceedings{sun2019deep,
  title={Deep high-resolution representation learning for human pose estimation},
  author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={5693--5703},
  year={2019}
}

COCO (ECCV'2014)

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 without multi-scale test

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
HRNet-w32	512x512	0.656	0.864	0.719	0.711	0.893	ckpt	log

Topdown Heatmap + Hrnet on Crowdpose¶

HRNet (CVPR'2019)

@inproceedings{sun2019deep,
  title={Deep high-resolution representation learning for human pose estimation},
  author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={5693--5703},
  year={2019}
}

CrowdPose (CVPR'2019)

@article{li2018crowdpose,
  title={CrowdPose: Efficient Crowded Scenes Pose Estimation and A New Benchmark},
  author={Li, Jiefeng and Wang, Can and Zhu, Hao and Mao, Yihuan and Fang, Hao-Shu and Lu, Cewu},
  journal={arXiv preprint arXiv:1812.00324},
  year={2018}
}

Results on CrowdPose test with YOLOv3 human detector

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AP (E)	AP (M)	AP (H)	ckpt	log
pose_hrnet_w32	256x192	0.675	0.825	0.729	0.770	0.687	0.553	ckpt	log

Dekr + Hrnet on Crowdpose¶

DEKR (CVPR'2021)

@inproceedings{geng2021bottom,
  title={Bottom-up human pose estimation via disentangled keypoint regression},
  author={Geng, Zigang and Sun, Ke and Xiao, Bin and Zhang, Zhaoxiang and Wang, Jingdong},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={14676--14686},
  year={2021}
}

HRNet (CVPR'2019)

@inproceedings{sun2019deep,
  title={Deep high-resolution representation learning for human pose estimation},
  author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={5693--5703},
  year={2019}
}

CrowdPose (CVPR'2019)

@article{li2018crowdpose,
  title={CrowdPose: Efficient Crowded Scenes Pose Estimation and A New Benchmark},
  author={Li, Jiefeng and Wang, Can and Zhu, Hao and Mao, Yihuan and Fang, Hao-Shu and Lu, Cewu},
  journal={arXiv preprint arXiv:1812.00324},
  year={2018}
}

Results on CrowdPose test without multi-scale test

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AP (E)	AP (M)	AP (H)	ckpt	log
HRNet-w32	512x512	0.663	0.857	0.714	0.740	0.671	0.576	ckpt	log
HRNet-w48	640x640	0.679	0.869	0.731	0.753	0.688	0.593	ckpt	log

Topdown Heatmap + Hrnet on Exlpose¶

HRNet (CVPR'2019)

@inproceedings{sun2019deep,
  title={Deep high-resolution representation learning for human pose estimation},
  author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={5693--5703},
  year={2019}
}

ExLPose (2023)

@inproceedings{ExLPose_2023_CVPR,
 title={Human Pose Estimation in Extremely Low-Light Conditions},
 author={Sohyun Lee, Jaesung Rim, Boseung Jeong, Geonu Kim, ByungJu Woo, Haechan Lee, Sunghyun Cho, Suha Kwak},
 booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
 year={2023}
}

Results on ExLPose-LLA val set with ground-truth bounding boxes

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
pose_hrnet_w32	256x192	0.401	0.64	0.40	0.452	0.693	ckpt	log

Topdown Heatmap + Hrnet on Posetrack18¶

HRNet (CVPR'2019)

@inproceedings{sun2019deep,
  title={Deep high-resolution representation learning for human pose estimation},
  author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={5693--5703},
  year={2019}
}

PoseTrack18 (CVPR'2018)

@inproceedings{andriluka2018posetrack,
  title={Posetrack: A benchmark for human pose estimation and tracking},
  author={Andriluka, Mykhaylo and Iqbal, Umar and Insafutdinov, Eldar and Pishchulin, Leonid and Milan, Anton and Gall, Juergen and Schiele, Bernt},
  booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
  pages={5167--5176},
  year={2018}
}

Results on PoseTrack2018 val with ground-truth bounding boxes

Arch	Input Size	Head	Shou	Elb	Wri	Hip	Knee	Ankl	Total	ckpt	log
pose_hrnet_w32	256x192	86.2	89.0	84.5	79.2	82.3	82.5	78.7	83.4	ckpt	log
pose_hrnet_w32	384x288	87.1	89.0	85.1	80.2	80.6	82.8	79.6	83.7	ckpt	log
pose_hrnet_w48	256x192	88.3	90.2	86.0	81.0	80.7	83.3	80.6	84.6	ckpt	log
pose_hrnet_w48	384x288	87.8	90.0	86.2	81.3	81.0	83.4	80.9	84.6	ckpt	log

The models are first pre-trained on COCO dataset, and then fine-tuned on PoseTrack18.

Results on PoseTrack2018 val with MMDetection pre-trained Cascade R-CNN (X-101-64x4d-FPN) human detector

Arch	Input Size	Head	Shou	Elb	Wri	Hip	Knee	Ankl	Total	ckpt	log
pose_hrnet_w32	256x192	78.0	82.9	79.5	73.8	76.9	76.6	70.2	76.9	ckpt	log
pose_hrnet_w32	384x288	79.9	83.6	80.4	74.5	74.8	76.1	70.5	77.3	ckpt	log
pose_hrnet_w48	256x192	80.1	83.4	80.6	74.8	74.3	76.8	70.5	77.4	ckpt	log
pose_hrnet_w48	384x288	80.2	83.8	80.9	75.2	74.7	76.7	71.7	77.8	ckpt	log

The models are first pre-trained on COCO dataset, and then fine-tuned on PoseTrack18.

Topdown Heatmap + Hrnet on Aic¶

HRNet (CVPR'2019)

@inproceedings{sun2019deep,
  title={Deep high-resolution representation learning for human pose estimation},
  author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={5693--5703},
  year={2019}
}

AI Challenger (ArXiv'2017)

@article{wu2017ai,
  title={Ai challenger: A large-scale dataset for going deeper in image understanding},
  author={Wu, Jiahong and Zheng, He and Zhao, Bo and Li, Yixin and Yan, Baoming and Liang, Rui and Wang, Wenjia and Zhou, Shipei and Lin, Guosen and Fu, Yanwei and others},
  journal={arXiv preprint arXiv:1711.06475},
  year={2017}
}

Results on AIC val set with ground-truth bounding boxes

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
pose_hrnet_w32	256x192	0.323	0.761	0.218	0.366	0.789	ckpt	log

Topdown Heatmap + Hrnet on Humanart¶

HRNet (CVPR'2019)

@inproceedings{sun2019deep,
  title={Deep high-resolution representation learning for human pose estimation},
  author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={5693--5703},
  year={2019}
}

COCO (ECCV'2014)

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Human-Art (CVPR'2023)

@inproceedings{ju2023humanart,
    title={Human-Art: A Versatile Human-Centric Dataset Bridging Natural and Artificial Scenes},
    author={Ju, Xuan and Zeng, Ailing and Jianan, Wang and Qiang, Xu and Lei, Zhang},
    booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR),
    year={2023}}

Results on Human-Art validation dataset with detector having human AP of 56.2 on Human-Art validation dataset

With classic decoder

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
pose_hrnet_w32-coco	256x192	0.252	0.397	0.255	0.321	0.485	ckpt	log
pose_hrnet_w32-humanart-coco	256x192	0.399	0.545	0.420	0.466	0.613	ckpt	log
pose_hrnet_w48-coco	256x192	0.271	0.413	0.277	0.339	0.499	ckpt	log
pose_hrnet_w48-humanart-coco	256x192	0.417	0.553	0.442	0.481	0.617	ckpt	log

Results on Human-Art validation dataset with ground-truth bounding-box

With classic decoder

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
pose_hrnet_w32-coco	256x192	0.533	0.771	0.562	0.574	0.792	ckpt	log
pose_hrnet_w32-humanart-coco	256x192	0.754	0.906	0.812	0.783	0.916	ckpt	log
pose_hrnet_w48-coco	256x192	0.557	0.782	0.593	0.595	0.804	ckpt	log
pose_hrnet_w48-humanart-coco	256x192	0.769	0.906	0.825	0.796	0.919	ckpt	log

Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset

With classic decoder

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
pose_hrnet_w32-coco	256x192	0.749	0.906	0.821	0.804	0.945	ckpt	log
pose_hrnet_w32-humanart-coco	256x192	0.741	0.902	0.814	0.795	0.941	ckpt	log
pose_hrnet_w48-coco	256x192	0.756	0.908	0.826	0.809	0.945	ckpt	log
pose_hrnet_w48-humanart-coco	256x192	0.751	0.905	0.822	0.805	0.943	ckpt	log

Topdown Heatmap + Hrnet + Animalkingdom on Ak¶

HRNet (CVPR'2019)

@inproceedings{sun2019deep,
  title={Deep high-resolution representation learning for human pose estimation},
  author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={5693--5703},
  year={2019}
}

AnimalKingdom (CVPR'2022)

@InProceedings{
    Ng_2022_CVPR,
    author    = {Ng, Xun Long and Ong, Kian Eng and Zheng, Qichen and Ni, Yun and Yeo, Si Yong and Liu, Jun},
    title     = {Animal Kingdom: A Large and Diverse Dataset for Animal Behavior Understanding},
    booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
    month     = {June},
    year      = {2022},
    pages     = {19023-19034}
 }

Results on AnimalKingdom validation set

Arch	Input Size	PCK(0.05)	Official Repo	Paper	ckpt	log
P1_hrnet_w32	256x256	0.6323	0.6342	0.6606	ckpt	log
P2_hrnet_w32	256x256	0.3741	0.3726	0.393	ckpt	log
P3_mammals_hrnet_w32	256x256	0.571	0.5719	0.6159	ckpt	log
P3_amphibians_hrnet_w32	256x256	0.5358	0.5432	0.5674	ckpt	log
P3_reptiles_hrnet_w32	256x256	0.51	0.5	0.5606	ckpt	log
P3_birds_hrnet_w32	256x256	0.7671	0.7636	0.7735	ckpt	log
P3_fishes_hrnet_w32	256x256	0.6406	0.636	0.6825	ckpt	log

Topdown Heatmap + Hrnet on Ap10k¶

HRNet (CVPR'2019)

@inproceedings{sun2019deep,
  title={Deep high-resolution representation learning for human pose estimation},
  author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={5693--5703},
  year={2019}
}

AP-10K (NeurIPS'2021)

@misc{yu2021ap10k,
      title={AP-10K: A Benchmark for Animal Pose Estimation in the Wild},
      author={Hang Yu and Yufei Xu and Jing Zhang and Wei Zhao and Ziyu Guan and Dacheng Tao},
      year={2021},
      eprint={2108.12617},
      archivePrefix={arXiv},
      primaryClass={cs.CV}
}

Results on AP-10K validation set

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AP^M	AP^L	ckpt	log
pose_hrnet_w32	256x256	0.722	0.935	0.789	0.557	0.729	ckpt	log
pose_hrnet_w48	256x256	0.728	0.936	0.802	0.577	0.735	ckpt	log

Topdown Heatmap + Hrnet on Animalpose¶

HRNet (CVPR'2019)

@inproceedings{sun2019deep,
  title={Deep high-resolution representation learning for human pose estimation},
  author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={5693--5703},
  year={2019}
}

Animal-Pose (ICCV'2019)

@InProceedings{Cao_2019_ICCV,
    author = {Cao, Jinkun and Tang, Hongyang and Fang, Hao-Shu and Shen, Xiaoyong and Lu, Cewu and Tai, Yu-Wing},
    title = {Cross-Domain Adaptation for Animal Pose Estimation},
    booktitle = {The IEEE International Conference on Computer Vision (ICCV)},
    month = {October},
    year = {2019}
}

Results on AnimalPose validation set (1117 instances)

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
pose_hrnet_w32	256x256	0.740	0.959	0.833	0.780	0.965	ckpt	log
pose_hrnet_w48	256x256	0.738	0.958	0.831	0.778	0.962	ckpt	log

Swin (ICCV’2021)¶

Topdown Heatmap + Swin on Coco¶

SimpleBaseline2D (ECCV'2018)

@inproceedings{xiao2018simple,
  title={Simple baselines for human pose estimation and tracking},
  author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={466--481},
  year={2018}
}

Swin (ICCV'2021)

@inproceedings{liu2021swin,
  title={Swin transformer: Hierarchical vision transformer using shifted windows},
  author={Liu, Ze and Lin, Yutong and Cao, Yue and Hu, Han and Wei, Yixuan and Zhang, Zheng and Lin, Stephen and Guo, Baining},
  booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
  pages={10012--10022},
  year={2021}
}

FPN (CVPR'2017)

@inproceedings{lin2017feature,
  title={Feature pyramid networks for object detection},
  author={Lin, Tsung-Yi and Doll{\'a}r, Piotr and Girshick, Ross and He, Kaiming and Hariharan, Bharath and Belongie, Serge},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={2117--2125},
  year={2017}
}

COCO (ECCV'2014)

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
pose_swin_t	256x192	0.724	0.901	0.806	0.782	0.940	ckpt	log
pose_swin_b	256x192	0.737	0.904	0.820	0.794	0.942	ckpt	log
pose_swin_b	384x288	0.759	0.910	0.832	0.811	0.946	ckpt	log
pose_swin_l	256x192	0.743	0.906	0.821	0.798	0.943	ckpt	log
pose_swin_l	384x288	0.763	0.912	0.830	0.814	0.949	ckpt	log

ResNet (CVPR’2016)¶

Internet + Internet on Interhand3d¶

InterNet (ECCV'2020)

@InProceedings{Moon_2020_ECCV_InterHand2.6M,
author = {Moon, Gyeongsik and Yu, Shoou-I and Wen, He and Shiratori, Takaaki and Lee, Kyoung Mu},
title = {InterHand2.6M: A Dataset and Baseline for 3D Interacting Hand Pose Estimation from a Single RGB Image},
booktitle = {European Conference on Computer Vision (ECCV)},
year = {2020}
}

ResNet (CVPR'2016)

@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}

InterHand2.6M (ECCV'2020)

@InProceedings{Moon_2020_ECCV_InterHand2.6M,
author = {Moon, Gyeongsik and Yu, Shoou-I and Wen, He and Shiratori, Takaaki and Lee, Kyoung Mu},
title = {InterHand2.6M: A Dataset and Baseline for 3D Interacting Hand Pose Estimation from a Single RGB Image},
booktitle = {European Conference on Computer Vision (ECCV)},
year = {2020}
}

Results on InterHand2.6M val & test set

Train Set	Set	Arch	Input Size	MPJPE-single	MPJPE-interacting	MPJPE-all	MRRPE	APh	ckpt	log
All	test(H+M)	InterNet_resnet_50	256x256	9.69	13.72	11.86	29.27	0.99	ckpt	log
All	val(M)	InterNet_resnet_50	256x256	11.30	15.57	13.36	32.15	0.98	ckpt	log
All	test(H+M)	InterNet_resnet_50*	256x256	9.47	13.40	11.59	29.28	0.99	ckpt	log
All	val(M)	InterNet_resnet_50*	256x256	11.22	15.23	13.16	31.73	0.98	ckpt	log

Models with * are trained in MMPose 0.x. The checkpoints and logs are only for validation.

Topdown Heatmap + Resnet on Deepfashion¶

SimpleBaseline2D (ECCV'2018)

@inproceedings{xiao2018simple,
  title={Simple baselines for human pose estimation and tracking},
  author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={466--481},
  year={2018}
}

ResNet (CVPR'2016)

@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}

DeepFashion (CVPR'2016)

@inproceedings{liuLQWTcvpr16DeepFashion,
 author = {Liu, Ziwei and Luo, Ping and Qiu, Shi and Wang, Xiaogang and Tang, Xiaoou},
 title = {DeepFashion: Powering Robust Clothes Recognition and Retrieval with Rich Annotations},
 booktitle = {Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
 month = {June},
 year = {2016}
}

DeepFashion (ECCV'2016)

@inproceedings{liuYLWTeccv16FashionLandmark,
 author = {Liu, Ziwei and Yan, Sijie and Luo, Ping and Wang, Xiaogang and Tang, Xiaoou},
 title = {Fashion Landmark Detection in the Wild},
 booktitle = {European Conference on Computer Vision (ECCV)},
 month = {October},
 year = {2016}
 }

Results on DeepFashion val set

Set	Arch	Input Size	PCK@0.2	AUC	EPE	ckpt	log
upper	pose_resnet_50	256x192	95.4	57.8	16.8	ckpt	log
lower	pose_resnet_50	256x192	96.5	74.4	10.5	ckpt	log
full	pose_resnet_50	256x192	97.7	66.4	12.7	ckpt	log

Note: Due to the time constraints, we have only trained resnet50 models. We warmly welcome any contributions if you can successfully reproduce the results from the paper!

Topdown Heatmap + Res50 on Deepfashion2¶

SimpleBaseline2D (ECCV'2018)

@inproceedings{xiao2018simple,
  title={Simple baselines for human pose estimation and tracking},
  author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={466--481},
  year={2018}
}

ResNet (CVPR'2016)

@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}

DeepFashion2 (CVPR'2019)

@article{DeepFashion2,
  author = {Yuying Ge and Ruimao Zhang and Lingyun Wu and Xiaogang Wang and Xiaoou Tang and Ping Luo},
  title={A Versatile Benchmark for Detection, Pose Estimation, Segmentation and Re-Identification of Clothing Images},
  journal={CVPR},
  year={2019}
}

Results on DeepFashion2 val set

Set	Arch	Input Size	PCK@0.2	AUC	EPE	ckpt	log
short_sleeved_shirt	pose_resnet_50	256x192	0.988	0.703	10.2	ckpt	log
long_sleeved_shirt	pose_resnet_50	256x192	0.973	0.587	16.6	ckpt	log
short_sleeved_outwear	pose_resnet_50	256x192	0.966	0.408	24.0	ckpt	log
long_sleeved_outwear	pose_resnet_50	256x192	0.987	0.517	18.1	ckpt	log
vest	pose_resnet_50	256x192	0.981	0.643	12.7	ckpt	log
sling	pose_resnet_50	256x192	0.940	0.557	21.6	ckpt	log
shorts	pose_resnet_50	256x192	0.975	0.682	12.4	ckpt	log
trousers	pose_resnet_50	256x192	0.973	0.625	14.8	ckpt	log
skirt	pose_resnet_50	256x192	0.952	0.653	16.6	ckpt	log
short_sleeved_dress	pose_resnet_50	256x192	0.980	0.603	15.6	ckpt	log
long_sleeved_dress	pose_resnet_50	256x192	0.976	0.518	20.1	ckpt	log
vest_dress	pose_resnet_50	256x192	0.980	0.600	16.0	ckpt	log
sling_dress	pose_resnet_50	256x192	0.967	0.544	19.5	ckpt	log

Topdown Regression + Resnet + Rle on Mpii¶

DeepPose (CVPR'2014)

@inproceedings{toshev2014deeppose,
  title={Deeppose: Human pose estimation via deep neural networks},
  author={Toshev, Alexander and Szegedy, Christian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={1653--1660},
  year={2014}
}

RLE (ICCV'2021)

@inproceedings{li2021human,
  title={Human pose regression with residual log-likelihood estimation},
  author={Li, Jiefeng and Bian, Siyuan and Zeng, Ailing and Wang, Can and Pang, Bo and Liu, Wentao and Lu, Cewu},
  booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
  pages={11025--11034},
  year={2021}
}

ResNet (CVPR'2016)

@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}

MPII (CVPR'2014)

@inproceedings{andriluka14cvpr,
  author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
  title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
  booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
  year = {2014},
  month = {June}
}

Results on MPII val set

Arch	Input Size	Mean	Mean@0.1	ckpt	log
deeppose_resnet_50_rle	256x256	0.861	0.277	ckpt	log

Topdown Regression + Resnet on Mpii¶

DeepPose (CVPR'2014)

@inproceedings{toshev2014deeppose,
  title={Deeppose: Human pose estimation via deep neural networks},
  author={Toshev, Alexander and Szegedy, Christian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={1653--1660},
  year={2014}
}

ResNet (CVPR'2016)

@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}

MPII (CVPR'2014)

@inproceedings{andriluka14cvpr,
  author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
  title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
  booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
  year = {2014},
  month = {June}
}

Results on MPII val set

Arch	Input Size	Mean	Mean@0.1	ckpt	log
deeppose_resnet_50	256x256	0.826	0.180	ckpt	log
deeppose_resnet_101	256x256	0.841	0.200	ckpt	log
deeppose_resnet_152	256x256	0.850	0.208	ckpt	log

Topdown Heatmap + Resnet on Mpii¶

SimpleBaseline2D (ECCV'2018)

@inproceedings{xiao2018simple,
  title={Simple baselines for human pose estimation and tracking},
  author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={466--481},
  year={2018}
}

ResNet (CVPR'2016)

@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}

MPII (CVPR'2014)

@inproceedings{andriluka14cvpr,
  author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
  title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
  booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
  year = {2014},
  month = {June}
}

Results on MPII val set

Arch	Input Size	Mean	Mean@0.1	ckpt	log
pose_resnet_50	256x256	0.882	0.286	ckpt	log
pose_resnet_101	256x256	0.888	0.290	ckpt	log
pose_resnet_152	256x256	0.889	0.303	ckpt	log

Topdown Regression + Resnet on Coco¶

DeepPose (CVPR'2014)

@inproceedings{toshev2014deeppose,
  title={Deeppose: Human pose estimation via deep neural networks},
  author={Toshev, Alexander and Szegedy, Christian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={1653--1660},
  year={2014}
}

ResNet (CVPR'2016)

@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}

COCO (ECCV'2014)

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
deeppose_resnet_50	256x192	0.541	0.824	0.601	0.649	0.893	ckpt	log
deeppose_resnet_101	256x192	0.562	0.831	0.629	0.670	0.900	ckpt	log
deeppose_resnet_152	256x192	0.584	0.842	0.659	0.688	0.907	ckpt	log

Topdown Regression + Resnet + Rle on Coco¶

DeepPose (CVPR'2014)

@inproceedings{toshev2014deeppose,
  title={Deeppose: Human pose estimation via deep neural networks},
  author={Toshev, Alexander and Szegedy, Christian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={1653--1660},
  year={2014}
}

RLE (ICCV'2021)

@inproceedings{li2021human,
  title={Human pose regression with residual log-likelihood estimation},
  author={Li, Jiefeng and Bian, Siyuan and Zeng, Ailing and Wang, Can and Pang, Bo and Liu, Wentao and Lu, Cewu},
  booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
  pages={11025--11034},
  year={2021}
}

ResNet (CVPR'2016)

@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}

COCO (ECCV'2014)

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
deeppose_resnet_50_rle	256x192	0.706	0.888	0.776	0.753	0.924	ckpt	log
deeppose_resnet_50_rle_pretrained	256x192	0.719	0.891	0.788	0.764	0.925	ckpt	log
deeppose_resnet_101_rle	256x192	0.722	0.894	0.794	0.768	0.930	ckpt	log
deeppose_resnet_152_rle	256x192	0.731	0.897	0.805	0.777	0.933	ckpt	log
deeppose_resnet_152_rle	384x288	0.749	0.901	0.815	0.793	0.935	ckpt	log

Edpose + Edpose on Coco¶

ED-Pose (ICLR'2023)

@inproceedings{
yang2023explicit,
title={Explicit Box Detection Unifies End-to-End Multi-Person Pose Estimation},
author={Jie Yang and Ailing Zeng and Shilong Liu and Feng Li and Ruimao Zhang and Lei Zhang},
booktitle={International Conference on Learning Representations},
year={2023},
url={https://openreview.net/forum?id=s4WVupnJjmX}
}

ResNet (CVPR'2016)

@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}

COCO (ECCV'2014)

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017.

Arch	BackBone	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
edpose_res50_coco	ResNet-50	0.716	0.897	0.783	0.793	0.943	ckpt	log

The checkpoint is converted from the official repo. The training of EDPose is not supported yet. It will be supported in the future updates.

The above config follows Pure Python style. Please install mmengine>=0.8.2 to use this config.

Simcc + Resnet on Coco¶

SimCC (ECCV'2022)

@misc{https://doi.org/10.48550/arxiv.2107.03332,
  title={SimCC: a Simple Coordinate Classification Perspective for Human Pose Estimation},
  author={Li, Yanjie and Yang, Sen and Liu, Peidong and Zhang, Shoukui and Wang, Yunxiao and Wang, Zhicheng and Yang, Wankou and Xia, Shu-Tao},
  year={2021}
}

ResNet (CVPR'2016)

@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}

COCO (ECCV'2014)

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
simcc_resnet_50	256x192	0.721	0.897	0.798	0.781	0.937	ckpt	log
simcc_resnet_50	384x288	0.735	0.899	0.800	0.790	0.939	ckpt	log

Topdown Heatmap + Resnet + Fp16 on Coco¶

SimpleBaseline2D (ECCV'2018)

@inproceedings{xiao2018simple,
  title={Simple baselines for human pose estimation and tracking},
  author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={466--481},
  year={2018}
}

ResNet (CVPR'2016)

@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}

FP16 (ArXiv'2017)

@article{micikevicius2017mixed,
  title={Mixed precision training},
  author={Micikevicius, Paulius and Narang, Sharan and Alben, Jonah and Diamos, Gregory and Elsen, Erich and Garcia, David and Ginsburg, Boris and Houston, Michael and Kuchaiev, Oleksii and Venkatesh, Ganesh and others},
  journal={arXiv preprint arXiv:1710.03740},
  year={2017}
}

COCO (ECCV'2014)

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
pose_resnet_50_fp16	256x192	0.716	0.898	0.798	0.772	0.937	ckpt	log

Topdown Heatmap + Resnet + Dark on Coco¶

SimpleBaseline2D (ECCV'2018)

@inproceedings{xiao2018simple,
  title={Simple baselines for human pose estimation and tracking},
  author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={466--481},
  year={2018}
}

ResNet (CVPR'2016)

@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}

DarkPose (CVPR'2020)

@inproceedings{zhang2020distribution,
  title={Distribution-aware coordinate representation for human pose estimation},
  author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={7093--7102},
  year={2020}
}

COCO (ECCV'2014)

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
pose_resnet_50_dark	256x192	0.724	0.897	0.797	0.777	0.934	ckpt	log
pose_resnet_50_dark	384x288	0.735	0.902	0.801	0.786	0.938	ckpt	log
pose_resnet_101_dark	256x192	0.733	0.900	0.810	0.786	0.938	ckpt	log
pose_resnet_101_dark	384x288	0.749	0.905	0.818	0.799	0.940	ckpt	log
pose_resnet_152_dark	256x192	0.743	0.906	0.819	0.796	0.943	ckpt	log
pose_resnet_152_dark	384x288	0.755	0.907	0.825	0.805	0.943	ckpt	log

Topdown Heatmap + Resnet on Coco¶

SimpleBaseline2D (ECCV'2018)

@inproceedings{xiao2018simple,
  title={Simple baselines for human pose estimation and tracking},
  author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={466--481},
  year={2018}
}

ResNet (CVPR'2016)

@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}

COCO (ECCV'2014)

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
pose_resnet_50	256x192	0.718	0.898	0.796	0.774	0.934	ckpt	log
pose_resnet_50	384x288	0.731	0.900	0.799	0.782	0.937	ckpt	log
pose_resnet_101	256x192	0.728	0.904	0.809	0.783	0.942	ckpt	log
pose_resnet_101	384x288	0.749	0.906	0.817	0.799	0.941	ckpt	log
pose_resnet_152	256x192	0.736	0.904	0.818	0.791	0.942	ckpt	log
pose_resnet_152	384x288	0.750	0.908	0.821	0.800	0.942	ckpt	log

The following model is equipped with a visibility prediction head and has been trained using COCO and AIC datasets.

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
pose_resnet_50	256x192	0.729	0.900	0.807	0.783	0.938	ckpt	log

Integral Regression + Resnet + Debias on Coco¶

Debias IPR (ICCV'2021)

@inproceedings{gu2021removing,
    title={Removing the Bias of Integral Pose Regression},
    author={Gu, Kerui and Yang, Linlin and Yao, Angela},
    booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
    pages={11067--11076},
    year={2021}
  }

ResNet (CVPR'2016)

@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}

COCO (ECCV'2014)

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
debias-ipr_resnet_50	256x256	0.675	0.872	0.740	0.765	0.928	ckpt	log

Integral Regression + Resnet + DSNT on Coco¶

DSNT (2018)

@article{nibali2018numerical,
  title={Numerical Coordinate Regression with Convolutional Neural Networks},
  author={Nibali, Aiden and He, Zhen and Morgan, Stuart and Prendergast, Luke},
  journal={arXiv preprint arXiv:1801.07372},
  year={2018}
}

ResNet (CVPR'2016)

@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}

COCO (ECCV'2014)

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
ipr_resnet_50_dsnt	256x256	0.674	0.870	0.744	0.764	0.928	ckpt	log

Integral Regression + Resnet + Ipr on Coco¶

IPR (ECCV'2018)

@inproceedings{sun2018integral,
  title={Integral human pose regression},
  author={Sun, Xiao and Xiao, Bin and Wei, Fangyin and Liang, Shuang and Wei, Yichen},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={529--545},
  year={2018}
}

ResNet (CVPR'2016)

@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}

COCO (ECCV'2014)

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
ipr_resnet_50	256x256	0.633	0.860	0.703	0.730	0.919	ckpt	log

Topdown Heatmap + Resnet on Crowdpose¶

SimpleBaseline2D (ECCV'2018)

@inproceedings{xiao2018simple,
  title={Simple baselines for human pose estimation and tracking},
  author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={466--481},
  year={2018}
}

ResNet (CVPR'2016)

@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}

CrowdPose (CVPR'2019)

@article{li2018crowdpose,
  title={CrowdPose: Efficient Crowded Scenes Pose Estimation and A New Benchmark},
  author={Li, Jiefeng and Wang, Can and Zhu, Hao and Mao, Yihuan and Fang, Hao-Shu and Lu, Cewu},
  journal={arXiv preprint arXiv:1812.00324},
  year={2018}
}

Results on CrowdPose test with YOLOv3 human detector

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AP (E)	AP (M)	AP (H)	ckpt	log
pose_resnet_50	256x192	0.637	0.808	0.692	0.738	0.650	0.506	ckpt	log
pose_resnet_101	256x192	0.647	0.810	0.703	0.745	0.658	0.521	ckpt	log
pose_resnet_101	320x256	0.661	0.821	0.714	0.759	0.672	0.534	ckpt	log
pose_resnet_152	256x192	0.656	0.818	0.712	0.754	0.666	0.533	ckpt	log

Topdown Heatmap + Resnet on JHMDB¶

SimpleBaseline2D (ECCV'2018)

@inproceedings{xiao2018simple,
  title={Simple baselines for human pose estimation and tracking},
  author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={466--481},
  year={2018}
}

ResNet (CVPR'2016)

@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}

JHMDB (ICCV'2013)

@inproceedings{Jhuang:ICCV:2013,
  title = {Towards understanding action recognition},
  author = {H. Jhuang and J. Gall and S. Zuffi and C. Schmid and M. J. Black},
  booktitle = {International Conf. on Computer Vision (ICCV)},
  month = Dec,
  pages = {3192-3199},
  year = {2013}
}

Results on Sub-JHMDB dataset

The models are pre-trained on MPII dataset only. NO test-time augmentation (multi-scale /rotation testing) is used.

Normalized by Person Size

Split	Arch	Input Size	Head	Sho	Elb	Wri	Hip	Knee	Ank	Mean	ckpt	log
Sub1	pose_resnet_50	256x256	99.1	98.0	93.8	91.3	99.4	96.5	92.8	96.1	ckpt	log
Sub2	pose_resnet_50	256x256	99.3	97.1	90.6	87.0	98.9	96.3	94.1	95.0	ckpt	log
Sub3	pose_resnet_50	256x256	99.0	97.9	94.0	91.6	99.7	98.0	94.7	96.7	ckpt	log
Average	pose_resnet_50	256x256	99.2	97.7	92.8	90.0	99.3	96.9	93.9	96.0	-	-
Sub1	pose_resnet_50 (2 Deconv.)	256x256	99.1	98.5	94.6	92.0	99.4	94.6	92.5	96.1	ckpt	log
Sub2	pose_resnet_50 (2 Deconv.)	256x256	99.3	97.8	91.0	87.0	99.1	96.5	93.8	95.2	ckpt	log
Sub3	pose_resnet_50 (2 Deconv.)	256x256	98.8	98.4	94.3	92.1	99.8	97.5	93.8	96.7	ckpt	log
Average	pose_resnet_50 (2 Deconv.)	256x256	99.1	98.2	93.3	90.4	99.4	96.2	93.4	96.0	-	-

Normalized by Torso Size

Split	Arch	Input Size	Head	Sho	Elb	Wri	Hip	Knee	Ank	Mean	ckpt	log
Sub1	pose_resnet_50	256x256	93.3	83.2	74.4	72.7	85.0	81.2	78.9	81.9	ckpt	log
Sub2	pose_resnet_50	256x256	94.1	74.9	64.5	62.5	77.9	71.9	78.6	75.5	ckpt	log
Sub3	pose_resnet_50	256x256	97.0	82.2	74.9	70.7	84.7	83.7	84.2	82.9	ckpt	log
Average	pose_resnet_50	256x256	94.8	80.1	71.3	68.6	82.5	78.9	80.6	80.1	-	-
Sub1	pose_resnet_50 (2 Deconv.)	256x256	92.4	80.6	73.2	70.5	82.3	75.4	75.0	79.2	ckpt	log
Sub2	pose_resnet_50 (2 Deconv.)	256x256	93.4	73.6	63.8	60.5	75.1	68.4	75.5	73.7	ckpt	log
Sub3	pose_resnet_50 (2 Deconv.)	256x256	96.1	81.2	72.6	67.9	83.6	80.9	81.5	81.2	ckpt	log
Average	pose_resnet_50 (2 Deconv.)	256x256	94.0	78.5	69.9	66.3	80.3	74.9	77.3	78.0	-	-

Topdown Heatmap + Resnet on Posetrack18¶

SimpleBaseline2D (ECCV'2018)

@inproceedings{xiao2018simple,
  title={Simple baselines for human pose estimation and tracking},
  author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={466--481},
  year={2018}
}

ResNet (CVPR'2016)

@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}

PoseTrack18 (CVPR'2018)

@inproceedings{andriluka2018posetrack,
  title={Posetrack: A benchmark for human pose estimation and tracking},
  author={Andriluka, Mykhaylo and Iqbal, Umar and Insafutdinov, Eldar and Pishchulin, Leonid and Milan, Anton and Gall, Juergen and Schiele, Bernt},
  booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
  pages={5167--5176},
  year={2018}
}

Results on PoseTrack2018 val with ground-truth bounding boxes

Arch	Input Size	Head	Shou	Elb	Wri	Hip	Knee	Ankl	Total	ckpt	log
pose_resnet_50	256x192	86.5	87.7	82.5	75.8	80.1	78.8	74.2	81.2	ckpt	log

The models are first pre-trained on COCO dataset, and then fine-tuned on PoseTrack18.

Topdown Heatmap + Resnet on Aic¶

SimpleBaseline2D (ECCV'2018)

@inproceedings{xiao2018simple,
  title={Simple baselines for human pose estimation and tracking},
  author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={466--481},
  year={2018}
}

ResNet (CVPR'2016)

@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}

AI Challenger (ArXiv'2017)

@article{wu2017ai,
  title={Ai challenger: A large-scale dataset for going deeper in image understanding},
  author={Wu, Jiahong and Zheng, He and Zhao, Bo and Li, Yixin and Yan, Baoming and Liang, Rui and Wang, Wenjia and Zhou, Shipei and Lin, Guosen and Fu, Yanwei and others},
  journal={arXiv preprint arXiv:1711.06475},
  year={2017}
}

Results on AIC val set with ground-truth bounding boxes

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
pose_resnet_101	256x192	0.294	0.736	0.172	0.337	0.762	ckpt	log

Topdown Regression + Resnet + Softwingloss on WFLW¶

DeepPose (CVPR'2014)

@inproceedings{toshev2014deeppose,
  title={Deeppose: Human pose estimation via deep neural networks},
  author={Toshev, Alexander and Szegedy, Christian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={1653--1660},
  year={2014}
}

ResNet (CVPR'2016)

@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}

SoftWingloss (TIP'2021)

@article{lin2021structure,
  title={Structure-Coherent Deep Feature Learning for Robust Face Alignment},
  author={Lin, Chunze and Zhu, Beier and Wang, Quan and Liao, Renjie and Qian, Chen and Lu, Jiwen and Zhou, Jie},
  journal={IEEE Transactions on Image Processing},
  year={2021},
  publisher={IEEE}
}

WFLW (CVPR'2018)

@inproceedings{wu2018look,
  title={Look at boundary: A boundary-aware face alignment algorithm},
  author={Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, Quan and Cai, Yici and Zhou, Qiang},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={2129--2138},
  year={2018}
}

Results on WFLW dataset

The model is trained on WFLW train set.

Model	Input Size	NME	ckpt	log
ResNet-50+SoftWingLoss	256x256	4.44	ckpt	log

Topdown Regression + Resnet on WFLW¶

DeepPose (CVPR'2014)

@inproceedings{toshev2014deeppose,
  title={Deeppose: Human pose estimation via deep neural networks},
  author={Toshev, Alexander and Szegedy, Christian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={1653--1660},
  year={2014}
}

ResNet (CVPR'2016)

@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}

WFLW (CVPR'2018)

@inproceedings{wu2018look,
  title={Look at boundary: A boundary-aware face alignment algorithm},
  author={Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, Quan and Cai, Yici and Zhou, Qiang},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={2129--2138},
  year={2018}
}

Results on WFLW dataset

The model is trained on WFLW train set.

Model	Input Size	NME	ckpt	log
ResNet-50	256x256	4.88	ckpt	log

Topdown Regression + Resnet + Wingloss on WFLW¶

DeepPose (CVPR'2014)

@inproceedings{toshev2014deeppose,
  title={Deeppose: Human pose estimation via deep neural networks},
  author={Toshev, Alexander and Szegedy, Christian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={1653--1660},
  year={2014}
}

ResNet (CVPR'2016)

@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}

Wingloss (CVPR'2018)

@inproceedings{feng2018wing,
  title={Wing Loss for Robust Facial Landmark Localisation with Convolutional Neural Networks},
  author={Feng, Zhen-Hua and Kittler, Josef and Awais, Muhammad and Huber, Patrik and Wu, Xiao-Jun},
  booktitle={Computer Vision and Pattern Recognition (CVPR), 2018 IEEE Conference on},
  year={2018},
  pages ={2235-2245},
  organization={IEEE}
}

WFLW (CVPR'2018)

@inproceedings{wu2018look,
  title={Look at boundary: A boundary-aware face alignment algorithm},
  author={Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, Quan and Cai, Yici and Zhou, Qiang},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={2129--2138},
  year={2018}
}

Results on WFLW dataset

The model is trained on WFLW train set.

Model	Input Size	NME	ckpt	log
ResNet-50+WingLoss	256x256	4.67	ckpt	log

Topdown Heatmap + Resnet + Coco + Wholebody + Face on Coco_wholebody_face¶

SimpleBaseline2D (ECCV'2018)

@inproceedings{xiao2018simple,
  title={Simple baselines for human pose estimation and tracking},
  author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={466--481},
  year={2018}
}

ResNet (CVPR'2016)

@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}

COCO-WholeBody-Face (ECCV'2020)

@inproceedings{jin2020whole,
  title={Whole-Body Human Pose Estimation in the Wild},
  author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
  booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
  year={2020}
}

Results on COCO-WholeBody-Face val set

Arch	Input Size	NME	ckpt	log
pose_res50	256x256	0.0582	ckpt	log

Topdown Regression + Resnet on Rhd2d¶

DeepPose (CVPR'2014)

@inproceedings{toshev2014deeppose,
  title={Deeppose: Human pose estimation via deep neural networks},
  author={Toshev, Alexander and Szegedy, Christian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={1653--1660},
  year={2014}
}

ResNet (CVPR'2016)

@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}

RHD (ICCV'2017)

@TechReport{zb2017hand,
  author={Christian Zimmermann and Thomas Brox},
  title={Learning to Estimate 3D Hand Pose from Single RGB Images},
  institution={arXiv:1705.01389},
  year={2017},
  note="https://arxiv.org/abs/1705.01389",
  url="https://lmb.informatik.uni-freiburg.de/projects/hand3d/"
}

Results on RHD test set

Arch	Input Size	PCK@0.2	AUC	EPE	ckpt	log
deeppose_resnet_50	256x256	0.988	0.865	3.32	ckpt	log

Topdown Heatmap + Resnet on Rhd2d¶

SimpleBaseline2D (ECCV'2018)

@inproceedings{xiao2018simple,
  title={Simple baselines for human pose estimation and tracking},
  author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={466--481},
  year={2018}
}

ResNet (CVPR'2016)

@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}

RHD (ICCV'2017)

@TechReport{zb2017hand,
  author={Christian Zimmermann and Thomas Brox},
  title={Learning to Estimate 3D Hand Pose from Single RGB Images},
  institution={arXiv:1705.01389},
  year={2017},
  note="https://arxiv.org/abs/1705.01389",
  url="https://lmb.informatik.uni-freiburg.de/projects/hand3d/"
}

Results on RHD test set

Arch	Input Size	PCK@0.2	AUC	EPE	ckpt	log
pose_resnet50	256x256	0.991	0.898	2.32	ckpt	log

Topdown Regression + Resnet on Onehand10k¶

DeepPose (CVPR'2014)

@inproceedings{toshev2014deeppose,
  title={Deeppose: Human pose estimation via deep neural networks},
  author={Toshev, Alexander and Szegedy, Christian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={1653--1660},
  year={2014}
}

ResNet (CVPR'2016)

@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}

OneHand10K (TCSVT'2019)

@article{wang2018mask,
  title={Mask-pose cascaded cnn for 2d hand pose estimation from single color image},
  author={Wang, Yangang and Peng, Cong and Liu, Yebin},
  journal={IEEE Transactions on Circuits and Systems for Video Technology},
  volume={29},
  number={11},
  pages={3258--3268},
  year={2018},
  publisher={IEEE}
}

Results on OneHand10K val set

Arch	Input Size	PCK@0.2	AUC	EPE	ckpt	log
deeppose_resnet_50	256x256	0.990	0.485	34.21	ckpt	log

Topdown Heatmap + Resnet on Onehand10k¶

SimpleBaseline2D (ECCV'2018)

@inproceedings{xiao2018simple,
  title={Simple baselines for human pose estimation and tracking},
  author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={466--481},
  year={2018}
}

ResNet (CVPR'2016)

@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}

OneHand10K (TCSVT'2019)

@article{wang2018mask,
  title={Mask-pose cascaded cnn for 2d hand pose estimation from single color image},
  author={Wang, Yangang and Peng, Cong and Liu, Yebin},
  journal={IEEE Transactions on Circuits and Systems for Video Technology},
  volume={29},
  number={11},
  pages={3258--3268},
  year={2018},
  publisher={IEEE}
}

Results on OneHand10K val set

Arch	Input Size	PCK@0.2	AUC	EPE	ckpt	log
pose_resnet_50	256x256	0.989	0.555	25.16	ckpt	log

Topdown Heatmap + Resnet + Coco + Wholebody + Hand on Coco_wholebody_hand¶

SimpleBaseline2D (ECCV'2018)

@inproceedings{xiao2018simple,
  title={Simple baselines for human pose estimation and tracking},
  author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={466--481},
  year={2018}
}

ResNet (CVPR'2016)

@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}

COCO-WholeBody-Hand (ECCV'2020)

@inproceedings{jin2020whole,
  title={Whole-Body Human Pose Estimation in the Wild},
  author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
  booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
  year={2020}
}

Results on COCO-WholeBody-Hand val set

Arch	Input Size	PCK@0.2	AUC	EPE	ckpt	log
pose_resnet_50	256x256	0.800	0.833	4.64	ckpt	log

Topdown Heatmap + Resnet on Freihand2d¶

SimpleBaseline2D (ECCV'2018)

@inproceedings{xiao2018simple,
  title={Simple baselines for human pose estimation and tracking},
  author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={466--481},
  year={2018}
}

ResNet (CVPR'2016)

@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}

FreiHand (ICCV'2019)

@inproceedings{zimmermann2019freihand,
  title={Freihand: A dataset for markerless capture of hand pose and shape from single rgb images},
  author={Zimmermann, Christian and Ceylan, Duygu and Yang, Jimei and Russell, Bryan and Argus, Max and Brox, Thomas},
  booktitle={Proceedings of the IEEE International Conference on Computer Vision},
  pages={813--822},
  year={2019}
}

Results on FreiHand val & test set

Set	Arch	Input Size	PCK@0.2	AUC	EPE	ckpt	log
test	pose_resnet_50	224x224	0.999	0.868	3.27	ckpt	log

SCNet (CVPR’2020)¶

Topdown Heatmap + Scnet on Mpii¶

SCNet (CVPR'2020)

@inproceedings{liu2020improving,
  title={Improving Convolutional Networks with Self-Calibrated Convolutions},
  author={Liu, Jiang-Jiang and Hou, Qibin and Cheng, Ming-Ming and Wang, Changhu and Feng, Jiashi},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={10096--10105},
  year={2020}
}

MPII (CVPR'2014)

@inproceedings{andriluka14cvpr,
  author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
  title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
  booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
  year = {2014},
  month = {June}
}

Results on MPII val set

Arch	Input Size	Mean	Mean@0.1	ckpt	log
pose_scnet_50	256x256	0.888	0.290	ckpt	log
pose_scnet_101	256x256	0.887	0.293	ckpt	log

Topdown Heatmap + Scnet on Coco¶

SCNet (CVPR'2020)

@inproceedings{liu2020improving,
  title={Improving Convolutional Networks with Self-Calibrated Convolutions},
  author={Liu, Jiang-Jiang and Hou, Qibin and Cheng, Ming-Ming and Wang, Changhu and Feng, Jiashi},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={10096--10105},
  year={2020}
}

COCO (ECCV'2014)

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
pose_scnet_50	256x192	0.728	0.899	0.807	0.784	0.938	ckpt	log
pose_scnet_50	384x288	0.751	0.906	0.818	0.802	0.942	ckpt	log
pose_scnet_101	256x192	0.733	0.902	0.811	0.789	0.940	ckpt	log
pose_scnet_101	384x288	0.752	0.906	0.823	0.804	0.943	ckpt	log

Topdown Heatmap + Scnet + Coco + Wholebody + Face on Coco_wholebody_face¶

SCNet (CVPR'2020)

@inproceedings{liu2020improving,
  title={Improving Convolutional Networks with Self-Calibrated Convolutions},
  author={Liu, Jiang-Jiang and Hou, Qibin and Cheng, Ming-Ming and Wang, Changhu and Feng, Jiashi},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={10096--10105},
  year={2020}
}

COCO-WholeBody-Face (ECCV'2020)

@inproceedings{jin2020whole,
  title={Whole-Body Human Pose Estimation in the Wild},
  author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
  booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
  year={2020}
}

Results on COCO-WholeBody-Face val set

Arch	Input Size	NME	ckpt	log
pose_scnet_50	256x256	0.0567	ckpt	log

Topdown Heatmap + Scnet + Coco + Wholebody + Hand on Coco_wholebody_hand¶

SCNet (CVPR'2020)

@inproceedings{liu2020improving,
  title={Improving Convolutional Networks with Self-Calibrated Convolutions},
  author={Liu, Jiang-Jiang and Hou, Qibin and Cheng, Ming-Ming and Wang, Changhu and Feng, Jiashi},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={10096--10105},
  year={2020}
}

COCO-WholeBody-Hand (ECCV'2020)

@inproceedings{jin2020whole,
  title={Whole-Body Human Pose Estimation in the Wild},
  author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
  booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
  year={2020}
}

Results on COCO-WholeBody-Hand val set

Arch	Input Size	PCK@0.2	AUC	EPE	ckpt	log
pose_scnet_50	256x256	0.803	0.834	4.55	ckpt	log

ShufflenetV2 (ECCV’2018)¶

Topdown Heatmap + Shufflenetv2 on Mpii¶

ShufflenetV2 (ECCV'2018)

@inproceedings{ma2018shufflenet,
  title={Shufflenet v2: Practical guidelines for efficient cnn architecture design},
  author={Ma, Ningning and Zhang, Xiangyu and Zheng, Hai-Tao and Sun, Jian},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={116--131},
  year={2018}
}

MPII (CVPR'2014)

@inproceedings{andriluka14cvpr,
  author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
  title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
  booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
  year = {2014},
  month = {June}
}

Results on MPII val set

Arch	Input Size	Mean	Mean@0.1	ckpt	log
pose_shufflenetv2	256x256	0.828	0.205	ckpt	log

Topdown Heatmap + Shufflenetv2 on Coco¶

ShufflenetV2 (ECCV'2018)

@inproceedings{ma2018shufflenet,
  title={Shufflenet v2: Practical guidelines for efficient cnn architecture design},
  author={Ma, Ningning and Zhang, Xiangyu and Zheng, Hai-Tao and Sun, Jian},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={116--131},
  year={2018}
}

COCO (ECCV'2014)

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
pose_shufflenetv2	256x192	0.602	0.857	0.672	0.668	0.902	ckpt	log
pose_shufflenetv2	384x288	0.638	0.866	0.707	0.699	0.910	ckpt	log

MSPN (ArXiv’2019)¶

Topdown Heatmap + MSPN on Coco¶

MSPN (ArXiv'2019)

@article{li2019rethinking,
  title={Rethinking on Multi-Stage Networks for Human Pose Estimation},
  author={Li, Wenbo and Wang, Zhicheng and Yin, Binyi and Peng, Qixiang and Du, Yuming and Xiao, Tianzi and Yu, Gang and Lu, Hongtao and Wei, Yichen and Sun, Jian},
  journal={arXiv preprint arXiv:1901.00148},
  year={2019}
}

COCO (ECCV'2014)

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
mspn_50	256x192	0.723	0.895	0.794	0.788	0.934	ckpt	log
2xmspn_50	256x192	0.754	0.903	0.826	0.816	0.942	ckpt	log
3xmspn_50	256x192	0.758	0.904	0.830	0.821	0.943	ckpt	log
4xmspn_50	256x192	0.765	0.906	0.835	0.826	0.943	ckpt	log

VGG (ICLR’2015)¶

Topdown Heatmap + VGG on Coco¶

VGG (ICLR'2015)

@article{simonyan2014very,
  title={Very deep convolutional networks for large-scale image recognition},
  author={Simonyan, Karen and Zisserman, Andrew},
  journal={arXiv preprint arXiv:1409.1556},
  year={2014}
}

COCO (ECCV'2014)

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
vgg	256x192	0.699	0.890	0.769	0.754	0.927	ckpt	log

HRFormer (NIPS’2021)¶

Topdown Heatmap + Hrformer on Coco¶

HRFormer (NIPS'2021)

@article{yuan2021hrformer,
  title={HRFormer: High-Resolution Vision Transformer for Dense Predict},
  author={Yuan, Yuhui and Fu, Rao and Huang, Lang and Lin, Weihong and Zhang, Chao and Chen, Xilin and Wang, Jingdong},
  journal={Advances in Neural Information Processing Systems},
  volume={34},
  year={2021}
}

COCO (ECCV'2014)

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
pose_hrformer_small	256x192	0.738	0.904	0.812	0.793	0.941	ckpt	log
pose_hrformer_small	384x288	0.757	0.905	0.824	0.807	0.941	ckpt	log
pose_hrformer_base	256x192	0.754	0.906	0.827	0.807	0.943	ckpt	log
pose_hrformer_base	384x288	0.774	0.909	0.842	0.823	0.945	ckpt	log

CPM (CVPR’2016)¶

Topdown Heatmap + CPM on Mpii¶

CPM (CVPR'2016)

@inproceedings{wei2016convolutional,
  title={Convolutional pose machines},
  author={Wei, Shih-En and Ramakrishna, Varun and Kanade, Takeo and Sheikh, Yaser},
  booktitle={Proceedings of the IEEE conference on Computer Vision and Pattern Recognition},
  pages={4724--4732},
  year={2016}
}

MPII (CVPR'2014)

@inproceedings{andriluka14cvpr,
  author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
  title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
  booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
  year = {2014},
  month = {June}
}

Results on MPII val set

Arch	Input Size	Mean	Mean@0.1	ckpt	log
cpm	368x368	0.876	0.285	ckpt	log

Topdown Heatmap + CPM on Coco¶

CPM (CVPR'2016)

@inproceedings{wei2016convolutional,
  title={Convolutional pose machines},
  author={Wei, Shih-En and Ramakrishna, Varun and Kanade, Takeo and Sheikh, Yaser},
  booktitle={Proceedings of the IEEE conference on Computer Vision and Pattern Recognition},
  pages={4724--4732},
  year={2016}
}

COCO (ECCV'2014)

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
cpm	256x192	0.627	0.862	0.709	0.689	0.906	ckpt	log
cpm	384x288	0.652	0.865	0.730	0.710	0.907	ckpt	log

Topdown Heatmap + CPM on JHMDB¶

CPM (CVPR'2016)

@inproceedings{wei2016convolutional,
  title={Convolutional pose machines},
  author={Wei, Shih-En and Ramakrishna, Varun and Kanade, Takeo and Sheikh, Yaser},
  booktitle={Proceedings of the IEEE conference on Computer Vision and Pattern Recognition},
  pages={4724--4732},
  year={2016}
}

JHMDB (ICCV'2013)

@inproceedings{Jhuang:ICCV:2013,
  title = {Towards understanding action recognition},
  author = {H. Jhuang and J. Gall and S. Zuffi and C. Schmid and M. J. Black},
  booktitle = {International Conf. on Computer Vision (ICCV)},
  month = Dec,
  pages = {3192-3199},
  year = {2013}
}

Results on Sub-JHMDB dataset

The models are pre-trained on MPII dataset only. NO test-time augmentation (multi-scale /rotation testing) is used.

Normalized by Person Size

Split	Arch	Input Size	Head	Sho	Elb	Wri	Hip	Knee	Ank	Mean	ckpt	log
Sub1	cpm	368x368	96.1	91.9	81.0	78.9	96.6	90.8	87.3	89.5	ckpt	log
Sub2	cpm	368x368	98.1	93.6	77.1	70.9	94.0	89.1	84.7	87.4	ckpt	log
Sub3	cpm	368x368	97.9	94.9	87.3	84.0	98.6	94.4	86.2	92.4	ckpt	log
Average	cpm	368x368	97.4	93.5	81.5	77.9	96.4	91.4	86.1	89.8	-	-

Normalized by Torso Size

Split	Arch	Input Size	Head	Sho	Elb	Wri	Hip	Knee	Ank	Mean	ckpt	log
Sub1	cpm	368x368	89.0	63.0	54.0	54.9	68.2	63.1	61.2	66.0	ckpt	log
Sub2	cpm	368x368	90.3	57.9	46.8	44.3	60.8	58.2	62.4	61.1	ckpt	log
Sub3	cpm	368x368	91.0	72.6	59.9	54.0	73.2	68.5	65.8	70.3	ckpt	log
Average	cpm	368x368	90.1	64.5	53.6	51.1	67.4	63.3	63.1	65.7	-	-

Hourglass (ECCV’2016)¶

Topdown Heatmap + Hourglass on Mpii¶

Hourglass (ECCV'2016)

@inproceedings{newell2016stacked,
  title={Stacked hourglass networks for human pose estimation},
  author={Newell, Alejandro and Yang, Kaiyu and Deng, Jia},
  booktitle={European conference on computer vision},
  pages={483--499},
  year={2016},
  organization={Springer}
}

MPII (CVPR'2014)

@inproceedings{andriluka14cvpr,
  author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
  title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
  booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
  year = {2014},
  month = {June}
}

Results on MPII val set

Arch	Input Size	Mean	Mean@0.1	ckpt	log
pose_hourglass_52	256x256	0.889	0.317	ckpt	log
pose_hourglass_52	384x384	0.894	0.367	ckpt	log

Topdown Heatmap + Hourglass on Coco¶

Hourglass (ECCV'2016)

@inproceedings{newell2016stacked,
  title={Stacked hourglass networks for human pose estimation},
  author={Newell, Alejandro and Yang, Kaiyu and Deng, Jia},
  booktitle={European conference on computer vision},
  pages={483--499},
  year={2016},
  organization={Springer}
}

COCO (ECCV'2014)

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
pose_hourglass_52	256x256	0.726	0.896	0.799	0.780	0.934	ckpt	log
pose_hourglass_52	384x384	0.746	0.900	0.812	0.797	0.939	ckpt	log

Topdown Heatmap + Hourglass + Coco + Wholebody + Face on Coco_wholebody_face¶

Hourglass (ECCV'2016)

@inproceedings{newell2016stacked,
  title={Stacked hourglass networks for human pose estimation},
  author={Newell, Alejandro and Yang, Kaiyu and Deng, Jia},
  booktitle={European conference on computer vision},
  pages={483--499},
  year={2016},
  organization={Springer}
}

COCO-WholeBody-Face (ECCV'2020)

@inproceedings{jin2020whole,
  title={Whole-Body Human Pose Estimation in the Wild},
  author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
  booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
  year={2020}
}

Results on COCO-WholeBody-Face val set

Arch	Input Size	NME	ckpt	log
pose_hourglass_52	256x256	0.0587	ckpt	log

Topdown Heatmap + Hourglass + Coco + Wholebody + Hand on Coco_wholebody_hand¶

Hourglass (ECCV'2016)

@inproceedings{newell2016stacked,
  title={Stacked hourglass networks for human pose estimation},
  author={Newell, Alejandro and Yang, Kaiyu and Deng, Jia},
  booktitle={European conference on computer vision},
  pages={483--499},
  year={2016},
  organization={Springer}
}

COCO-WholeBody-Hand (ECCV'2020)

@inproceedings{jin2020whole,
  title={Whole-Body Human Pose Estimation in the Wild},
  author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
  booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
  year={2020}
}

Results on COCO-WholeBody-Hand val set

Arch	Input Size	PCK@0.2	AUC	EPE	ckpt	log
pose_hourglass_52	256x256	0.804	0.835	4.54	ckpt	log

MobilenetV2 (CVPR’2018)¶

Topdown Heatmap + Mobilenetv2 on Mpii¶

MobilenetV2 (CVPR'2018)

@inproceedings{sandler2018mobilenetv2,
  title={Mobilenetv2: Inverted residuals and linear bottlenecks},
  author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={4510--4520},
  year={2018}
}

MPII (CVPR'2014)

@inproceedings{andriluka14cvpr,
  author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
  title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
  booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
  year = {2014},
  month = {June}
}

Results on MPII val set

Arch	Input Size	Mean	Mean@0.1	ckpt	log
pose_mobilenetv2	256x256	0.854	0.234	ckpt	log

Topdown Regression + Mobilenetv2 + Rle on Coco¶

DeepPose (CVPR'2014)

@inproceedings{toshev2014deeppose,
  title={Deeppose: Human pose estimation via deep neural networks},
  author={Toshev, Alexander and Szegedy, Christian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={1653--1660},
  year={2014}
}

RLE (ICCV'2021)

@inproceedings{li2021human,
  title={Human pose regression with residual log-likelihood estimation},
  author={Li, Jiefeng and Bian, Siyuan and Zeng, Ailing and Wang, Can and Pang, Bo and Liu, Wentao and Lu, Cewu},
  booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
  pages={11025--11034},
  year={2021}
}

MobilenetV2 (CVPR'2018)

@inproceedings{sandler2018mobilenetv2,
  title={Mobilenetv2: Inverted residuals and linear bottlenecks},
  author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={4510--4520},
  year={2018}
}

COCO (ECCV'2014)

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
deeppose_mobilenetv2_rle_pretrained	256x192	0.593	0.836	0.660	0.644	0.877	ckpt	log

Simcc + Mobilenetv2 on Coco¶

SimCC (ECCV'2022)

@misc{https://doi.org/10.48550/arxiv.2107.03332,
  title={SimCC: a Simple Coordinate Classification Perspective for Human Pose Estimation},
  author={Li, Yanjie and Yang, Sen and Liu, Peidong and Zhang, Shoukui and Wang, Yunxiao and Wang, Zhicheng and Yang, Wankou and Xia, Shu-Tao},
  year={2021}
}

MobilenetV2 (CVPR'2018)

@inproceedings{sandler2018mobilenetv2,
  title={Mobilenetv2: Inverted residuals and linear bottlenecks},
  author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={4510--4520},
  year={2018}
}

COCO (ECCV'2014)

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
simcc_mobilenetv2_wo_deconv	256x192	0.620	0.855	0.697	0.678	0.902	ckpt	log

Topdown Heatmap + Mobilenetv2 on Coco¶

MobilenetV2 (CVPR'2018)

@inproceedings{sandler2018mobilenetv2,
  title={Mobilenetv2: Inverted residuals and linear bottlenecks},
  author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={4510--4520},
  year={2018}
}

COCO (ECCV'2014)

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
pose_mobilenetv2	256x192	0.648	0.874	0.725	0.709	0.918	ckpt	log
pose_mobilenetv2	384x288	0.677	0.882	0.746	0.734	0.920	ckpt	log

Topdown Heatmap + Mobilenetv2 + Coco + Wholebody + Face on Coco_wholebody_face¶

MobilenetV2 (CVPR'2018)

@inproceedings{sandler2018mobilenetv2,
  title={Mobilenetv2: Inverted residuals and linear bottlenecks},
  author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={4510--4520},
  year={2018}
}

COCO-WholeBody-Face (ECCV'2020)

@inproceedings{jin2020whole,
  title={Whole-Body Human Pose Estimation in the Wild},
  author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
  booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
  year={2020}
}

Results on COCO-WholeBody-Face val set

Arch	Input Size	NME	ckpt	log
pose_mobilenetv2	256x256	0.0611	ckpt	log

Topdown Heatmap + Mobilenetv2 on Rhd2d¶

MobilenetV2 (CVPR'2018)

@inproceedings{sandler2018mobilenetv2,
  title={Mobilenetv2: Inverted residuals and linear bottlenecks},
  author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={4510--4520},
  year={2018}
}

RHD (ICCV'2017)

@TechReport{zb2017hand,
  author={Christian Zimmermann and Thomas Brox},
  title={Learning to Estimate 3D Hand Pose from Single RGB Images},
  institution={arXiv:1705.01389},
  year={2017},
  note="https://arxiv.org/abs/1705.01389",
  url="https://lmb.informatik.uni-freiburg.de/projects/hand3d/"
}

Results on RHD test set

Arch	Input Size	PCK@0.2	AUC	EPE	ckpt	log
pose_mobilenet_v2	256x256	0.985	0.883	2.79	ckpt	log

Topdown Heatmap + Mobilenetv2 on Onehand10k¶

MobilenetV2 (CVPR'2018)

@inproceedings{sandler2018mobilenetv2,
  title={Mobilenetv2: Inverted residuals and linear bottlenecks},
  author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={4510--4520},
  year={2018}
}

OneHand10K (TCSVT'2019)

@article{wang2018mask,
  title={Mask-pose cascaded cnn for 2d hand pose estimation from single color image},
  author={Wang, Yangang and Peng, Cong and Liu, Yebin},
  journal={IEEE Transactions on Circuits and Systems for Video Technology},
  volume={29},
  number={11},
  pages={3258--3268},
  year={2018},
  publisher={IEEE}
}

Results on OneHand10K val set

Arch	Input Size	PCK@0.2	AUC	EPE	ckpt	log
pose_mobilenet_v2	256x256	0.986	0.537	28.56	ckpt	log

Topdown Heatmap + Mobilenetv2 + Coco + Wholebody + Hand on Coco_wholebody_hand¶

MobilenetV2 (CVPR'2018)

@inproceedings{sandler2018mobilenetv2,
  title={Mobilenetv2: Inverted residuals and linear bottlenecks},
  author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={4510--4520},
  year={2018}
}

COCO-WholeBody-Hand (ECCV'2020)

@inproceedings{jin2020whole,
  title={Whole-Body Human Pose Estimation in the Wild},
  author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
  booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
  year={2020}
}

Results on COCO-WholeBody-Hand val set

Arch	Input Size	PCK@0.2	AUC	EPE	ckpt	log
pose_mobilenetv2	256x256	0.795	0.829	4.77	ckpt	log

SEResNet (CVPR’2018)¶

Topdown Heatmap + Seresnet on Mpii¶

SEResNet (CVPR'2018)

@inproceedings{hu2018squeeze,
  title={Squeeze-and-excitation networks},
  author={Hu, Jie and Shen, Li and Sun, Gang},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={7132--7141},
  year={2018}
}

MPII (CVPR'2014)

@inproceedings{andriluka14cvpr,
  author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
  title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
  booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
  year = {2014},
  month = {June}
}

Results on MPII val set

Arch	Input Size	Mean	Mean@0.1	ckpt	log
pose_seresnet_50	256x256	0.884	0.292	ckpt	log
pose_seresnet_101	256x256	0.884	0.295	ckpt	log
pose_seresnet_152*	256x256	0.884	0.287	ckpt	log

Note that * means without imagenet pre-training.

Topdown Heatmap + Seresnet on Coco¶

SEResNet (CVPR'2018)

@inproceedings{hu2018squeeze,
  title={Squeeze-and-excitation networks},
  author={Hu, Jie and Shen, Li and Sun, Gang},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={7132--7141},
  year={2018}
}

COCO (ECCV'2014)

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
pose_seresnet_50	256x192	0.729	0.903	0.807	0.784	0.941	ckpt	log
pose_seresnet_50	384x288	0.748	0.904	0.819	0.799	0.941	ckpt	log
pose_seresnet_101	256x192	0.734	0.905	0.814	0.790	0.941	ckpt	log
pose_seresnet_101	384x288	0.754	0.907	0.823	0.805	0.943	ckpt	log
pose_seresnet_152*	256x192	0.730	0.899	0.810	0.787	0.939	ckpt	log
pose_seresnet_152*	384x288	0.753	0.906	0.824	0.806	0.945	ckpt	log

Note that * means without imagenet pre-training.

LiteHRNet (CVPR’2021)¶

Topdown Heatmap + Litehrnet on Mpii¶

LiteHRNet (CVPR'2021)

@inproceedings{Yulitehrnet21,
  title={Lite-HRNet: A Lightweight High-Resolution Network},
  author={Yu, Changqian and Xiao, Bin and Gao, Changxin and Yuan, Lu and Zhang, Lei and Sang, Nong and Wang, Jingdong},
  booktitle={CVPR},
  year={2021}
}

MPII (CVPR'2014)

@inproceedings{andriluka14cvpr,
  author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
  title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
  booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
  year = {2014},
  month = {June}
}

Results on MPII val set

Arch	Input Size	Mean	Mean@0.1	ckpt	log
LiteHRNet-18	256x256	0.859	0.260	ckpt	log
LiteHRNet-30	256x256	0.869	0.271	ckpt	log

Topdown Heatmap + Litehrnet on Coco¶

LiteHRNet (CVPR'2021)

@inproceedings{Yulitehrnet21,
  title={Lite-HRNet: A Lightweight High-Resolution Network},
  author={Yu, Changqian and Xiao, Bin and Gao, Changxin and Yuan, Lu and Zhang, Lei and Sang, Nong and Wang, Jingdong},
  booktitle={CVPR},
  year={2021}
}

COCO (ECCV'2014)

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
LiteHRNet-18	256x192	0.642	0.867	0.719	0.705	0.911	ckpt	log
LiteHRNet-18	384x288	0.676	0.876	0.746	0.735	0.919	ckpt	log
LiteHRNet-30	256x192	0.676	0.880	0.756	0.736	0.922	ckpt	log
LiteHRNet-30	384x288	0.700	0.883	0.776	0.758	0.926	ckpt	log

Topdown Heatmap + Litehrnet + Coco + Wholebody + Hand on Coco_wholebody_hand¶

LiteHRNet (CVPR'2021)

@inproceedings{Yulitehrnet21,
  title={Lite-HRNet: A Lightweight High-Resolution Network},
  author={Yu, Changqian and Xiao, Bin and Gao, Changxin and Yuan, Lu and Zhang, Lei and Sang, Nong and Wang, Jingdong},
  booktitle={CVPR},
  year={2021}
}

COCO-WholeBody-Hand (ECCV'2020)

@inproceedings{jin2020whole,
  title={Whole-Body Human Pose Estimation in the Wild},
  author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
  booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
  year={2020}
}

Results on COCO-WholeBody-Hand val set

Arch	Input Size	PCK@0.2	AUC	EPE	ckpt	log
LiteHRNet-18	256x256	0.795	0.830	4.77	ckpt	log

HRNetv2 (TPAMI’2019)¶

Topdown Heatmap + Hrnetv2 + Awing on WFLW¶

HRNetv2 (TPAMI'2019)

@article{WangSCJDZLMTWLX19,
  title={Deep High-Resolution Representation Learning for Visual Recognition},
  author={Jingdong Wang and Ke Sun and Tianheng Cheng and
          Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
          Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
  journal={TPAMI},
  year={2019}
}

AdaptiveWingloss (ICCV'2019)

@inproceedings{wang2019adaptive,
  title={Adaptive wing loss for robust face alignment via heatmap regression},
  author={Wang, Xinyao and Bo, Liefeng and Fuxin, Li},
  booktitle={Proceedings of the IEEE/CVF international conference on computer vision},
  pages={6971--6981},
  year={2019}
}

WFLW (CVPR'2018)

@inproceedings{wu2018look,
  title={Look at boundary: A boundary-aware face alignment algorithm},
  author={Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, Quan and Cai, Yici and Zhou, Qiang},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={2129--2138},
  year={2018}
}

Results on WFLW dataset

The model is trained on WFLW train.

Arch	Input Size	NME_test	NME_pose	NME_illumination	NME_occlusion	NME_blur	NME_makeup	NME_expression	ckpt	log
pose_hrnetv2_w18_awing	256x256	4.02	6.94	3.97	4.78	4.59	3.87	4.28	ckpt	log

Topdown Heatmap + Hrnetv2 on WFLW¶

HRNetv2 (TPAMI'2019)

@article{WangSCJDZLMTWLX19,
  title={Deep High-Resolution Representation Learning for Visual Recognition},
  author={Jingdong Wang and Ke Sun and Tianheng Cheng and
          Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
          Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
  journal={TPAMI},
  year={2019}
}

WFLW (CVPR'2018)

@inproceedings{wu2018look,
  title={Look at boundary: A boundary-aware face alignment algorithm},
  author={Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, Quan and Cai, Yici and Zhou, Qiang},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={2129--2138},
  year={2018}
}

Results on WFLW dataset

The model is trained on WFLW train.

Arch	Input Size	NME_test	NME_pose	NME_illumination	NME_occlusion	NME_blur	NME_makeup	NME_expression	ckpt	log
pose_hrnetv2_w18	256x256	4.06	6.97	3.99	4.83	4.58	3.94	4.33	ckpt	log

Topdown Heatmap + Hrnetv2 + Dark on WFLW¶

HRNetv2 (TPAMI'2019)

@article{WangSCJDZLMTWLX19,
  title={Deep High-Resolution Representation Learning for Visual Recognition},
  author={Jingdong Wang and Ke Sun and Tianheng Cheng and
          Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
          Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
  journal={TPAMI},
  year={2019}
}

DarkPose (CVPR'2020)

@inproceedings{zhang2020distribution,
  title={Distribution-aware coordinate representation for human pose estimation},
  author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={7093--7102},
  year={2020}
}

WFLW (CVPR'2018)

@inproceedings{wu2018look,
  title={Look at boundary: A boundary-aware face alignment algorithm},
  author={Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, Quan and Cai, Yici and Zhou, Qiang},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={2129--2138},
  year={2018}
}

Results on WFLW dataset

The model is trained on WFLW train.

Arch	Input Size	NME_test	NME_pose	NME_illumination	NME_occlusion	NME_blur	NME_makeup	NME_expression	ckpt	log
pose_hrnetv2_w18_dark	256x256	3.98	6.98	3.96	4.78	4.56	3.89	4.29	ckpt	log

Topdown Heatmap + Hrnetv2 on Cofw¶

HRNetv2 (TPAMI'2019)

@article{WangSCJDZLMTWLX19,
  title={Deep High-Resolution Representation Learning for Visual Recognition},
  author={Jingdong Wang and Ke Sun and Tianheng Cheng and
          Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
          Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
  journal={TPAMI},
  year={2019}
}

COFW (ICCV'2013)

@inproceedings{burgos2013robust,
  title={Robust face landmark estimation under occlusion},
  author={Burgos-Artizzu, Xavier P and Perona, Pietro and Doll{\'a}r, Piotr},
  booktitle={Proceedings of the IEEE international conference on computer vision},
  pages={1513--1520},
  year={2013}
}

Results on COFW dataset

The model is trained on COFW train.

Arch	Input Size	NME	ckpt	log
pose_hrnetv2_w18	256x256	3.48	ckpt	log

Topdown Heatmap + Hrnetv2 on 300w¶

HRNetv2 (TPAMI'2019)

@article{WangSCJDZLMTWLX19,
  title={Deep High-Resolution Representation Learning for Visual Recognition},
  author={Jingdong Wang and Ke Sun and Tianheng Cheng and
          Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
          Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
  journal={TPAMI},
  year={2019}
}

300W (IMAVIS'2016)

@article{sagonas2016300,
  title={300 faces in-the-wild challenge: Database and results},
  author={Sagonas, Christos and Antonakos, Epameinondas and Tzimiropoulos, Georgios and Zafeiriou, Stefanos and Pantic, Maja},
  journal={Image and vision computing},
  volume={47},
  pages={3--18},
  year={2016},
  publisher={Elsevier}
}

Results on 300W dataset

The model is trained on 300W train.

Arch	Input Size	NME_common	NME_challenge	NME_full	NME_test	ckpt	log
pose_hrnetv2_w18	256x256	2.92	5.64	3.45	4.10	ckpt	log

Topdown Heatmap + Hrnetv2 + Dark on Aflw¶

HRNetv2 (TPAMI'2019)

@article{WangSCJDZLMTWLX19,
  title={Deep High-Resolution Representation Learning for Visual Recognition},
  author={Jingdong Wang and Ke Sun and Tianheng Cheng and
          Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
          Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
  journal={TPAMI},
  year={2019}
}

DarkPose (CVPR'2020)

@inproceedings{zhang2020distribution,
  title={Distribution-aware coordinate representation for human pose estimation},
  author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={7093--7102},
  year={2020}
}

AFLW (ICCVW'2011)

@inproceedings{koestinger2011annotated,
  title={Annotated facial landmarks in the wild: A large-scale, real-world database for facial landmark localization},
  author={Koestinger, Martin and Wohlhart, Paul and Roth, Peter M and Bischof, Horst},
  booktitle={2011 IEEE international conference on computer vision workshops (ICCV workshops)},
  pages={2144--2151},
  year={2011},
  organization={IEEE}
}

Results on AFLW dataset

The model is trained on AFLW train and evaluated on AFLW full and frontal.

Arch	Input Size	NME_full	NME_frontal	ckpt	log
pose_hrnetv2_w18_dark	256x256	1.35	1.19	ckpt	log

Topdown Heatmap + Hrnetv2 on Aflw¶

HRNetv2 (TPAMI'2019)

@article{WangSCJDZLMTWLX19,
  title={Deep High-Resolution Representation Learning for Visual Recognition},
  author={Jingdong Wang and Ke Sun and Tianheng Cheng and
          Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
          Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
  journal={TPAMI},
  year={2019}
}

AFLW (ICCVW'2011)

@inproceedings{koestinger2011annotated,
  title={Annotated facial landmarks in the wild: A large-scale, real-world database for facial landmark localization},
  author={Koestinger, Martin and Wohlhart, Paul and Roth, Peter M and Bischof, Horst},
  booktitle={2011 IEEE international conference on computer vision workshops (ICCV workshops)},
  pages={2144--2151},
  year={2011},
  organization={IEEE}
}

Results on AFLW dataset

The model is trained on AFLW train and evaluated on AFLW full and frontal.

Arch	Input Size	NME_full	NME_frontal	ckpt	log
pose_hrnetv2_w18	256x256	1.41	1.27	ckpt	log

Topdown Heatmap + Hrnetv2 + Coco + Wholebody + Face on Coco_wholebody_face¶

HRNetv2 (TPAMI'2019)

@article{WangSCJDZLMTWLX19,
  title={Deep High-Resolution Representation Learning for Visual Recognition},
  author={Jingdong Wang and Ke Sun and Tianheng Cheng and
          Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
          Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
  journal={TPAMI},
  year={2019}
}

COCO-WholeBody-Face (ECCV'2020)

@inproceedings{jin2020whole,
  title={Whole-Body Human Pose Estimation in the Wild},
  author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
  booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
  year={2020}
}

Results on COCO-WholeBody-Face val set

Arch	Input Size	NME	ckpt	log
pose_hrnetv2_w18	256x256	0.0569	ckpt	log

Topdown Heatmap + Hrnetv2 + Dark + Coco + Wholebody + Face on Coco_wholebody_face¶

HRNetv2 (TPAMI'2019)

@article{WangSCJDZLMTWLX19,
  title={Deep High-Resolution Representation Learning for Visual Recognition},
  author={Jingdong Wang and Ke Sun and Tianheng Cheng and
          Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
          Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
  journal={TPAMI},
  year={2019}
}

DarkPose (CVPR'2020)

@inproceedings{zhang2020distribution,
  title={Distribution-aware coordinate representation for human pose estimation},
  author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={7093--7102},
  year={2020}
}

COCO-WholeBody-Face (ECCV'2020)

@inproceedings{jin2020whole,
  title={Whole-Body Human Pose Estimation in the Wild},
  author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
  booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
  year={2020}
}

Results on COCO-WholeBody-Face val set

Arch	Input Size	NME	ckpt	log
pose_hrnetv2_w18_dark	256x256	0.0513	ckpt	log

Topdown Heatmap + Hrnetv2 on 300wlp¶

HRNetv2 (TPAMI'2019)

@article{WangSCJDZLMTWLX19,
  title={Deep High-Resolution Representation Learning for Visual Recognition},
  author={Jingdong Wang and Ke Sun and Tianheng Cheng and
          Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
          Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
  journal={TPAMI},
  year={2019}
}

300WLP (IEEE'2017)

@article{zhu2017face,
  title={Face alignment in full pose range: A 3d total solution},
  author={Zhu, Xiangyu and Liu, Xiaoming and Lei, Zhen and Li, Stan Z},
  journal={IEEE transactions on pattern analysis and machine intelligence},
  year={2017},
  publisher={IEEE}
}

Results on 300W-LP dataset

The model is trained on 300W-LP train.

Arch	Input Size	NME_full	NME_test	ckpt	log
pose_hrnetv2_w18	256x256	0.0413	0.04125	ckpt	log

Topdown Heatmap + Hrnetv2 + Dark on Rhd2d¶

HRNetv2 (TPAMI'2019)

@article{WangSCJDZLMTWLX19,
  title={Deep High-Resolution Representation Learning for Visual Recognition},
  author={Jingdong Wang and Ke Sun and Tianheng Cheng and
          Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
          Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
  journal={TPAMI},
  year={2019}
}

DarkPose (CVPR'2020)

@inproceedings{zhang2020distribution,
  title={Distribution-aware coordinate representation for human pose estimation},
  author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={7093--7102},
  year={2020}
}

RHD (ICCV'2017)

@TechReport{zb2017hand,
  author={Christian Zimmermann and Thomas Brox},
  title={Learning to Estimate 3D Hand Pose from Single RGB Images},
  institution={arXiv:1705.01389},
  year={2017},
  note="https://arxiv.org/abs/1705.01389",
  url="https://lmb.informatik.uni-freiburg.de/projects/hand3d/"
}

Results on RHD test set

Arch	Input Size	PCK@0.2	AUC	EPE	ckpt	log
pose_hrnetv2_w18_dark	256x256	0.992	0.903	2.18	ckpt	log

Topdown Heatmap + Hrnetv2 + Udp on Rhd2d¶

HRNetv2 (TPAMI'2019)

@article{WangSCJDZLMTWLX19,
  title={Deep High-Resolution Representation Learning for Visual Recognition},
  author={Jingdong Wang and Ke Sun and Tianheng Cheng and
          Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
          Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
  journal={TPAMI},
  year={2019}
}

UDP (CVPR'2020)

@InProceedings{Huang_2020_CVPR,
  author = {Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan},
  title = {The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation},
  booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
  month = {June},
  year = {2020}
}

RHD (ICCV'2017)

@TechReport{zb2017hand,
  author={Christian Zimmermann and Thomas Brox},
  title={Learning to Estimate 3D Hand Pose from Single RGB Images},
  institution={arXiv:1705.01389},
  year={2017},
  note="https://arxiv.org/abs/1705.01389",
  url="https://lmb.informatik.uni-freiburg.de/projects/hand3d/"
}

Results on RHD test set

Arch	Input Size	PCKh@0.7	AUC	EPE	ckpt	log
pose_hrnetv2_w18_udp	256x256	0.992	0.902	2.19	ckpt	log

Topdown Heatmap + Hrnetv2 on Rhd2d¶

HRNetv2 (TPAMI'2019)

@article{WangSCJDZLMTWLX19,
  title={Deep High-Resolution Representation Learning for Visual Recognition},
  author={Jingdong Wang and Ke Sun and Tianheng Cheng and
          Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
          Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
  journal={TPAMI},
  year={2019}
}

RHD (ICCV'2017)

@TechReport{zb2017hand,
  author={Christian Zimmermann and Thomas Brox},
  title={Learning to Estimate 3D Hand Pose from Single RGB Images},
  institution={arXiv:1705.01389},
  year={2017},
  note="https://arxiv.org/abs/1705.01389",
  url="https://lmb.informatik.uni-freiburg.de/projects/hand3d/"
}

Results on RHD test set

Arch	Input Size	PCK@0.2	AUC	EPE	ckpt	log
pose_hrnetv2_w18	256x256	0.992	0.902	2.21	ckpt	log

Topdown Heatmap + Hrnetv2 + Udp on Onehand10k¶

HRNetv2 (TPAMI'2019)

@article{WangSCJDZLMTWLX19,
  title={Deep High-Resolution Representation Learning for Visual Recognition},
  author={Jingdong Wang and Ke Sun and Tianheng Cheng and
          Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
          Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
  journal={TPAMI},
  year={2019}
}

UDP (CVPR'2020)

@InProceedings{Huang_2020_CVPR,
  author = {Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan},
  title = {The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation},
  booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
  month = {June},
  year = {2020}
}

OneHand10K (TCSVT'2019)

@article{wang2018mask,
  title={Mask-pose cascaded cnn for 2d hand pose estimation from single color image},
  author={Wang, Yangang and Peng, Cong and Liu, Yebin},
  journal={IEEE Transactions on Circuits and Systems for Video Technology},
  volume={29},
  number={11},
  pages={3258--3268},
  year={2018},
  publisher={IEEE}
}

Results on OneHand10K val set

Arch	Input Size	PCK@0.2	AUC	EPE	ckpt	log
pose_hrnetv2_w18_udp	256x256	0.990	0.571	23.88	ckpt	log

Topdown Heatmap + Hrnetv2 on Onehand10k¶

HRNetv2 (TPAMI'2019)

@article{WangSCJDZLMTWLX19,
  title={Deep High-Resolution Representation Learning for Visual Recognition},
  author={Jingdong Wang and Ke Sun and Tianheng Cheng and
          Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
          Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
  journal={TPAMI},
  year={2019}
}

OneHand10K (TCSVT'2019)

@article{wang2018mask,
  title={Mask-pose cascaded cnn for 2d hand pose estimation from single color image},
  author={Wang, Yangang and Peng, Cong and Liu, Yebin},
  journal={IEEE Transactions on Circuits and Systems for Video Technology},
  volume={29},
  number={11},
  pages={3258--3268},
  year={2018},
  publisher={IEEE}
}

Results on OneHand10K val set

Arch	Input Size	PCK@0.2	AUC	EPE	ckpt	log
pose_hrnetv2_w18	256x256	0.990	0.567	24.26	ckpt	log

Topdown Heatmap + Hrnetv2 + Dark on Onehand10k¶

HRNetv2 (TPAMI'2019)

@article{WangSCJDZLMTWLX19,
  title={Deep High-Resolution Representation Learning for Visual Recognition},
  author={Jingdong Wang and Ke Sun and Tianheng Cheng and
          Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
          Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
  journal={TPAMI},
  year={2019}
}

DarkPose (CVPR'2020)

@inproceedings{zhang2020distribution,
  title={Distribution-aware coordinate representation for human pose estimation},
  author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={7093--7102},
  year={2020}
}

OneHand10K (TCSVT'2019)

@article{wang2018mask,
  title={Mask-pose cascaded cnn for 2d hand pose estimation from single color image},
  author={Wang, Yangang and Peng, Cong and Liu, Yebin},
  journal={IEEE Transactions on Circuits and Systems for Video Technology},
  volume={29},
  number={11},
  pages={3258--3268},
  year={2018},
  publisher={IEEE}
}

Results on OneHand10K val set

Arch	Input Size	PCK@0.2	AUC	EPE	ckpt	log
pose_hrnetv2_w18_dark	256x256	0.990	0.572	23.96	ckpt	log

Topdown Heatmap + Hrnetv2 + Dark + Coco + Wholebody + Hand on Coco_wholebody_hand¶

HRNetv2 (TPAMI'2019)

@article{WangSCJDZLMTWLX19,
  title={Deep High-Resolution Representation Learning for Visual Recognition},
  author={Jingdong Wang and Ke Sun and Tianheng Cheng and
          Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
          Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
  journal={TPAMI},
  year={2019}
}

DarkPose (CVPR'2020)

@inproceedings{zhang2020distribution,
  title={Distribution-aware coordinate representation for human pose estimation},
  author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={7093--7102},
  year={2020}
}

COCO-WholeBody-Hand (ECCV'2020)

@inproceedings{jin2020whole,
  title={Whole-Body Human Pose Estimation in the Wild},
  author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
  booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
  year={2020}
}

Results on COCO-WholeBody-Hand val set

Arch	Input Size	PCK@0.2	AUC	EPE	ckpt	log
pose_hrnetv2_w18_dark	256x256	0.814	0.840	4.37	ckpt	log

Topdown Heatmap + Hrnetv2 + Coco + Wholebody + Hand on Coco_wholebody_hand¶

HRNetv2 (TPAMI'2019)

@article{WangSCJDZLMTWLX19,
  title={Deep High-Resolution Representation Learning for Visual Recognition},
  author={Jingdong Wang and Ke Sun and Tianheng Cheng and
          Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
          Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
  journal={TPAMI},
  year={2019}
}

COCO-WholeBody-Hand (ECCV'2020)

@inproceedings{jin2020whole,
  title={Whole-Body Human Pose Estimation in the Wild},
  author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
  booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
  year={2020}
}

Results on COCO-WholeBody-Hand val set

Arch	Input Size	PCK@0.2	AUC	EPE	ckpt	log
pose_hrnetv2_w18	256x256	0.813	0.840	4.39	ckpt	log

ResNetV1D (CVPR’2019)¶

Topdown Heatmap + Resnetv1d on Mpii¶

ResNetV1D (CVPR'2019)

@inproceedings{he2019bag,
  title={Bag of tricks for image classification with convolutional neural networks},
  author={He, Tong and Zhang, Zhi and Zhang, Hang and Zhang, Zhongyue and Xie, Junyuan and Li, Mu},
  booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
  pages={558--567},
  year={2019}
}

MPII (CVPR'2014)

@inproceedings{andriluka14cvpr,
  author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
  title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
  booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
  year = {2014},
  month = {June}
}

Results on MPII val set

Arch	Input Size	Mean	Mean@0.1	ckpt	log
pose_resnetv1d_50	256x256	0.881	0.290	ckpt	log
pose_resnetv1d_101	256x256	0.883	0.295	ckpt	log
pose_resnetv1d_152	256x256	0.888	0.300	ckpt	log

Topdown Heatmap + Resnetv1d on Coco¶

ResNetV1D (CVPR'2019)

@inproceedings{he2019bag,
  title={Bag of tricks for image classification with convolutional neural networks},
  author={He, Tong and Zhang, Zhi and Zhang, Hang and Zhang, Zhongyue and Xie, Junyuan and Li, Mu},
  booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
  pages={558--567},
  year={2019}
}

COCO (ECCV'2014)

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
pose_resnetv1d_50	256x192	0.722	0.897	0.796	0.777	0.936	ckpt	log
pose_resnetv1d_50	384x288	0.730	0.899	0.800	0.782	0.935	ckpt	log
pose_resnetv1d_101	256x192	0.732	0.901	0.808	0.785	0.940	ckpt	log
pose_resnetv1d_101	384x288	0.748	0.906	0.817	0.798	0.941	ckpt	log
pose_resnetv1d_152	256x192	0.737	0.904	0.814	0.790	0.940	ckpt	log
pose_resnetv1d_152	384x288	0.751	0.907	0.821	0.801	0.942	ckpt	log

ResNext (CVPR’2017)¶

Topdown Heatmap + Resnext on Mpii¶

ResNext (CVPR'2017)

@inproceedings{xie2017aggregated,
  title={Aggregated residual transformations for deep neural networks},
  author={Xie, Saining and Girshick, Ross and Doll{\'a}r, Piotr and Tu, Zhuowen and He, Kaiming},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={1492--1500},
  year={2017}
}

MPII (CVPR'2014)

@inproceedings{andriluka14cvpr,
  author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
  title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
  booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
  year = {2014},
  month = {June}
}

Results on MPII val set

Arch	Input Size	Mean	Mean@0.1	ckpt	log
pose_resnext_152	256x256	0.887	0.294	ckpt	log

Topdown Heatmap + Resnext on Coco¶

ResNext (CVPR'2017)

@inproceedings{xie2017aggregated,
  title={Aggregated residual transformations for deep neural networks},
  author={Xie, Saining and Girshick, Ross and Doll{\'a}r, Piotr and Tu, Zhuowen and He, Kaiming},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={1492--1500},
  year={2017}
}

COCO (ECCV'2014)

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
pose_resnext_50	256x192	0.715	0.897	0.791	0.771	0.935	ckpt	log
pose_resnext_50	384x288	0.724	0.899	0.794	0.777	0.936	ckpt	log
pose_resnext_101	256x192	0.726	0.900	0.801	0.781	0.939	ckpt	log
pose_resnext_101	384x288	0.744	0.903	0.815	0.794	0.939	ckpt	log
pose_resnext_152	256x192	0.730	0.903	0.808	0.785	0.940	ckpt	log
pose_resnext_152	384x288	0.742	0.904	0.810	0.794	0.940	ckpt	log

ViPNAS (CVPR’2021)¶

Topdown Heatmap + Vipnas + Dark on Coco-Wholebody¶

ViPNAS (CVPR'2021)

@article{xu2021vipnas,
  title={ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search},
  author={Xu, Lumin and Guan, Yingda and Jin, Sheng and Liu, Wentao and Qian, Chen and Luo, Ping and Ouyang, Wanli and Wang, Xiaogang},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  year={2021}
}

DarkPose (CVPR'2020)

@inproceedings{zhang2020distribution,
  title={Distribution-aware coordinate representation for human pose estimation},
  author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={7093--7102},
  year={2020}
}

COCO-WholeBody (ECCV'2020)

@inproceedings{jin2020whole,
  title={Whole-Body Human Pose Estimation in the Wild},
  author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
  booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
  year={2020}
}

Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	Body AP	Body AR	Foot AP	Foot AR	Face AP	Face AR	Hand AP	Hand AR	Whole AP	Whole AR	ckpt	log
S-ViPNAS-MobileNetV3_dark	256x192	0.632	0.710	0.530	0.660	0.672	0.771	0.404	0.519	0.508	0.607	ckpt	log
S-ViPNAS-Res50_dark	256x192	0.650	0.732	0.550	0.686	0.684	0.783	0.437	0.554	0.528	0.632	ckpt	log

Topdown Heatmap + Vipnas on Coco-Wholebody¶

ViPNAS (CVPR'2021)

@article{xu2021vipnas,
  title={ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search},
  author={Xu, Lumin and Guan, Yingda and Jin, Sheng and Liu, Wentao and Qian, Chen and Luo, Ping and Ouyang, Wanli and Wang, Xiaogang},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  year={2021}
}

COCO-WholeBody (ECCV'2020)

@inproceedings{jin2020whole,
  title={Whole-Body Human Pose Estimation in the Wild},
  author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
  booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
  year={2020}
}

Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	Body AP	Body AR	Foot AP	Foot AR	Face AP	Face AR	Hand AP	Hand AR	Whole AP	Whole AR	ckpt	log
S-ViPNAS-MobileNetV3	256x192	0.619	0.700	0.477	0.608	0.585	0.689	0.386	0.505	0.473	0.578	ckpt	log
S-ViPNAS-Res50	256x192	0.643	0.726	0.553	0.694	0.587	0.698	0.410	0.529	0.495	0.607	ckpt	log

Simcc + Vipnas on Coco¶

SimCC (ECCV'2022)

@misc{https://doi.org/10.48550/arxiv.2107.03332,
  title={SimCC: a Simple Coordinate Classification Perspective for Human Pose Estimation},
  author={Li, Yanjie and Yang, Sen and Liu, Peidong and Zhang, Shoukui and Wang, Yunxiao and Wang, Zhicheng and Yang, Wankou and Xia, Shu-Tao},
  year={2021}
}

ViPNAS (CVPR'2021)

@article{xu2021vipnas,
  title={ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search},
  author={Xu, Lumin and Guan, Yingda and Jin, Sheng and Liu, Wentao and Qian, Chen and Luo, Ping and Ouyang, Wanli and Wang, Xiaogang},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  year={2021}
}

COCO (ECCV'2014)

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
simcc_S-ViPNAS-MobileNetV3	256x192	0.695	0.883	0.772	0.755	0.927	ckpt	log

Topdown Heatmap + Vipnas on Coco¶

ViPNAS (CVPR'2021)

@article{xu2021vipnas,
  title={ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search},
  author={Xu, Lumin and Guan, Yingda and Jin, Sheng and Liu, Wentao and Qian, Chen and Luo, Ping and Ouyang, Wanli and Wang, Xiaogang},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  year={2021}
}

COCO (ECCV'2014)

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
S-ViPNAS-MobileNetV3	256x192	0.700	0.887	0.783	0.758	0.929	ckpt	log
S-ViPNAS-Res50	256x192	0.711	0.894	0.787	0.769	0.934	ckpt	log

RSN (ECCV’2020)¶

Topdown Heatmap + RSN on Coco¶

RSN (ECCV'2020)

@misc{cai2020learning,
    title={Learning Delicate Local Representations for Multi-Person Pose Estimation},
    author={Yuanhao Cai and Zhicheng Wang and Zhengxiong Luo and Binyi Yin and Angang Du and Haoqian Wang and Xinyu Zhou and Erjin Zhou and Xiangyu Zhang and Jian Sun},
    year={2020},
    eprint={2003.04030},
    archivePrefix={arXiv},
    primaryClass={cs.CV}
}

COCO (ECCV'2014)

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
rsn_18	256x192	0.704	0.887	0.781	0.773	0.927	ckpt	log
rsn_50	256x192	0.724	0.894	0.799	0.790	0.935	ckpt	log
2xrsn_50	256x192	0.748	0.900	0.821	0.810	0.939	ckpt	log
3xrsn_50	256x192	0.750	0.900	0.824	0.814	0.941	ckpt	log

PVTV2 (CVMJ’2022)¶

Topdown Heatmap + PVT on Coco¶

PVT (ICCV'2021)

@inproceedings{wang2021pyramid,
  title={Pyramid vision transformer: A versatile backbone for dense prediction without convolutions},
  author={Wang, Wenhai and Xie, Enze and Li, Xiang and Fan, Deng-Ping and Song, Kaitao and Liang, Ding and Lu, Tong and Luo, Ping and Shao, Ling},
  booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
  pages={568--578},
  year={2021}
}

PVTV2 (CVMJ'2022)

@article{wang2022pvt,
  title={PVT v2: Improved baselines with Pyramid Vision Transformer},
  author={Wang, Wenhai and Xie, Enze and Li, Xiang and Fan, Deng-Ping and Song, Kaitao and Liang, Ding and Lu, Tong and Luo, Ping and Shao, Ling},
  journal={Computational Visual Media},
  pages={1--10},
  year={2022},
  publisher={Springer}
}

COCO (ECCV'2014)

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
pose_pvt-s	256x192	0.714	0.896	0.794	0.773	0.936	ckpt	log
pose_pvtv2-b2	256x192	0.737	0.905	0.812	0.791	0.942	ckpt	log