Datasets¶
300WLP (IEEE’2017)¶
Topdown Heatmap + Hrnetv2 on 300wlp¶
HRNetv2 (TPAMI'2019)
@article{WangSCJDZLMTWLX19,
title={Deep High-Resolution Representation Learning for Visual Recognition},
author={Jingdong Wang and Ke Sun and Tianheng Cheng and
Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
journal={TPAMI},
year={2019}
}
300WLP (IEEE'2017)
@article{zhu2017face,
title={Face alignment in full pose range: A 3d total solution},
author={Zhu, Xiangyu and Liu, Xiaoming and Lei, Zhen and Li, Stan Z},
journal={IEEE transactions on pattern analysis and machine intelligence},
year={2017},
publisher={IEEE}
}
Results on 300W-LP dataset
The model is trained on 300W-LP train.
Arch | Input Size | NMEfull | NMEtest | ckpt | log |
---|---|---|---|---|---|
pose_hrnetv2_w18 | 256x256 | 0.0413 | 0.04125 | ckpt | log |
CrowdPose (CVPR’2019)¶
Rtmo + Rtmo on Body7¶
RTMO
@misc{lu2023rtmo,
title={{RTMO}: Towards High-Performance One-Stage Real-Time Multi-Person Pose Estimation},
author={Peng Lu and Tao Jiang and Yining Li and Xiangtai Li and Kai Chen and Wenming Yang},
year={2023},
eprint={2312.07526},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
AI Challenger (ArXiv'2017)
@article{wu2017ai,
title={Ai challenger: A large-scale dataset for going deeper in image understanding},
author={Wu, Jiahong and Zheng, He and Zhao, Bo and Li, Yixin and Yan, Baoming and Liang, Rui and Wang, Wenjia and Zhou, Shipei and Lin, Guosen and Fu, Yanwei and others},
journal={arXiv preprint arXiv:1711.06475},
year={2017}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
CrowdPose (CVPR'2019)
@article{li2018crowdpose,
title={CrowdPose: Efficient Crowded Scenes Pose Estimation and A New Benchmark},
author={Li, Jiefeng and Wang, Can and Zhu, Hao and Mao, Yihuan and Fang, Hao-Shu and Lu, Cewu},
journal={arXiv preprint arXiv:1812.00324},
year={2018}
}
JHMDB (ICCV'2013)
@inproceedings{Jhuang:ICCV:2013,
title = {Towards understanding action recognition},
author = {H. Jhuang and J. Gall and S. Zuffi and C. Schmid and M. J. Black},
booktitle = {International Conf. on Computer Vision (ICCV)},
month = Dec,
pages = {3192-3199},
year = {2013}
}
MPII (CVPR'2014)
@inproceedings{andriluka14cvpr,
author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
year = {2014},
month = {June}
}
PoseTrack18 (CVPR'2018)
@inproceedings{andriluka2018posetrack,
title={Posetrack: A benchmark for human pose estimation and tracking},
author={Andriluka, Mykhaylo and Iqbal, Umar and Insafutdinov, Eldar and Pishchulin, Leonid and Milan, Anton and Gall, Juergen and Schiele, Bernt},
booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
pages={5167--5176},
year={2018}
}
Halpe (CVPR'2020)
@inproceedings{li2020pastanet,
title={PaStaNet: Toward Human Activity Knowledge Engine},
author={Li, Yong-Lu and Xu, Liang and Liu, Xinpeng and Huang, Xijie and Xu, Yue and Wang, Shiyi and Fang, Hao-Shu and Ma, Ze and Chen, Mingyang and Lu, Cewu},
booktitle={CVPR},
year={2020}
}
Results on COCO val2017
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log | onnx |
---|---|---|---|---|---|---|---|---|---|
RTMO-t | 640x640 | 0.574 | 0.803 | 0.613 | 0.611 | 0.836 | ckpt | log | onnx |
RTMO-s | 640x640 | 0.686 | 0.879 | 0.744 | 0.723 | 0.908 | ckpt | log | onnx |
RTMO-m | 640x640 | 0.726 | 0.899 | 0.790 | 0.763 | 0.926 | ckpt | log | onnx |
RTMO-l | 640x640 | 0.748 | 0.911 | 0.813 | 0.786 | 0.939 | ckpt | log | onnx |
Rtmo + Rtmo on Crowdpose¶
RTMO
@misc{lu2023rtmo,
title={{RTMO}: Towards High-Performance One-Stage Real-Time Multi-Person Pose Estimation},
author={Peng Lu and Tao Jiang and Yining Li and Xiangtai Li and Kai Chen and Wenming Yang},
year={2023},
eprint={2312.07526},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
CrowdPose (CVPR'2019)
@article{li2018crowdpose,
title={CrowdPose: Efficient Crowded Scenes Pose Estimation and A New Benchmark},
author={Li, Jiefeng and Wang, Can and Zhu, Hao and Mao, Yihuan and Fang, Hao-Shu and Lu, Cewu},
journal={arXiv preprint arXiv:1812.00324},
year={2018}
}
Results on COCO val2017
Arch | Input Size | AP | AP50 | AP75 | AP (E) | AP (M) | AP (H) | ckpt | log |
---|---|---|---|---|---|---|---|---|---|
RTMO-s | 640x640 | 0.673 | 0.882 | 0.729 | 0.737 | 0.682 | 0.591 | ckpt | log |
RTMO-m | 640x640 | 0.711 | 0.897 | 0.771 | 0.774 | 0.719 | 0.634 | ckpt | log |
RTMO-l | 640x640 | 0.732 | 0.907 | 0.793 | 0.792 | 0.741 | 0.653 | ckpt | log |
RTMO-l* | 640x640 | 0.838 | 0.947 | 0.893 | 0.888 | 0.847 | 0.772 | ckpt | log |
* indicates the model is trained using a combined dataset composed of AI Challenger, COCO, CrowdPose, Halpe, MPII, PoseTrack18 and sub-JHMDB.
Dekr + Hrnet on Crowdpose¶
DEKR (CVPR'2021)
@inproceedings{geng2021bottom,
title={Bottom-up human pose estimation via disentangled keypoint regression},
author={Geng, Zigang and Sun, Ke and Xiao, Bin and Zhang, Zhaoxiang and Wang, Jingdong},
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
pages={14676--14686},
year={2021}
}
HRNet (CVPR'2019)
@inproceedings{sun2019deep,
title={Deep high-resolution representation learning for human pose estimation},
author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={5693--5703},
year={2019}
}
CrowdPose (CVPR'2019)
@article{li2018crowdpose,
title={CrowdPose: Efficient Crowded Scenes Pose Estimation and A New Benchmark},
author={Li, Jiefeng and Wang, Can and Zhu, Hao and Mao, Yihuan and Fang, Hao-Shu and Lu, Cewu},
journal={arXiv preprint arXiv:1812.00324},
year={2018}
}
Results on CrowdPose test without multi-scale test
Arch | Input Size | AP | AP50 | AP75 | AP (E) | AP (M) | AP (H) | ckpt | log |
---|---|---|---|---|---|---|---|---|---|
HRNet-w32 | 512x512 | 0.663 | 0.857 | 0.714 | 0.740 | 0.671 | 0.576 | ckpt | log |
HRNet-w48 | 640x640 | 0.679 | 0.869 | 0.731 | 0.753 | 0.688 | 0.593 | ckpt | log |
Topdown Heatmap + Cspnext + Udp on Crowdpose¶
RTMDet (ArXiv 2022)
@misc{lyu2022rtmdet,
title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors},
author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen},
year={2022},
eprint={2212.07784},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
UDP (CVPR'2020)
@InProceedings{Huang_2020_CVPR,
author = {Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan},
title = {The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation},
booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
month = {June},
year = {2020}
}
CrowdPose (CVPR'2019)
@article{li2018crowdpose,
title={CrowdPose: Efficient Crowded Scenes Pose Estimation and A New Benchmark},
author={Li, Jiefeng and Wang, Can and Zhu, Hao and Mao, Yihuan and Fang, Hao-Shu and Lu, Cewu},
journal={arXiv preprint arXiv:1812.00324},
year={2018}
}
Results on CrowdPose test with YOLOv3 human detector
Arch | Input Size | AP | AP50 | AP75 | AP (E) | AP (M) | AP (H) | ckpt | log |
---|---|---|---|---|---|---|---|---|---|
pose_cspnext_m | 256x192 | 0.662 | 0.821 | 0.723 | 0.759 | 0.675 | 0.539 | ckpt | log |
Topdown Heatmap + Hrnet on Crowdpose¶
HRNet (CVPR'2019)
@inproceedings{sun2019deep,
title={Deep high-resolution representation learning for human pose estimation},
author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={5693--5703},
year={2019}
}
CrowdPose (CVPR'2019)
@article{li2018crowdpose,
title={CrowdPose: Efficient Crowded Scenes Pose Estimation and A New Benchmark},
author={Li, Jiefeng and Wang, Can and Zhu, Hao and Mao, Yihuan and Fang, Hao-Shu and Lu, Cewu},
journal={arXiv preprint arXiv:1812.00324},
year={2018}
}
Results on CrowdPose test with YOLOv3 human detector
Arch | Input Size | AP | AP50 | AP75 | AP (E) | AP (M) | AP (H) | ckpt | log |
---|---|---|---|---|---|---|---|---|---|
pose_hrnet_w32 | 256x192 | 0.675 | 0.825 | 0.729 | 0.770 | 0.687 | 0.553 | ckpt | log |
Topdown Heatmap + Resnet on Crowdpose¶
SimpleBaseline2D (ECCV'2018)
@inproceedings{xiao2018simple,
title={Simple baselines for human pose estimation and tracking},
author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
booktitle={Proceedings of the European conference on computer vision (ECCV)},
pages={466--481},
year={2018}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
title={Deep residual learning for image recognition},
author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={770--778},
year={2016}
}
CrowdPose (CVPR'2019)
@article{li2018crowdpose,
title={CrowdPose: Efficient Crowded Scenes Pose Estimation and A New Benchmark},
author={Li, Jiefeng and Wang, Can and Zhu, Hao and Mao, Yihuan and Fang, Hao-Shu and Lu, Cewu},
journal={arXiv preprint arXiv:1812.00324},
year={2018}
}
Results on CrowdPose test with YOLOv3 human detector
Arch | Input Size | AP | AP50 | AP75 | AP (E) | AP (M) | AP (H) | ckpt | log |
---|---|---|---|---|---|---|---|---|---|
pose_resnet_50 | 256x192 | 0.637 | 0.808 | 0.692 | 0.738 | 0.650 | 0.506 | ckpt | log |
pose_resnet_101 | 256x192 | 0.647 | 0.810 | 0.703 | 0.745 | 0.658 | 0.521 | ckpt | log |
pose_resnet_101 | 320x256 | 0.661 | 0.821 | 0.714 | 0.759 | 0.672 | 0.534 | ckpt | log |
pose_resnet_152 | 256x192 | 0.656 | 0.818 | 0.712 | 0.754 | 0.666 | 0.533 | ckpt | log |
Rtmpose + Rtmpose on Crowdpose¶
RTMPose (arXiv'2023)
@misc{https://doi.org/10.48550/arxiv.2303.07399,
doi = {10.48550/ARXIV.2303.07399},
url = {https://arxiv.org/abs/2303.07399},
author = {Jiang, Tao and Lu, Peng and Zhang, Li and Ma, Ningsheng and Han, Rui and Lyu, Chengqi and Li, Yining and Chen, Kai},
keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences},
title = {RTMPose: Real-Time Multi-Person Pose Estimation based on MMPose},
publisher = {arXiv},
year = {2023},
copyright = {Creative Commons Attribution 4.0 International}
}
RTMDet (arXiv'2022)
@misc{lyu2022rtmdet,
title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors},
author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen},
year={2022},
eprint={2212.07784},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
CrowdPose (CVPR'2019)
@article{li2018crowdpose,
title={CrowdPose: Efficient Crowded Scenes Pose Estimation and A New Benchmark},
author={Li, Jiefeng and Wang, Can and Zhu, Hao and Mao, Yihuan and Fang, Hao-Shu and Lu, Cewu},
journal={arXiv preprint arXiv:1812.00324},
year={2018}
}
Results on CrowdPose test with YOLOv3 human detector
Arch | Input Size | AP | AP50 | AP75 | AP (E) | AP (M) | AP (H) | ckpt | log |
---|---|---|---|---|---|---|---|---|---|
rtmpose-m | 256x192 | 0.706 | 0.841 | 0.765 | 0.799 | 0.719 | 0.582 | ckpt | log |
AI Challenger (ArXiv’2017)¶
Rtmo + Rtmo on Body7¶
RTMO
@misc{lu2023rtmo,
title={{RTMO}: Towards High-Performance One-Stage Real-Time Multi-Person Pose Estimation},
author={Peng Lu and Tao Jiang and Yining Li and Xiangtai Li and Kai Chen and Wenming Yang},
year={2023},
eprint={2312.07526},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
AI Challenger (ArXiv'2017)
@article{wu2017ai,
title={Ai challenger: A large-scale dataset for going deeper in image understanding},
author={Wu, Jiahong and Zheng, He and Zhao, Bo and Li, Yixin and Yan, Baoming and Liang, Rui and Wang, Wenjia and Zhou, Shipei and Lin, Guosen and Fu, Yanwei and others},
journal={arXiv preprint arXiv:1711.06475},
year={2017}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
CrowdPose (CVPR'2019)
@article{li2018crowdpose,
title={CrowdPose: Efficient Crowded Scenes Pose Estimation and A New Benchmark},
author={Li, Jiefeng and Wang, Can and Zhu, Hao and Mao, Yihuan and Fang, Hao-Shu and Lu, Cewu},
journal={arXiv preprint arXiv:1812.00324},
year={2018}
}
JHMDB (ICCV'2013)
@inproceedings{Jhuang:ICCV:2013,
title = {Towards understanding action recognition},
author = {H. Jhuang and J. Gall and S. Zuffi and C. Schmid and M. J. Black},
booktitle = {International Conf. on Computer Vision (ICCV)},
month = Dec,
pages = {3192-3199},
year = {2013}
}
MPII (CVPR'2014)
@inproceedings{andriluka14cvpr,
author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
year = {2014},
month = {June}
}
PoseTrack18 (CVPR'2018)
@inproceedings{andriluka2018posetrack,
title={Posetrack: A benchmark for human pose estimation and tracking},
author={Andriluka, Mykhaylo and Iqbal, Umar and Insafutdinov, Eldar and Pishchulin, Leonid and Milan, Anton and Gall, Juergen and Schiele, Bernt},
booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
pages={5167--5176},
year={2018}
}
Halpe (CVPR'2020)
@inproceedings{li2020pastanet,
title={PaStaNet: Toward Human Activity Knowledge Engine},
author={Li, Yong-Lu and Xu, Liang and Liu, Xinpeng and Huang, Xijie and Xu, Yue and Wang, Shiyi and Fang, Hao-Shu and Ma, Ze and Chen, Mingyang and Lu, Cewu},
booktitle={CVPR},
year={2020}
}
Results on COCO val2017
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log | onnx |
---|---|---|---|---|---|---|---|---|---|
RTMO-t | 640x640 | 0.574 | 0.803 | 0.613 | 0.611 | 0.836 | ckpt | log | onnx |
RTMO-s | 640x640 | 0.686 | 0.879 | 0.744 | 0.723 | 0.908 | ckpt | log | onnx |
RTMO-m | 640x640 | 0.726 | 0.899 | 0.790 | 0.763 | 0.926 | ckpt | log | onnx |
RTMO-l | 640x640 | 0.748 | 0.911 | 0.813 | 0.786 | 0.939 | ckpt | log | onnx |
Topdown Heatmap + Hrnet + Aic on Coco¶
HRNet (CVPR'2019)
@inproceedings{sun2019deep,
title={Deep high-resolution representation learning for human pose estimation},
author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={5693--5703},
year={2019}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
AI Challenger (ArXiv'2017)
@article{wu2017ai,
title={Ai challenger: A large-scale dataset for going deeper in image understanding},
author={Wu, Jiahong and Zheng, He and Zhao, Bo and Li, Yixin and Yan, Baoming and Liang, Rui and Wang, Wenjia and Zhou, Shipei and Lin, Guosen and Fu, Yanwei and others},
journal={arXiv preprint arXiv:1711.06475},
year={2017}
}
MMPose supports training model with combined datasets. coco-aic-merge and coco-aic-combine are two examples.
coco-aic-merge leverages AIC data with partial keypoints as auxiliary data to train a COCO model
coco-aic-combine constructs a combined dataset whose keypoints are the union of COCO and AIC keypoints to train a model that predicts keypoints of both datasets.
Evaluation results on COCO val2017 of models trained with solely COCO dataset and combined dataset as shown below. These models are evaluated with detector having human AP of 56.4 on COCO val2017 dataset.
Train Set | Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|---|
coco | pose_hrnet_w32 | 256x192 | 0.749 | 0.906 | 0.821 | 0.804 | 0.945 | ckpt | log |
coco-aic-merge | pose_hrnet_w32 | 256x192 | 0.756 | 0.907 | 0.828 | 0.809 | 0.944 | ckpt | log |
coco-aic-combine | pose_hrnet_w32 | 256x192 | 0.755 | 0.904 | 0.825 | 0.807 | 0.942 | ckpt | log |
Topdown Heatmap + Hrnet on Aic¶
HRNet (CVPR'2019)
@inproceedings{sun2019deep,
title={Deep high-resolution representation learning for human pose estimation},
author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={5693--5703},
year={2019}
}
AI Challenger (ArXiv'2017)
@article{wu2017ai,
title={Ai challenger: A large-scale dataset for going deeper in image understanding},
author={Wu, Jiahong and Zheng, He and Zhao, Bo and Li, Yixin and Yan, Baoming and Liang, Rui and Wang, Wenjia and Zhou, Shipei and Lin, Guosen and Fu, Yanwei and others},
journal={arXiv preprint arXiv:1711.06475},
year={2017}
}
Results on AIC val set with ground-truth bounding boxes
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
pose_hrnet_w32 | 256x192 | 0.323 | 0.761 | 0.218 | 0.366 | 0.789 | ckpt | log |
Topdown Heatmap + Resnet on Aic¶
SimpleBaseline2D (ECCV'2018)
@inproceedings{xiao2018simple,
title={Simple baselines for human pose estimation and tracking},
author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
booktitle={Proceedings of the European conference on computer vision (ECCV)},
pages={466--481},
year={2018}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
title={Deep residual learning for image recognition},
author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={770--778},
year={2016}
}
AI Challenger (ArXiv'2017)
@article{wu2017ai,
title={Ai challenger: A large-scale dataset for going deeper in image understanding},
author={Wu, Jiahong and Zheng, He and Zhao, Bo and Li, Yixin and Yan, Baoming and Liang, Rui and Wang, Wenjia and Zhou, Shipei and Lin, Guosen and Fu, Yanwei and others},
journal={arXiv preprint arXiv:1711.06475},
year={2017}
}
Results on AIC val set with ground-truth bounding boxes
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
pose_resnet_101 | 256x192 | 0.294 | 0.736 | 0.172 | 0.337 | 0.762 | ckpt | log |
InterHand2.6M (ECCV’2020)¶
Internet + Internet on Interhand3d¶
InterNet (ECCV'2020)
@InProceedings{Moon_2020_ECCV_InterHand2.6M,
author = {Moon, Gyeongsik and Yu, Shoou-I and Wen, He and Shiratori, Takaaki and Lee, Kyoung Mu},
title = {InterHand2.6M: A Dataset and Baseline for 3D Interacting Hand Pose Estimation from a Single RGB Image},
booktitle = {European Conference on Computer Vision (ECCV)},
year = {2020}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
title={Deep residual learning for image recognition},
author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={770--778},
year={2016}
}
InterHand2.6M (ECCV'2020)
@InProceedings{Moon_2020_ECCV_InterHand2.6M,
author = {Moon, Gyeongsik and Yu, Shoou-I and Wen, He and Shiratori, Takaaki and Lee, Kyoung Mu},
title = {InterHand2.6M: A Dataset and Baseline for 3D Interacting Hand Pose Estimation from a Single RGB Image},
booktitle = {European Conference on Computer Vision (ECCV)},
year = {2020}
}
Results on InterHand2.6M val & test set
Train Set | Set | Arch | Input Size | MPJPE-single | MPJPE-interacting | MPJPE-all | MRRPE | APh | ckpt | log |
---|---|---|---|---|---|---|---|---|---|---|
All | test(H+M) | InterNet_resnet_50 | 256x256 | 9.69 | 13.72 | 11.86 | 29.27 | 0.99 | ckpt | log |
All | val(M) | InterNet_resnet_50 | 256x256 | 11.30 | 15.57 | 13.36 | 32.15 | 0.98 | ckpt | log |
All | test(H+M) | InterNet_resnet_50* | 256x256 | 9.47 | 13.40 | 11.59 | 29.28 | 0.99 | ckpt | log |
All | val(M) | InterNet_resnet_50* | 256x256 | 11.22 | 15.23 | 13.16 | 31.73 | 0.98 | ckpt | log |
Models with * are trained in MMPose 0.x. The checkpoints and logs are only for validation.
Human-Art (CVPR’2023)¶
Topdown Heatmap + Hrnet on Humanart¶
HRNet (CVPR'2019)
@inproceedings{sun2019deep,
title={Deep high-resolution representation learning for human pose estimation},
author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={5693--5703},
year={2019}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Human-Art (CVPR'2023)
@inproceedings{ju2023humanart,
title={Human-Art: A Versatile Human-Centric Dataset Bridging Natural and Artificial Scenes},
author={Ju, Xuan and Zeng, Ailing and Jianan, Wang and Qiang, Xu and Lei, Zhang},
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR),
year={2023}}
Results on Human-Art validation dataset with detector having human AP of 56.2 on Human-Art validation dataset
With classic decoder
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
pose_hrnet_w32-coco | 256x192 | 0.252 | 0.397 | 0.255 | 0.321 | 0.485 | ckpt | log |
pose_hrnet_w32-humanart-coco | 256x192 | 0.399 | 0.545 | 0.420 | 0.466 | 0.613 | ckpt | log |
pose_hrnet_w48-coco | 256x192 | 0.271 | 0.413 | 0.277 | 0.339 | 0.499 | ckpt | log |
pose_hrnet_w48-humanart-coco | 256x192 | 0.417 | 0.553 | 0.442 | 0.481 | 0.617 | ckpt | log |
Results on Human-Art validation dataset with ground-truth bounding-box
With classic decoder
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
pose_hrnet_w32-coco | 256x192 | 0.533 | 0.771 | 0.562 | 0.574 | 0.792 | ckpt | log |
pose_hrnet_w32-humanart-coco | 256x192 | 0.754 | 0.906 | 0.812 | 0.783 | 0.916 | ckpt | log |
pose_hrnet_w48-coco | 256x192 | 0.557 | 0.782 | 0.593 | 0.595 | 0.804 | ckpt | log |
pose_hrnet_w48-humanart-coco | 256x192 | 0.769 | 0.906 | 0.825 | 0.796 | 0.919 | ckpt | log |
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
With classic decoder
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
pose_hrnet_w32-coco | 256x192 | 0.749 | 0.906 | 0.821 | 0.804 | 0.945 | ckpt | log |
pose_hrnet_w32-humanart-coco | 256x192 | 0.741 | 0.902 | 0.814 | 0.795 | 0.941 | ckpt | log |
pose_hrnet_w48-coco | 256x192 | 0.756 | 0.908 | 0.826 | 0.809 | 0.945 | ckpt | log |
pose_hrnet_w48-humanart-coco | 256x192 | 0.751 | 0.905 | 0.822 | 0.805 | 0.943 | ckpt | log |
Topdown Heatmap + Vitpose on Humanart¶
To utilize ViTPose, you’ll need to have MMPreTrain. To install the required version, run the following command:
mim install 'mmpretrain>=1.0.0'
ViTPose (NeurIPS'2022)
@inproceedings{
xu2022vitpose,
title={Vi{TP}ose: Simple Vision Transformer Baselines for Human Pose Estimation},
author={Yufei Xu and Jing Zhang and Qiming Zhang and Dacheng Tao},
booktitle={Advances in Neural Information Processing Systems},
year={2022},
}
COCO-WholeBody (ECCV'2020)
@inproceedings{jin2020whole,
title={Whole-Body Human Pose Estimation in the Wild},
author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
year={2020}
}
Human-Art (CVPR'2023)
@inproceedings{ju2023humanart,
title={Human-Art: A Versatile Human-Centric Dataset Bridging Natural and Artificial Scenes},
author={Ju, Xuan and Zeng, Ailing and Jianan, Wang and Qiang, Xu and Lei, Zhang},
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR),
year={2023}}
Results on Human-Art validation dataset with detector having human AP of 56.2 on Human-Art validation dataset
With classic decoder
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
ViTPose-S-coco | 256x192 | 0.228 | 0.371 | 0.229 | 0.298 | 0.467 | ckpt | log |
ViTPose-S-humanart-coco | 256x192 | 0.381 | 0.532 | 0.405 | 0.448 | 0.602 | ckpt | log |
ViTPose-B-coco | 256x192 | 0.270 | 0.423 | 0.272 | 0.340 | 0.510 | ckpt | log |
ViTPose-B-humanart-coco | 256x192 | 0.410 | 0.549 | 0.434 | 0.475 | 0.615 | ckpt | log |
ViTPose-L-coco | 256x192 | 0.342 | 0.498 | 0.357 | 0.413 | 0.577 | ckpt | log |
ViTPose-L-humanart-coco | 256x192 | 0.459 | 0.592 | 0.487 | 0.525 | 0.656 | ckpt | log |
ViTPose-H-coco | 256x192 | 0.377 | 0.541 | 0.391 | 0.447 | 0.615 | ckpt | log |
ViTPose-H-humanart-coco | 256x192 | 0.468 | 0.594 | 0.498 | 0.534 | 0.655 | ckpt | log |
Results on Human-Art validation dataset with ground-truth bounding-box
With classic decoder
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
ViTPose-S-coco | 256x192 | 0.507 | 0.758 | 0.531 | 0.551 | 0.780 | ckpt | log |
ViTPose-S-humanart-coco | 256x192 | 0.738 | 0.905 | 0.802 | 0.768 | 0.911 | ckpt | log |
ViTPose-B-coco | 256x192 | 0.555 | 0.782 | 0.590 | 0.599 | 0.809 | ckpt | log |
ViTPose-B-humanart-coco | 256x192 | 0.759 | 0.905 | 0.823 | 0.790 | 0.917 | ckpt | log |
ViTPose-L-coco | 256x192 | 0.637 | 0.838 | 0.689 | 0.677 | 0.859 | ckpt | log |
ViTPose-L-humanart-coco | 256x192 | 0.789 | 0.916 | 0.845 | 0.819 | 0.929 | ckpt | log |
ViTPose-H-coco | 256x192 | 0.665 | 0.860 | 0.715 | 0.701 | 0.871 | ckpt | log |
ViTPose-H-humanart-coco | 256x192 | 0.800 | 0.926 | 0.855 | 0.828 | 0.933 | ckpt | log |
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
With classic decoder
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
ViTPose-S-coco | 256x192 | 0.739 | 0.903 | 0.816 | 0.792 | 0.942 | ckpt | log |
ViTPose-S-humanart-coco | 256x192 | 0.737 | 0.902 | 0.811 | 0.792 | 0.942 | ckpt | log |
ViTPose-B-coco | 256x192 | 0.757 | 0.905 | 0.829 | 0.810 | 0.946 | ckpt | log |
ViTPose-B-humanart-coco | 256x192 | 0.758 | 0.906 | 0.829 | 0.812 | 0.946 | ckpt | log |
ViTPose-L-coco | 256x192 | 0.782 | 0.914 | 0.850 | 0.834 | 0.952 | ckpt | log |
ViTPose-L-humanart-coco | 256x192 | 0.782 | 0.914 | 0.849 | 0.835 | 0.953 | ckpt | log |
ViTPose-H-coco | 256x192 | 0.788 | 0.917 | 0.855 | 0.839 | 0.954 | ckpt | log |
ViTPose-H-humanart-coco | 256x192 | 0.788 | 0.914 | 0.853 | 0.841 | 0.956 | ckpt | log |
Rtmpose + Rtmpose on Humanart¶
RTMPose (arXiv'2023)
@misc{https://doi.org/10.48550/arxiv.2303.07399,
doi = {10.48550/ARXIV.2303.07399},
url = {https://arxiv.org/abs/2303.07399},
author = {Jiang, Tao and Lu, Peng and Zhang, Li and Ma, Ningsheng and Han, Rui and Lyu, Chengqi and Li, Yining and Chen, Kai},
keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences},
title = {RTMPose: Real-Time Multi-Person Pose Estimation based on MMPose},
publisher = {arXiv},
year = {2023},
copyright = {Creative Commons Attribution 4.0 International}
}
RTMDet (arXiv'2022)
@misc{lyu2022rtmdet,
title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors},
author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen},
year={2022},
eprint={2212.07784},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Human-Art (CVPR'2023)
@inproceedings{ju2023humanart,
title={Human-Art: A Versatile Human-Centric Dataset Bridging Natural and Artificial Scenes},
author={Ju, Xuan and Zeng, Ailing and Jianan, Wang and Qiang, Xu and Lei, Zhang},
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR),
year={2023}}
Results on Human-Art validation dataset with detector having human AP of 56.2 on Human-Art validation dataset
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
rtmpose-t-coco | 256x192 | 0.161 | 0.283 | 0.154 | 0.221 | 0.373 | ckpt | log |
rtmpose-t-humanart-coco | 256x192 | 0.249 | 0.395 | 0.256 | 0.323 | 0.485 | ckpt | log |
rtmpose-s-coco | 256x192 | 0.199 | 0.328 | 0.198 | 0.261 | 0.418 | ckpt | log |
rtmpose-s-humanart-coco | 256x192 | 0.311 | 0.462 | 0.323 | 0.381 | 0.540 | ckpt | log |
rtmpose-m-coco | 256x192 | 0.239 | 0.372 | 0.243 | 0.302 | 0.455 | ckpt | log |
rtmpose-m-humanart-coco | 256x192 | 0.355 | 0.503 | 0.377 | 0.417 | 0.568 | ckpt | log |
rtmpose-l-coco | 256x192 | 0.260 | 0.393 | 0.267 | 0.323 | 0.472 | ckpt | log |
rtmpose-l-humanart-coco | 256x192 | 0.378 | 0.521 | 0.399 | 0.442 | 0.584 | ckpt | log |
Results on Human-Art validation dataset with ground-truth bounding-box
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
rtmpose-t-coco | 256x192 | 0.444 | 0.725 | 0.453 | 0.488 | 0.750 | ckpt | log |
rtmpose-t-humanart-coco | 256x192 | 0.655 | 0.872 | 0.720 | 0.693 | 0.890 | ckpt | log |
rtmpose-s-coco | 256x192 | 0.480 | 0.739 | 0.498 | 0.521 | 0.763 | ckpt | log |
rtmpose-s-humanart-coco | 256x192 | 0.698 | 0.893 | 0.768 | 0.732 | 0.903 | ckpt | log |
rtmpose-m-coco | 256x192 | 0.532 | 0.765 | 0.563 | 0.571 | 0.789 | ckpt | log |
rtmpose-m-humanart-coco | 256x192 | 0.728 | 0.895 | 0.791 | 0.759 | 0.906 | ckpt | log |
rtmpose-l-coco | 256x192 | 0.564 | 0.789 | 0.602 | 0.599 | 0.808 | ckpt | log |
rtmpose-l-humanart-coco | 256x192 | 0.753 | 0.905 | 0.812 | 0.783 | 0.915 | ckpt | log |
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
rtmpose-t-coco | 256x192 | 0.682 | 0.883 | 0.759 | 0.736 | 0.920 | ckpt | log |
rtmpose-t-humanart-coco | 256x192 | 0.665 | 0.875 | 0.739 | 0.721 | 0.916 | ckpt | log |
rtmpose-s-coco | 256x192 | 0.716 | 0.892 | 0.789 | 0.768 | 0.929 | ckpt | log |
rtmpose-s-humanart-coco | 256x192 | 0.706 | 0.888 | 0.780 | 0.759 | 0.928 | ckpt | log |
rtmpose-m-coco | 256x192 | 0.746 | 0.899 | 0.817 | 0.795 | 0.935 | ckpt | log |
rtmpose-m-humanart-coco | 256x192 | 0.725 | 0.892 | 0.795 | 0.775 | 0.929 | ckpt | log |
rtmpose-l-coco | 256x192 | 0.758 | 0.906 | 0.826 | 0.806 | 0.942 | ckpt | log |
rtmpose-l-humanart-coco | 256x192 | 0.748 | 0.901 | 0.816 | 0.796 | 0.938 | ckpt | log |
Results on COCO val2017 with ground-truth bounding box
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
rtmpose-t-humanart-coco | 256x192 | 0.679 | 0.895 | 0.755 | 0.710 | 0.907 | ckpt | log |
rtmpose-s-humanart-coco | 256x192 | 0.725 | 0.916 | 0.798 | 0.753 | 0.925 | ckpt | log |
rtmpose-m-humanart-coco | 256x192 | 0.744 | 0.916 | 0.818 | 0.770 | 0.930 | ckpt | log |
rtmpose-l-humanart-coco | 256x192 | 0.770 | 0.927 | 0.840 | 0.794 | 0.939 | ckpt | log |
COFW (ICCV’2013)¶
Topdown Heatmap + Hrnetv2 on Cofw¶
HRNetv2 (TPAMI'2019)
@article{WangSCJDZLMTWLX19,
title={Deep High-Resolution Representation Learning for Visual Recognition},
author={Jingdong Wang and Ke Sun and Tianheng Cheng and
Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
journal={TPAMI},
year={2019}
}
COFW (ICCV'2013)
@inproceedings{burgos2013robust,
title={Robust face landmark estimation under occlusion},
author={Burgos-Artizzu, Xavier P and Perona, Pietro and Doll{\'a}r, Piotr},
booktitle={Proceedings of the IEEE international conference on computer vision},
pages={1513--1520},
year={2013}
}
Results on COFW dataset
The model is trained on COFW train.
Arch | Input Size | NME | ckpt | log |
---|---|---|---|---|
pose_hrnetv2_w18 | 256x256 | 3.48 | ckpt | log |
MPII (CVPR’2014)¶
Rtmo + Rtmo on Body7¶
RTMO
@misc{lu2023rtmo,
title={{RTMO}: Towards High-Performance One-Stage Real-Time Multi-Person Pose Estimation},
author={Peng Lu and Tao Jiang and Yining Li and Xiangtai Li and Kai Chen and Wenming Yang},
year={2023},
eprint={2312.07526},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
AI Challenger (ArXiv'2017)
@article{wu2017ai,
title={Ai challenger: A large-scale dataset for going deeper in image understanding},
author={Wu, Jiahong and Zheng, He and Zhao, Bo and Li, Yixin and Yan, Baoming and Liang, Rui and Wang, Wenjia and Zhou, Shipei and Lin, Guosen and Fu, Yanwei and others},
journal={arXiv preprint arXiv:1711.06475},
year={2017}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
CrowdPose (CVPR'2019)
@article{li2018crowdpose,
title={CrowdPose: Efficient Crowded Scenes Pose Estimation and A New Benchmark},
author={Li, Jiefeng and Wang, Can and Zhu, Hao and Mao, Yihuan and Fang, Hao-Shu and Lu, Cewu},
journal={arXiv preprint arXiv:1812.00324},
year={2018}
}
JHMDB (ICCV'2013)
@inproceedings{Jhuang:ICCV:2013,
title = {Towards understanding action recognition},
author = {H. Jhuang and J. Gall and S. Zuffi and C. Schmid and M. J. Black},
booktitle = {International Conf. on Computer Vision (ICCV)},
month = Dec,
pages = {3192-3199},
year = {2013}
}
MPII (CVPR'2014)
@inproceedings{andriluka14cvpr,
author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
year = {2014},
month = {June}
}
PoseTrack18 (CVPR'2018)
@inproceedings{andriluka2018posetrack,
title={Posetrack: A benchmark for human pose estimation and tracking},
author={Andriluka, Mykhaylo and Iqbal, Umar and Insafutdinov, Eldar and Pishchulin, Leonid and Milan, Anton and Gall, Juergen and Schiele, Bernt},
booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
pages={5167--5176},
year={2018}
}
Halpe (CVPR'2020)
@inproceedings{li2020pastanet,
title={PaStaNet: Toward Human Activity Knowledge Engine},
author={Li, Yong-Lu and Xu, Liang and Liu, Xinpeng and Huang, Xijie and Xu, Yue and Wang, Shiyi and Fang, Hao-Shu and Ma, Ze and Chen, Mingyang and Lu, Cewu},
booktitle={CVPR},
year={2020}
}
Results on COCO val2017
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log | onnx |
---|---|---|---|---|---|---|---|---|---|
RTMO-t | 640x640 | 0.574 | 0.803 | 0.613 | 0.611 | 0.836 | ckpt | log | onnx |
RTMO-s | 640x640 | 0.686 | 0.879 | 0.744 | 0.723 | 0.908 | ckpt | log | onnx |
RTMO-m | 640x640 | 0.726 | 0.899 | 0.790 | 0.763 | 0.926 | ckpt | log | onnx |
RTMO-l | 640x640 | 0.748 | 0.911 | 0.813 | 0.786 | 0.939 | ckpt | log | onnx |
Topdown Regression + Resnet + Rle on Mpii¶
DeepPose (CVPR'2014)
@inproceedings{toshev2014deeppose,
title={Deeppose: Human pose estimation via deep neural networks},
author={Toshev, Alexander and Szegedy, Christian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={1653--1660},
year={2014}
}
RLE (ICCV'2021)
@inproceedings{li2021human,
title={Human pose regression with residual log-likelihood estimation},
author={Li, Jiefeng and Bian, Siyuan and Zeng, Ailing and Wang, Can and Pang, Bo and Liu, Wentao and Lu, Cewu},
booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
pages={11025--11034},
year={2021}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
title={Deep residual learning for image recognition},
author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={770--778},
year={2016}
}
MPII (CVPR'2014)
@inproceedings{andriluka14cvpr,
author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
year = {2014},
month = {June}
}
Results on MPII val set
Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
---|---|---|---|---|---|
deeppose_resnet_50_rle | 256x256 | 0.861 | 0.277 | ckpt | log |
Topdown Regression + Resnet on Mpii¶
DeepPose (CVPR'2014)
@inproceedings{toshev2014deeppose,
title={Deeppose: Human pose estimation via deep neural networks},
author={Toshev, Alexander and Szegedy, Christian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={1653--1660},
year={2014}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
title={Deep residual learning for image recognition},
author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={770--778},
year={2016}
}
MPII (CVPR'2014)
@inproceedings{andriluka14cvpr,
author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
year = {2014},
month = {June}
}
Results on MPII val set
Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
---|---|---|---|---|---|
deeppose_resnet_50 | 256x256 | 0.826 | 0.180 | ckpt | log |
deeppose_resnet_101 | 256x256 | 0.841 | 0.200 | ckpt | log |
deeppose_resnet_152 | 256x256 | 0.850 | 0.208 | ckpt | log |
Topdown Heatmap + Resnext on Mpii¶
ResNext (CVPR'2017)
@inproceedings{xie2017aggregated,
title={Aggregated residual transformations for deep neural networks},
author={Xie, Saining and Girshick, Ross and Doll{\'a}r, Piotr and Tu, Zhuowen and He, Kaiming},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={1492--1500},
year={2017}
}
MPII (CVPR'2014)
@inproceedings{andriluka14cvpr,
author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
year = {2014},
month = {June}
}
Results on MPII val set
Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
---|---|---|---|---|---|
pose_resnext_152 | 256x256 | 0.887 | 0.294 | ckpt | log |
Topdown Heatmap + Hrnet + Dark on Mpii¶
HRNet (CVPR'2019)
@inproceedings{sun2019deep,
title={Deep high-resolution representation learning for human pose estimation},
author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={5693--5703},
year={2019}
}
DarkPose (CVPR'2020)
@inproceedings{zhang2020distribution,
title={Distribution-aware coordinate representation for human pose estimation},
author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
pages={7093--7102},
year={2020}
}
MPII (CVPR'2014)
@inproceedings{andriluka14cvpr,
author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
year = {2014},
month = {June}
}
Results on MPII val set
Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
---|---|---|---|---|---|
pose_hrnet_w32_dark | 256x256 | 0.904 | 0.354 | ckpt | log |
pose_hrnet_w48_dark | 256x256 | 0.905 | 0.360 | ckpt | log |
Topdown Heatmap + Resnetv1d on Mpii¶
ResNetV1D (CVPR'2019)
@inproceedings{he2019bag,
title={Bag of tricks for image classification with convolutional neural networks},
author={He, Tong and Zhang, Zhi and Zhang, Hang and Zhang, Zhongyue and Xie, Junyuan and Li, Mu},
booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
pages={558--567},
year={2019}
}
MPII (CVPR'2014)
@inproceedings{andriluka14cvpr,
author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
year = {2014},
month = {June}
}
Results on MPII val set
Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
---|---|---|---|---|---|
pose_resnetv1d_50 | 256x256 | 0.881 | 0.290 | ckpt | log |
pose_resnetv1d_101 | 256x256 | 0.883 | 0.295 | ckpt | log |
pose_resnetv1d_152 | 256x256 | 0.888 | 0.300 | ckpt | log |
Topdown Heatmap + Shufflenetv2 on Mpii¶
ShufflenetV2 (ECCV'2018)
@inproceedings{ma2018shufflenet,
title={Shufflenet v2: Practical guidelines for efficient cnn architecture design},
author={Ma, Ningning and Zhang, Xiangyu and Zheng, Hai-Tao and Sun, Jian},
booktitle={Proceedings of the European conference on computer vision (ECCV)},
pages={116--131},
year={2018}
}
MPII (CVPR'2014)
@inproceedings{andriluka14cvpr,
author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
year = {2014},
month = {June}
}
Results on MPII val set
Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
---|---|---|---|---|---|
pose_shufflenetv2 | 256x256 | 0.828 | 0.205 | ckpt | log |
Topdown Heatmap + Scnet on Mpii¶
SCNet (CVPR'2020)
@inproceedings{liu2020improving,
title={Improving Convolutional Networks with Self-Calibrated Convolutions},
author={Liu, Jiang-Jiang and Hou, Qibin and Cheng, Ming-Ming and Wang, Changhu and Feng, Jiashi},
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
pages={10096--10105},
year={2020}
}
MPII (CVPR'2014)
@inproceedings{andriluka14cvpr,
author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
year = {2014},
month = {June}
}
Results on MPII val set
Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
---|---|---|---|---|---|
pose_scnet_50 | 256x256 | 0.888 | 0.290 | ckpt | log |
pose_scnet_101 | 256x256 | 0.887 | 0.293 | ckpt | log |
Topdown Heatmap + Hrnet on Mpii¶
HRNet (CVPR'2019)
@inproceedings{sun2019deep,
title={Deep high-resolution representation learning for human pose estimation},
author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={5693--5703},
year={2019}
}
MPII (CVPR'2014)
@inproceedings{andriluka14cvpr,
author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
year = {2014},
month = {June}
}
Results on MPII val set
Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
---|---|---|---|---|---|
pose_hrnet_w32 | 256x256 | 0.900 | 0.334 | ckpt | log |
pose_hrnet_w48 | 256x256 | 0.901 | 0.337 | ckpt | log |
Topdown Heatmap + Mobilenetv2 on Mpii¶
MobilenetV2 (CVPR'2018)
@inproceedings{sandler2018mobilenetv2,
title={Mobilenetv2: Inverted residuals and linear bottlenecks},
author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={4510--4520},
year={2018}
}
MPII (CVPR'2014)
@inproceedings{andriluka14cvpr,
author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
year = {2014},
month = {June}
}
Results on MPII val set
Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
---|---|---|---|---|---|
pose_mobilenetv2 | 256x256 | 0.854 | 0.234 | ckpt | log |
Topdown Heatmap + Shufflenetv1 on Mpii¶
ShufflenetV1 (CVPR'2018)
@inproceedings{zhang2018shufflenet,
title={Shufflenet: An extremely efficient convolutional neural network for mobile devices},
author={Zhang, Xiangyu and Zhou, Xinyu and Lin, Mengxiao and Sun, Jian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={6848--6856},
year={2018}
}
MPII (CVPR'2014)
@inproceedings{andriluka14cvpr,
author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
year = {2014},
month = {June}
}
Results on MPII val set
Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
---|---|---|---|---|---|
pose_shufflenetv1 | 256x256 | 0.824 | 0.195 | ckpt | log |
Topdown Heatmap + CPM on Mpii¶
CPM (CVPR'2016)
@inproceedings{wei2016convolutional,
title={Convolutional pose machines},
author={Wei, Shih-En and Ramakrishna, Varun and Kanade, Takeo and Sheikh, Yaser},
booktitle={Proceedings of the IEEE conference on Computer Vision and Pattern Recognition},
pages={4724--4732},
year={2016}
}
MPII (CVPR'2014)
@inproceedings{andriluka14cvpr,
author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
year = {2014},
month = {June}
}
Results on MPII val set
Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
---|---|---|---|---|---|
cpm | 368x368 | 0.876 | 0.285 | ckpt | log |
Topdown Heatmap + Resnet on Mpii¶
SimpleBaseline2D (ECCV'2018)
@inproceedings{xiao2018simple,
title={Simple baselines for human pose estimation and tracking},
author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
booktitle={Proceedings of the European conference on computer vision (ECCV)},
pages={466--481},
year={2018}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
title={Deep residual learning for image recognition},
author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={770--778},
year={2016}
}
MPII (CVPR'2014)
@inproceedings{andriluka14cvpr,
author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
year = {2014},
month = {June}
}
Results on MPII val set
Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
---|---|---|---|---|---|
pose_resnet_50 | 256x256 | 0.882 | 0.286 | ckpt | log |
pose_resnet_101 | 256x256 | 0.888 | 0.290 | ckpt | log |
pose_resnet_152 | 256x256 | 0.889 | 0.303 | ckpt | log |
Topdown Heatmap + Cspnext + Udp on Mpii¶
RTMDet (arXiv'2022)
@misc{lyu2022rtmdet,
title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors},
author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen},
year={2022},
eprint={2212.07784},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
UDP (CVPR'2020)
@InProceedings{Huang_2020_CVPR,
author = {Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan},
title = {The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation},
booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
month = {June},
year = {2020}
}
MPII (CVPR'2014)
@inproceedings{andriluka14cvpr,
author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
year = {2014},
month = {June}
}
Results on MPII val set
Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
---|---|---|---|---|---|
pose_hrnet_w32 | 256x256 | 0.902 | 0.303 | ckpt | log |
Topdown Heatmap + Seresnet on Mpii¶
SEResNet (CVPR'2018)
@inproceedings{hu2018squeeze,
title={Squeeze-and-excitation networks},
author={Hu, Jie and Shen, Li and Sun, Gang},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={7132--7141},
year={2018}
}
MPII (CVPR'2014)
@inproceedings{andriluka14cvpr,
author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
year = {2014},
month = {June}
}
Results on MPII val set
Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
---|---|---|---|---|---|
pose_seresnet_50 | 256x256 | 0.884 | 0.292 | ckpt | log |
pose_seresnet_101 | 256x256 | 0.884 | 0.295 | ckpt | log |
pose_seresnet_152* | 256x256 | 0.884 | 0.287 | ckpt | log |
Note that * means without imagenet pre-training.
Topdown Heatmap + Litehrnet on Mpii¶
LiteHRNet (CVPR'2021)
@inproceedings{Yulitehrnet21,
title={Lite-HRNet: A Lightweight High-Resolution Network},
author={Yu, Changqian and Xiao, Bin and Gao, Changxin and Yuan, Lu and Zhang, Lei and Sang, Nong and Wang, Jingdong},
booktitle={CVPR},
year={2021}
}
MPII (CVPR'2014)
@inproceedings{andriluka14cvpr,
author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
year = {2014},
month = {June}
}
Results on MPII val set
Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
---|---|---|---|---|---|
LiteHRNet-18 | 256x256 | 0.859 | 0.260 | ckpt | log |
LiteHRNet-30 | 256x256 | 0.869 | 0.271 | ckpt | log |
Topdown Heatmap + Hourglass on Mpii¶
Hourglass (ECCV'2016)
@inproceedings{newell2016stacked,
title={Stacked hourglass networks for human pose estimation},
author={Newell, Alejandro and Yang, Kaiyu and Deng, Jia},
booktitle={European conference on computer vision},
pages={483--499},
year={2016},
organization={Springer}
}
MPII (CVPR'2014)
@inproceedings{andriluka14cvpr,
author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
year = {2014},
month = {June}
}
Results on MPII val set
Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
---|---|---|---|---|---|
pose_hourglass_52 | 256x256 | 0.889 | 0.317 | ckpt | log |
pose_hourglass_52 | 384x384 | 0.894 | 0.367 | ckpt | log |
Rtmpose + Rtmpose on Mpii¶
RTMPose (arXiv'2023)
@misc{https://doi.org/10.48550/arxiv.2303.07399,
doi = {10.48550/ARXIV.2303.07399},
url = {https://arxiv.org/abs/2303.07399},
author = {Jiang, Tao and Lu, Peng and Zhang, Li and Ma, Ningsheng and Han, Rui and Lyu, Chengqi and Li, Yining and Chen, Kai},
keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences},
title = {RTMPose: Real-Time Multi-Person Pose Estimation based on MMPose},
publisher = {arXiv},
year = {2023},
copyright = {Creative Commons Attribution 4.0 International}
}
MPII (CVPR'2014)
@inproceedings{andriluka14cvpr,
author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
year = {2014},
month = {June}
}
Results on MPII val set
Arch | Input Size | Mean / w. flip | Mean@0.1 | ckpt | log |
---|---|---|---|---|---|
rtmpose-m | 256x256 | 0.907 | 0.348 | ckpt | log |
Halpe (CVPR’2020)¶
Rtmo + Rtmo on Body7¶
RTMO
@misc{lu2023rtmo,
title={{RTMO}: Towards High-Performance One-Stage Real-Time Multi-Person Pose Estimation},
author={Peng Lu and Tao Jiang and Yining Li and Xiangtai Li and Kai Chen and Wenming Yang},
year={2023},
eprint={2312.07526},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
AI Challenger (ArXiv'2017)
@article{wu2017ai,
title={Ai challenger: A large-scale dataset for going deeper in image understanding},
author={Wu, Jiahong and Zheng, He and Zhao, Bo and Li, Yixin and Yan, Baoming and Liang, Rui and Wang, Wenjia and Zhou, Shipei and Lin, Guosen and Fu, Yanwei and others},
journal={arXiv preprint arXiv:1711.06475},
year={2017}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
CrowdPose (CVPR'2019)
@article{li2018crowdpose,
title={CrowdPose: Efficient Crowded Scenes Pose Estimation and A New Benchmark},
author={Li, Jiefeng and Wang, Can and Zhu, Hao and Mao, Yihuan and Fang, Hao-Shu and Lu, Cewu},
journal={arXiv preprint arXiv:1812.00324},
year={2018}
}
JHMDB (ICCV'2013)
@inproceedings{Jhuang:ICCV:2013,
title = {Towards understanding action recognition},
author = {H. Jhuang and J. Gall and S. Zuffi and C. Schmid and M. J. Black},
booktitle = {International Conf. on Computer Vision (ICCV)},
month = Dec,
pages = {3192-3199},
year = {2013}
}
MPII (CVPR'2014)
@inproceedings{andriluka14cvpr,
author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
year = {2014},
month = {June}
}
PoseTrack18 (CVPR'2018)
@inproceedings{andriluka2018posetrack,
title={Posetrack: A benchmark for human pose estimation and tracking},
author={Andriluka, Mykhaylo and Iqbal, Umar and Insafutdinov, Eldar and Pishchulin, Leonid and Milan, Anton and Gall, Juergen and Schiele, Bernt},
booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
pages={5167--5176},
year={2018}
}
Halpe (CVPR'2020)
@inproceedings{li2020pastanet,
title={PaStaNet: Toward Human Activity Knowledge Engine},
author={Li, Yong-Lu and Xu, Liang and Liu, Xinpeng and Huang, Xijie and Xu, Yue and Wang, Shiyi and Fang, Hao-Shu and Ma, Ze and Chen, Mingyang and Lu, Cewu},
booktitle={CVPR},
year={2020}
}
Results on COCO val2017
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log | onnx |
---|---|---|---|---|---|---|---|---|---|
RTMO-t | 640x640 | 0.574 | 0.803 | 0.613 | 0.611 | 0.836 | ckpt | log | onnx |
RTMO-s | 640x640 | 0.686 | 0.879 | 0.744 | 0.723 | 0.908 | ckpt | log | onnx |
RTMO-m | 640x640 | 0.726 | 0.899 | 0.790 | 0.763 | 0.926 | ckpt | log | onnx |
RTMO-l | 640x640 | 0.748 | 0.911 | 0.813 | 0.786 | 0.939 | ckpt | log | onnx |
COCO-WholeBody-Face (ECCV’2020)¶
Topdown Heatmap + Hrnetv2 + Dark + Coco + Wholebody + Face on Coco_wholebody_face¶
HRNetv2 (TPAMI'2019)
@article{WangSCJDZLMTWLX19,
title={Deep High-Resolution Representation Learning for Visual Recognition},
author={Jingdong Wang and Ke Sun and Tianheng Cheng and
Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
journal={TPAMI},
year={2019}
}
DarkPose (CVPR'2020)
@inproceedings{zhang2020distribution,
title={Distribution-aware coordinate representation for human pose estimation},
author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
pages={7093--7102},
year={2020}
}
COCO-WholeBody-Face (ECCV'2020)
@inproceedings{jin2020whole,
title={Whole-Body Human Pose Estimation in the Wild},
author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
year={2020}
}
Results on COCO-WholeBody-Face val set
Arch | Input Size | NME | ckpt | log |
---|---|---|---|---|
pose_hrnetv2_w18_dark | 256x256 | 0.0513 | ckpt | log |
Topdown Heatmap + Mobilenetv2 + Coco + Wholebody + Face on Coco_wholebody_face¶
MobilenetV2 (CVPR'2018)
@inproceedings{sandler2018mobilenetv2,
title={Mobilenetv2: Inverted residuals and linear bottlenecks},
author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={4510--4520},
year={2018}
}
COCO-WholeBody-Face (ECCV'2020)
@inproceedings{jin2020whole,
title={Whole-Body Human Pose Estimation in the Wild},
author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
year={2020}
}
Results on COCO-WholeBody-Face val set
Arch | Input Size | NME | ckpt | log |
---|---|---|---|---|
pose_mobilenetv2 | 256x256 | 0.0611 | ckpt | log |
Topdown Heatmap + Hourglass + Coco + Wholebody + Face on Coco_wholebody_face¶
Hourglass (ECCV'2016)
@inproceedings{newell2016stacked,
title={Stacked hourglass networks for human pose estimation},
author={Newell, Alejandro and Yang, Kaiyu and Deng, Jia},
booktitle={European conference on computer vision},
pages={483--499},
year={2016},
organization={Springer}
}
COCO-WholeBody-Face (ECCV'2020)
@inproceedings{jin2020whole,
title={Whole-Body Human Pose Estimation in the Wild},
author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
year={2020}
}
Results on COCO-WholeBody-Face val set
Arch | Input Size | NME | ckpt | log |
---|---|---|---|---|
pose_hourglass_52 | 256x256 | 0.0587 | ckpt | log |
Topdown Heatmap + Resnet + Coco + Wholebody + Face on Coco_wholebody_face¶
SimpleBaseline2D (ECCV'2018)
@inproceedings{xiao2018simple,
title={Simple baselines for human pose estimation and tracking},
author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
booktitle={Proceedings of the European conference on computer vision (ECCV)},
pages={466--481},
year={2018}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
title={Deep residual learning for image recognition},
author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={770--778},
year={2016}
}
COCO-WholeBody-Face (ECCV'2020)
@inproceedings{jin2020whole,
title={Whole-Body Human Pose Estimation in the Wild},
author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
year={2020}
}
Results on COCO-WholeBody-Face val set
Arch | Input Size | NME | ckpt | log |
---|---|---|---|---|
pose_res50 | 256x256 | 0.0582 | ckpt | log |
Topdown Heatmap + Hrnetv2 + Coco + Wholebody + Face on Coco_wholebody_face¶
HRNetv2 (TPAMI'2019)
@article{WangSCJDZLMTWLX19,
title={Deep High-Resolution Representation Learning for Visual Recognition},
author={Jingdong Wang and Ke Sun and Tianheng Cheng and
Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
journal={TPAMI},
year={2019}
}
COCO-WholeBody-Face (ECCV'2020)
@inproceedings{jin2020whole,
title={Whole-Body Human Pose Estimation in the Wild},
author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
year={2020}
}
Results on COCO-WholeBody-Face val set
Arch | Input Size | NME | ckpt | log |
---|---|---|---|---|
pose_hrnetv2_w18 | 256x256 | 0.0569 | ckpt | log |
Topdown Heatmap + Scnet + Coco + Wholebody + Face on Coco_wholebody_face¶
SCNet (CVPR'2020)
@inproceedings{liu2020improving,
title={Improving Convolutional Networks with Self-Calibrated Convolutions},
author={Liu, Jiang-Jiang and Hou, Qibin and Cheng, Ming-Ming and Wang, Changhu and Feng, Jiashi},
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
pages={10096--10105},
year={2020}
}
COCO-WholeBody-Face (ECCV'2020)
@inproceedings{jin2020whole,
title={Whole-Body Human Pose Estimation in the Wild},
author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
year={2020}
}
Results on COCO-WholeBody-Face val set
Arch | Input Size | NME | ckpt | log |
---|---|---|---|---|
pose_scnet_50 | 256x256 | 0.0567 | ckpt | log |
Rtmpose + Rtmpose + Coco + Wholebody + Face on Coco_wholebody_face¶
RTMDet (ArXiv 2022)
@misc{lyu2022rtmdet,
title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors},
author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen},
year={2022},
eprint={2212.07784},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
COCO-WholeBody-Face (ECCV'2020)
@inproceedings{jin2020whole,
title={Whole-Body Human Pose Estimation in the Wild},
author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
year={2020}
}
Results on COCO-WholeBody-Face val set
Arch | Input Size | NME | ckpt | log |
---|---|---|---|---|
pose_rtmpose_m | 256x256 | 0.0466 | ckpt | log |
DeepFashion (CVPR’2016)¶
Topdown Heatmap + Resnet on Deepfashion¶
SimpleBaseline2D (ECCV'2018)
@inproceedings{xiao2018simple,
title={Simple baselines for human pose estimation and tracking},
author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
booktitle={Proceedings of the European conference on computer vision (ECCV)},
pages={466--481},
year={2018}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
title={Deep residual learning for image recognition},
author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={770--778},
year={2016}
}
DeepFashion (CVPR'2016)
@inproceedings{liuLQWTcvpr16DeepFashion,
author = {Liu, Ziwei and Luo, Ping and Qiu, Shi and Wang, Xiaogang and Tang, Xiaoou},
title = {DeepFashion: Powering Robust Clothes Recognition and Retrieval with Rich Annotations},
booktitle = {Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
month = {June},
year = {2016}
}
DeepFashion (ECCV'2016)
@inproceedings{liuYLWTeccv16FashionLandmark,
author = {Liu, Ziwei and Yan, Sijie and Luo, Ping and Wang, Xiaogang and Tang, Xiaoou},
title = {Fashion Landmark Detection in the Wild},
booktitle = {European Conference on Computer Vision (ECCV)},
month = {October},
year = {2016}
}
Results on DeepFashion val set
Set | Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
---|---|---|---|---|---|---|---|
upper | pose_resnet_50 | 256x192 | 95.4 | 57.8 | 16.8 | ckpt | log |
lower | pose_resnet_50 | 256x192 | 96.5 | 74.4 | 10.5 | ckpt | log |
full | pose_resnet_50 | 256x192 | 97.7 | 66.4 | 12.7 | ckpt | log |
Note: Due to the time constraints, we have only trained resnet50 models. We warmly welcome any contributions if you can successfully reproduce the results from the paper!
Topdown Heatmap + Hrnet on Deepfashion¶
HRNet (CVPR'2019)
@inproceedings{sun2019deep,
title={Deep high-resolution representation learning for human pose estimation},
author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={5693--5703},
year={2019}
}
UDP (CVPR'2020)
@InProceedings{Huang_2020_CVPR,
author = {Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan},
title = {The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation},
booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
month = {June},
year = {2020}
}
DeepFashion (CVPR'2016)
@inproceedings{liuLQWTcvpr16DeepFashion,
author = {Liu, Ziwei and Luo, Ping and Qiu, Shi and Wang, Xiaogang and Tang, Xiaoou},
title = {DeepFashion: Powering Robust Clothes Recognition and Retrieval with Rich Annotations},
booktitle = {Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
month = {June},
year = {2016}
}
DeepFashion (ECCV'2016)
@inproceedings{liuYLWTeccv16FashionLandmark,
author = {Liu, Ziwei and Yan, Sijie and Luo, Ping and Wang, Xiaogang and Tang, Xiaoou},
title = {Fashion Landmark Detection in the Wild},
booktitle = {European Conference on Computer Vision (ECCV)},
month = {October},
year = {2016}
}
Results on DeepFashion val set
Set | Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
---|---|---|---|---|---|---|---|
upper | pose_hrnet_w48_udp | 256x192 | 96.1 | 60.9 | 15.1 | ckpt | log |
lower | pose_hrnet_w48_udp | 256x192 | 97.8 | 76.1 | 8.9 | ckpt | log |
full | pose_hrnet_w48_udp | 256x192 | 98.3 | 67.3 | 11.7 | ckpt | log |
Note: Due to the time constraints, we have only trained resnet50 models. We warmly welcome any contributions if you can successfully reproduce the results from the paper!
PoseTrack18 (CVPR’2018)¶
Rtmo + Rtmo on Body7¶
RTMO
@misc{lu2023rtmo,
title={{RTMO}: Towards High-Performance One-Stage Real-Time Multi-Person Pose Estimation},
author={Peng Lu and Tao Jiang and Yining Li and Xiangtai Li and Kai Chen and Wenming Yang},
year={2023},
eprint={2312.07526},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
AI Challenger (ArXiv'2017)
@article{wu2017ai,
title={Ai challenger: A large-scale dataset for going deeper in image understanding},
author={Wu, Jiahong and Zheng, He and Zhao, Bo and Li, Yixin and Yan, Baoming and Liang, Rui and Wang, Wenjia and Zhou, Shipei and Lin, Guosen and Fu, Yanwei and others},
journal={arXiv preprint arXiv:1711.06475},
year={2017}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
CrowdPose (CVPR'2019)
@article{li2018crowdpose,
title={CrowdPose: Efficient Crowded Scenes Pose Estimation and A New Benchmark},
author={Li, Jiefeng and Wang, Can and Zhu, Hao and Mao, Yihuan and Fang, Hao-Shu and Lu, Cewu},
journal={arXiv preprint arXiv:1812.00324},
year={2018}
}
JHMDB (ICCV'2013)
@inproceedings{Jhuang:ICCV:2013,
title = {Towards understanding action recognition},
author = {H. Jhuang and J. Gall and S. Zuffi and C. Schmid and M. J. Black},
booktitle = {International Conf. on Computer Vision (ICCV)},
month = Dec,
pages = {3192-3199},
year = {2013}
}
MPII (CVPR'2014)
@inproceedings{andriluka14cvpr,
author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
year = {2014},
month = {June}
}
PoseTrack18 (CVPR'2018)
@inproceedings{andriluka2018posetrack,
title={Posetrack: A benchmark for human pose estimation and tracking},
author={Andriluka, Mykhaylo and Iqbal, Umar and Insafutdinov, Eldar and Pishchulin, Leonid and Milan, Anton and Gall, Juergen and Schiele, Bernt},
booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
pages={5167--5176},
year={2018}
}
Halpe (CVPR'2020)
@inproceedings{li2020pastanet,
title={PaStaNet: Toward Human Activity Knowledge Engine},
author={Li, Yong-Lu and Xu, Liang and Liu, Xinpeng and Huang, Xijie and Xu, Yue and Wang, Shiyi and Fang, Hao-Shu and Ma, Ze and Chen, Mingyang and Lu, Cewu},
booktitle={CVPR},
year={2020}
}
Results on COCO val2017
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log | onnx |
---|---|---|---|---|---|---|---|---|---|
RTMO-t | 640x640 | 0.574 | 0.803 | 0.613 | 0.611 | 0.836 | ckpt | log | onnx |
RTMO-s | 640x640 | 0.686 | 0.879 | 0.744 | 0.723 | 0.908 | ckpt | log | onnx |
RTMO-m | 640x640 | 0.726 | 0.899 | 0.790 | 0.763 | 0.926 | ckpt | log | onnx |
RTMO-l | 640x640 | 0.748 | 0.911 | 0.813 | 0.786 | 0.939 | ckpt | log | onnx |
Topdown Heatmap + Resnet on Posetrack18¶
SimpleBaseline2D (ECCV'2018)
@inproceedings{xiao2018simple,
title={Simple baselines for human pose estimation and tracking},
author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
booktitle={Proceedings of the European conference on computer vision (ECCV)},
pages={466--481},
year={2018}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
title={Deep residual learning for image recognition},
author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={770--778},
year={2016}
}
PoseTrack18 (CVPR'2018)
@inproceedings{andriluka2018posetrack,
title={Posetrack: A benchmark for human pose estimation and tracking},
author={Andriluka, Mykhaylo and Iqbal, Umar and Insafutdinov, Eldar and Pishchulin, Leonid and Milan, Anton and Gall, Juergen and Schiele, Bernt},
booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
pages={5167--5176},
year={2018}
}
Results on PoseTrack2018 val with ground-truth bounding boxes
Arch | Input Size | Head | Shou | Elb | Wri | Hip | Knee | Ankl | Total | ckpt | log |
---|---|---|---|---|---|---|---|---|---|---|---|
pose_resnet_50 | 256x192 | 86.5 | 87.7 | 82.5 | 75.8 | 80.1 | 78.8 | 74.2 | 81.2 | ckpt | log |
The models are first pre-trained on COCO dataset, and then fine-tuned on PoseTrack18.
Topdown Heatmap + Hrnet on Posetrack18¶
HRNet (CVPR'2019)
@inproceedings{sun2019deep,
title={Deep high-resolution representation learning for human pose estimation},
author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={5693--5703},
year={2019}
}
PoseTrack18 (CVPR'2018)
@inproceedings{andriluka2018posetrack,
title={Posetrack: A benchmark for human pose estimation and tracking},
author={Andriluka, Mykhaylo and Iqbal, Umar and Insafutdinov, Eldar and Pishchulin, Leonid and Milan, Anton and Gall, Juergen and Schiele, Bernt},
booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
pages={5167--5176},
year={2018}
}
Results on PoseTrack2018 val with ground-truth bounding boxes
Arch | Input Size | Head | Shou | Elb | Wri | Hip | Knee | Ankl | Total | ckpt | log |
---|---|---|---|---|---|---|---|---|---|---|---|
pose_hrnet_w32 | 256x192 | 86.2 | 89.0 | 84.5 | 79.2 | 82.3 | 82.5 | 78.7 | 83.4 | ckpt | log |
pose_hrnet_w32 | 384x288 | 87.1 | 89.0 | 85.1 | 80.2 | 80.6 | 82.8 | 79.6 | 83.7 | ckpt | log |
pose_hrnet_w48 | 256x192 | 88.3 | 90.2 | 86.0 | 81.0 | 80.7 | 83.3 | 80.6 | 84.6 | ckpt | log |
pose_hrnet_w48 | 384x288 | 87.8 | 90.0 | 86.2 | 81.3 | 81.0 | 83.4 | 80.9 | 84.6 | ckpt | log |
The models are first pre-trained on COCO dataset, and then fine-tuned on PoseTrack18.
Results on PoseTrack2018 val with MMDetection pre-trained Cascade R-CNN (X-101-64x4d-FPN) human detector
Arch | Input Size | Head | Shou | Elb | Wri | Hip | Knee | Ankl | Total | ckpt | log |
---|---|---|---|---|---|---|---|---|---|---|---|
pose_hrnet_w32 | 256x192 | 78.0 | 82.9 | 79.5 | 73.8 | 76.9 | 76.6 | 70.2 | 76.9 | ckpt | log |
pose_hrnet_w32 | 384x288 | 79.9 | 83.6 | 80.4 | 74.5 | 74.8 | 76.1 | 70.5 | 77.3 | ckpt | log |
pose_hrnet_w48 | 256x192 | 80.1 | 83.4 | 80.6 | 74.8 | 74.3 | 76.8 | 70.5 | 77.4 | ckpt | log |
pose_hrnet_w48 | 384x288 | 80.2 | 83.8 | 80.9 | 75.2 | 74.7 | 76.7 | 71.7 | 77.8 | ckpt | log |
The models are first pre-trained on COCO dataset, and then fine-tuned on PoseTrack18.
JHMDB (ICCV’2013)¶
Rtmo + Rtmo on Body7¶
RTMO
@misc{lu2023rtmo,
title={{RTMO}: Towards High-Performance One-Stage Real-Time Multi-Person Pose Estimation},
author={Peng Lu and Tao Jiang and Yining Li and Xiangtai Li and Kai Chen and Wenming Yang},
year={2023},
eprint={2312.07526},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
AI Challenger (ArXiv'2017)
@article{wu2017ai,
title={Ai challenger: A large-scale dataset for going deeper in image understanding},
author={Wu, Jiahong and Zheng, He and Zhao, Bo and Li, Yixin and Yan, Baoming and Liang, Rui and Wang, Wenjia and Zhou, Shipei and Lin, Guosen and Fu, Yanwei and others},
journal={arXiv preprint arXiv:1711.06475},
year={2017}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
CrowdPose (CVPR'2019)
@article{li2018crowdpose,
title={CrowdPose: Efficient Crowded Scenes Pose Estimation and A New Benchmark},
author={Li, Jiefeng and Wang, Can and Zhu, Hao and Mao, Yihuan and Fang, Hao-Shu and Lu, Cewu},
journal={arXiv preprint arXiv:1812.00324},
year={2018}
}
JHMDB (ICCV'2013)
@inproceedings{Jhuang:ICCV:2013,
title = {Towards understanding action recognition},
author = {H. Jhuang and J. Gall and S. Zuffi and C. Schmid and M. J. Black},
booktitle = {International Conf. on Computer Vision (ICCV)},
month = Dec,
pages = {3192-3199},
year = {2013}
}
MPII (CVPR'2014)
@inproceedings{andriluka14cvpr,
author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
year = {2014},
month = {June}
}
PoseTrack18 (CVPR'2018)
@inproceedings{andriluka2018posetrack,
title={Posetrack: A benchmark for human pose estimation and tracking},
author={Andriluka, Mykhaylo and Iqbal, Umar and Insafutdinov, Eldar and Pishchulin, Leonid and Milan, Anton and Gall, Juergen and Schiele, Bernt},
booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
pages={5167--5176},
year={2018}
}
Halpe (CVPR'2020)
@inproceedings{li2020pastanet,
title={PaStaNet: Toward Human Activity Knowledge Engine},
author={Li, Yong-Lu and Xu, Liang and Liu, Xinpeng and Huang, Xijie and Xu, Yue and Wang, Shiyi and Fang, Hao-Shu and Ma, Ze and Chen, Mingyang and Lu, Cewu},
booktitle={CVPR},
year={2020}
}
Results on COCO val2017
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log | onnx |
---|---|---|---|---|---|---|---|---|---|
RTMO-t | 640x640 | 0.574 | 0.803 | 0.613 | 0.611 | 0.836 | ckpt | log | onnx |
RTMO-s | 640x640 | 0.686 | 0.879 | 0.744 | 0.723 | 0.908 | ckpt | log | onnx |
RTMO-m | 640x640 | 0.726 | 0.899 | 0.790 | 0.763 | 0.926 | ckpt | log | onnx |
RTMO-l | 640x640 | 0.748 | 0.911 | 0.813 | 0.786 | 0.939 | ckpt | log | onnx |
Topdown Heatmap + Resnet on JHMDB¶
SimpleBaseline2D (ECCV'2018)
@inproceedings{xiao2018simple,
title={Simple baselines for human pose estimation and tracking},
author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
booktitle={Proceedings of the European conference on computer vision (ECCV)},
pages={466--481},
year={2018}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
title={Deep residual learning for image recognition},
author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={770--778},
year={2016}
}
JHMDB (ICCV'2013)
@inproceedings{Jhuang:ICCV:2013,
title = {Towards understanding action recognition},
author = {H. Jhuang and J. Gall and S. Zuffi and C. Schmid and M. J. Black},
booktitle = {International Conf. on Computer Vision (ICCV)},
month = Dec,
pages = {3192-3199},
year = {2013}
}
Results on Sub-JHMDB dataset
The models are pre-trained on MPII dataset only. NO test-time augmentation (multi-scale /rotation testing) is used.
Normalized by Person Size
Split | Arch | Input Size | Head | Sho | Elb | Wri | Hip | Knee | Ank | Mean | ckpt | log |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Sub1 | pose_resnet_50 | 256x256 | 99.1 | 98.0 | 93.8 | 91.3 | 99.4 | 96.5 | 92.8 | 96.1 | ckpt | log |
Sub2 | pose_resnet_50 | 256x256 | 99.3 | 97.1 | 90.6 | 87.0 | 98.9 | 96.3 | 94.1 | 95.0 | ckpt | log |
Sub3 | pose_resnet_50 | 256x256 | 99.0 | 97.9 | 94.0 | 91.6 | 99.7 | 98.0 | 94.7 | 96.7 | ckpt | log |
Average | pose_resnet_50 | 256x256 | 99.2 | 97.7 | 92.8 | 90.0 | 99.3 | 96.9 | 93.9 | 96.0 | - | - |
Sub1 | pose_resnet_50 (2 Deconv.) | 256x256 | 99.1 | 98.5 | 94.6 | 92.0 | 99.4 | 94.6 | 92.5 | 96.1 | ckpt | log |
Sub2 | pose_resnet_50 (2 Deconv.) | 256x256 | 99.3 | 97.8 | 91.0 | 87.0 | 99.1 | 96.5 | 93.8 | 95.2 | ckpt | log |
Sub3 | pose_resnet_50 (2 Deconv.) | 256x256 | 98.8 | 98.4 | 94.3 | 92.1 | 99.8 | 97.5 | 93.8 | 96.7 | ckpt | log |
Average | pose_resnet_50 (2 Deconv.) | 256x256 | 99.1 | 98.2 | 93.3 | 90.4 | 99.4 | 96.2 | 93.4 | 96.0 | - | - |
Normalized by Torso Size
Split | Arch | Input Size | Head | Sho | Elb | Wri | Hip | Knee | Ank | Mean | ckpt | log |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Sub1 | pose_resnet_50 | 256x256 | 93.3 | 83.2 | 74.4 | 72.7 | 85.0 | 81.2 | 78.9 | 81.9 | ckpt | log |
Sub2 | pose_resnet_50 | 256x256 | 94.1 | 74.9 | 64.5 | 62.5 | 77.9 | 71.9 | 78.6 | 75.5 | ckpt | log |
Sub3 | pose_resnet_50 | 256x256 | 97.0 | 82.2 | 74.9 | 70.7 | 84.7 | 83.7 | 84.2 | 82.9 | ckpt | log |
Average | pose_resnet_50 | 256x256 | 94.8 | 80.1 | 71.3 | 68.6 | 82.5 | 78.9 | 80.6 | 80.1 | - | - |
Sub1 | pose_resnet_50 (2 Deconv.) | 256x256 | 92.4 | 80.6 | 73.2 | 70.5 | 82.3 | 75.4 | 75.0 | 79.2 | ckpt | log |
Sub2 | pose_resnet_50 (2 Deconv.) | 256x256 | 93.4 | 73.6 | 63.8 | 60.5 | 75.1 | 68.4 | 75.5 | 73.7 | ckpt | log |
Sub3 | pose_resnet_50 (2 Deconv.) | 256x256 | 96.1 | 81.2 | 72.6 | 67.9 | 83.6 | 80.9 | 81.5 | 81.2 | ckpt | log |
Average | pose_resnet_50 (2 Deconv.) | 256x256 | 94.0 | 78.5 | 69.9 | 66.3 | 80.3 | 74.9 | 77.3 | 78.0 | - | - |
Topdown Heatmap + CPM on JHMDB¶
CPM (CVPR'2016)
@inproceedings{wei2016convolutional,
title={Convolutional pose machines},
author={Wei, Shih-En and Ramakrishna, Varun and Kanade, Takeo and Sheikh, Yaser},
booktitle={Proceedings of the IEEE conference on Computer Vision and Pattern Recognition},
pages={4724--4732},
year={2016}
}
JHMDB (ICCV'2013)
@inproceedings{Jhuang:ICCV:2013,
title = {Towards understanding action recognition},
author = {H. Jhuang and J. Gall and S. Zuffi and C. Schmid and M. J. Black},
booktitle = {International Conf. on Computer Vision (ICCV)},
month = Dec,
pages = {3192-3199},
year = {2013}
}
Results on Sub-JHMDB dataset
The models are pre-trained on MPII dataset only. NO test-time augmentation (multi-scale /rotation testing) is used.
Normalized by Person Size
Split | Arch | Input Size | Head | Sho | Elb | Wri | Hip | Knee | Ank | Mean | ckpt | log |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Sub1 | cpm | 368x368 | 96.1 | 91.9 | 81.0 | 78.9 | 96.6 | 90.8 | 87.3 | 89.5 | ckpt | log |
Sub2 | cpm | 368x368 | 98.1 | 93.6 | 77.1 | 70.9 | 94.0 | 89.1 | 84.7 | 87.4 | ckpt | log |
Sub3 | cpm | 368x368 | 97.9 | 94.9 | 87.3 | 84.0 | 98.6 | 94.4 | 86.2 | 92.4 | ckpt | log |
Average | cpm | 368x368 | 97.4 | 93.5 | 81.5 | 77.9 | 96.4 | 91.4 | 86.1 | 89.8 | - | - |
Normalized by Torso Size
Split | Arch | Input Size | Head | Sho | Elb | Wri | Hip | Knee | Ank | Mean | ckpt | log |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Sub1 | cpm | 368x368 | 89.0 | 63.0 | 54.0 | 54.9 | 68.2 | 63.1 | 61.2 | 66.0 | ckpt | log |
Sub2 | cpm | 368x368 | 90.3 | 57.9 | 46.8 | 44.3 | 60.8 | 58.2 | 62.4 | 61.1 | ckpt | log |
Sub3 | cpm | 368x368 | 91.0 | 72.6 | 59.9 | 54.0 | 73.2 | 68.5 | 65.8 | 70.3 | ckpt | log |
Average | cpm | 368x368 | 90.1 | 64.5 | 53.6 | 51.1 | 67.4 | 63.3 | 63.1 | 65.7 | - | - |
WFLW (CVPR’2018)¶
Topdown Regression + Resnet on WFLW¶
DeepPose (CVPR'2014)
@inproceedings{toshev2014deeppose,
title={Deeppose: Human pose estimation via deep neural networks},
author={Toshev, Alexander and Szegedy, Christian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={1653--1660},
year={2014}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
title={Deep residual learning for image recognition},
author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={770--778},
year={2016}
}
WFLW (CVPR'2018)
@inproceedings{wu2018look,
title={Look at boundary: A boundary-aware face alignment algorithm},
author={Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, Quan and Cai, Yici and Zhou, Qiang},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={2129--2138},
year={2018}
}
Results on WFLW dataset
The model is trained on WFLW train set.
Model | Input Size | NME | ckpt | log |
---|---|---|---|---|
ResNet-50 | 256x256 | 4.88 | ckpt | log |
Topdown Regression + Resnet + Softwingloss on WFLW¶
DeepPose (CVPR'2014)
@inproceedings{toshev2014deeppose,
title={Deeppose: Human pose estimation via deep neural networks},
author={Toshev, Alexander and Szegedy, Christian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={1653--1660},
year={2014}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
title={Deep residual learning for image recognition},
author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={770--778},
year={2016}
}
SoftWingloss (TIP'2021)
@article{lin2021structure,
title={Structure-Coherent Deep Feature Learning for Robust Face Alignment},
author={Lin, Chunze and Zhu, Beier and Wang, Quan and Liao, Renjie and Qian, Chen and Lu, Jiwen and Zhou, Jie},
journal={IEEE Transactions on Image Processing},
year={2021},
publisher={IEEE}
}
WFLW (CVPR'2018)
@inproceedings{wu2018look,
title={Look at boundary: A boundary-aware face alignment algorithm},
author={Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, Quan and Cai, Yici and Zhou, Qiang},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={2129--2138},
year={2018}
}
Results on WFLW dataset
The model is trained on WFLW train set.
Model | Input Size | NME | ckpt | log |
---|---|---|---|---|
ResNet-50+SoftWingLoss | 256x256 | 4.44 | ckpt | log |
Topdown Regression + Resnet + Wingloss on WFLW¶
DeepPose (CVPR'2014)
@inproceedings{toshev2014deeppose,
title={Deeppose: Human pose estimation via deep neural networks},
author={Toshev, Alexander and Szegedy, Christian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={1653--1660},
year={2014}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
title={Deep residual learning for image recognition},
author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={770--778},
year={2016}
}
Wingloss (CVPR'2018)
@inproceedings{feng2018wing,
title={Wing Loss for Robust Facial Landmark Localisation with Convolutional Neural Networks},
author={Feng, Zhen-Hua and Kittler, Josef and Awais, Muhammad and Huber, Patrik and Wu, Xiao-Jun},
booktitle={Computer Vision and Pattern Recognition (CVPR), 2018 IEEE Conference on},
year={2018},
pages ={2235-2245},
organization={IEEE}
}
WFLW (CVPR'2018)
@inproceedings{wu2018look,
title={Look at boundary: A boundary-aware face alignment algorithm},
author={Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, Quan and Cai, Yici and Zhou, Qiang},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={2129--2138},
year={2018}
}
Results on WFLW dataset
The model is trained on WFLW train set.
Model | Input Size | NME | ckpt | log |
---|---|---|---|---|
ResNet-50+WingLoss | 256x256 | 4.67 | ckpt | log |
Topdown Heatmap + Hrnetv2 on WFLW¶
HRNetv2 (TPAMI'2019)
@article{WangSCJDZLMTWLX19,
title={Deep High-Resolution Representation Learning for Visual Recognition},
author={Jingdong Wang and Ke Sun and Tianheng Cheng and
Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
journal={TPAMI},
year={2019}
}
WFLW (CVPR'2018)
@inproceedings{wu2018look,
title={Look at boundary: A boundary-aware face alignment algorithm},
author={Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, Quan and Cai, Yici and Zhou, Qiang},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={2129--2138},
year={2018}
}
Results on WFLW dataset
The model is trained on WFLW train.
Arch | Input Size | NMEtest | NMEpose | NMEillumination | NMEocclusion | NMEblur | NMEmakeup | NMEexpression | ckpt | log |
---|---|---|---|---|---|---|---|---|---|---|
pose_hrnetv2_w18 | 256x256 | 4.06 | 6.97 | 3.99 | 4.83 | 4.58 | 3.94 | 4.33 | ckpt | log |
Topdown Heatmap + Hrnetv2 + Dark on WFLW¶
HRNetv2 (TPAMI'2019)
@article{WangSCJDZLMTWLX19,
title={Deep High-Resolution Representation Learning for Visual Recognition},
author={Jingdong Wang and Ke Sun and Tianheng Cheng and
Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
journal={TPAMI},
year={2019}
}
DarkPose (CVPR'2020)
@inproceedings{zhang2020distribution,
title={Distribution-aware coordinate representation for human pose estimation},
author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
pages={7093--7102},
year={2020}
}
WFLW (CVPR'2018)
@inproceedings{wu2018look,
title={Look at boundary: A boundary-aware face alignment algorithm},
author={Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, Quan and Cai, Yici and Zhou, Qiang},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={2129--2138},
year={2018}
}
Results on WFLW dataset
The model is trained on WFLW train.
Arch | Input Size | NMEtest | NMEpose | NMEillumination | NMEocclusion | NMEblur | NMEmakeup | NMEexpression | ckpt | log |
---|---|---|---|---|---|---|---|---|---|---|
pose_hrnetv2_w18_dark | 256x256 | 3.98 | 6.98 | 3.96 | 4.78 | 4.56 | 3.89 | 4.29 | ckpt | log |
Topdown Heatmap + Hrnetv2 + Awing on WFLW¶
HRNetv2 (TPAMI'2019)
@article{WangSCJDZLMTWLX19,
title={Deep High-Resolution Representation Learning for Visual Recognition},
author={Jingdong Wang and Ke Sun and Tianheng Cheng and
Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
journal={TPAMI},
year={2019}
}
AdaptiveWingloss (ICCV'2019)
@inproceedings{wang2019adaptive,
title={Adaptive wing loss for robust face alignment via heatmap regression},
author={Wang, Xinyao and Bo, Liefeng and Fuxin, Li},
booktitle={Proceedings of the IEEE/CVF international conference on computer vision},
pages={6971--6981},
year={2019}
}
WFLW (CVPR'2018)
@inproceedings{wu2018look,
title={Look at boundary: A boundary-aware face alignment algorithm},
author={Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, Quan and Cai, Yici and Zhou, Qiang},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={2129--2138},
year={2018}
}
Results on WFLW dataset
The model is trained on WFLW train.
Arch | Input Size | NMEtest | NMEpose | NMEillumination | NMEocclusion | NMEblur | NMEmakeup | NMEexpression | ckpt | log |
---|---|---|---|---|---|---|---|---|---|---|
pose_hrnetv2_w18_awing | 256x256 | 4.02 | 6.94 | 3.97 | 4.78 | 4.59 | 3.87 | 4.28 | ckpt | log |
Rtmpose + Rtmpose on WFLW¶
RTMDet (ArXiv 2022)
@misc{lyu2022rtmdet,
title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors},
author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen},
year={2022},
eprint={2212.07784},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
WFLW (CVPR'2018)
@inproceedings{wu2018look,
title={Look at boundary: A boundary-aware face alignment algorithm},
author={Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, Quan and Cai, Yici and Zhou, Qiang},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={2129--2138},
year={2018}
}
Results on WFLW dataset
The model is trained on WFLW train.
Arch | Input Size | NME | ckpt | log |
---|---|---|---|---|
pose_rtmpose_m | 256x256 | 4.01 | ckpt | log |
Animal-Pose (ICCV’2019)¶
Topdown Heatmap + Resnet on Animalpose¶
SimpleBaseline2D (ECCV'2018)
@inproceedings{xiao2018simple,
title={Simple baselines for human pose estimation and tracking},
author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
booktitle={Proceedings of the European conference on computer vision (ECCV)},
pages={466--481},
year={2018}
}
Animal-Pose (ICCV'2019)
@InProceedings{Cao_2019_ICCV,
author = {Cao, Jinkun and Tang, Hongyang and Fang, Hao-Shu and Shen, Xiaoyong and Lu, Cewu and Tai, Yu-Wing},
title = {Cross-Domain Adaptation for Animal Pose Estimation},
booktitle = {The IEEE International Conference on Computer Vision (ICCV)},
month = {October},
year = {2019}
}
Results on AnimalPose validation set (1117 instances)
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
pose_resnet_50 | 256x256 | 0.691 | 0.947 | 0.770 | 0.736 | 0.955 | ckpt | log |
pose_resnet_101 | 256x256 | 0.696 | 0.948 | 0.774 | 0.736 | 0.951 | ckpt | log |
pose_resnet_152 | 256x256 | 0.704 | 0.938 | 0.786 | 0.748 | 0.946 | ckpt | log |
Topdown Heatmap + Hrnet on Animalpose¶
HRNet (CVPR'2019)
@inproceedings{sun2019deep,
title={Deep high-resolution representation learning for human pose estimation},
author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={5693--5703},
year={2019}
}
Animal-Pose (ICCV'2019)
@InProceedings{Cao_2019_ICCV,
author = {Cao, Jinkun and Tang, Hongyang and Fang, Hao-Shu and Shen, Xiaoyong and Lu, Cewu and Tai, Yu-Wing},
title = {Cross-Domain Adaptation for Animal Pose Estimation},
booktitle = {The IEEE International Conference on Computer Vision (ICCV)},
month = {October},
year = {2019}
}
Results on AnimalPose validation set (1117 instances)
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
pose_hrnet_w32 | 256x256 | 0.740 | 0.959 | 0.833 | 0.780 | 0.965 | ckpt | log |
pose_hrnet_w48 | 256x256 | 0.738 | 0.958 | 0.831 | 0.778 | 0.962 | ckpt | log |
FreiHand (ICCV’2019)¶
Topdown Heatmap + Resnet on Freihand2d¶
SimpleBaseline2D (ECCV'2018)
@inproceedings{xiao2018simple,
title={Simple baselines for human pose estimation and tracking},
author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
booktitle={Proceedings of the European conference on computer vision (ECCV)},
pages={466--481},
year={2018}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
title={Deep residual learning for image recognition},
author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={770--778},
year={2016}
}
FreiHand (ICCV'2019)
@inproceedings{zimmermann2019freihand,
title={Freihand: A dataset for markerless capture of hand pose and shape from single rgb images},
author={Zimmermann, Christian and Ceylan, Duygu and Yang, Jimei and Russell, Bryan and Argus, Max and Brox, Thomas},
booktitle={Proceedings of the IEEE International Conference on Computer Vision},
pages={813--822},
year={2019}
}
Results on FreiHand val & test set
Set | Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
---|---|---|---|---|---|---|---|
test | pose_resnet_50 | 224x224 | 0.999 | 0.868 | 3.27 | ckpt | log |
OneHand10K (TCSVT’2019)¶
Topdown Regression + Resnet on Onehand10k¶
DeepPose (CVPR'2014)
@inproceedings{toshev2014deeppose,
title={Deeppose: Human pose estimation via deep neural networks},
author={Toshev, Alexander and Szegedy, Christian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={1653--1660},
year={2014}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
title={Deep residual learning for image recognition},
author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={770--778},
year={2016}
}
OneHand10K (TCSVT'2019)
@article{wang2018mask,
title={Mask-pose cascaded cnn for 2d hand pose estimation from single color image},
author={Wang, Yangang and Peng, Cong and Liu, Yebin},
journal={IEEE Transactions on Circuits and Systems for Video Technology},
volume={29},
number={11},
pages={3258--3268},
year={2018},
publisher={IEEE}
}
Results on OneHand10K val set
Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
---|---|---|---|---|---|---|
deeppose_resnet_50 | 256x256 | 0.990 | 0.485 | 34.21 | ckpt | log |
Topdown Heatmap + Hrnetv2 + Udp on Onehand10k¶
HRNetv2 (TPAMI'2019)
@article{WangSCJDZLMTWLX19,
title={Deep High-Resolution Representation Learning for Visual Recognition},
author={Jingdong Wang and Ke Sun and Tianheng Cheng and
Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
journal={TPAMI},
year={2019}
}
UDP (CVPR'2020)
@InProceedings{Huang_2020_CVPR,
author = {Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan},
title = {The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation},
booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
month = {June},
year = {2020}
}
OneHand10K (TCSVT'2019)
@article{wang2018mask,
title={Mask-pose cascaded cnn for 2d hand pose estimation from single color image},
author={Wang, Yangang and Peng, Cong and Liu, Yebin},
journal={IEEE Transactions on Circuits and Systems for Video Technology},
volume={29},
number={11},
pages={3258--3268},
year={2018},
publisher={IEEE}
}
Results on OneHand10K val set
Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
---|---|---|---|---|---|---|
pose_hrnetv2_w18_udp | 256x256 | 0.990 | 0.571 | 23.88 | ckpt | log |
Topdown Heatmap + Mobilenetv2 on Onehand10k¶
MobilenetV2 (CVPR'2018)
@inproceedings{sandler2018mobilenetv2,
title={Mobilenetv2: Inverted residuals and linear bottlenecks},
author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={4510--4520},
year={2018}
}
OneHand10K (TCSVT'2019)
@article{wang2018mask,
title={Mask-pose cascaded cnn for 2d hand pose estimation from single color image},
author={Wang, Yangang and Peng, Cong and Liu, Yebin},
journal={IEEE Transactions on Circuits and Systems for Video Technology},
volume={29},
number={11},
pages={3258--3268},
year={2018},
publisher={IEEE}
}
Results on OneHand10K val set
Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
---|---|---|---|---|---|---|
pose_mobilenet_v2 | 256x256 | 0.986 | 0.537 | 28.56 | ckpt | log |
Topdown Heatmap + Hrnetv2 on Onehand10k¶
HRNetv2 (TPAMI'2019)
@article{WangSCJDZLMTWLX19,
title={Deep High-Resolution Representation Learning for Visual Recognition},
author={Jingdong Wang and Ke Sun and Tianheng Cheng and
Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
journal={TPAMI},
year={2019}
}
OneHand10K (TCSVT'2019)
@article{wang2018mask,
title={Mask-pose cascaded cnn for 2d hand pose estimation from single color image},
author={Wang, Yangang and Peng, Cong and Liu, Yebin},
journal={IEEE Transactions on Circuits and Systems for Video Technology},
volume={29},
number={11},
pages={3258--3268},
year={2018},
publisher={IEEE}
}
Results on OneHand10K val set
Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
---|---|---|---|---|---|---|
pose_hrnetv2_w18 | 256x256 | 0.990 | 0.567 | 24.26 | ckpt | log |
Topdown Heatmap + Resnet on Onehand10k¶
SimpleBaseline2D (ECCV'2018)
@inproceedings{xiao2018simple,
title={Simple baselines for human pose estimation and tracking},
author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
booktitle={Proceedings of the European conference on computer vision (ECCV)},
pages={466--481},
year={2018}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
title={Deep residual learning for image recognition},
author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={770--778},
year={2016}
}
OneHand10K (TCSVT'2019)
@article{wang2018mask,
title={Mask-pose cascaded cnn for 2d hand pose estimation from single color image},
author={Wang, Yangang and Peng, Cong and Liu, Yebin},
journal={IEEE Transactions on Circuits and Systems for Video Technology},
volume={29},
number={11},
pages={3258--3268},
year={2018},
publisher={IEEE}
}
Results on OneHand10K val set
Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
---|---|---|---|---|---|---|
pose_resnet_50 | 256x256 | 0.989 | 0.555 | 25.16 | ckpt | log |
Topdown Heatmap + Hrnetv2 + Dark on Onehand10k¶
HRNetv2 (TPAMI'2019)
@article{WangSCJDZLMTWLX19,
title={Deep High-Resolution Representation Learning for Visual Recognition},
author={Jingdong Wang and Ke Sun and Tianheng Cheng and
Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
journal={TPAMI},
year={2019}
}
DarkPose (CVPR'2020)
@inproceedings{zhang2020distribution,
title={Distribution-aware coordinate representation for human pose estimation},
author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
pages={7093--7102},
year={2020}
}
OneHand10K (TCSVT'2019)
@article{wang2018mask,
title={Mask-pose cascaded cnn for 2d hand pose estimation from single color image},
author={Wang, Yangang and Peng, Cong and Liu, Yebin},
journal={IEEE Transactions on Circuits and Systems for Video Technology},
volume={29},
number={11},
pages={3258--3268},
year={2018},
publisher={IEEE}
}
Results on OneHand10K val set
Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
---|---|---|---|---|---|---|
pose_hrnetv2_w18_dark | 256x256 | 0.990 | 0.572 | 23.96 | ckpt | log |
UBody (CVPR’2023)¶
Topdown Heatmap + Hrnet + Ubody-Coco-Wholebody on Ubody2d¶
HRNet (CVPR'2019)
@inproceedings{sun2019deep,
title={Deep high-resolution representation learning for human pose estimation},
author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={5693--5703},
year={2019}
}
UBody (CVPR'2023)
@article{lin2023one,
title={One-Stage 3D Whole-Body Mesh Recovery with Component Aware Transformer},
author={Lin, Jing and Zeng, Ailing and Wang, Haoqian and Zhang, Lei and Li, Yu},
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
year={2023},
}
Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset
Arch | Input Size | Body AP | Body AR | Foot AP | Foot AR | Face AP | Face AR | Hand AP | Hand AR | Whole AP | Whole AR | ckpt | log |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
pose_hrnet_w32 | 256x192 | 0.685 | 0.759 | 0.564 | 0.675 | 0.625 | 0.705 | 0.516 | 0.609 | 0.549 | 0.646 | ckpt | log |
COCO-WholeBody-Hand (ECCV’2020)¶
Topdown Heatmap + Resnet + Coco + Wholebody + Hand on Coco_wholebody_hand¶
SimpleBaseline2D (ECCV'2018)
@inproceedings{xiao2018simple,
title={Simple baselines for human pose estimation and tracking},
author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
booktitle={Proceedings of the European conference on computer vision (ECCV)},
pages={466--481},
year={2018}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
title={Deep residual learning for image recognition},
author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={770--778},
year={2016}
}
COCO-WholeBody-Hand (ECCV'2020)
@inproceedings{jin2020whole,
title={Whole-Body Human Pose Estimation in the Wild},
author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
year={2020}
}
Results on COCO-WholeBody-Hand val set
Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
---|---|---|---|---|---|---|
pose_resnet_50 | 256x256 | 0.800 | 0.833 | 4.64 | ckpt | log |
Topdown Heatmap + Scnet + Coco + Wholebody + Hand on Coco_wholebody_hand¶
SCNet (CVPR'2020)
@inproceedings{liu2020improving,
title={Improving Convolutional Networks with Self-Calibrated Convolutions},
author={Liu, Jiang-Jiang and Hou, Qibin and Cheng, Ming-Ming and Wang, Changhu and Feng, Jiashi},
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
pages={10096--10105},
year={2020}
}
COCO-WholeBody-Hand (ECCV'2020)
@inproceedings{jin2020whole,
title={Whole-Body Human Pose Estimation in the Wild},
author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
year={2020}
}
Results on COCO-WholeBody-Hand val set
Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
---|---|---|---|---|---|---|
pose_scnet_50 | 256x256 | 0.803 | 0.834 | 4.55 | ckpt | log |
Topdown Heatmap + Mobilenetv2 + Coco + Wholebody + Hand on Coco_wholebody_hand¶
MobilenetV2 (CVPR'2018)
@inproceedings{sandler2018mobilenetv2,
title={Mobilenetv2: Inverted residuals and linear bottlenecks},
author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={4510--4520},
year={2018}
}
COCO-WholeBody-Hand (ECCV'2020)
@inproceedings{jin2020whole,
title={Whole-Body Human Pose Estimation in the Wild},
author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
year={2020}
}
Results on COCO-WholeBody-Hand val set
Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
---|---|---|---|---|---|---|
pose_mobilenetv2 | 256x256 | 0.795 | 0.829 | 4.77 | ckpt | log |
Topdown Heatmap + Hrnetv2 + Coco + Wholebody + Hand on Coco_wholebody_hand¶
HRNetv2 (TPAMI'2019)
@article{WangSCJDZLMTWLX19,
title={Deep High-Resolution Representation Learning for Visual Recognition},
author={Jingdong Wang and Ke Sun and Tianheng Cheng and
Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
journal={TPAMI},
year={2019}
}
COCO-WholeBody-Hand (ECCV'2020)
@inproceedings{jin2020whole,
title={Whole-Body Human Pose Estimation in the Wild},
author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
year={2020}
}
Results on COCO-WholeBody-Hand val set
Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
---|---|---|---|---|---|---|
pose_hrnetv2_w18 | 256x256 | 0.813 | 0.840 | 4.39 | ckpt | log |
Topdown Heatmap + Hrnetv2 + Dark + Coco + Wholebody + Hand on Coco_wholebody_hand¶
HRNetv2 (TPAMI'2019)
@article{WangSCJDZLMTWLX19,
title={Deep High-Resolution Representation Learning for Visual Recognition},
author={Jingdong Wang and Ke Sun and Tianheng Cheng and
Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
journal={TPAMI},
year={2019}
}
DarkPose (CVPR'2020)
@inproceedings{zhang2020distribution,
title={Distribution-aware coordinate representation for human pose estimation},
author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
pages={7093--7102},
year={2020}
}
COCO-WholeBody-Hand (ECCV'2020)
@inproceedings{jin2020whole,
title={Whole-Body Human Pose Estimation in the Wild},
author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
year={2020}
}
Results on COCO-WholeBody-Hand val set
Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
---|---|---|---|---|---|---|
pose_hrnetv2_w18_dark | 256x256 | 0.814 | 0.840 | 4.37 | ckpt | log |
Topdown Heatmap + Hourglass + Coco + Wholebody + Hand on Coco_wholebody_hand¶
Hourglass (ECCV'2016)
@inproceedings{newell2016stacked,
title={Stacked hourglass networks for human pose estimation},
author={Newell, Alejandro and Yang, Kaiyu and Deng, Jia},
booktitle={European conference on computer vision},
pages={483--499},
year={2016},
organization={Springer}
}
COCO-WholeBody-Hand (ECCV'2020)
@inproceedings{jin2020whole,
title={Whole-Body Human Pose Estimation in the Wild},
author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
year={2020}
}
Results on COCO-WholeBody-Hand val set
Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
---|---|---|---|---|---|---|
pose_hourglass_52 | 256x256 | 0.804 | 0.835 | 4.54 | ckpt | log |
Topdown Heatmap + Litehrnet + Coco + Wholebody + Hand on Coco_wholebody_hand¶
LiteHRNet (CVPR'2021)
@inproceedings{Yulitehrnet21,
title={Lite-HRNet: A Lightweight High-Resolution Network},
author={Yu, Changqian and Xiao, Bin and Gao, Changxin and Yuan, Lu and Zhang, Lei and Sang, Nong and Wang, Jingdong},
booktitle={CVPR},
year={2021}
}
COCO-WholeBody-Hand (ECCV'2020)
@inproceedings{jin2020whole,
title={Whole-Body Human Pose Estimation in the Wild},
author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
year={2020}
}
Results on COCO-WholeBody-Hand val set
Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
---|---|---|---|---|---|---|
LiteHRNet-18 | 256x256 | 0.795 | 0.830 | 4.77 | ckpt | log |
Rtmpose + Rtmpose + Coco + Wholebody + Hand on Coco_wholebody_hand¶
RTMDet (ArXiv 2022)
@misc{lyu2022rtmdet,
title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors},
author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen},
year={2022},
eprint={2212.07784},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
COCO-WholeBody-Hand (ECCV'2020)
@inproceedings{jin2020whole,
title={Whole-Body Human Pose Estimation in the Wild},
author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
year={2020}
}
Results on COCO-WholeBody-Hand val set
Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
---|---|---|---|---|---|---|
rtmpose_m | 256x256 | 0.815 | 0.837 | 4.51 | ckpt | log |
COCO (ECCV’2014)¶
Rtmo + Rtmo on Body7¶
RTMO
@misc{lu2023rtmo,
title={{RTMO}: Towards High-Performance One-Stage Real-Time Multi-Person Pose Estimation},
author={Peng Lu and Tao Jiang and Yining Li and Xiangtai Li and Kai Chen and Wenming Yang},
year={2023},
eprint={2312.07526},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
AI Challenger (ArXiv'2017)
@article{wu2017ai,
title={Ai challenger: A large-scale dataset for going deeper in image understanding},
author={Wu, Jiahong and Zheng, He and Zhao, Bo and Li, Yixin and Yan, Baoming and Liang, Rui and Wang, Wenjia and Zhou, Shipei and Lin, Guosen and Fu, Yanwei and others},
journal={arXiv preprint arXiv:1711.06475},
year={2017}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
CrowdPose (CVPR'2019)
@article{li2018crowdpose,
title={CrowdPose: Efficient Crowded Scenes Pose Estimation and A New Benchmark},
author={Li, Jiefeng and Wang, Can and Zhu, Hao and Mao, Yihuan and Fang, Hao-Shu and Lu, Cewu},
journal={arXiv preprint arXiv:1812.00324},
year={2018}
}
JHMDB (ICCV'2013)
@inproceedings{Jhuang:ICCV:2013,
title = {Towards understanding action recognition},
author = {H. Jhuang and J. Gall and S. Zuffi and C. Schmid and M. J. Black},
booktitle = {International Conf. on Computer Vision (ICCV)},
month = Dec,
pages = {3192-3199},
year = {2013}
}
MPII (CVPR'2014)
@inproceedings{andriluka14cvpr,
author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
year = {2014},
month = {June}
}
PoseTrack18 (CVPR'2018)
@inproceedings{andriluka2018posetrack,
title={Posetrack: A benchmark for human pose estimation and tracking},
author={Andriluka, Mykhaylo and Iqbal, Umar and Insafutdinov, Eldar and Pishchulin, Leonid and Milan, Anton and Gall, Juergen and Schiele, Bernt},
booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
pages={5167--5176},
year={2018}
}
Halpe (CVPR'2020)
@inproceedings{li2020pastanet,
title={PaStaNet: Toward Human Activity Knowledge Engine},
author={Li, Yong-Lu and Xu, Liang and Liu, Xinpeng and Huang, Xijie and Xu, Yue and Wang, Shiyi and Fang, Hao-Shu and Ma, Ze and Chen, Mingyang and Lu, Cewu},
booktitle={CVPR},
year={2020}
}
Results on COCO val2017
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log | onnx |
---|---|---|---|---|---|---|---|---|---|
RTMO-t | 640x640 | 0.574 | 0.803 | 0.613 | 0.611 | 0.836 | ckpt | log | onnx |
RTMO-s | 640x640 | 0.686 | 0.879 | 0.744 | 0.723 | 0.908 | ckpt | log | onnx |
RTMO-m | 640x640 | 0.726 | 0.899 | 0.790 | 0.763 | 0.926 | ckpt | log | onnx |
RTMO-l | 640x640 | 0.748 | 0.911 | 0.813 | 0.786 | 0.939 | ckpt | log | onnx |
Rtmo + Rtmo on Coco¶
RTMO
@misc{lu2023rtmo,
title={{RTMO}: Towards High-Performance One-Stage Real-Time Multi-Person Pose Estimation},
author={Peng Lu and Tao Jiang and Yining Li and Xiangtai Li and Kai Chen and Wenming Yang},
year={2023},
eprint={2312.07526},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
RTMO-s | 640x640 | 0.677 | 0.878 | 0.737 | 0.715 | 0.908 | ckpt | log |
RTMO-m | 640x640 | 0.709 | 0.890 | 0.778 | 0.747 | 0.920 | ckpt | log |
RTMO-l | 640x640 | 0.724 | 0.899 | 0.788 | 0.762 | 0.927 | ckpt | log |
Integral Regression + Resnet + Debias on Coco¶
Debias IPR (ICCV'2021)
@inproceedings{gu2021removing,
title={Removing the Bias of Integral Pose Regression},
author={Gu, Kerui and Yang, Linlin and Yao, Angela},
booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
pages={11067--11076},
year={2021}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
title={Deep residual learning for image recognition},
author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={770--778},
year={2016}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
debias-ipr_resnet_50 | 256x256 | 0.675 | 0.872 | 0.740 | 0.765 | 0.928 | ckpt | log |
Integral Regression + Resnet + DSNT on Coco¶
DSNT (2018)
@article{nibali2018numerical,
title={Numerical Coordinate Regression with Convolutional Neural Networks},
author={Nibali, Aiden and He, Zhen and Morgan, Stuart and Prendergast, Luke},
journal={arXiv preprint arXiv:1801.07372},
year={2018}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
title={Deep residual learning for image recognition},
author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={770--778},
year={2016}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
ipr_resnet_50_dsnt | 256x256 | 0.674 | 0.870 | 0.744 | 0.764 | 0.928 | ckpt | log |
Integral Regression + Resnet + Ipr on Coco¶
IPR (ECCV'2018)
@inproceedings{sun2018integral,
title={Integral human pose regression},
author={Sun, Xiao and Xiao, Bin and Wei, Fangyin and Liang, Shuang and Wei, Yichen},
booktitle={Proceedings of the European conference on computer vision (ECCV)},
pages={529--545},
year={2018}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
title={Deep residual learning for image recognition},
author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={770--778},
year={2016}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
ipr_resnet_50 | 256x256 | 0.633 | 0.860 | 0.703 | 0.730 | 0.919 | ckpt | log |
Edpose + Edpose on Coco¶
ED-Pose (ICLR'2023)
@inproceedings{
yang2023explicit,
title={Explicit Box Detection Unifies End-to-End Multi-Person Pose Estimation},
author={Jie Yang and Ailing Zeng and Shilong Liu and Feng Li and Ruimao Zhang and Lei Zhang},
booktitle={International Conference on Learning Representations},
year={2023},
url={https://openreview.net/forum?id=s4WVupnJjmX}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
title={Deep residual learning for image recognition},
author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={770--778},
year={2016}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017.
Arch | BackBone | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
edpose_res50_coco | ResNet-50 | 0.716 | 0.897 | 0.783 | 0.793 | 0.943 | ckpt | log |
The checkpoint is converted from the official repo. The training of EDPose is not supported yet. It will be supported in the future updates.
The above config follows Pure Python style. Please install mmengine>=0.8.2
to use this config.
Topdown Regression + Mobilenetv2 + Rle on Coco¶
DeepPose (CVPR'2014)
@inproceedings{toshev2014deeppose,
title={Deeppose: Human pose estimation via deep neural networks},
author={Toshev, Alexander and Szegedy, Christian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={1653--1660},
year={2014}
}
RLE (ICCV'2021)
@inproceedings{li2021human,
title={Human pose regression with residual log-likelihood estimation},
author={Li, Jiefeng and Bian, Siyuan and Zeng, Ailing and Wang, Can and Pang, Bo and Liu, Wentao and Lu, Cewu},
booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
pages={11025--11034},
year={2021}
}
MobilenetV2 (CVPR'2018)
@inproceedings{sandler2018mobilenetv2,
title={Mobilenetv2: Inverted residuals and linear bottlenecks},
author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={4510--4520},
year={2018}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
deeppose_mobilenetv2_rle_pretrained | 256x192 | 0.593 | 0.836 | 0.660 | 0.644 | 0.877 | ckpt | log |
Topdown Regression + Resnet on Coco¶
DeepPose (CVPR'2014)
@inproceedings{toshev2014deeppose,
title={Deeppose: Human pose estimation via deep neural networks},
author={Toshev, Alexander and Szegedy, Christian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={1653--1660},
year={2014}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
title={Deep residual learning for image recognition},
author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={770--778},
year={2016}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
deeppose_resnet_50 | 256x192 | 0.541 | 0.824 | 0.601 | 0.649 | 0.893 | ckpt | log |
deeppose_resnet_101 | 256x192 | 0.562 | 0.831 | 0.629 | 0.670 | 0.900 | ckpt | log |
deeppose_resnet_152 | 256x192 | 0.584 | 0.842 | 0.659 | 0.688 | 0.907 | ckpt | log |
Topdown Regression + Resnet + Rle on Coco¶
DeepPose (CVPR'2014)
@inproceedings{toshev2014deeppose,
title={Deeppose: Human pose estimation via deep neural networks},
author={Toshev, Alexander and Szegedy, Christian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={1653--1660},
year={2014}
}
RLE (ICCV'2021)
@inproceedings{li2021human,
title={Human pose regression with residual log-likelihood estimation},
author={Li, Jiefeng and Bian, Siyuan and Zeng, Ailing and Wang, Can and Pang, Bo and Liu, Wentao and Lu, Cewu},
booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
pages={11025--11034},
year={2021}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
title={Deep residual learning for image recognition},
author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={770--778},
year={2016}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
deeppose_resnet_50_rle | 256x192 | 0.706 | 0.888 | 0.776 | 0.753 | 0.924 | ckpt | log |
deeppose_resnet_50_rle_pretrained | 256x192 | 0.719 | 0.891 | 0.788 | 0.764 | 0.925 | ckpt | log |
deeppose_resnet_101_rle | 256x192 | 0.722 | 0.894 | 0.794 | 0.768 | 0.930 | ckpt | log |
deeppose_resnet_152_rle | 256x192 | 0.731 | 0.897 | 0.805 | 0.777 | 0.933 | ckpt | log |
deeppose_resnet_152_rle | 384x288 | 0.749 | 0.901 | 0.815 | 0.793 | 0.935 | ckpt | log |
Dekr + Hrnet on Coco¶
DEKR (CVPR'2021)
@inproceedings{geng2021bottom,
title={Bottom-up human pose estimation via disentangled keypoint regression},
author={Geng, Zigang and Sun, Ke and Xiao, Bin and Zhang, Zhaoxiang and Wang, Jingdong},
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
pages={14676--14686},
year={2021}
}
HRNet (CVPR'2019)
@inproceedings{sun2019deep,
title={Deep high-resolution representation learning for human pose estimation},
author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={5693--5703},
year={2019}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 without multi-scale test
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
HRNet-w32 | 512x512 | 0.686 | 0.868 | 0.750 | 0.735 | 0.898 | ckpt | log |
HRNet-w48 | 640x640 | 0.714 | 0.883 | 0.777 | 0.762 | 0.915 | ckpt | log |
Yoloxpose + Yoloxpose on Coco¶
YOLO-Pose (CVPRW'2022)
@inproceedings{maji2022yolo,
title={Yolo-pose: Enhancing yolo for multi person pose estimation using object keypoint similarity loss},
author={Maji, Debapriya and Nagori, Soyeb and Mathew, Manu and Poddar, Deepak},
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
pages={2637--2646},
year={2022}
}
YOLOX
@article{ge2021yolox,
title={Yolox: Exceeding yolo series in 2021},
author={Ge, Zheng and Liu, Songtao and Wang, Feng and Li, Zeming and Sun, Jian},
journal={arXiv preprint arXiv:2107.08430},
year={2021}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
yoloxpose_tiny | 416x416 | 0.526 | 0.793 | 0.556 | 0.571 | 0.833 | ckpt | log |
yoloxpose_s | 640x640 | 0.641 | 0.872 | 0.702 | 0.682 | 0.902 | ckpt | log |
yoloxpose_m | 640x640 | 0.695 | 0.899 | 0.766 | 0.733 | 0.926 | ckpt | log |
yoloxpose_l | 640x640 | 0.712 | 0.901 | 0.782 | 0.749 | 0.926 | ckpt | log |
Simcc + Vipnas on Coco¶
SimCC (ECCV'2022)
@misc{https://doi.org/10.48550/arxiv.2107.03332,
title={SimCC: a Simple Coordinate Classification Perspective for Human Pose Estimation},
author={Li, Yanjie and Yang, Sen and Liu, Peidong and Zhang, Shoukui and Wang, Yunxiao and Wang, Zhicheng and Yang, Wankou and Xia, Shu-Tao},
year={2021}
}
ViPNAS (CVPR'2021)
@article{xu2021vipnas,
title={ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search},
author={Xu, Lumin and Guan, Yingda and Jin, Sheng and Liu, Wentao and Qian, Chen and Luo, Ping and Ouyang, Wanli and Wang, Xiaogang},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
year={2021}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
simcc_S-ViPNAS-MobileNetV3 | 256x192 | 0.695 | 0.883 | 0.772 | 0.755 | 0.927 | ckpt | log |
Simcc + Mobilenetv2 on Coco¶
SimCC (ECCV'2022)
@misc{https://doi.org/10.48550/arxiv.2107.03332,
title={SimCC: a Simple Coordinate Classification Perspective for Human Pose Estimation},
author={Li, Yanjie and Yang, Sen and Liu, Peidong and Zhang, Shoukui and Wang, Yunxiao and Wang, Zhicheng and Yang, Wankou and Xia, Shu-Tao},
year={2021}
}
MobilenetV2 (CVPR'2018)
@inproceedings{sandler2018mobilenetv2,
title={Mobilenetv2: Inverted residuals and linear bottlenecks},
author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={4510--4520},
year={2018}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
simcc_mobilenetv2_wo_deconv | 256x192 | 0.620 | 0.855 | 0.697 | 0.678 | 0.902 | ckpt | log |
Simcc + Resnet on Coco¶
SimCC (ECCV'2022)
@misc{https://doi.org/10.48550/arxiv.2107.03332,
title={SimCC: a Simple Coordinate Classification Perspective for Human Pose Estimation},
author={Li, Yanjie and Yang, Sen and Liu, Peidong and Zhang, Shoukui and Wang, Yunxiao and Wang, Zhicheng and Yang, Wankou and Xia, Shu-Tao},
year={2021}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
title={Deep residual learning for image recognition},
author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={770--778},
year={2016}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
simcc_resnet_50 | 256x192 | 0.721 | 0.897 | 0.798 | 0.781 | 0.937 | ckpt | log |
simcc_resnet_50 | 384x288 | 0.735 | 0.899 | 0.800 | 0.790 | 0.939 | ckpt | log |
Topdown Heatmap + Hrnet + Aic on Coco¶
HRNet (CVPR'2019)
@inproceedings{sun2019deep,
title={Deep high-resolution representation learning for human pose estimation},
author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={5693--5703},
year={2019}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
AI Challenger (ArXiv'2017)
@article{wu2017ai,
title={Ai challenger: A large-scale dataset for going deeper in image understanding},
author={Wu, Jiahong and Zheng, He and Zhao, Bo and Li, Yixin and Yan, Baoming and Liang, Rui and Wang, Wenjia and Zhou, Shipei and Lin, Guosen and Fu, Yanwei and others},
journal={arXiv preprint arXiv:1711.06475},
year={2017}
}
MMPose supports training model with combined datasets. coco-aic-merge and coco-aic-combine are two examples.
coco-aic-merge leverages AIC data with partial keypoints as auxiliary data to train a COCO model
coco-aic-combine constructs a combined dataset whose keypoints are the union of COCO and AIC keypoints to train a model that predicts keypoints of both datasets.
Evaluation results on COCO val2017 of models trained with solely COCO dataset and combined dataset as shown below. These models are evaluated with detector having human AP of 56.4 on COCO val2017 dataset.
Train Set | Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|---|
coco | pose_hrnet_w32 | 256x192 | 0.749 | 0.906 | 0.821 | 0.804 | 0.945 | ckpt | log |
coco-aic-merge | pose_hrnet_w32 | 256x192 | 0.756 | 0.907 | 0.828 | 0.809 | 0.944 | ckpt | log |
coco-aic-combine | pose_hrnet_w32 | 256x192 | 0.755 | 0.904 | 0.825 | 0.807 | 0.942 | ckpt | log |
Topdown Heatmap + Hourglass on Coco¶
Hourglass (ECCV'2016)
@inproceedings{newell2016stacked,
title={Stacked hourglass networks for human pose estimation},
author={Newell, Alejandro and Yang, Kaiyu and Deng, Jia},
booktitle={European conference on computer vision},
pages={483--499},
year={2016},
organization={Springer}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
pose_hourglass_52 | 256x256 | 0.726 | 0.896 | 0.799 | 0.780 | 0.934 | ckpt | log |
pose_hourglass_52 | 384x384 | 0.746 | 0.900 | 0.812 | 0.797 | 0.939 | ckpt | log |
Topdown Heatmap + Hrformer on Coco¶
HRFormer (NIPS'2021)
@article{yuan2021hrformer,
title={HRFormer: High-Resolution Vision Transformer for Dense Predict},
author={Yuan, Yuhui and Fu, Rao and Huang, Lang and Lin, Weihong and Zhang, Chao and Chen, Xilin and Wang, Jingdong},
journal={Advances in Neural Information Processing Systems},
volume={34},
year={2021}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
pose_hrformer_small | 256x192 | 0.738 | 0.904 | 0.812 | 0.793 | 0.941 | ckpt | log |
pose_hrformer_small | 384x288 | 0.757 | 0.905 | 0.824 | 0.807 | 0.941 | ckpt | log |
pose_hrformer_base | 256x192 | 0.754 | 0.906 | 0.827 | 0.807 | 0.943 | ckpt | log |
pose_hrformer_base | 384x288 | 0.774 | 0.909 | 0.842 | 0.823 | 0.945 | ckpt | log |
Topdown Heatmap + Hrnet on Coco¶
HRNet (CVPR'2019)
@inproceedings{sun2019deep,
title={Deep high-resolution representation learning for human pose estimation},
author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={5693--5703},
year={2019}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
pose_hrnet_w32 | 256x192 | 0.749 | 0.906 | 0.821 | 0.804 | 0.945 | ckpt | log |
pose_hrnet_w32 | 384x288 | 0.761 | 0.908 | 0.826 | 0.811 | 0.944 | ckpt | log |
pose_hrnet_w48 | 256x192 | 0.756 | 0.908 | 0.826 | 0.809 | 0.945 | ckpt | log |
pose_hrnet_w48 | 384x288 | 0.767 | 0.911 | 0.832 | 0.817 | 0.947 | ckpt | log |
Topdown Heatmap + Vipnas on Coco¶
ViPNAS (CVPR'2021)
@article{xu2021vipnas,
title={ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search},
author={Xu, Lumin and Guan, Yingda and Jin, Sheng and Liu, Wentao and Qian, Chen and Luo, Ping and Ouyang, Wanli and Wang, Xiaogang},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
year={2021}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
S-ViPNAS-MobileNetV3 | 256x192 | 0.700 | 0.887 | 0.783 | 0.758 | 0.929 | ckpt | log |
S-ViPNAS-Res50 | 256x192 | 0.711 | 0.894 | 0.787 | 0.769 | 0.934 | ckpt | log |
Topdown Heatmap + Resnext on Coco¶
ResNext (CVPR'2017)
@inproceedings{xie2017aggregated,
title={Aggregated residual transformations for deep neural networks},
author={Xie, Saining and Girshick, Ross and Doll{\'a}r, Piotr and Tu, Zhuowen and He, Kaiming},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={1492--1500},
year={2017}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
pose_resnext_50 | 256x192 | 0.715 | 0.897 | 0.791 | 0.771 | 0.935 | ckpt | log |
pose_resnext_50 | 384x288 | 0.724 | 0.899 | 0.794 | 0.777 | 0.936 | ckpt | log |
pose_resnext_101 | 256x192 | 0.726 | 0.900 | 0.801 | 0.781 | 0.939 | ckpt | log |
pose_resnext_101 | 384x288 | 0.744 | 0.903 | 0.815 | 0.794 | 0.939 | ckpt | log |
pose_resnext_152 | 256x192 | 0.730 | 0.903 | 0.808 | 0.785 | 0.940 | ckpt | log |
pose_resnext_152 | 384x288 | 0.742 | 0.904 | 0.810 | 0.794 | 0.940 | ckpt | log |
Topdown Heatmap + CPM on Coco¶
CPM (CVPR'2016)
@inproceedings{wei2016convolutional,
title={Convolutional pose machines},
author={Wei, Shih-En and Ramakrishna, Varun and Kanade, Takeo and Sheikh, Yaser},
booktitle={Proceedings of the IEEE conference on Computer Vision and Pattern Recognition},
pages={4724--4732},
year={2016}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
cpm | 256x192 | 0.627 | 0.862 | 0.709 | 0.689 | 0.906 | ckpt | log |
cpm | 384x288 | 0.652 | 0.865 | 0.730 | 0.710 | 0.907 | ckpt | log |
Topdown Heatmap + Shufflenetv2 on Coco¶
ShufflenetV2 (ECCV'2018)
@inproceedings{ma2018shufflenet,
title={Shufflenet v2: Practical guidelines for efficient cnn architecture design},
author={Ma, Ningning and Zhang, Xiangyu and Zheng, Hai-Tao and Sun, Jian},
booktitle={Proceedings of the European conference on computer vision (ECCV)},
pages={116--131},
year={2018}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
pose_shufflenetv2 | 256x192 | 0.602 | 0.857 | 0.672 | 0.668 | 0.902 | ckpt | log |
pose_shufflenetv2 | 384x288 | 0.638 | 0.866 | 0.707 | 0.699 | 0.910 | ckpt | log |
Topdown Heatmap + RSN on Coco¶
RSN (ECCV'2020)
@misc{cai2020learning,
title={Learning Delicate Local Representations for Multi-Person Pose Estimation},
author={Yuanhao Cai and Zhicheng Wang and Zhengxiong Luo and Binyi Yin and Angang Du and Haoqian Wang and Xinyu Zhou and Erjin Zhou and Xiangyu Zhang and Jian Sun},
year={2020},
eprint={2003.04030},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
rsn_18 | 256x192 | 0.704 | 0.887 | 0.781 | 0.773 | 0.927 | ckpt | log |
rsn_50 | 256x192 | 0.724 | 0.894 | 0.799 | 0.790 | 0.935 | ckpt | log |
2xrsn_50 | 256x192 | 0.748 | 0.900 | 0.821 | 0.810 | 0.939 | ckpt | log |
3xrsn_50 | 256x192 | 0.750 | 0.900 | 0.824 | 0.814 | 0.941 | ckpt | log |
Topdown Heatmap + Hrnet + Dark on Coco¶
HRNet (CVPR'2019)
@inproceedings{sun2019deep,
title={Deep high-resolution representation learning for human pose estimation},
author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={5693--5703},
year={2019}
}
DarkPose (CVPR'2020)
@inproceedings{zhang2020distribution,
title={Distribution-aware coordinate representation for human pose estimation},
author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
pages={7093--7102},
year={2020}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
pose_hrnet_w32_dark | 256x192 | 0.757 | 0.907 | 0.825 | 0.807 | 0.943 | ckpt | log |
pose_hrnet_w32_dark | 384x288 | 0.766 | 0.907 | 0.829 | 0.815 | 0.943 | ckpt | log |
pose_hrnet_w48_dark | 256x192 | 0.764 | 0.907 | 0.831 | 0.814 | 0.942 | ckpt | log |
pose_hrnet_w48_dark | 384x288 | 0.772 | 0.911 | 0.833 | 0.821 | 0.948 | ckpt | log |
Topdown Heatmap + Resnet + Fp16 on Coco¶
SimpleBaseline2D (ECCV'2018)
@inproceedings{xiao2018simple,
title={Simple baselines for human pose estimation and tracking},
author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
booktitle={Proceedings of the European conference on computer vision (ECCV)},
pages={466--481},
year={2018}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
title={Deep residual learning for image recognition},
author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={770--778},
year={2016}
}
FP16 (ArXiv'2017)
@article{micikevicius2017mixed,
title={Mixed precision training},
author={Micikevicius, Paulius and Narang, Sharan and Alben, Jonah and Diamos, Gregory and Elsen, Erich and Garcia, David and Ginsburg, Boris and Houston, Michael and Kuchaiev, Oleksii and Venkatesh, Ganesh and others},
journal={arXiv preprint arXiv:1710.03740},
year={2017}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
pose_resnet_50_fp16 | 256x192 | 0.716 | 0.898 | 0.798 | 0.772 | 0.937 | ckpt | log |
Topdown Heatmap + Resnetv1d on Coco¶
ResNetV1D (CVPR'2019)
@inproceedings{he2019bag,
title={Bag of tricks for image classification with convolutional neural networks},
author={He, Tong and Zhang, Zhi and Zhang, Hang and Zhang, Zhongyue and Xie, Junyuan and Li, Mu},
booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
pages={558--567},
year={2019}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
pose_resnetv1d_50 | 256x192 | 0.722 | 0.897 | 0.796 | 0.777 | 0.936 | ckpt | log |
pose_resnetv1d_50 | 384x288 | 0.730 | 0.899 | 0.800 | 0.782 | 0.935 | ckpt | log |
pose_resnetv1d_101 | 256x192 | 0.732 | 0.901 | 0.808 | 0.785 | 0.940 | ckpt | log |
pose_resnetv1d_101 | 384x288 | 0.748 | 0.906 | 0.817 | 0.798 | 0.941 | ckpt | log |
pose_resnetv1d_152 | 256x192 | 0.737 | 0.904 | 0.814 | 0.790 | 0.940 | ckpt | log |
pose_resnetv1d_152 | 384x288 | 0.751 | 0.907 | 0.821 | 0.801 | 0.942 | ckpt | log |
Topdown Heatmap + Alexnet on Coco¶
AlexNet (NeurIPS'2012)
@inproceedings{krizhevsky2012imagenet,
title={Imagenet classification with deep convolutional neural networks},
author={Krizhevsky, Alex and Sutskever, Ilya and Hinton, Geoffrey E},
booktitle={Advances in neural information processing systems},
pages={1097--1105},
year={2012}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
pose_alexnet | 256x192 | 0.448 | 0.767 | 0.461 | 0.521 | 0.829 | ckpt | log |
Topdown Heatmap + Seresnet on Coco¶
SEResNet (CVPR'2018)
@inproceedings{hu2018squeeze,
title={Squeeze-and-excitation networks},
author={Hu, Jie and Shen, Li and Sun, Gang},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={7132--7141},
year={2018}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
pose_seresnet_50 | 256x192 | 0.729 | 0.903 | 0.807 | 0.784 | 0.941 | ckpt | log |
pose_seresnet_50 | 384x288 | 0.748 | 0.904 | 0.819 | 0.799 | 0.941 | ckpt | log |
pose_seresnet_101 | 256x192 | 0.734 | 0.905 | 0.814 | 0.790 | 0.941 | ckpt | log |
pose_seresnet_101 | 384x288 | 0.754 | 0.907 | 0.823 | 0.805 | 0.943 | ckpt | log |
pose_seresnet_152* | 256x192 | 0.730 | 0.899 | 0.810 | 0.787 | 0.939 | ckpt | log |
pose_seresnet_152* | 384x288 | 0.753 | 0.906 | 0.824 | 0.806 | 0.945 | ckpt | log |
Note that * means without imagenet pre-training.
Topdown Heatmap + Hrnet + Fp16 on Coco¶
HRNet (CVPR'2019)
@inproceedings{sun2019deep,
title={Deep high-resolution representation learning for human pose estimation},
author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={5693--5703},
year={2019}
}
FP16 (ArXiv'2017)
@article{micikevicius2017mixed,
title={Mixed precision training},
author={Micikevicius, Paulius and Narang, Sharan and Alben, Jonah and Diamos, Gregory and Elsen, Erich and Garcia, David and Ginsburg, Boris and Houston, Michael and Kuchaiev, Oleksii and Venkatesh, Ganesh and others},
journal={arXiv preprint arXiv:1710.03740},
year={2017}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
pose_hrnet_w32_fp16 | 256x192 | 0.749 | 0.907 | 0.822 | 0.802 | 0.946 | ckpt | log |
Topdown Heatmap + Shufflenetv1 on Coco¶
ShufflenetV1 (CVPR'2018)
@inproceedings{zhang2018shufflenet,
title={Shufflenet: An extremely efficient convolutional neural network for mobile devices},
author={Zhang, Xiangyu and Zhou, Xinyu and Lin, Mengxiao and Sun, Jian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={6848--6856},
year={2018}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
pose_shufflenetv1 | 256x192 | 0.587 | 0.849 | 0.654 | 0.654 | 0.896 | ckpt | log |
pose_shufflenetv1 | 384x288 | 0.626 | 0.862 | 0.696 | 0.687 | 0.903 | ckpt | log |
Topdown Heatmap + Swin on Coco¶
SimpleBaseline2D (ECCV'2018)
@inproceedings{xiao2018simple,
title={Simple baselines for human pose estimation and tracking},
author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
booktitle={Proceedings of the European conference on computer vision (ECCV)},
pages={466--481},
year={2018}
}
Swin (ICCV'2021)
@inproceedings{liu2021swin,
title={Swin transformer: Hierarchical vision transformer using shifted windows},
author={Liu, Ze and Lin, Yutong and Cao, Yue and Hu, Han and Wei, Yixuan and Zhang, Zheng and Lin, Stephen and Guo, Baining},
booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
pages={10012--10022},
year={2021}
}
FPN (CVPR'2017)
@inproceedings{lin2017feature,
title={Feature pyramid networks for object detection},
author={Lin, Tsung-Yi and Doll{\'a}r, Piotr and Girshick, Ross and He, Kaiming and Hariharan, Bharath and Belongie, Serge},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={2117--2125},
year={2017}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
pose_swin_t | 256x192 | 0.724 | 0.901 | 0.806 | 0.782 | 0.940 | ckpt | log |
pose_swin_b | 256x192 | 0.737 | 0.904 | 0.820 | 0.794 | 0.942 | ckpt | log |
pose_swin_b | 384x288 | 0.759 | 0.910 | 0.832 | 0.811 | 0.946 | ckpt | log |
pose_swin_l | 256x192 | 0.743 | 0.906 | 0.821 | 0.798 | 0.943 | ckpt | log |
pose_swin_l | 384x288 | 0.763 | 0.912 | 0.830 | 0.814 | 0.949 | ckpt | log |
Topdown Heatmap + Cspnext + Udp on Coco¶
RTMDet (ArXiv 2022)
@misc{lyu2022rtmdet,
title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors},
author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen},
year={2022},
eprint={2212.07784},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
UDP (CVPR'2020)
@InProceedings{Huang_2020_CVPR,
author = {Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan},
title = {The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation},
booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
month = {June},
year = {2020}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
pose_cspnext_t_udp | 256x192 | 0.665 | 0.874 | 0.723 | 0.723 | 0.917 | ckpt | log |
pose_cspnext_s_udp | 256x192 | 0.697 | 0.886 | 0.776 | 0.753 | 0.929 | ckpt | log |
pose_cspnext_m_udp | 256x192 | 0.732 | 0.896 | 0.806 | 0.785 | 0.937 | ckpt | log |
pose_cspnext_l_udp | 256x192 | 0.750 | 0.904 | 0.822 | 0.800 | 0.941 | ckpt | log |
pose_cspnext_t_udp_aic_coco | 256x192 | 0.655 | 0.884 | 0.731 | 0.689 | 0.890 | ckpt | log |
pose_cspnext_s_udp_aic_coco | 256x192 | 0.700 | 0.905 | 0.783 | 0.733 | 0.918 | ckpt | log |
pose_cspnext_m_udp_aic_coco | 256x192 | 0.748 | 0.925 | 0.818 | 0.777 | 0.933 | ckpt | log |
pose_cspnext_l_udp_aic_coco | 256x192 | 0.772 | 0.936 | 0.839 | 0.799 | 0.943 | ckpt | log |
Note that, UDP also adopts the unbiased encoding/decoding algorithm of DARK.
Flip test and detector is not used in the result of aic-coco training.
Topdown Heatmap + MSPN on Coco¶
MSPN (ArXiv'2019)
@article{li2019rethinking,
title={Rethinking on Multi-Stage Networks for Human Pose Estimation},
author={Li, Wenbo and Wang, Zhicheng and Yin, Binyi and Peng, Qixiang and Du, Yuming and Xiao, Tianzi and Yu, Gang and Lu, Hongtao and Wei, Yichen and Sun, Jian},
journal={arXiv preprint arXiv:1901.00148},
year={2019}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
mspn_50 | 256x192 | 0.723 | 0.895 | 0.794 | 0.788 | 0.934 | ckpt | log |
2xmspn_50 | 256x192 | 0.754 | 0.903 | 0.826 | 0.816 | 0.942 | ckpt | log |
3xmspn_50 | 256x192 | 0.758 | 0.904 | 0.830 | 0.821 | 0.943 | ckpt | log |
4xmspn_50 | 256x192 | 0.765 | 0.906 | 0.835 | 0.826 | 0.943 | ckpt | log |
Topdown Heatmap + Resnest on Coco¶
ResNeSt (ArXiv'2020)
@article{zhang2020resnest,
title={ResNeSt: Split-Attention Networks},
author={Zhang, Hang and Wu, Chongruo and Zhang, Zhongyue and Zhu, Yi and Zhang, Zhi and Lin, Haibin and Sun, Yue and He, Tong and Muller, Jonas and Manmatha, R. and Li, Mu and Smola, Alexander},
journal={arXiv preprint arXiv:2004.08955},
year={2020}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
pose_resnest_50 | 256x192 | 0.720 | 0.899 | 0.800 | 0.775 | 0.939 | ckpt | log |
pose_resnest_50 | 384x288 | 0.737 | 0.900 | 0.811 | 0.789 | 0.937 | ckpt | log |
pose_resnest_101 | 256x192 | 0.725 | 0.900 | 0.807 | 0.781 | 0.939 | ckpt | log |
pose_resnest_101 | 384x288 | 0.745 | 0.905 | 0.818 | 0.798 | 0.942 | ckpt | log |
pose_resnest_200 | 256x192 | 0.731 | 0.905 | 0.812 | 0.787 | 0.943 | ckpt | log |
pose_resnest_200 | 384x288 | 0.753 | 0.907 | 0.827 | 0.805 | 0.943 | ckpt | log |
pose_resnest_269 | 256x192 | 0.737 | 0.907 | 0.819 | 0.792 | 0.943 | ckpt | log |
pose_resnest_269 | 384x288 | 0.754 | 0.908 | 0.828 | 0.805 | 0.943 | ckpt | log |
Topdown Heatmap + Hrnet + Augmentation on Coco¶
HRNet (CVPR'2019)
@inproceedings{sun2019deep,
title={Deep high-resolution representation learning for human pose estimation},
author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={5693--5703},
year={2019}
}
Albumentations (Information'2020)
@article{buslaev2020albumentations,
title={Albumentations: fast and flexible image augmentations},
author={Buslaev, Alexander and Iglovikov, Vladimir I and Khvedchenya, Eugene and Parinov, Alex and Druzhinin, Mikhail and Kalinin, Alexandr A},
journal={Information},
volume={11},
number={2},
pages={125},
year={2020},
publisher={Multidisciplinary Digital Publishing Institute}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
coarsedropout | 256x192 | 0.753 | 0.908 | 0.822 | 0.805 | 0.944 | ckpt | log |
gridmask | 256x192 | 0.752 | 0.906 | 0.825 | 0.804 | 0.943 | ckpt | log |
photometric | 256x192 | 0.754 | 0.908 | 0.825 | 0.805 | 0.943 | ckpt | log |
Topdown Heatmap + Scnet on Coco¶
SCNet (CVPR'2020)
@inproceedings{liu2020improving,
title={Improving Convolutional Networks with Self-Calibrated Convolutions},
author={Liu, Jiang-Jiang and Hou, Qibin and Cheng, Ming-Ming and Wang, Changhu and Feng, Jiashi},
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
pages={10096--10105},
year={2020}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
pose_scnet_50 | 256x192 | 0.728 | 0.899 | 0.807 | 0.784 | 0.938 | ckpt | log |
pose_scnet_50 | 384x288 | 0.751 | 0.906 | 0.818 | 0.802 | 0.942 | ckpt | log |
pose_scnet_101 | 256x192 | 0.733 | 0.902 | 0.811 | 0.789 | 0.940 | ckpt | log |
pose_scnet_101 | 384x288 | 0.752 | 0.906 | 0.823 | 0.804 | 0.943 | ckpt | log |
Topdown Heatmap + Mobilenetv2 on Coco¶
MobilenetV2 (CVPR'2018)
@inproceedings{sandler2018mobilenetv2,
title={Mobilenetv2: Inverted residuals and linear bottlenecks},
author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={4510--4520},
year={2018}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
pose_mobilenetv2 | 256x192 | 0.648 | 0.874 | 0.725 | 0.709 | 0.918 | ckpt | log |
pose_mobilenetv2 | 384x288 | 0.677 | 0.882 | 0.746 | 0.734 | 0.920 | ckpt | log |
Topdown Heatmap + VGG on Coco¶
VGG (ICLR'2015)
@article{simonyan2014very,
title={Very deep convolutional networks for large-scale image recognition},
author={Simonyan, Karen and Zisserman, Andrew},
journal={arXiv preprint arXiv:1409.1556},
year={2014}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
vgg | 256x192 | 0.699 | 0.890 | 0.769 | 0.754 | 0.927 | ckpt | log |
Topdown Heatmap + Resnet + Dark on Coco¶
SimpleBaseline2D (ECCV'2018)
@inproceedings{xiao2018simple,
title={Simple baselines for human pose estimation and tracking},
author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
booktitle={Proceedings of the European conference on computer vision (ECCV)},
pages={466--481},
year={2018}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
title={Deep residual learning for image recognition},
author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={770--778},
year={2016}
}
DarkPose (CVPR'2020)
@inproceedings{zhang2020distribution,
title={Distribution-aware coordinate representation for human pose estimation},
author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
pages={7093--7102},
year={2020}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
pose_resnet_50_dark | 256x192 | 0.724 | 0.897 | 0.797 | 0.777 | 0.934 | ckpt | log |
pose_resnet_50_dark | 384x288 | 0.735 | 0.902 | 0.801 | 0.786 | 0.938 | ckpt | log |
pose_resnet_101_dark | 256x192 | 0.733 | 0.900 | 0.810 | 0.786 | 0.938 | ckpt | log |
pose_resnet_101_dark | 384x288 | 0.749 | 0.905 | 0.818 | 0.799 | 0.940 | ckpt | log |
pose_resnet_152_dark | 256x192 | 0.743 | 0.906 | 0.819 | 0.796 | 0.943 | ckpt | log |
pose_resnet_152_dark | 384x288 | 0.755 | 0.907 | 0.825 | 0.805 | 0.943 | ckpt | log |
Topdown Heatmap + Litehrnet on Coco¶
LiteHRNet (CVPR'2021)
@inproceedings{Yulitehrnet21,
title={Lite-HRNet: A Lightweight High-Resolution Network},
author={Yu, Changqian and Xiao, Bin and Gao, Changxin and Yuan, Lu and Zhang, Lei and Sang, Nong and Wang, Jingdong},
booktitle={CVPR},
year={2021}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
LiteHRNet-18 | 256x192 | 0.642 | 0.867 | 0.719 | 0.705 | 0.911 | ckpt | log |
LiteHRNet-18 | 384x288 | 0.676 | 0.876 | 0.746 | 0.735 | 0.919 | ckpt | log |
LiteHRNet-30 | 256x192 | 0.676 | 0.880 | 0.756 | 0.736 | 0.922 | ckpt | log |
LiteHRNet-30 | 384x288 | 0.700 | 0.883 | 0.776 | 0.758 | 0.926 | ckpt | log |
Topdown Heatmap + Resnet on Coco¶
SimpleBaseline2D (ECCV'2018)
@inproceedings{xiao2018simple,
title={Simple baselines for human pose estimation and tracking},
author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
booktitle={Proceedings of the European conference on computer vision (ECCV)},
pages={466--481},
year={2018}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
title={Deep residual learning for image recognition},
author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={770--778},
year={2016}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
pose_resnet_50 | 256x192 | 0.718 | 0.898 | 0.796 | 0.774 | 0.934 | ckpt | log |
pose_resnet_50 | 384x288 | 0.731 | 0.900 | 0.799 | 0.782 | 0.937 | ckpt | log |
pose_resnet_101 | 256x192 | 0.728 | 0.904 | 0.809 | 0.783 | 0.942 | ckpt | log |
pose_resnet_101 | 384x288 | 0.749 | 0.906 | 0.817 | 0.799 | 0.941 | ckpt | log |
pose_resnet_152 | 256x192 | 0.736 | 0.904 | 0.818 | 0.791 | 0.942 | ckpt | log |
pose_resnet_152 | 384x288 | 0.750 | 0.908 | 0.821 | 0.800 | 0.942 | ckpt | log |
The following model is equipped with a visibility prediction head and has been trained using COCO and AIC datasets.
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
pose_resnet_50 | 256x192 | 0.729 | 0.900 | 0.807 | 0.783 | 0.938 | ckpt | log |
Topdown Heatmap + Hrnet + Udp on Coco¶
HRNet (CVPR'2019)
@inproceedings{sun2019deep,
title={Deep high-resolution representation learning for human pose estimation},
author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={5693--5703},
year={2019}
}
UDP (CVPR'2020)
@InProceedings{Huang_2020_CVPR,
author = {Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan},
title = {The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation},
booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
month = {June},
year = {2020}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
pose_hrnet_w32_udp | 256x192 | 0.762 | 0.907 | 0.829 | 0.810 | 0.942 | ckpt | log |
pose_hrnet_w32_udp | 384x288 | 0.768 | 0.909 | 0.832 | 0.815 | 0.945 | ckpt | log |
pose_hrnet_w48_udp | 256x192 | 0.768 | 0.908 | 0.833 | 0.817 | 0.945 | ckpt | log |
pose_hrnet_w48_udp | 384x288 | 0.773 | 0.911 | 0.836 | 0.821 | 0.946 | ckpt | log |
pose_hrnet_w32_udp_regress | 256x192 | 0.759 | 0.907 | 0.827 | 0.813 | 0.943 | ckpt | log |
Note that, UDP also adopts the unbiased encoding/decoding algorithm of DARK.
Topdown Heatmap + PVT on Coco¶
PVT (ICCV'2021)
@inproceedings{wang2021pyramid,
title={Pyramid vision transformer: A versatile backbone for dense prediction without convolutions},
author={Wang, Wenhai and Xie, Enze and Li, Xiang and Fan, Deng-Ping and Song, Kaitao and Liang, Ding and Lu, Tong and Luo, Ping and Shao, Ling},
booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
pages={568--578},
year={2021}
}
PVTV2 (CVMJ'2022)
@article{wang2022pvt,
title={PVT v2: Improved baselines with Pyramid Vision Transformer},
author={Wang, Wenhai and Xie, Enze and Li, Xiang and Fan, Deng-Ping and Song, Kaitao and Liang, Ding and Lu, Tong and Luo, Ping and Shao, Ling},
journal={Computational Visual Media},
pages={1--10},
year={2022},
publisher={Springer}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
pose_pvt-s | 256x192 | 0.714 | 0.896 | 0.794 | 0.773 | 0.936 | ckpt | log |
pose_pvtv2-b2 | 256x192 | 0.737 | 0.905 | 0.812 | 0.791 | 0.942 | ckpt | log |
Cid + Hrnet on Coco¶
CID (CVPR'2022)
@InProceedings{Wang_2022_CVPR,
author = {Wang, Dongkai and Zhang, Shiliang},
title = {Contextual Instance Decoupling for Robust Multi-Person Pose Estimation},
booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
month = {June},
year = {2022},
pages = {11060-11068}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 without multi-scale test
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
CID | 512x512 | 0.704 | 0.894 | 0.775 | 0.753 | 0.928 | ckpt | log |
CID | 512x512 | 0.715 | 0.900 | 0.782 | 0.765 | 0.935 | ckpt | log |
Rtmpose + Rtmpose on Coco¶
RTMPose (arXiv'2023)
@misc{https://doi.org/10.48550/arxiv.2303.07399,
doi = {10.48550/ARXIV.2303.07399},
url = {https://arxiv.org/abs/2303.07399},
author = {Jiang, Tao and Lu, Peng and Zhang, Li and Ma, Ningsheng and Han, Rui and Lyu, Chengqi and Li, Yining and Chen, Kai},
keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences},
title = {RTMPose: Real-Time Multi-Person Pose Estimation based on MMPose},
publisher = {arXiv},
year = {2023},
copyright = {Creative Commons Attribution 4.0 International}
}
RTMDet (arXiv'2022)
@misc{lyu2022rtmdet,
title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors},
author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen},
year={2022},
eprint={2212.07784},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
rtmpose-t | 256x192 | 0.682 | 0.883 | 0.759 | 0.736 | 0.920 | ckpt | log |
rtmpose-s | 256x192 | 0.716 | 0.892 | 0.789 | 0.768 | 0.929 | ckpt | log |
rtmpose-m | 256x192 | 0.746 | 0.899 | 0.817 | 0.795 | 0.935 | ckpt | log |
rtmpose-l | 256x192 | 0.758 | 0.906 | 0.826 | 0.806 | 0.942 | ckpt | log |
rtmpose-t-aic-coco | 256x192 | 0.685 | 0.880 | 0.761 | 0.738 | 0.918 | ckpt | log |
rtmpose-s-aic-coco | 256x192 | 0.722 | 0.892 | 0.794 | 0.772 | 0.929 | ckpt | log |
rtmpose-m-aic-coco | 256x192 | 0.758 | 0.903 | 0.826 | 0.806 | 0.940 | ckpt | log |
rtmpose-l-aic-coco | 256x192 | 0.765 | 0.906 | 0.835 | 0.813 | 0.942 | ckpt | log |
rtmpose-m-aic-coco | 384x288 | 0.770 | 0.908 | 0.833 | 0.816 | 0.943 | ckpt | log |
rtmpose-l-aic-coco | 384x288 | 0.773 | 0.907 | 0.835 | 0.819 | 0.942 | ckpt | log |
Associative Embedding + Hrnet on Coco¶
Associative Embedding (NIPS'2017)
@inproceedings{newell2017associative,
title={Associative embedding: End-to-end learning for joint detection and grouping},
author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
booktitle={Advances in neural information processing systems},
pages={2277--2287},
year={2017}
}
HRNet (CVPR'2019)
@inproceedings{sun2019deep,
title={Deep high-resolution representation learning for human pose estimation},
author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={5693--5703},
year={2019}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 without multi-scale test
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
HRNet-w32 | 512x512 | 0.656 | 0.864 | 0.719 | 0.711 | 0.893 | ckpt | log |
Topdown Heatmap + Hrnet on Humanart¶
HRNet (CVPR'2019)
@inproceedings{sun2019deep,
title={Deep high-resolution representation learning for human pose estimation},
author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={5693--5703},
year={2019}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Human-Art (CVPR'2023)
@inproceedings{ju2023humanart,
title={Human-Art: A Versatile Human-Centric Dataset Bridging Natural and Artificial Scenes},
author={Ju, Xuan and Zeng, Ailing and Jianan, Wang and Qiang, Xu and Lei, Zhang},
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR),
year={2023}}
Results on Human-Art validation dataset with detector having human AP of 56.2 on Human-Art validation dataset
With classic decoder
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
pose_hrnet_w32-coco | 256x192 | 0.252 | 0.397 | 0.255 | 0.321 | 0.485 | ckpt | log |
pose_hrnet_w32-humanart-coco | 256x192 | 0.399 | 0.545 | 0.420 | 0.466 | 0.613 | ckpt | log |
pose_hrnet_w48-coco | 256x192 | 0.271 | 0.413 | 0.277 | 0.339 | 0.499 | ckpt | log |
pose_hrnet_w48-humanart-coco | 256x192 | 0.417 | 0.553 | 0.442 | 0.481 | 0.617 | ckpt | log |
Results on Human-Art validation dataset with ground-truth bounding-box
With classic decoder
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
pose_hrnet_w32-coco | 256x192 | 0.533 | 0.771 | 0.562 | 0.574 | 0.792 | ckpt | log |
pose_hrnet_w32-humanart-coco | 256x192 | 0.754 | 0.906 | 0.812 | 0.783 | 0.916 | ckpt | log |
pose_hrnet_w48-coco | 256x192 | 0.557 | 0.782 | 0.593 | 0.595 | 0.804 | ckpt | log |
pose_hrnet_w48-humanart-coco | 256x192 | 0.769 | 0.906 | 0.825 | 0.796 | 0.919 | ckpt | log |
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
With classic decoder
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
pose_hrnet_w32-coco | 256x192 | 0.749 | 0.906 | 0.821 | 0.804 | 0.945 | ckpt | log |
pose_hrnet_w32-humanart-coco | 256x192 | 0.741 | 0.902 | 0.814 | 0.795 | 0.941 | ckpt | log |
pose_hrnet_w48-coco | 256x192 | 0.756 | 0.908 | 0.826 | 0.809 | 0.945 | ckpt | log |
pose_hrnet_w48-humanart-coco | 256x192 | 0.751 | 0.905 | 0.822 | 0.805 | 0.943 | ckpt | log |
Rtmpose + Rtmpose on Humanart¶
RTMPose (arXiv'2023)
@misc{https://doi.org/10.48550/arxiv.2303.07399,
doi = {10.48550/ARXIV.2303.07399},
url = {https://arxiv.org/abs/2303.07399},
author = {Jiang, Tao and Lu, Peng and Zhang, Li and Ma, Ningsheng and Han, Rui and Lyu, Chengqi and Li, Yining and Chen, Kai},
keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences},
title = {RTMPose: Real-Time Multi-Person Pose Estimation based on MMPose},
publisher = {arXiv},
year = {2023},
copyright = {Creative Commons Attribution 4.0 International}
}
RTMDet (arXiv'2022)
@misc{lyu2022rtmdet,
title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors},
author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen},
year={2022},
eprint={2212.07784},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Human-Art (CVPR'2023)
@inproceedings{ju2023humanart,
title={Human-Art: A Versatile Human-Centric Dataset Bridging Natural and Artificial Scenes},
author={Ju, Xuan and Zeng, Ailing and Jianan, Wang and Qiang, Xu and Lei, Zhang},
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR),
year={2023}}
Results on Human-Art validation dataset with detector having human AP of 56.2 on Human-Art validation dataset
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
rtmpose-t-coco | 256x192 | 0.161 | 0.283 | 0.154 | 0.221 | 0.373 | ckpt | log |
rtmpose-t-humanart-coco | 256x192 | 0.249 | 0.395 | 0.256 | 0.323 | 0.485 | ckpt | log |
rtmpose-s-coco | 256x192 | 0.199 | 0.328 | 0.198 | 0.261 | 0.418 | ckpt | log |
rtmpose-s-humanart-coco | 256x192 | 0.311 | 0.462 | 0.323 | 0.381 | 0.540 | ckpt | log |
rtmpose-m-coco | 256x192 | 0.239 | 0.372 | 0.243 | 0.302 | 0.455 | ckpt | log |
rtmpose-m-humanart-coco | 256x192 | 0.355 | 0.503 | 0.377 | 0.417 | 0.568 | ckpt | log |
rtmpose-l-coco | 256x192 | 0.260 | 0.393 | 0.267 | 0.323 | 0.472 | ckpt | log |
rtmpose-l-humanart-coco | 256x192 | 0.378 | 0.521 | 0.399 | 0.442 | 0.584 | ckpt | log |
Results on Human-Art validation dataset with ground-truth bounding-box
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
rtmpose-t-coco | 256x192 | 0.444 | 0.725 | 0.453 | 0.488 | 0.750 | ckpt | log |
rtmpose-t-humanart-coco | 256x192 | 0.655 | 0.872 | 0.720 | 0.693 | 0.890 | ckpt | log |
rtmpose-s-coco | 256x192 | 0.480 | 0.739 | 0.498 | 0.521 | 0.763 | ckpt | log |
rtmpose-s-humanart-coco | 256x192 | 0.698 | 0.893 | 0.768 | 0.732 | 0.903 | ckpt | log |
rtmpose-m-coco | 256x192 | 0.532 | 0.765 | 0.563 | 0.571 | 0.789 | ckpt | log |
rtmpose-m-humanart-coco | 256x192 | 0.728 | 0.895 | 0.791 | 0.759 | 0.906 | ckpt | log |
rtmpose-l-coco | 256x192 | 0.564 | 0.789 | 0.602 | 0.599 | 0.808 | ckpt | log |
rtmpose-l-humanart-coco | 256x192 | 0.753 | 0.905 | 0.812 | 0.783 | 0.915 | ckpt | log |
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
rtmpose-t-coco | 256x192 | 0.682 | 0.883 | 0.759 | 0.736 | 0.920 | ckpt | log |
rtmpose-t-humanart-coco | 256x192 | 0.665 | 0.875 | 0.739 | 0.721 | 0.916 | ckpt | log |
rtmpose-s-coco | 256x192 | 0.716 | 0.892 | 0.789 | 0.768 | 0.929 | ckpt | log |
rtmpose-s-humanart-coco | 256x192 | 0.706 | 0.888 | 0.780 | 0.759 | 0.928 | ckpt | log |
rtmpose-m-coco | 256x192 | 0.746 | 0.899 | 0.817 | 0.795 | 0.935 | ckpt | log |
rtmpose-m-humanart-coco | 256x192 | 0.725 | 0.892 | 0.795 | 0.775 | 0.929 | ckpt | log |
rtmpose-l-coco | 256x192 | 0.758 | 0.906 | 0.826 | 0.806 | 0.942 | ckpt | log |
rtmpose-l-humanart-coco | 256x192 | 0.748 | 0.901 | 0.816 | 0.796 | 0.938 | ckpt | log |
Results on COCO val2017 with ground-truth bounding box
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
rtmpose-t-humanart-coco | 256x192 | 0.679 | 0.895 | 0.755 | 0.710 | 0.907 | ckpt | log |
rtmpose-s-humanart-coco | 256x192 | 0.725 | 0.916 | 0.798 | 0.753 | 0.925 | ckpt | log |
rtmpose-m-humanart-coco | 256x192 | 0.744 | 0.916 | 0.818 | 0.770 | 0.930 | ckpt | log |
rtmpose-l-humanart-coco | 256x192 | 0.770 | 0.927 | 0.840 | 0.794 | 0.939 | ckpt | log |
Rtmpose + Rtmpose + Body8-Coco on Body8¶
RTMPose (arXiv'2023)
@misc{https://doi.org/10.48550/arxiv.2303.07399,
doi = {10.48550/ARXIV.2303.07399},
url = {https://arxiv.org/abs/2303.07399},
author = {Jiang, Tao and Lu, Peng and Zhang, Li and Ma, Ningsheng and Han, Rui and Lyu, Chengqi and Li, Yining and Chen, Kai},
keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences},
title = {RTMPose: Real-Time Multi-Person Pose Estimation based on MMPose},
publisher = {arXiv},
year = {2023},
copyright = {Creative Commons Attribution 4.0 International}
}
RTMDet (arXiv'2022)
@misc{lyu2022rtmdet,
title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors},
author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen},
year={2022},
eprint={2212.07784},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset.
*
denotes model trained on 7 public datasets:Body8
denotes the addition of the OCHuman dataset, in addition to the 7 datasets mentioned above, for evaluation.
Config | Input Size | AP (COCO) |
PCK@0.1 (Body8) |
AUC (Body8) |
EPE (Body8) |
Params(M) | FLOPS(G) | Download |
---|---|---|---|---|---|---|---|---|
RTMPose-t* | 256x192 | 65.9 | 91.44 | 63.18 | 19.45 | 3.34 | 0.36 | Model |
RTMPose-s* | 256x192 | 69.7 | 92.45 | 65.15 | 17.85 | 5.47 | 0.68 | Model |
RTMPose-m* | 256x192 | 74.9 | 94.25 | 68.59 | 15.12 | 13.59 | 1.93 | Model |
RTMPose-l* | 256x192 | 76.7 | 95.08 | 70.14 | 13.79 | 27.66 | 4.16 | Model |
RTMPose-m* | 384x288 | 76.6 | 94.64 | 70.38 | 13.98 | 13.72 | 4.33 | Model |
RTMPose-l* | 384x288 | 78.3 | 95.36 | 71.58 | 13.08 | 27.79 | 9.35 | Model |
Rtmpose + Rtmpose on Hand5¶
RTMPose (arXiv'2023)
@misc{https://doi.org/10.48550/arxiv.2303.07399,
doi = {10.48550/ARXIV.2303.07399},
url = {https://arxiv.org/abs/2303.07399},
author = {Jiang, Tao and Lu, Peng and Zhang, Li and Ma, Ningsheng and Han, Rui and Lyu, Chengqi and Li, Yining and Chen, Kai},
keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences},
title = {RTMPose: Real-Time Multi-Person Pose Estimation based on MMPose},
publisher = {arXiv},
year = {2023},
copyright = {Creative Commons Attribution 4.0 International}
}
RTMDet (arXiv'2022)
@misc{lyu2022rtmdet,
title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors},
author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen},
year={2022},
eprint={2212.07784},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Hand5
and*
denote model trained on 5 public datasets:
Config | Input Size | PCK@0.2 (COCO-Wholebody-Hand) |
PCK@0.2 (Hand5) |
AUC (Hand5) |
EPE (Hand5) |
FLOPS(G) | Download |
---|---|---|---|---|---|---|---|
RTMPose-m* (alpha version) |
256x256 | 81.5 | 96.4 | 83.9 | 5.06 | 2.581 | ckpt |
Rtmpose + Rtmpose on Face6¶
RTMPose (arXiv'2023)
@misc{https://doi.org/10.48550/arxiv.2303.07399,
doi = {10.48550/ARXIV.2303.07399},
url = {https://arxiv.org/abs/2303.07399},
author = {Jiang, Tao and Lu, Peng and Zhang, Li and Ma, Ningsheng and Han, Rui and Lyu, Chengqi and Li, Yining and Chen, Kai},
keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences},
title = {RTMPose: Real-Time Multi-Person Pose Estimation based on MMPose},
publisher = {arXiv},
year = {2023},
copyright = {Creative Commons Attribution 4.0 International}
}
RTMDet (arXiv'2022)
@misc{lyu2022rtmdet,
title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors},
author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen},
year={2022},
eprint={2212.07784},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset.
Face6
and*
denote model trained on 6 public datasets:
Config | Input Size | NME (LaPa) |
FLOPS (G) |
Download |
---|---|---|---|---|
RTMPose-t* | 256x256 | 1.67 | 0.652 | Model |
RTMPose-s* | 256x256 | 1.59 | 1.119 | Model |
RTMPose-m* | 256x256 | 1.44 | 2.852 | Model |
RHD (ICCV’2017)¶
Topdown Regression + Resnet on Rhd2d¶
DeepPose (CVPR'2014)
@inproceedings{toshev2014deeppose,
title={Deeppose: Human pose estimation via deep neural networks},
author={Toshev, Alexander and Szegedy, Christian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={1653--1660},
year={2014}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
title={Deep residual learning for image recognition},
author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={770--778},
year={2016}
}
RHD (ICCV'2017)
@TechReport{zb2017hand,
author={Christian Zimmermann and Thomas Brox},
title={Learning to Estimate 3D Hand Pose from Single RGB Images},
institution={arXiv:1705.01389},
year={2017},
note="https://arxiv.org/abs/1705.01389",
url="https://lmb.informatik.uni-freiburg.de/projects/hand3d/"
}
Results on RHD test set
Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
---|---|---|---|---|---|---|
deeppose_resnet_50 | 256x256 | 0.988 | 0.865 | 3.32 | ckpt | log |
Topdown Heatmap + Resnet on Rhd2d¶
SimpleBaseline2D (ECCV'2018)
@inproceedings{xiao2018simple,
title={Simple baselines for human pose estimation and tracking},
author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
booktitle={Proceedings of the European conference on computer vision (ECCV)},
pages={466--481},
year={2018}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
title={Deep residual learning for image recognition},
author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={770--778},
year={2016}
}
RHD (ICCV'2017)
@TechReport{zb2017hand,
author={Christian Zimmermann and Thomas Brox},
title={Learning to Estimate 3D Hand Pose from Single RGB Images},
institution={arXiv:1705.01389},
year={2017},
note="https://arxiv.org/abs/1705.01389",
url="https://lmb.informatik.uni-freiburg.de/projects/hand3d/"
}
Results on RHD test set
Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
---|---|---|---|---|---|---|
pose_resnet50 | 256x256 | 0.991 | 0.898 | 2.32 | ckpt | log |
Topdown Heatmap + Hrnetv2 on Rhd2d¶
HRNetv2 (TPAMI'2019)
@article{WangSCJDZLMTWLX19,
title={Deep High-Resolution Representation Learning for Visual Recognition},
author={Jingdong Wang and Ke Sun and Tianheng Cheng and
Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
journal={TPAMI},
year={2019}
}
RHD (ICCV'2017)
@TechReport{zb2017hand,
author={Christian Zimmermann and Thomas Brox},
title={Learning to Estimate 3D Hand Pose from Single RGB Images},
institution={arXiv:1705.01389},
year={2017},
note="https://arxiv.org/abs/1705.01389",
url="https://lmb.informatik.uni-freiburg.de/projects/hand3d/"
}
Results on RHD test set
Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
---|---|---|---|---|---|---|
pose_hrnetv2_w18 | 256x256 | 0.992 | 0.902 | 2.21 | ckpt | log |
Topdown Heatmap + Hrnetv2 + Udp on Rhd2d¶
HRNetv2 (TPAMI'2019)
@article{WangSCJDZLMTWLX19,
title={Deep High-Resolution Representation Learning for Visual Recognition},
author={Jingdong Wang and Ke Sun and Tianheng Cheng and
Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
journal={TPAMI},
year={2019}
}
UDP (CVPR'2020)
@InProceedings{Huang_2020_CVPR,
author = {Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan},
title = {The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation},
booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
month = {June},
year = {2020}
}
RHD (ICCV'2017)
@TechReport{zb2017hand,
author={Christian Zimmermann and Thomas Brox},
title={Learning to Estimate 3D Hand Pose from Single RGB Images},
institution={arXiv:1705.01389},
year={2017},
note="https://arxiv.org/abs/1705.01389",
url="https://lmb.informatik.uni-freiburg.de/projects/hand3d/"
}
Results on RHD test set
Arch | Input Size | PCKh@0.7 | AUC | EPE | ckpt | log |
---|---|---|---|---|---|---|
pose_hrnetv2_w18_udp | 256x256 | 0.992 | 0.902 | 2.19 | ckpt | log |
Topdown Heatmap + Hrnetv2 + Dark on Rhd2d¶
HRNetv2 (TPAMI'2019)
@article{WangSCJDZLMTWLX19,
title={Deep High-Resolution Representation Learning for Visual Recognition},
author={Jingdong Wang and Ke Sun and Tianheng Cheng and
Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
journal={TPAMI},
year={2019}
}
DarkPose (CVPR'2020)
@inproceedings{zhang2020distribution,
title={Distribution-aware coordinate representation for human pose estimation},
author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
pages={7093--7102},
year={2020}
}
RHD (ICCV'2017)
@TechReport{zb2017hand,
author={Christian Zimmermann and Thomas Brox},
title={Learning to Estimate 3D Hand Pose from Single RGB Images},
institution={arXiv:1705.01389},
year={2017},
note="https://arxiv.org/abs/1705.01389",
url="https://lmb.informatik.uni-freiburg.de/projects/hand3d/"
}
Results on RHD test set
Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
---|---|---|---|---|---|---|
pose_hrnetv2_w18_dark | 256x256 | 0.992 | 0.903 | 2.18 | ckpt | log |
Topdown Heatmap + Mobilenetv2 on Rhd2d¶
MobilenetV2 (CVPR'2018)
@inproceedings{sandler2018mobilenetv2,
title={Mobilenetv2: Inverted residuals and linear bottlenecks},
author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={4510--4520},
year={2018}
}
RHD (ICCV'2017)
@TechReport{zb2017hand,
author={Christian Zimmermann and Thomas Brox},
title={Learning to Estimate 3D Hand Pose from Single RGB Images},
institution={arXiv:1705.01389},
year={2017},
note="https://arxiv.org/abs/1705.01389",
url="https://lmb.informatik.uni-freiburg.de/projects/hand3d/"
}
Results on RHD test set
Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
---|---|---|---|---|---|---|
pose_mobilenet_v2 | 256x256 | 0.985 | 0.883 | 2.79 | ckpt | log |
AP-10K (NeurIPS’2021)¶
Topdown Heatmap + Resnet on Ap10k¶
SimpleBaseline2D (ECCV'2018)
@inproceedings{xiao2018simple,
title={Simple baselines for human pose estimation and tracking},
author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
booktitle={Proceedings of the European conference on computer vision (ECCV)},
pages={466--481},
year={2018}
}
AP-10K (NeurIPS'2021)
@misc{yu2021ap10k,
title={AP-10K: A Benchmark for Animal Pose Estimation in the Wild},
author={Hang Yu and Yufei Xu and Jing Zhang and Wei Zhao and Ziyu Guan and Dacheng Tao},
year={2021},
eprint={2108.12617},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
Results on AP-10K validation set
Arch | Input Size | AP | AP50 | AP75 | APM | APL | ckpt | log |
---|---|---|---|---|---|---|---|---|
pose_resnet_50 | 256x256 | 0.680 | 0.926 | 0.738 | 0.552 | 0.687 | ckpt | log |
pose_resnet_101 | 256x256 | 0.681 | 0.921 | 0.751 | 0.545 | 0.690 | ckpt | log |
Topdown Heatmap + Cspnext + Udp on Ap10k¶
RTMDet (ArXiv 2022)
@misc{lyu2022rtmdet,
title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors},
author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen},
year={2022},
eprint={2212.07784},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
UDP (CVPR'2020)
@InProceedings{Huang_2020_CVPR,
author = {Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan},
title = {The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation},
booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
month = {June},
year = {2020}
}
AP-10K (NeurIPS'2021)
@misc{yu2021ap10k,
title={AP-10K: A Benchmark for Animal Pose Estimation in the Wild},
author={Hang Yu and Yufei Xu and Jing Zhang and Wei Zhao and Ziyu Guan and Dacheng Tao},
year={2021},
eprint={2108.12617},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
Results on AP-10K validation set
Arch | Input Size | AP | AP50 | AP75 | APM | APL | ckpt | log |
---|---|---|---|---|---|---|---|---|
pose_cspnext_m | 256x256 | 0.703 | 0.944 | 0.776 | 0.513 | 0.710 | ckpt | log |
Topdown Heatmap + Hrnet on Ap10k¶
HRNet (CVPR'2019)
@inproceedings{sun2019deep,
title={Deep high-resolution representation learning for human pose estimation},
author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={5693--5703},
year={2019}
}
AP-10K (NeurIPS'2021)
@misc{yu2021ap10k,
title={AP-10K: A Benchmark for Animal Pose Estimation in the Wild},
author={Hang Yu and Yufei Xu and Jing Zhang and Wei Zhao and Ziyu Guan and Dacheng Tao},
year={2021},
eprint={2108.12617},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
Results on AP-10K validation set
Arch | Input Size | AP | AP50 | AP75 | APM | APL | ckpt | log |
---|---|---|---|---|---|---|---|---|
pose_hrnet_w32 | 256x256 | 0.722 | 0.935 | 0.789 | 0.557 | 0.729 | ckpt | log |
pose_hrnet_w48 | 256x256 | 0.728 | 0.936 | 0.802 | 0.577 | 0.735 | ckpt | log |
Rtmpose + Rtmpose on Ap10k¶
AP-10K (NeurIPS'2021)
@misc{yu2021ap10k,
title={AP-10K: A Benchmark for Animal Pose Estimation in the Wild},
author={Hang Yu and Yufei Xu and Jing Zhang and Wei Zhao and Ziyu Guan and Dacheng Tao},
year={2021},
eprint={2108.12617},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
Results on AP-10K validation set
Arch | Input Size | AP | AP50 | AP75 | APM | APL | ckpt | log |
---|---|---|---|---|---|---|---|---|
rtmpose-m | 256x256 | 0.722 | 0.939 | 0.788 | 0.569 | 0.728 | ckpt | log |
300W (IMAVIS’2016)¶
Topdown Heatmap + Hrnetv2 on 300w¶
HRNetv2 (TPAMI'2019)
@article{WangSCJDZLMTWLX19,
title={Deep High-Resolution Representation Learning for Visual Recognition},
author={Jingdong Wang and Ke Sun and Tianheng Cheng and
Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
journal={TPAMI},
year={2019}
}
300W (IMAVIS'2016)
@article{sagonas2016300,
title={300 faces in-the-wild challenge: Database and results},
author={Sagonas, Christos and Antonakos, Epameinondas and Tzimiropoulos, Georgios and Zafeiriou, Stefanos and Pantic, Maja},
journal={Image and vision computing},
volume={47},
pages={3--18},
year={2016},
publisher={Elsevier}
}
Results on 300W dataset
The model is trained on 300W train.
Arch | Input Size | NMEcommon | NMEchallenge | NMEfull | NMEtest | ckpt | log |
---|---|---|---|---|---|---|---|
pose_hrnetv2_w18 | 256x256 | 2.92 | 5.64 | 3.45 | 4.10 | ckpt | log |
AFLW (ICCVW’2011)¶
Topdown Heatmap + Hrnetv2 + Dark on Aflw¶
HRNetv2 (TPAMI'2019)
@article{WangSCJDZLMTWLX19,
title={Deep High-Resolution Representation Learning for Visual Recognition},
author={Jingdong Wang and Ke Sun and Tianheng Cheng and
Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
journal={TPAMI},
year={2019}
}
DarkPose (CVPR'2020)
@inproceedings{zhang2020distribution,
title={Distribution-aware coordinate representation for human pose estimation},
author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
pages={7093--7102},
year={2020}
}
AFLW (ICCVW'2011)
@inproceedings{koestinger2011annotated,
title={Annotated facial landmarks in the wild: A large-scale, real-world database for facial landmark localization},
author={Koestinger, Martin and Wohlhart, Paul and Roth, Peter M and Bischof, Horst},
booktitle={2011 IEEE international conference on computer vision workshops (ICCV workshops)},
pages={2144--2151},
year={2011},
organization={IEEE}
}
Results on AFLW dataset
The model is trained on AFLW train and evaluated on AFLW full and frontal.
Arch | Input Size | NMEfull | NMEfrontal | ckpt | log |
---|---|---|---|---|---|
pose_hrnetv2_w18_dark | 256x256 | 1.35 | 1.19 | ckpt | log |
Topdown Heatmap + Hrnetv2 on Aflw¶
HRNetv2 (TPAMI'2019)
@article{WangSCJDZLMTWLX19,
title={Deep High-Resolution Representation Learning for Visual Recognition},
author={Jingdong Wang and Ke Sun and Tianheng Cheng and
Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
journal={TPAMI},
year={2019}
}
AFLW (ICCVW'2011)
@inproceedings{koestinger2011annotated,
title={Annotated facial landmarks in the wild: A large-scale, real-world database for facial landmark localization},
author={Koestinger, Martin and Wohlhart, Paul and Roth, Peter M and Bischof, Horst},
booktitle={2011 IEEE international conference on computer vision workshops (ICCV workshops)},
pages={2144--2151},
year={2011},
organization={IEEE}
}
Results on AFLW dataset
The model is trained on AFLW train and evaluated on AFLW full and frontal.
Arch | Input Size | NMEfull | NMEfrontal | ckpt | log |
---|---|---|---|---|---|
pose_hrnetv2_w18 | 256x256 | 1.41 | 1.27 | ckpt | log |