{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,5]],"date-time":"2025-11-05T21:12:18Z","timestamp":1762377138077,"version":"3.37.3"},"reference-count":57,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2022,4,23]],"date-time":"2022-04-23T00:00:00Z","timestamp":1650672000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,4,23]],"date-time":"2022-04-23T00:00:00Z","timestamp":1650672000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2023,1]]},"DOI":"10.1007\/s10489-022-03516-1","type":"journal-article","created":{"date-parts":[[2022,4,23]],"date-time":"2022-04-23T05:02:35Z","timestamp":1650690155000},"page":"1021-1029","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Staged cascaded network for monocular 3D human pose estimation"],"prefix":"10.1007","volume":"53","author":[{"given":"Bing-kun","family":"Gao","sequence":"first","affiliation":[]},{"given":"Zhong-xin","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Cui-na","family":"Wu","sequence":"additional","affiliation":[]},{"given":"Chen-lei","family":"Wu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2442-330X","authenticated-orcid":false,"given":"Hong-bo","family":"Bi","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,4,23]]},"reference":[{"issue":"1","key":"3516_CR1","doi-asserted-by":"publisher","first-page":"44","DOI":"10.1109\/TPAMI.2006.21","volume":"28","author":"A Agarwal","year":"2005","unstructured":"Agarwal A, Triggs B (2005) Recovering 3d human pose from monocular images. IEEE Trans Pattern Anal Mach Intell 28(1):44\u201358","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"3516_CR2","doi-asserted-by":"crossref","unstructured":"Akhter I, Black MJ (2015) Pose-conditioned joint angle limits for 3d human pose reconstruction. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1446\u20131455","DOI":"10.1109\/CVPR.2015.7298751"},{"key":"3516_CR3","doi-asserted-by":"crossref","unstructured":"Bai H, Cheng S, Tang J, Pan J (2021) Learning a cascaded non-local residual network for super-resolving blurry images. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 223\u2013232","DOI":"10.1109\/CVPRW53098.2021.00031"},{"key":"3516_CR4","doi-asserted-by":"crossref","unstructured":"Belagiannis V, Amin S, Andriluka M, Schiele B, Navab N, Ilic S (2014) 3d pictorial structures for multiple human pose estimation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1669\u20131676","DOI":"10.1109\/CVPR.2014.216"},{"key":"3516_CR5","doi-asserted-by":"crossref","unstructured":"Bo L, Sminchisescu C (2009) Structured output-associative regression. In: 2009 IEEE Conference on computer vision and pattern recognition. IEEE, pp 2403\u20132410","DOI":"10.1109\/CVPR.2009.5206699"},{"key":"3516_CR6","doi-asserted-by":"crossref","unstructured":"Bogo F, Kanazawa A, Lassner C, Gehler P, Romero J, Black MJ (2016) Keep it smpl: Automatic estimation of 3d human pose and shape from a single image. In: European conference on computer vision. Springer, pp 561\u2013578","DOI":"10.1007\/978-3-319-46454-1_34"},{"key":"3516_CR7","doi-asserted-by":"crossref","unstructured":"Burenius M, Sullivan J, Carlsson S (2013) 3d pictorial structures for multiple view articulated pose estimation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 3618\u20133625","DOI":"10.1109\/CVPR.2013.464"},{"key":"3516_CR8","doi-asserted-by":"crossref","unstructured":"Chen W, Wang H, Li Y, Su H, Wang Z, Tu C, Lischinski D, Cohen-Or D, Chen B (2016) Synthesizing training images for boosting human 3d pose estimation. In: 2016 Fourth international conference on 3d vision (3DV). IEEE, pp 479\u2013488","DOI":"10.1109\/3DV.2016.58"},{"key":"3516_CR9","unstructured":"Chen X, Yuille A (2014) Articulated pose estimation by a graphical model with image dependent pairwise relations. arXiv:1407.3399"},{"key":"3516_CR10","doi-asserted-by":"crossref","unstructured":"Chen X, Lin K-Y, Liu W, Qian C, Lin L (2019) Weakly-supervised discovery of geometry-aware representation for 3d human pose estimation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 10895\u201310904","DOI":"10.1109\/CVPR.2019.01115"},{"key":"3516_CR11","doi-asserted-by":"crossref","unstructured":"Chen X, Fu C, Zhao Y, Zheng F, Song J, Ji R, Yi Y (2020) Salience-guided cascaded suppression network for person re-identification. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 3300\u20133310","DOI":"10.1109\/CVPR42600.2020.00336"},{"key":"3516_CR12","doi-asserted-by":"crossref","unstructured":"Chen Y, Wang Z, Peng Y, Zhang Z, Yu G, Sun J (2018) Cascaded pyramid network for multi-person pose estimation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 7103\u20137112","DOI":"10.1109\/CVPR.2018.00742"},{"key":"3516_CR13","doi-asserted-by":"crossref","unstructured":"Cheng Y, Bo Y, Bo W, Yan W, Tan RT (2019) Occlusion-aware networks for 3d human pose estimation in video. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 723\u2013732","DOI":"10.1109\/ICCV.2019.00081"},{"key":"3516_CR14","doi-asserted-by":"crossref","unstructured":"Diba A, Sharma V, Pazandeh A, Pirsiavash H, Gool LV (2017) Weakly supervised cascaded convolutional networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 914\u2013922","DOI":"10.1109\/CVPR.2017.545"},{"key":"3516_CR15","unstructured":"Dix A, Finlay J, Abowd GD, Beale R (2000) Human-computer interaction Harlow ua"},{"key":"3516_CR16","doi-asserted-by":"crossref","unstructured":"Habibie I, Xu W, Mehta D, Pons-Moll G, Theobalt C (2019) In the wild human pose estimation using explicit 2d features and intermediate 3d representations. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 10905\u201310914","DOI":"10.1109\/CVPR.2019.01116"},{"key":"3516_CR17","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"3516_CR18","doi-asserted-by":"crossref","unstructured":"Huang G, Liu Z, Maaten LVD, Weinberger KQ (2017) Densely connected convolutional networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 4700\u20134708","DOI":"10.1109\/CVPR.2017.243"},{"issue":"6","key":"3516_CR19","doi-asserted-by":"publisher","first-page":"471","DOI":"10.1016\/j.vrih.2020.04.005","volume":"2","author":"X Ji","year":"2020","unstructured":"Ji X, Qi F, Dong J, Shuai Q, Jiang W, Zhou X (2020) A survey on monocular 3d human pose estimation. Virtual Real Intell Hardw 2(6):471\u2013500","journal-title":"Virtual Real Intell Hardw"},{"key":"3516_CR20","doi-asserted-by":"crossref","unstructured":"Kanazawa A, Black MJ, Jacobs DW, Malik J (2018) End-to-end recovery of human shape and pose. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 7122\u20137131","DOI":"10.1109\/CVPR.2018.00744"},{"key":"3516_CR21","doi-asserted-by":"crossref","unstructured":"Kocabas M, Karagoz S, Akbas E (2019) Self-supervised learning of 3d human pose using multi-view geometry. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 1077\u20131086","DOI":"10.1109\/CVPR.2019.00117"},{"key":"3516_CR22","doi-asserted-by":"crossref","unstructured":"Kolotouros N, Pavlakos G, Black MJ, Daniilidis K (2019) Learning to reconstruct 3d human pose and shape via model-fitting in the loop. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 2252\u20132261","DOI":"10.1109\/ICCV.2019.00234"},{"key":"3516_CR23","doi-asserted-by":"crossref","unstructured":"Li S, Ke L, Pratama K, Tai Y-W, Tang C-K, Cheng K-T (2020) Cascaded deep monocular 3d human pose estimation with evolutionary training data. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 6173\u20136183","DOI":"10.1109\/CVPR42600.2020.00621"},{"key":"3516_CR24","doi-asserted-by":"crossref","unstructured":"Li Z, Wang X, Wang F, Jiang P (2019) On boosting single-frame 3d human pose estimation via monocular videos. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 2192\u20132201","DOI":"10.1109\/ICCV.2019.00228"},{"key":"3516_CR25","doi-asserted-by":"crossref","unstructured":"Liu W, Chen J, Li C, Qian C, Chu X, Hu X (2018) A cascaded inception of inception network with attention modulated feature fusion for human pose estimation. In: Thirty-second AAAI conference on artificial intelligence","DOI":"10.1609\/aaai.v32i1.12334"},{"key":"3516_CR26","unstructured":"Luo C, Chu X, Yuille A (2018) Orinet: A fully convolutional network for 3d human pose estimation. arXiv:1811.04989"},{"key":"3516_CR27","doi-asserted-by":"crossref","unstructured":"Martinez J, Hossain R, Romero J, Little JJ (2017) A simple yet effective baseline for 3d human pose estimation. In: Proceedings of the IEEE international conference on computer vision, pp 2640\u20132649","DOI":"10.1109\/ICCV.2017.288"},{"key":"3516_CR28","doi-asserted-by":"crossref","unstructured":"Mehta D, Rhodin H, Casas D, Fua P, Sotnychenko O, Weipeng X u, Theobalt C (2017) Monocular 3d human pose estimation in the wild using improved cnn supervision. In: 2017 International conference on 3d vision (3DV). IEEE, pp 506\u2013516","DOI":"10.1109\/3DV.2017.00064"},{"key":"3516_CR29","doi-asserted-by":"crossref","unstructured":"Mehta D, Sotnychenko O, Mueller F, Xu W, Sridhar S, Pons-Moll G, Theobalt C (2018) Single-shot multi-person 3d pose estimation from monocular rgb. In: 2018 International conference on 3d vision (3DV). IEEE, pp 120\u2013130","DOI":"10.1109\/3DV.2018.00024"},{"issue":"4","key":"3516_CR30","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3072959.3073596","volume":"36","author":"D Mehta","year":"2017","unstructured":"Mehta D, Sridhar S, Sotnychenko O, Rhodin H, Shafiei M, Seidel H-P, Xu W, Casas D, Theobalt C (2017) Vnect: Real-time 3d human pose estimation with a single rgb camera. ACM Trans Graph (TOG) 36(4):1\u201314","journal-title":"ACM Trans Graph (TOG)"},{"key":"3516_CR31","doi-asserted-by":"crossref","unstructured":"Moon G, Chang YJ, Lee KM (2019) Camera distance-aware top-down approach for 3d multi-person pose estimation from a single rgb image. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 10133\u201310142","DOI":"10.1109\/ICCV.2019.01023"},{"key":"3516_CR32","doi-asserted-by":"crossref","unstructured":"Moreno-Noguer F (2017) 3d human pose estimation from a single image via distance matrix regression. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2823\u20132832","DOI":"10.1109\/CVPR.2017.170"},{"key":"3516_CR33","doi-asserted-by":"crossref","unstructured":"Newell A, Yang K, Deng J (2016) Stacked hourglass networks for human pose estimation. In: European conference on computer vision. Springer, pp 483\u2013499","DOI":"10.1007\/978-3-319-46484-8_29"},{"key":"3516_CR34","unstructured":"Nibali A, He Z, Morgan S, Prendergast L (2018) Numerical coordinate regression with convolutional neural networks. arXiv:1801.07372"},{"key":"3516_CR35","doi-asserted-by":"crossref","unstructured":"Nie Q, Liu Z, Liu Y (2020) Unsupervised 3d human pose representation with viewpoint and pose disentanglement. In: European conference on computer vision. Springer, pp 102\u2013118","DOI":"10.1007\/978-3-030-58529-7_7"},{"key":"3516_CR36","doi-asserted-by":"crossref","unstructured":"Nie X, Feng J, Zhang J, Yan S (2019) Single-stage multi-person pose machines. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 6951\u20136960","DOI":"10.1109\/ICCV.2019.00705"},{"key":"3516_CR37","doi-asserted-by":"crossref","unstructured":"Pavlakos G, Choutas V, Ghorbani N, Bolkart T, Osman AAA, Tzionas D, Black MJ (2019) Expressive body capture: 3d hands, face, and body from a single image. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 10975\u201310985","DOI":"10.1109\/CVPR.2019.01123"},{"key":"3516_CR38","doi-asserted-by":"crossref","unstructured":"Pavlakos G, Zhou X, Derpanis KG, Daniilidis K (2017) Coarse-to-fine volumetric prediction for single-image 3d human pose. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 7025\u20137034","DOI":"10.1109\/CVPR.2017.139"},{"key":"3516_CR39","doi-asserted-by":"crossref","unstructured":"Pavlakos G, Zhou X, Derpanis KG, Daniilidis K (2017) Harvesting multiple views for marker-less 3d human pose annotations. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 6988\u20136997","DOI":"10.1109\/CVPR.2017.138"},{"key":"3516_CR40","doi-asserted-by":"crossref","unstructured":"Pavllo D, Feichtenhofer C, Grangier D, Auli M (2019) 3d human pose estimation in video with temporal convolutions and semi-supervised training. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 7753\u20137762","DOI":"10.1109\/CVPR.2019.00794"},{"key":"3516_CR41","doi-asserted-by":"crossref","unstructured":"Hossain MRI, Little JJ (2017) Exploiting temporal information for 3d pose estimation. arXiv:arXiv--1711","DOI":"10.1007\/978-3-030-01249-6_5"},{"key":"3516_CR42","doi-asserted-by":"crossref","unstructured":"Rhodin H, Sp\u00f6rri J, Katircioglu I, Constantin V, Meyer F, M\u00fcller E, Salzmann M, Fua P (2018) Learning monocular 3d human pose estimation from multi-view images. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 8437\u20138446","DOI":"10.1109\/CVPR.2018.00880"},{"key":"3516_CR43","doi-asserted-by":"crossref","unstructured":"Sharma S, Varigonda PT, Bindal P, Sharma A, Jain A (2019) Monocular 3d human pose estimation by generation and ordinal ranking. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 2325\u20132334","DOI":"10.1109\/ICCV.2019.00241"},{"key":"3516_CR44","doi-asserted-by":"crossref","unstructured":"Shi W, Caballero J, Husz\u00e1r F, Totz J, Aitken AP, Bishop R, Rueckert D, Wang Z (2016) Real-time single image and video super-resolution using an efficient sub-pixel convolutional neural network. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1874\u20131883","DOI":"10.1109\/CVPR.2016.207"},{"key":"3516_CR45","unstructured":"Simonyan K, Zisserman A (2014) Very deep convolutional networks for large-scale image recognition. arXiv:1409.1556"},{"key":"3516_CR46","unstructured":"Ke S, Xiao B, Liu D, Wang J (2019) Deep high-resolution representation learning for human pose estimation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 5693\u20135703"},{"key":"3516_CR47","doi-asserted-by":"crossref","unstructured":"Sun X, Xiao B, Wei F, Liang S, Wei Y (2018) Integral human pose regression. In: Proceedings of the european conference on computer vision (ECCV), pp 529\u2013545","DOI":"10.1007\/978-3-030-01231-1_33"},{"key":"3516_CR48","doi-asserted-by":"crossref","unstructured":"Szegedy C, Liu W, Jia Y, Sermanet P, Reed S, Anguelov D, Erhan D, Vanhoucke V, Rabinovich A (2015) Going deeper with convolutions. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1\u20139","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"3516_CR49","doi-asserted-by":"crossref","unstructured":"Szegedy C, Vanhoucke V, Ioffe S, Shlens J, Wojna Z (2016) Rethinking the inception architecture for computer vision. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2818\u20132826","DOI":"10.1109\/CVPR.2016.308"},{"key":"3516_CR50","unstructured":"Tompson JJ, Jain A, LeCun Y, Bregler C (2014) Joint training of a convolutional network and a graphical model for human pose estimation. arXiv:1406.2984"},{"key":"3516_CR51","doi-asserted-by":"crossref","unstructured":"Wang J, Tan S, Zhen X, Xu S, Zheng F, He Z, Shao L (2021) Deep 3d human pose estimation: A review. Computer Vision and Image Understanding, p 103225","DOI":"10.1016\/j.cviu.2021.103225"},{"key":"3516_CR52","doi-asserted-by":"crossref","unstructured":"Wu J, Xue T, Lim JJ, Tian Y, Tenenbaum JB, Torralba A, Freeman WT (2016) Single image 3d interpreter network. In: European conference on computer vision. Springer, pp 365\u2013382","DOI":"10.1007\/978-3-319-46466-4_22"},{"key":"3516_CR53","doi-asserted-by":"crossref","unstructured":"Yang W, Ouyang W, Wang X, Ren J, Li H, Wang X (2018) 3d human pose estimation in the wild by adversarial learning. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 5255\u20135264","DOI":"10.1109\/CVPR.2018.00551"},{"key":"3516_CR54","doi-asserted-by":"crossref","unstructured":"Yu D, Su K, Sun J, Wang C (2018) Multi-person pose estimation for pose tracking with enhanced cascaded pyramid network. In: Proceedings of the european conference on computer vision (ECCV) Workshops, pp 0\u20130","DOI":"10.1007\/978-3-030-11012-3_19"},{"key":"3516_CR55","doi-asserted-by":"crossref","unstructured":"Zhao L, Xi P, Yu T, Kapadia M, Metaxas DN (2019) Semantic graph convolutional networks for 3d human pose regression. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 3425\u20133435","DOI":"10.1109\/CVPR.2019.00354"},{"key":"3516_CR56","doi-asserted-by":"crossref","unstructured":"Zhou T, Wang W, Qi S, Ling H, Shen J (2020) Cascaded human-object interaction recognition. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 4263\u20134272","DOI":"10.1109\/CVPR42600.2020.00432"},{"key":"3516_CR57","doi-asserted-by":"crossref","unstructured":"Zhou X, Huang Q, Sun X, Xue X, Wei Y (2017) Towards 3d human pose estimation in the wild: A weakly-supervised approach. In: Proceedings of the IEEE international conference on computer vision, pp 398\u2013407","DOI":"10.1109\/ICCV.2017.51"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-022-03516-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-022-03516-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-022-03516-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,3]],"date-time":"2023-01-03T04:52:52Z","timestamp":1672721572000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-022-03516-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,4,23]]},"references-count":57,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2023,1]]}},"alternative-id":["3516"],"URL":"https:\/\/doi.org\/10.1007\/s10489-022-03516-1","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"type":"print","value":"0924-669X"},{"type":"electronic","value":"1573-7497"}],"subject":[],"published":{"date-parts":[[2022,4,23]]},"assertion":[{"value":"15 March 2022","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"23 April 2022","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}