{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,21]],"date-time":"2025-11-21T18:05:50Z","timestamp":1763748350897,"version":"3.37.3"},"reference-count":46,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2021,3,19]],"date-time":"2021-03-19T00:00:00Z","timestamp":1616112000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,3,19]],"date-time":"2021-03-19T00:00:00Z","timestamp":1616112000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100003453","name":"Natural Science Foundation of Guangdong Province","doi-asserted-by":"publisher","award":["2018A030313318"],"award-info":[{"award-number":["2018A030313318"]}],"id":[{"id":"10.13039\/501100003453","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100011523","name":"Guangdong Key Laboratory of Innovation Method and Decision Management System","doi-asserted-by":"publisher","award":["2019B111101001"],"award-info":[{"award-number":["2019B111101001"]}],"id":[{"id":"10.13039\/501100011523","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Ambient Intell Human Comput"],"published-print":{"date-parts":[[2022,6]]},"DOI":"10.1007\/s12652-021-03124-w","type":"journal-article","created":{"date-parts":[[2021,3,19]],"date-time":"2021-03-19T13:03:45Z","timestamp":1616159025000},"page":"2937-2949","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":9,"title":["Hope: heatmap and offset for pose estimation"],"prefix":"10.1007","volume":"13","author":[{"given":"Jing","family":"Xiao","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Haichao","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Guangzhuo","family":"Qu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5256-210X","authenticated-orcid":false,"given":"Hamido","family":"Fujita","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yang","family":"Cao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jia","family":"Zhu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Changqin","family":"Huang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2021,3,19]]},"reference":[{"key":"3124_CR1","doi-asserted-by":"publisher","first-page":"83","DOI":"10.1016\/j.knosys.2017.06.001","volume":"131","author":"S Alyammahi","year":"2017","unstructured":"Alyammahi S, Bhaskar H, Ruta D, Al-Mualla M (2017) People detection and articulated pose estimation framework for crowded scenes. Knowl Based Syst 131:83\u2013104","journal-title":"Knowl Based Syst"},{"key":"3124_CR2","doi-asserted-by":"publisher","unstructured":"Andriluka M, Pishchulin L, Gehler P, Schiele B (2014) 2D human pose estimation: New benchmark and state of the art analysis. In: Proceedings of the IEEE Conference on computer Vision and Pattern Recognition, pp 3686\u20133693. https:\/\/doi.org\/10.1109\/CVPR.2014.471","DOI":"10.1109\/CVPR.2014.471"},{"key":"3124_CR3","doi-asserted-by":"publisher","unstructured":"Belagiannis V, Zisserman A (2017) Recurrent human pose estimation. In: 2017 12th IEEE International Conference on Automatic Face & Gesture Recognition (FG 2017). IEEE, pp 468\u2013475. https:\/\/doi.org\/10.1109\/FG.2017.64","DOI":"10.1109\/FG.2017.64"},{"key":"3124_CR4","doi-asserted-by":"crossref","unstructured":"Cai Y, Wang Z, Luo Z, Yin B, Du A, Wang H, Zhou X, Zhou E, Zhang X, Sun J (2020) Learning delicate local representations for multi-person pose estimation. arXiv:200304030","DOI":"10.1007\/978-3-030-58580-8_27"},{"key":"3124_CR5","doi-asserted-by":"publisher","unstructured":"Cao Z, Simon T, Wei SE, Sheikh Y (2017) Realtime multi-person 2d pose estimation using part affinity fields. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 7291\u20137299. https:\/\/doi.org\/10.1109\/CVPR.2017.143","DOI":"10.1109\/CVPR.2017.143"},{"issue":"4","key":"3124_CR6","doi-asserted-by":"publisher","first-page":"834","DOI":"10.1109\/TPAMI.2017.2699184","volume":"40","author":"LC Chen","year":"2017","unstructured":"Chen LC, Papandreou G, Kokkinos I, Murphy K, Yuille AL (2017) Deeplab: Semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected CRFS. IEEE Trans Pattern Anal Mach Intell 40(4):834\u2013848","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"3124_CR7","unstructured":"Chen X, Yuille AL (2014) Articulated pose estimation by a graphical model with image dependent pairwise relations. In: Advances in neural information processing systems, pp 1736\u20131744. https:\/\/papers.nips.cc\/paper\/2014\/file\/8b6dd7db9af49e67306feb59a8bdc52c-Paper.pdf"},{"key":"3124_CR8","doi-asserted-by":"crossref","unstructured":"Chen Y, Wang Z, Peng Y, Zhang Z, Yu G, Sun J (2018) Cascaded pyramid network for multi-person pose estimation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 7103\u20137112","DOI":"10.1109\/CVPR.2018.00742"},{"key":"3124_CR9","doi-asserted-by":"crossref","unstructured":"Cheng B, Xiao B, Wang J, Shi H, Huang TS, Zhang L (2019) Higherhrnet: Scale-aware representation learning for bottom-up human pose estimation. arXiv:190810357","DOI":"10.1109\/CVPR42600.2020.00543"},{"issue":"4","key":"3124_CR10","doi-asserted-by":"publisher","first-page":"445","DOI":"10.1109\/LSP.2014.2362553","volume":"22","author":"E Cho","year":"2014","unstructured":"Cho E, Kim D (2014) Accurate human pose estimation by aggregating multiple pose hypotheses using modified kernel density approximation. IEEE Signal Process Lett 22(4):445\u2013449","journal-title":"IEEE Signal Process Lett"},{"key":"3124_CR11","doi-asserted-by":"publisher","first-page":"65347","DOI":"10.1109\/ACCESS.2019.2917952","volume":"7","author":"R Dong","year":"2019","unstructured":"Dong R, Pan X, Li F (2019) Denseu-net-based semantic segmentation of small objects in urban remote sensing images. IEEE Access 7:65347\u201365356","journal-title":"IEEE Access"},{"key":"3124_CR12","doi-asserted-by":"publisher","first-page":"245","DOI":"10.1016\/j.jvcir.2019.05.010","volume":"63","author":"P Duan","year":"2019","unstructured":"Duan P, Wang T, Cui M, Sang H, Sun Q (2019) Multi-person pose estimation based on a deep convolutional neural network. J Vis Commun Image Represent 63:245\u2013252","journal-title":"J Vis Commun Image Represent"},{"key":"3124_CR13","doi-asserted-by":"publisher","first-page":"69","DOI":"10.1016\/j.optlaseng.2017.03.012","volume":"95","author":"M Ghaneizad","year":"2017","unstructured":"Ghaneizad M, Kavehvash Z, Mehrany K, Hosseini SAT (2017) A fast bottom-up approach toward three-dimensional human pose estimation using an array of cameras. Opt Lasers Eng 95:69\u201377","journal-title":"Opt Lasers Eng"},{"key":"3124_CR14","doi-asserted-by":"publisher","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 770\u2013778. https:\/\/doi.org\/10.1109\/CVPR.2016.90","DOI":"10.1109\/CVPR.2016.90"},{"key":"3124_CR15","doi-asserted-by":"publisher","unstructured":"He K, Gkioxari G, Doll\u00e1r P, Girshick R (2017) Mask r-cnn. In: Proceedings of the IEEE international conference on computer vision, pp 2961\u20132969. https:\/\/doi.org\/10.1109\/TPAMI.2018.2844175","DOI":"10.1109\/TPAMI.2018.2844175"},{"key":"3124_CR16","doi-asserted-by":"publisher","unstructured":"Insafutdinov E, Pishchulin L, Andres B, Andriluka M, Schiele B (2016) Deepercut: a deeper, stronger, and faster multi-person pose estimation model. In: European Conference on Computer Vision. Springer, pp 34\u201350. https:\/\/doi.org\/10.1007\/978-3-319-46466-4_3","DOI":"10.1007\/978-3-319-46466-4_3"},{"key":"3124_CR17","unstructured":"Ioffe S, Szegedy C (2015) Batch normalization: accelerating deep network training by reducing internal covariate shift. arXiv:1502.03167"},{"key":"3124_CR18","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1016\/j.imavis.2016.12.002","volume":"59","author":"N Jammalamadaka","year":"2017","unstructured":"Jammalamadaka N, Zisserman A, Jawahar C (2017) Human pose search using deep networks. Image Vis Comput 59:31\u201343","journal-title":"Image Vis Comput"},{"issue":"7","key":"3124_CR19","doi-asserted-by":"publisher","first-page":"3542","DOI":"10.1109\/TIP.2019.2905081","volume":"28","author":"B Kang","year":"2019","unstructured":"Kang B, Nguyen TQ (2019) Random forest with learned representations for semantic segmentation. IEEE Trans Image Process 28(7):3542\u20133555","journal-title":"IEEE Trans Image Process"},{"issue":"2","key":"3124_CR20","doi-asserted-by":"publisher","first-page":"242","DOI":"10.1016\/j.cviu.2010.09.001","volume":"115","author":"P Kuo","year":"2011","unstructured":"Kuo P, Makris D, Nebel JC (2011) Integration of bottom-up\/top-down approaches for 2d pose estimation using probabilistic Gaussian modelling. Comput Vis Image Underst 115(2):242\u2013255","journal-title":"Comput Vis Image Underst"},{"key":"3124_CR21","doi-asserted-by":"crossref","unstructured":"Li J, Wang C, Zhu H, Mao Y, Fang HS, Lu C (2019a) Crowdpose: efficient crowded scenes pose estimation and a new benchmark. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 10863\u201310872","DOI":"10.1109\/CVPR.2019.01112"},{"key":"3124_CR22","doi-asserted-by":"publisher","first-page":"251","DOI":"10.1016\/j.patcog.2019.04.026","volume":"93","author":"R Li","year":"2019","unstructured":"Li R, Liu Z, Tan J (2019b) A survey on 3d hand pose estimation: Cameras, methods, and datasets. Pattern Recogn 93:251\u2013272","journal-title":"Pattern Recogn"},{"key":"3124_CR23","doi-asserted-by":"publisher","unstructured":"Li R, Zou K, Wang W (2020) Application of human body gesture recognition algorithm based on deep learning in non-contact human body measurement. J Ambient Intell Humani Comput. https:\/\/doi.org\/10.1007\/s12652-020-01993-1","DOI":"10.1007\/s12652-020-01993-1"},{"issue":"7","key":"3124_CR24","doi-asserted-by":"publisher","first-page":"1080","DOI":"10.1109\/TSMC.2016.2639788","volume":"48","author":"G Liang","year":"2017","unstructured":"Liang G, Lan X, Wang J, Wang J, Zheng N (2017) A limb-based graphical model for human pose estimation. IEEE Trans Syst Man Cybern Syst 48(7):1080\u20131092","journal-title":"IEEE Trans Syst Man Cybern Syst"},{"key":"3124_CR25","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.cviu.2018.10.006","volume":"176","author":"S Liang","year":"2018","unstructured":"Liang S, Sun X, Wei Y (2018) Compositional human pose regression. Comput Vis Image Underst 176:1\u20138","journal-title":"Comput Vis Image Underst"},{"key":"3124_CR26","doi-asserted-by":"publisher","unstructured":"Lin TY, Maire M, Belongie S, Hays J, Perona P, Ramanan D, Doll\u00e1r P, Zitnick CL (2014) Microsoft coco: Common objects in context. In: European Conference on Computer Vision. Springer, pp 740\u2013755. https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"3124_CR27","doi-asserted-by":"publisher","first-page":"126","DOI":"10.1016\/j.knosys.2014.04.028","volume":"66","author":"Y Liu","year":"2014","unstructured":"Liu Y, Wang Q, Jiang Y, Lei Y (2014) Supervised locality discriminant manifold learning for head pose estimation. Knowl Based Syst 66:126\u2013135","journal-title":"Knowl Based Syst"},{"key":"3124_CR28","doi-asserted-by":"publisher","first-page":"10","DOI":"10.1016\/j.jvcir.2015.06.013","volume":"32","author":"Z Liu","year":"2015","unstructured":"Liu Z, Zhu J, Bu J, Chen C (2015) A survey of human pose estimation: the body parts parsing based methods. J Vis Commun Image Represent 32:10\u201319","journal-title":"J Vis Commun Image Represent"},{"issue":"8","key":"3124_CR29","doi-asserted-by":"publisher","first-page":"1814","DOI":"10.1109\/TPAMI.2017.2737535","volume":"40","author":"Z Liu","year":"2017","unstructured":"Liu Z, Li X, Luo P, Loy CC, Tang X (2017) Deep learning Markov random field for semantic segmentation. IEEE Trans Pattern Anal Mach Intell 40(8):1814\u20131828","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"2","key":"3124_CR30","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1023\/B:VISI.0000029664.99615.94","volume":"60","author":"DG Lowe","year":"2004","unstructured":"Lowe DG (2004) Distinctive image features from scale-invariant keypoints. Int J Comput Vision 60(2):91\u2013110","journal-title":"Int J Comput Vision"},{"issue":"1","key":"3124_CR31","doi-asserted-by":"publisher","first-page":"142","DOI":"10.1109\/TIP.2018.2865666","volume":"28","author":"Y Luo","year":"2018","unstructured":"Luo Y, Xu Z, Liu P, Du Y, Guo JM (2018) Multi-person pose estimation via multi-layer fractal network and joints kinship pattern. IEEE Trans Image Process 28(1):142\u2013155","journal-title":"IEEE Trans Image Process"},{"key":"3124_CR32","unstructured":"MSCOCO (2015) Keypoints evaluation metric. http:\/\/mscoco.org\/dataset\/keypoints-eval"},{"key":"3124_CR33","doi-asserted-by":"publisher","unstructured":"Neubeck A, Van Gool L (2006) Efficient non-maximum suppression. In: International Conference on Pattern Recognition, vol 3. IEEE, pp 850\u2013855. https:\/\/doi.org\/10.1109\/ICPR.2006.479","DOI":"10.1109\/ICPR.2006.479"},{"key":"3124_CR34","doi-asserted-by":"publisher","unstructured":"Newell A, Yang K, Deng J (2016) Stacked hourglass networks for human pose estimation. In: European conference on computer vision. Springer, pp 483\u2013499. https:\/\/doi.org\/10.1007\/978-3-319-46484-8_29","DOI":"10.1007\/978-3-319-46484-8_29"},{"key":"3124_CR35","doi-asserted-by":"publisher","unstructured":"Papandreou G, Zhu T, Kanazawa N, Toshev A, Tompson J, Bregler C, Murphy K (2017) Towards accurate multi-person pose estimation in the wild. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 4903\u20134911. https:\/\/doi.org\/10.1109\/CVPR.2017.395","DOI":"10.1109\/CVPR.2017.395"},{"key":"3124_CR36","doi-asserted-by":"publisher","unstructured":"Ren S, He K, Girshick R, Sun J (2015) Faster r-cnn: towards real-time object detection with region proposal networks. In: Advances in neural information processing systems, pp 91\u201399. https:\/\/doi.org\/10.1109\/TPAMI.2016.2577031","DOI":"10.1109\/TPAMI.2016.2577031"},{"issue":"3","key":"3124_CR37","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky O, Deng J, Su H, Krause J, Satheesh S, Ma S, Huang Z, Karpathy A, Khosla A, Bernstein M et al (2015) Imagenet large scale visual recognition challenge. Int J Comput Vision 115(3):211\u2013252. https:\/\/doi.org\/10.1007\/s11263-015-0816-y","journal-title":"Int J Comput Vision"},{"key":"3124_CR38","doi-asserted-by":"publisher","unstructured":"Shamsafar F, Ebrahimnezhad H (2020) Uniting holistic and part-based attitudes for accurate and robust deep human pose estimation. J Ambient Intell Human Comput. https:\/\/doi.org\/10.1007\/s12652-020-02347-7","DOI":"10.1007\/s12652-020-02347-7"},{"key":"3124_CR39","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.cag.2019.08.013","volume":"85","author":"LJS Silva","year":"2019","unstructured":"Silva LJS, da Silva DLS, Raposo A, Velho L, Lopes H (2019) Tensorpose: real-time pose estimation for interactive applications. Comput Gr 85:1\u201314","journal-title":"Comput Gr"},{"key":"3124_CR40","doi-asserted-by":"crossref","unstructured":"Sun K, Xiao B, Liu D, Wang J (2019) Deep high-resolution representation learning for human pose estimation. In: Proceedings of the IEEE conference on computer vision and pattern recognition","DOI":"10.1109\/CVPR.2019.00584"},{"key":"3124_CR41","doi-asserted-by":"publisher","unstructured":"Szegedy C, Liu W, Jia Y, Sermanet P, Reed S, Anguelov D, Erhan D, Vanhoucke V, Rabinovich A (2015) Going deeper with convolutions. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1\u20139. https:\/\/doi.org\/10.1109\/CVPR.2015.7298594","DOI":"10.1109\/CVPR.2015.7298594"},{"issue":"12","key":"3124_CR42","doi-asserted-by":"publisher","first-page":"3045","DOI":"10.1109\/TPAMI.2017.2771779","volume":"40","author":"Y Tang","year":"2017","unstructured":"Tang Y, Wang J, Wang X, Gao B, Dellandr\u00e9a E, Gaizauskas R, Chen L (2017) Visual and semantic knowledge transfer for large scale semi-supervised object detection. IEEE Trans Pattern Anal Mach Intell 40(12):3045\u20133058","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"3124_CR43","doi-asserted-by":"publisher","unstructured":"Xiao B, Wu H, Wei Y (2018) Simple baselines for human pose estimation and tracking. In: Proceedings of the European Conference on Computer Vision (ECCV), pp 466\u2013481. https:\/\/doi.org\/10.1007\/978-3-030-01231-1_29","DOI":"10.1007\/978-3-030-01231-1_29"},{"key":"3124_CR44","doi-asserted-by":"crossref","unstructured":"Yu F, Wang D, Shelhamer E, Darrell T (2018) Deep layer aggregation. In: Proceedings of the IEEE\/CVF Conference on computer vision and pattern recognition. pp 2403\u20132412","DOI":"10.1109\/CVPR.2018.00255"},{"key":"3124_CR45","doi-asserted-by":"publisher","first-page":"415","DOI":"10.1016\/j.jvcir.2019.01.034","volume":"59","author":"Q Zhang","year":"2019","unstructured":"Zhang Q, Lin J, Zhuge J, Yuan W (2019) Multi-level and multi-scale deep saliency network for salient object detection. J Vis Commun Image Represent 59:415\u2013424","journal-title":"J Vis Commun Image Represent"},{"issue":"2","key":"3124_CR46","doi-asserted-by":"publisher","first-page":"1183","DOI":"10.1109\/TII.2018.2849348","volume":"15","author":"X Zhang","year":"2018","unstructured":"Zhang X, Chen Z, Wu QJ, Cai L, Lu D, Li X (2018) Fast semantic segmentation for scene perception. IEEE Trans Ind Inf 15(2):1183\u20131192","journal-title":"IEEE Trans Ind Inf"}],"container-title":["Journal of Ambient Intelligence and Humanized Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s12652-021-03124-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s12652-021-03124-w\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s12652-021-03124-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,11]],"date-time":"2022-05-11T10:49:28Z","timestamp":1652266168000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s12652-021-03124-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,3,19]]},"references-count":46,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2022,6]]}},"alternative-id":["3124"],"URL":"https:\/\/doi.org\/10.1007\/s12652-021-03124-w","relation":{},"ISSN":["1868-5137","1868-5145"],"issn-type":[{"type":"print","value":"1868-5137"},{"type":"electronic","value":"1868-5145"}],"subject":[],"published":{"date-parts":[[2021,3,19]]},"assertion":[{"value":"8 November 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 March 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 March 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}