{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,22]],"date-time":"2026-01-22T23:24:44Z","timestamp":1769124284794,"version":"3.49.0"},"reference-count":68,"publisher":"Springer Science and Business Media LLC","issue":"7","license":[{"start":{"date-parts":[[2022,7,19]],"date-time":"2022-07-19T00:00:00Z","timestamp":1658188800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,7,19]],"date-time":"2022-07-19T00:00:00Z","timestamp":1658188800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62071472"],"award-info":[{"award-number":["62071472"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61902404"],"award-info":[{"award-number":["61902404"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62001475"],"award-info":[{"award-number":["62001475"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Innovative Research Team in China University of Mining and Technology","award":["2020ZY002"],"award-info":[{"award-number":["2020ZY002"]}]},{"name":"Special Fund for cultivating major projects in China University of Mining and Technology","award":["2020-10991"],"award-info":[{"award-number":["2020-10991"]}]},{"DOI":"10.13039\/501100012154","name":"Graduate Research and Innovation Projects of Jiangsu Province","doi-asserted-by":"publisher","award":["KYCX21 2241"],"award-info":[{"award-number":["KYCX21 2241"]}],"id":[{"id":"10.13039\/501100012154","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004543","name":"China Scholarship Council","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004543","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2023,4]]},"DOI":"10.1007\/s10489-022-03909-2","type":"journal-article","created":{"date-parts":[[2022,7,19]],"date-time":"2022-07-19T06:02:25Z","timestamp":1658210545000},"page":"8097-8113","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["Combining detailed appearance and multi-scale representation: a structure-context complementary network for human pose estimation"],"prefix":"10.1007","volume":"53","author":[{"given":"Kaiwen","family":"Dong","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1389-3958","authenticated-orcid":false,"given":"Yanjing","family":"Sun","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaozhou","family":"Cheng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaolin","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bin","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,7,19]]},"reference":[{"key":"3909_CR1","doi-asserted-by":"crossref","unstructured":"Andriluka M, Pishchulin L, Gehler P, Schiele B (2014) 2d human pose estimation: New benchmark and state of the art analysis. In: Proceedings of the IEEE Conference on computer vision and pattern recognition, pp 3686\u20133693","DOI":"10.1109\/CVPR.2014.471"},{"key":"3909_CR2","doi-asserted-by":"crossref","unstructured":"Artacho B, Savakis A (2020) Unipose: Unified human pose estimation in single images and videos. In: Proceedings of the IEEE\/CVF Conference on computer vision and pattern recognition, pp 7035\u20137044","DOI":"10.1109\/CVPR42600.2020.00706"},{"key":"3909_CR3","doi-asserted-by":"crossref","unstructured":"Artacho B, Savakis A (2021) Unipose+: A unified framework for 2d and 3d human pose estimation in images and videos IEEE Transactions on Pattern Analysis and Machine Intelligence","DOI":"10.1109\/TPAMI.2021.3124736"},{"key":"3909_CR4","doi-asserted-by":"crossref","unstructured":"Bello I, Zoph B, Vaswani A, Shlens J, Le QV (2019) Attention augmented convolutional networks. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 3286\u20133295","DOI":"10.1109\/ICCV.2019.00338"},{"issue":"6","key":"3909_CR5","doi-asserted-by":"publisher","first-page":"3450","DOI":"10.1007\/s10489-020-01961-4","volume":"51","author":"HB Bi","year":"2021","unstructured":"Bi HB, Lu D, Zhu HH, Yang LN, Guan HP (2021) STA-net: spatial-temporal attention network for video salient object detection. Appl Intell 51(6):3450\u20133459. https:\/\/doi.org\/10.1007\/s10489-020-01961-4https:\/\/doi.org\/10.1007\/s10489-020-01961-4","journal-title":"Appl Intell"},{"issue":"01","key":"3909_CR6","doi-asserted-by":"publisher","first-page":"172","DOI":"10.1109\/TPAMI.2019.2929257","volume":"43","author":"Z Cao","year":"2021","unstructured":"Cao Z, Hidalgo G, Simon T, Wei SE, Sheikh Y (2021) Openpose: Realtime multi-person 2d pose estimation using part affinity fields. IEEE Trans Pattern Anal Mach Intell 43(01):172\u2013186","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"3909_CR7","doi-asserted-by":"crossref","unstructured":"Carion N, Massa F, Synnaeve G, Usunier N, Kirillov A, Zagoruyko S (2020) End-to-end object detection with transformers. In: European conference on computer vision. Springer, pp 213\u2013229","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"3909_CR8","unstructured":"Chen K, Wang J, Pang J, Cao Y, Xiong Y, Li X, Sun S, Feng W, Liu Z, Xu J et al (2019) Mmdetection: Open mmlab detection toolbox and benchmark. arXiv:1906.07155"},{"issue":"4","key":"3909_CR9","doi-asserted-by":"publisher","first-page":"834","DOI":"10.1109\/TPAMI.2017.2699184","volume":"40","author":"LC Chen","year":"2018","unstructured":"Chen LC, Papandreou G, Kokkinos I, Murphy K, Yuille AL (2018) Deeplab: Semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected crfs. IEEE Trans Pattern Anal Mach Intell 40(4):834\u2013848. https:\/\/doi.org\/10.1109\/TPAMI.2017.2699184","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"3909_CR10","unstructured":"Chen X, Yuille A (2014) Articulated pose estimation by a graphical model with image dependent pairwise relations. In: Proceedings of the 27th International conference on neural information processing systems-volume 1, pp 1736\u20131744"},{"key":"3909_CR11","unstructured":"Chen Y, Kalantidis Y, Li J, Yan S, Feng J (2018) A 2-nets: double attention networks. In: Proceedings of the 32nd International Conference on Neural Information Processing Systems, pp 350\u2013359"},{"key":"3909_CR12","doi-asserted-by":"crossref","unstructured":"Chen Y, Wang Z, Peng Y, Zhang Z, Yu G, Sun J (2018) Cascaded pyramid network for multi-person pose estimation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 7103\u20137112","DOI":"10.1109\/CVPR.2018.00742"},{"key":"3909_CR13","first-page":"316","volume":"29","author":"X Chu","year":"2016","unstructured":"Chu X, Ouyang W, Wang X, et al. (2016) Crf-cnn: Modeling structured information in human pose estimation. Adv Neural Inf Process Syst 29:316\u2013324","journal-title":"Adv Neural Inf Process Syst"},{"key":"3909_CR14","doi-asserted-by":"crossref","unstructured":"Chu X, Yang W, Ouyang W, Ma C, Yuille AL, Wang X (2017) Multi-context attention for human pose estimation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1831\u20131840","DOI":"10.1109\/CVPR.2017.601"},{"key":"3909_CR15","doi-asserted-by":"crossref","unstructured":"Deng J, Dong W, Socher R, Li LJ, Li K, Fei-Fei L (2009) Imagenet: a large-scale hierarchical image database. In: 2009 IEEE Conference on computer vision and pattern recognition. IEEE, pp 248\u2013255","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"3909_CR16","doi-asserted-by":"crossref","unstructured":"Ding H, Jiang X, Shuai B, Liu AQ, Wang G (2018) Context contrasted feature and gated multi-scale aggregation for scene segmentation. In: Proceedings of the IEEE Conference on computer vision and pattern recognition, pp 2393\u20132402","DOI":"10.1109\/CVPR.2018.00254"},{"key":"3909_CR17","doi-asserted-by":"crossref","unstructured":"Dong L, Chen X, Wang R, Zhang Q, Izquierdo E (2017) Adore: An adaptive holons representation framework for human pose estimation. IEEE Transactions on Circuits and Systems for Video Technology","DOI":"10.1109\/TCSVT.2017.2707477"},{"key":"3909_CR18","doi-asserted-by":"publisher","first-page":"95","DOI":"10.1016\/j.neucom.2021.10.073","volume":"472","author":"X Dong","year":"2022","unstructured":"Dong X, Yu J, Zhang J (2022) Joint usage of global and local attentions in hourglass network for human pose estimation. Neurocomputing 472:95\u2013102","journal-title":"Neurocomputing"},{"key":"3909_CR19","doi-asserted-by":"crossref","unstructured":"Fan H, Zhuo T, Yu X, Yang Y, Kankanhalli M (2021) Understanding atomic hand-object interaction with human intention IEEE Transactions on Circuits and Systems for Video Technology","DOI":"10.1109\/TCSVT.2021.3058688"},{"key":"3909_CR20","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"3909_CR21","doi-asserted-by":"crossref","unstructured":"He T, Zhang Z, Zhang H, Zhang Z, Xie J, Li M (2019) Bag of tricks for image classification with convolutional neural networks. In: Proceedings of the IEEE\/CVF Conference on computer vision and pattern recognition, pp 558\u2013567","DOI":"10.1109\/CVPR.2019.00065"},{"key":"3909_CR22","doi-asserted-by":"crossref","unstructured":"Hou Q, Zhou D, Feng J (2021) Coordinate attention for efficient mobile network design. In: Proceedings of the IEEE\/CVF Conference on computer vision and pattern recognition, pp 13713\u201313722","DOI":"10.1109\/CVPR46437.2021.01350"},{"key":"3909_CR23","first-page":"9401","volume":"31","author":"J Hu","year":"2018","unstructured":"Hu J, Shen L, Albanie S, Sun G, Vedaldi A (2018) Gather-excite: Exploiting feature context in convolutional neural networks. Adv Neural Inf Process Syst 31:9401\u20139411","journal-title":"Adv Neural Inf Process Syst"},{"key":"3909_CR24","doi-asserted-by":"crossref","unstructured":"Hu J, Shen L, Sun G (2018) Squeeze-and-excitation networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 7132\u20137141","DOI":"10.1109\/CVPR.2018.00745"},{"issue":"4","key":"3909_CR25","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s11704-019-8266-2","volume":"14","author":"G Hua","year":"2020","unstructured":"Hua G, Li L, Liu S (2020) Multipath affinage stacked\u2014hourglass networks for human pose estimation. Front Comput Sci 14(4):1\u201312","journal-title":"Front Comput Sci"},{"issue":"9","key":"3909_CR26","doi-asserted-by":"publisher","first-page":"2822","DOI":"10.1109\/TCSVT.2018.2870740","volume":"29","author":"J Huang","year":"2018","unstructured":"Huang J, Zhou W, Li H, Li W (2018) Attention-based 3d-cnns for large-vocabulary sign language recognition. IEEE Trans Circuits Syst Video Technol 29(9):2822\u20132832","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"3909_CR27","doi-asserted-by":"crossref","unstructured":"Huang Z, Ke W, Huang D (2020) Improving object detection with inverted attention. In: 2020 IEEE Winter conference on applications of computer vision (WACV). IEEE, pp 1294\u20131302","DOI":"10.1109\/WACV45572.2020.9093507"},{"key":"3909_CR28","doi-asserted-by":"crossref","unstructured":"Huang Z, Wang X, Huang L, Huang C, Wei Y, Liu W (2019) Ccnet: Criss-cross attention for semantic segmentation. In: Proceedings of the IEEE\/CVF International conference on computer vision, pp 603\u2013612","DOI":"10.1109\/ICCV.2019.00069"},{"key":"3909_CR29","doi-asserted-by":"crossref","unstructured":"Ke L, Chang MC, Qi H, Lyu S (2018) Multi-scale structure-aware network for human pose estimation. In: Proceedings of the european conference on computer vision (ECCV), pp 713\u2013728","DOI":"10.1109\/ICIP.2018.8451114"},{"key":"3909_CR30","doi-asserted-by":"crossref","unstructured":"Khirodkar R, Chari V, Agrawal A, Tyagi A (2021) Multi-instance pose networks: Rethinking top-down pose estimation. In: Proceedings of the IEEE\/CVF International conference on computer vision, pp 3122\u20133131","DOI":"10.1109\/ICCV48922.2021.00311"},{"issue":"12","key":"3909_CR31","doi-asserted-by":"publisher","first-page":"3729","DOI":"10.1109\/TCSVT.2018.2882513","volume":"29","author":"K Kong","year":"2018","unstructured":"Kong K, Shin S, Lee J, Song WJ (2018) How to estimate global motion non-iteratively from a coarsely sampled motion vector field. IEEE Trans Circuits Syst Video Technol 29(12):3729\u20133742","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"3909_CR32","doi-asserted-by":"crossref","unstructured":"Kreiss S, Bertoni L, Alahi A (2019) Pifpaf: Composite fields for human pose estimation. In: Proceedings of the IEEE\/CVF Conference on computer vision and pattern recognition, pp 11977\u201311986","DOI":"10.1109\/CVPR.2019.01225"},{"key":"3909_CR33","doi-asserted-by":"crossref","unstructured":"Li Y, Zhang S, Wang Z, Yang S, Yang W, Xia ST, Zhou E (2021) Tokenpose: Learning keypoint tokens for human pose estimation. In: Proceedings of the IEEE\/CVF International conference on computer vision, pp 11313\u201311322","DOI":"10.1109\/ICCV48922.2021.01112"},{"key":"3909_CR34","doi-asserted-by":"crossref","unstructured":"Lin TY, Maire M, Belongie S, Hays J, Perona P, Ramanan D, Doll\u00e1r P, Zitnick CL (2014) Microsoft coco: Common objects in context. In: European conference on computer vision. Springer, pp 740\u2013755","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"3909_CR35","unstructured":"Linsley D, Shiebler D, Eberhardt S, Serre T (2019) Learning what and where to attend. In: International conference on learning representations"},{"key":"3909_CR36","doi-asserted-by":"crossref","unstructured":"Liu JJ, Hou Q, Cheng MM, Wang C, Feng J (2020) Improving convolutional networks with self-calibrated convolutions. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 10096\u201310105","DOI":"10.1109\/CVPR42600.2020.01011"},{"key":"3909_CR37","doi-asserted-by":"crossref","unstructured":"Liu S, Bai X, Fang M, Li L, Hung CC (2021) Mixed graph convolution and residual transformation network for skeleton-based action recognition. Applied Intelligence p 1\u201312","DOI":"10.1007\/s10489-021-02517-w"},{"issue":"1","key":"3909_CR38","doi-asserted-by":"publisher","first-page":"142","DOI":"10.1109\/TIP.2018.2865666","volume":"28","author":"Y Luo","year":"2018","unstructured":"Luo Y, Xu Z, Liu P, Du Y, Guo JM (2018) Multi-person pose estimation via multi-layer fractal network and joints kinship pattern. IEEE Trans Image Process 28(1):142\u2013155","journal-title":"IEEE Trans Image Process"},{"key":"3909_CR39","doi-asserted-by":"crossref","unstructured":"Misra D, Nalamada T, Arasanipalai AU, Hou Q (2021) Rotate to attend: Convolutional triplet attention module. In: Proceedings of the IEEE\/CVF Winter conference on applications of computer vision, pp 3139\u20133148","DOI":"10.1109\/WACV48630.2021.00318"},{"key":"3909_CR40","doi-asserted-by":"crossref","unstructured":"Mo S, Cai M, Lin L, Tong R, Chen Q, Wang F, Hu H, Iwamoto Y, Han XH, Chen YW (2021) Mutual information-based graph co-attention networks for multimodal prior-guided magnetic resonance imaging segmentation IEEE Transactions on Circuits and Systems for Video Technology","DOI":"10.1109\/TCSVT.2021.3112551"},{"key":"3909_CR41","doi-asserted-by":"crossref","unstructured":"Newell A, Yang K, Deng J (2016) Stacked hourglass networks for human pose estimation. In: European conference on computer vision. Springer, pp 483\u2013499","DOI":"10.1007\/978-3-319-46484-8_29"},{"issue":"2","key":"3909_CR42","doi-asserted-by":"publisher","first-page":"924","DOI":"10.1109\/TIP.2018.2872628","volume":"28","author":"X Nie","year":"2018","unstructured":"Nie X, Feng J, Xing J, Xiao S, Yan S (2018) Hierarchical contextual refinement networks for human pose estimation. IEEE Trans Image Process 28(2):924\u2013936","journal-title":"IEEE Trans Image Process"},{"key":"3909_CR43","doi-asserted-by":"crossref","unstructured":"Nie X, Feng J, Zhang J, Yan S (2019) Single-stage multi-person pose machines. In: Proceedings of the IEEE\/CVF International conference on computer vision, pp 6951\u20136960","DOI":"10.1109\/ICCV.2019.00705"},{"key":"3909_CR44","unstructured":"Park J, Woo S, Lee JY, Kweon IS (2018) Bam: Bottleneck attention module. arXiv:1807.06514"},{"issue":"1","key":"3909_CR45","doi-asserted-by":"crossref","first-page":"123","DOI":"10.34768\/amcs-2021-0009","volume":"31","author":"G Peng","year":"2021","unstructured":"Peng G, Zheng Y, Li J, Yang J (2021) A single upper limb pose estimation method based on the improved stacked hourglass network. Int J Appl Math Comput Sci 31(1):123\u2013133","journal-title":"Int J Appl Math Comput Sci"},{"key":"3909_CR46","doi-asserted-by":"crossref","unstructured":"Ruggero Ronchi M, Perona P (2017) Benchmarking and error diagnosis in multi-instance pose estimation. In: Proceedings of the IEEE international conference on computer vision, pp 369\u2013378","DOI":"10.1109\/ICCV.2017.48"},{"key":"3909_CR47","doi-asserted-by":"crossref","unstructured":"Selvaraju RR, Cogswell M, Das A, Vedantam R, Parikh D, Batra D (2017) Grad-cam: Visual explanations from deep networks via gradient-based localization. In: Proceedings of the IEEE international conference on computer vision, pp 618\u2013626","DOI":"10.1109\/ICCV.2017.74"},{"key":"3909_CR48","unstructured":"Simonyan K, Zisserman A (2014) Very deep convolutional networks for large-scale image recognition. arXiv:1409.1556"},{"issue":"10","key":"3909_CR49","doi-asserted-by":"publisher","first-page":"2972","DOI":"10.1109\/TCSVT.2018.2875449","volume":"29","author":"K Song","year":"2018","unstructured":"Song K, Yang H, Yin Z (2018) Multi-scale attention deep neural network for fast accurate object detection. IEEE Trans Circuits Syst Video Technol 29(10):2972\u20132985","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"3909_CR50","doi-asserted-by":"crossref","unstructured":"Su K, Yu D, Xu Z, Geng X, Wang C (2019) Multi-person pose estimation with enhanced channel-wise and spatial information. In: Proceedings of the IEEE\/CVF Conference on computer vision and pattern recognition, pp 5674\u20135682","DOI":"10.1109\/CVPR.2019.00582"},{"key":"3909_CR51","doi-asserted-by":"crossref","unstructured":"Sun K, Xiao B, Liu D, Wang J (2019) Deep high-resolution representation learning for human pose estimation. In: Proceedings of the IEEE\/CVF Conference on computer vision and pattern recognition, pp 5693\u20135703","DOI":"10.1109\/CVPR.2019.00584"},{"key":"3909_CR52","first-page":"1799","volume":"27","author":"JJ Tompson","year":"2014","unstructured":"Tompson JJ, Jain A, LeCun Y, Bregler C (2014) Joint training of a convolutional network and a graphical model for human pose estimation. Adv Neural Inf Process Syst 27:1799\u20131807","journal-title":"Adv Neural Inf Process Syst"},{"issue":"7","key":"3909_CR53","doi-asserted-by":"publisher","first-page":"5146","DOI":"10.1007\/s10489-020-01966-z","volume":"51","author":"H Tong","year":"2021","unstructured":"Tong H, Fang Z, Wei Z, Cai Q, Gao Y (2021) Sat-net: a side attention network for retinal image segmentation. Appl Intell 51(7):5146\u20135156","journal-title":"Appl Intell"},{"issue":"3","key":"3909_CR54","doi-asserted-by":"publisher","first-page":"423","DOI":"10.1017\/S0140525X00079577","volume":"13","author":"JK Tsotsos","year":"1990","unstructured":"Tsotsos JK (1990) Analyzing vision at the complexity level. Behav Brain Sci 13(3):423\u2013445","journal-title":"Behav Brain Sci"},{"key":"3909_CR55","doi-asserted-by":"crossref","unstructured":"Tsotsos JK (2011) A computational perspective on visual attention","DOI":"10.7551\/mitpress\/9780262015417.001.0001"},{"key":"3909_CR56","first-page":"551","volume":"16","author":"T Wan","year":"2022","unstructured":"Wan T, Luo Y, Zhang Z, Ou Z (2022) Tsnet: Tree structure network for human pose estimation. 2 16:551\u2013558","journal-title":"2"},{"key":"3909_CR57","doi-asserted-by":"crossref","unstructured":"Wei SE, Ramakrishna V, Kanade T, Sheikh Y (2016) Convolutional pose machines. In: Proceedings of the IEEE conference on Computer Vision and Pattern Recognition, pp 4724\u20134732","DOI":"10.1109\/CVPR.2016.511"},{"key":"3909_CR58","doi-asserted-by":"crossref","unstructured":"Woo S, Park J, Lee JY, Kweon IS (2018) Cbam: Convolutional block attention module. In: Proceedings of the European conference on computer vision (ECCV), pp 3\u201319","DOI":"10.1007\/978-3-030-01234-2_1"},{"key":"3909_CR59","doi-asserted-by":"crossref","unstructured":"Wu H, Ma X, Li Y (2021) Spatiotemporal multimodal learning with 3d cnns for video action recognition. IEEE Transactions on Circuits and Systems for Video Technology","DOI":"10.1109\/TCSVT.2021.3077512"},{"key":"3909_CR60","doi-asserted-by":"publisher","unstructured":"Xiang S, Chen X, Zhou J (2021) An efficient method for boosting human pose estimation. In: 2021 IEEE International Symposium on Broadband Multimedia Systems and Broadcasting (BMSB). https:\/\/doi.org\/10.1109\/BMSB53066.2021.9547183, pp 1\u20136","DOI":"10.1109\/BMSB53066.2021.9547183"},{"key":"3909_CR61","doi-asserted-by":"crossref","unstructured":"Xiao B, Wu H, Wei Y (2018) Simple baselines for human pose estimation and tracking. In: Proceedings of the European conference on computer vision (ECCV), pp 466\u2013481","DOI":"10.1007\/978-3-030-01231-1_29"},{"key":"3909_CR62","doi-asserted-by":"crossref","unstructured":"Xie S, Girshick R, Doll\u00e1r P, Tu Z, He K (2017) Aggregated residual transformations for deep neural networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1492\u20131500","DOI":"10.1109\/CVPR.2017.634"},{"key":"3909_CR63","unstructured":"Xu X, Zou Q, Lin X Cfenet: Content-aware feature enhancement network for multi-person pose estimation. Applied Intelligence p 1\u201322"},{"key":"3909_CR64","unstructured":"Yang S, Quan Z, Nie M, Yang W (2020) Transpose: Towards explainable human pose estimation by transformer. arXiv:2012.14214"},{"key":"3909_CR65","doi-asserted-by":"crossref","unstructured":"Yang Y, Ramanan D (2011) Articulated pose estimation with flexible mixtures-of-parts. In: Proceedings of the IEEE Conference on computer vision and pattern recognition, pp 1385\u20131392","DOI":"10.1109\/CVPR.2011.5995741"},{"key":"3909_CR66","unstructured":"Zhang H, Ouyang H, Liu S, Qi X, Shen X, Yang R, Jia J (2019) Human pose estimation with spatial contextual information. arXiv:1901.01760"},{"key":"3909_CR67","doi-asserted-by":"publisher","first-page":"6785","DOI":"10.1109\/TIP.2021.3097836","volume":"30","author":"L Zhao","year":"2021","unstructured":"Zhao L, Wang N, Gong C, Yang J, Gao X (2021) Estimating human pose efficiently by parallel pyramid networks. IEEE Trans Image Process 30:6785\u20136800","journal-title":"IEEE Trans Image Process"},{"key":"3909_CR68","doi-asserted-by":"crossref","unstructured":"Zhu Z, Xu M, Bai S, Huang T, Bai X (2019) Asymmetric non-local neural networks for semantic segmentation. In: 2019 IEEE\/CVF International conference on computer vision (ICCV)","DOI":"10.1109\/ICCV.2019.00068"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-022-03909-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-022-03909-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-022-03909-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,29]],"date-time":"2024-09-29T10:38:40Z","timestamp":1727606320000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-022-03909-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,7,19]]},"references-count":68,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2023,4]]}},"alternative-id":["3909"],"URL":"https:\/\/doi.org\/10.1007\/s10489-022-03909-2","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"value":"0924-669X","type":"print"},{"value":"1573-7497","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,7,19]]},"assertion":[{"value":"18 June 2022","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 July 2022","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}