{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,17]],"date-time":"2025-12-17T23:25:36Z","timestamp":1766013936749,"version":"3.48.0"},"reference-count":40,"publisher":"Springer Science and Business Media LLC","issue":"17","license":[{"start":{"date-parts":[[2025,11,29]],"date-time":"2025-11-29T00:00:00Z","timestamp":1764374400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,11,29]],"date-time":"2025-11-29T00:00:00Z","timestamp":1764374400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"the National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["Grants No.62571130"],"award-info":[{"award-number":["Grants No.62571130"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"name":"the National Key Research and Development Program of China","award":["Grant No. 2022YFC3003002-03"],"award-info":[{"award-number":["Grant No. 2022YFC3003002-03"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["SIViP"],"published-print":{"date-parts":[[2025,12]]},"DOI":"10.1007\/s11760-025-04990-z","type":"journal-article","created":{"date-parts":[[2025,11,29]],"date-time":"2025-11-29T17:13:06Z","timestamp":1764436386000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Research on 3D human pose estimation via attention-guided adjacent frame-aware network"],"prefix":"10.1007","volume":"19","author":[{"given":"Jianwei","family":"Li","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xuanyao","family":"Song","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Weitian","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chengxi","family":"Jiang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ziyi","family":"Zhuang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,11,29]]},"reference":[{"key":"4990_CR1","doi-asserted-by":"crossref","unstructured":"Lin, J., Li, S., Qin, H., Wang, H., Cui, N., Jiang, Q., Jian, H., Wang, G.: Overview of 3d human pose estimation. CMES-Computer Modeling in Engineering & Sciences 134(3), (2023)","DOI":"10.32604\/cmes.2022.020857"},{"key":"4990_CR2","doi-asserted-by":"crossref","unstructured":"Dalal, N., Triggs, B.: Histograms of oriented gradients for human detection. In: 2005 IEEE Computer Society Conference on Computer Vision and Pattern Recognition (CVPR\u201905), vol. 1, pp. 886\u2013893 (2005). Ieee","DOI":"10.1109\/CVPR.2005.177"},{"issue":"1","key":"4990_CR3","doi-asserted-by":"publisher","first-page":"55","DOI":"10.1023\/B:VISI.0000042934.15159.49","volume":"61","author":"PF Felzenszwalb","year":"2005","unstructured":"Felzenszwalb, P.F., Huttenlocher, D.P.: Pictorial structures for object recognition. Int. J. Comput. Vision 61(1), 55\u201379 (2005)","journal-title":"Int. J. Comput. Vision"},{"key":"4990_CR4","doi-asserted-by":"crossref","unstructured":"Felzenszwalb, P., McAllester, D., Ramanan, D.: A discriminatively trained, multiscale, deformable part model. In: 2008 IEEE Conference on Computer Vision and Pattern Recognition, pp. 1\u20138 (2008). Ieee","DOI":"10.1109\/CVPR.2008.4587597"},{"key":"4990_CR5","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2019.102897","volume":"192","author":"Y Chen","year":"2020","unstructured":"Chen, Y., Tian, Y., He, M.: Monocular human pose estimation: A survey of deep learning-based methods. Comput. Vis. Image Underst. 192, 102897 (2020)","journal-title":"Comput. Vis. Image Underst."},{"key":"4990_CR6","doi-asserted-by":"crossref","unstructured":"Toshev, A., Szegedy, C.: Deeppose: Human pose estimation via deep neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1653\u20131660 (2014)","DOI":"10.1109\/CVPR.2014.214"},{"key":"4990_CR7","doi-asserted-by":"crossref","unstructured":"Wei, S.-E., Ramakrishna, V., Kanade, T., Sheikh, Y.: Convolutional pose machines. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4724\u20134732 (2016)","DOI":"10.1109\/CVPR.2016.511"},{"key":"4990_CR8","doi-asserted-by":"crossref","unstructured":"Newell, A., Yang, K., Deng, J.: Stacked hourglass networks for human pose estimation. In: European Conference on Computer Vision, pp. 483\u2013499 (2016). Springer","DOI":"10.1007\/978-3-319-46484-8_29"},{"key":"4990_CR9","doi-asserted-by":"crossref","unstructured":"Sun, K., Xiao, B., Liu, D., Wang, J.: Deep high-resolution representation learning for human pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5693\u20135703 (2019)","DOI":"10.1109\/CVPR.2019.00584"},{"key":"4990_CR10","doi-asserted-by":"crossref","unstructured":"Ye, S., Zhang, Y., Hu, J., Cao, L., Zhang, S., Shen, L., Wang, J., Ding, S., Ji, R.: Distilpose: Tokenized pose regression with heatmap distillation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2163\u20132172 (2023)","DOI":"10.1109\/CVPR52729.2023.00215"},{"issue":"6","key":"4990_CR11","doi-asserted-by":"publisher","first-page":"663","DOI":"10.26599\/TST.2018.9010100","volume":"24","author":"Q Dang","year":"2019","unstructured":"Dang, Q., Yin, J., Wang, B., Zheng, W.: Deep learning based 2d human pose estimation: A survey. Tsinghua Science and Technology 24(6), 663\u2013676 (2019)","journal-title":"Tsinghua Science and Technology"},{"issue":"1","key":"4990_CR12","doi-asserted-by":"publisher","first-page":"24","DOI":"10.1007\/s10462-024-11019-3","volume":"58","author":"RB Neupane","year":"2024","unstructured":"Neupane, R.B., Li, K., Boka, T.F.: A survey on deep 3d human pose estimation. Artif. Intell. Rev. 58(1), 24 (2024)","journal-title":"Artif. Intell. Rev."},{"issue":"7","key":"4990_CR13","doi-asserted-by":"publisher","first-page":"1325","DOI":"10.1109\/TPAMI.2013.248","volume":"36","author":"C Ionescu","year":"2013","unstructured":"Ionescu, C., Papava, D., Olaru, V., Sminchisescu, C.: Human3. 6m: Large scale datasets and predictive methods for 3d human sensing in natural environments. IEEE transactions on pattern analysis and machine intelligence 36(7), 1325\u20131339 (2013)","journal-title":"IEEE transactions on pattern analysis and machine intelligence"},{"key":"4990_CR14","doi-asserted-by":"crossref","unstructured":"Kanazawa, A., Black, M.J., Jacobs, D.W., Malik, J.: End-to-end recovery of human shape and pose. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7122\u20137131 (2018)","DOI":"10.1109\/CVPR.2018.00744"},{"key":"4990_CR15","doi-asserted-by":"crossref","unstructured":"Kolotouros, N., Pavlakos, G., Black, M.J., Daniilidis, K.: Learning to reconstruct 3d human pose and shape via model-fitting in the loop. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 2252\u20132261 (2019)","DOI":"10.1109\/ICCV.2019.00234"},{"key":"4990_CR16","doi-asserted-by":"crossref","unstructured":"Kanazawa, A., Zhang, J.Y., Felsen, P., Malik, J.: Learning 3d human dynamics from video. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5614\u20135623 (2019)","DOI":"10.1109\/CVPR.2019.00576"},{"key":"4990_CR17","doi-asserted-by":"crossref","unstructured":"Kocabas, M., Athanasiou, N., Black, M.J.: Vibe: Video inference for human body pose and shape estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5253\u20135263 (2020)","DOI":"10.1109\/CVPR42600.2020.00530"},{"key":"4990_CR18","doi-asserted-by":"crossref","unstructured":"Choi, H., Moon, G., Chang, J.Y., Lee, K.M.: Beyond static features for temporally consistent 3d human pose and shape from a video. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1964\u20131973 (2021)","DOI":"10.1109\/CVPR46437.2021.00200"},{"key":"4990_CR19","doi-asserted-by":"crossref","unstructured":"He, R., Xiang, S., Tao, P., Yu, Y.: Monocular 3d human pose estimation based on global temporal-attentive and joints-attention in video. In: ICASSP 2023-2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 1\u20135 (2023). IEEE","DOI":"10.1109\/ICASSP49357.2023.10096111"},{"issue":"3\u20134","key":"4990_CR20","doi-asserted-by":"publisher","first-page":"2187","DOI":"10.1002\/cav.2187","volume":"34","author":"L Sun","year":"2023","unstructured":"Sun, L., Tang, T., Qu, Y., Qin, W.: Bidirectional temporal feature for 3d human pose and shape estimation from a video. Computer Animation and Virtual Worlds 34(3\u20134), 2187 (2023)","journal-title":"Computer Animation and Virtual Worlds"},{"key":"4990_CR21","doi-asserted-by":"crossref","unstructured":"Mahmood, N., Ghorbani, N., Troje, N.F., Pons-Moll, G., Black, M.J.: Amass: Archive of motion capture as surface shapes. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5442\u20135451 (2019)","DOI":"10.1109\/ICCV.2019.00554"},{"key":"4990_CR22","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141., Polosukhin, I.: Attention is all you need. Advances in neural information processing systems 30 (2017)"},{"issue":"5","key":"4990_CR23","first-page":"5436","volume":"45","author":"M-H Guo","year":"2022","unstructured":"Guo, M.-H., Liu, Z.-N., Mu, T.-J., Hu, S.-M.: Beyond self-attention: External attention using two linear layers for visual tasks. IEEE Trans. Pattern Anal. Mach. Intell. 45(5), 5436\u20135447 (2022)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"21","key":"4990_CR24","doi-asserted-by":"publisher","first-page":"3523","DOI":"10.3390\/electronics11213523","volume":"11","author":"Q Yu","year":"2022","unstructured":"Yu, Q., Yu, H., Wang, Y., Pham, T.D.: Sum-gan-gea: Video summarization using gan with gaussian distribution and external attention. Electronics 11(21), 3523 (2022)","journal-title":"Electronics"},{"key":"4990_CR25","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2024.120605","volume":"669","author":"S Huang","year":"2024","unstructured":"Huang, S., Liu, Y., Cui, H., Zhang, F., Li, J., Zhang, X., Zhang, M., Zhang, C.: Meaformer: An all-mlp transformer with temporal external attention for long-term time series forecasting. Inf. Sci. 669, 120605 (2024)","journal-title":"Inf. Sci."},{"key":"4990_CR26","unstructured":"Goodfellow, I.J., Pouget-Abadie, J., Mirza, M., Xu, B., Warde-Farley, D., Ozair, S., Courville, A., Bengio, Y.: Generative adversarial nets. Advances in neural information processing systems 27 (2014)"},{"key":"4990_CR27","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"4990_CR28","doi-asserted-by":"crossref","unstructured":"Zhu, L., Xu, Z., Yang, Y.: Bidirectional multirate reconstruction for temporal modeling in videos. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2653\u20132662 (2017)","DOI":"10.1109\/CVPR.2017.147"},{"key":"4990_CR29","doi-asserted-by":"crossref","unstructured":"Loper, M., Mahmood, N., Romero, J., Pons-Moll, G., Black, M.J.: Smpl: A skinned multi-person linear model. In: Seminal Graphics Papers: Pushing the Boundaries, Volume 2, pp. 851\u2013866 (2023)","DOI":"10.1145\/3596711.3596800"},{"key":"4990_CR30","doi-asserted-by":"crossref","unstructured":"Von\u00a0Marcard, T., Henschel, R., Black, M.J., Rosenhahn, B., Pons-Moll, G.: Recovering accurate 3d human pose in the wild using imus and a moving camera. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 601\u2013617 (2018)","DOI":"10.1007\/978-3-030-01249-6_37"},{"key":"4990_CR31","doi-asserted-by":"crossref","unstructured":"Mehta, D., Rhodin, H., Casas, D., Fua, P., Sotnychenko, O., Xu, W., Theobalt, C.: Monocular 3d human pose estimation in the wild using improved cnn supervision. In: 2017 International Conference on 3D Vision (3DV), pp. 506\u2013516 (2017). IEEE","DOI":"10.1109\/3DV.2017.00064"},{"key":"4990_CR32","unstructured":"Kingma, D.P., Ba, J.: Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"key":"4990_CR33","doi-asserted-by":"crossref","unstructured":"Luo, Z., Golestaneh, S.A., Kitani, K.M.: 3d human motion estimation via motion compression and refinement. In: Proceedings of the Asian Conference on Computer Vision (2020)","DOI":"10.1007\/978-3-030-69541-5_20"},{"issue":"3","key":"4990_CR34","doi-asserted-by":"publisher","first-page":"703","DOI":"10.1007\/s11554-022-01214-2","volume":"19","author":"W Sun","year":"2022","unstructured":"Sun, W., Ma, S., He, X., Ma, Q.: Simplemeshnet: end to end recovery of 3d body mesh with one fully connected layer. J. Real-Time Image Proc. 19(3), 703\u2013713 (2022)","journal-title":"J. Real-Time Image Proc."},{"key":"4990_CR35","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2022.103539","volume":"224","author":"W Sun","year":"2022","unstructured":"Sun, W., Wang, L., Ma, S., Ma, Q.: Estimating 3d body mesh without smpl annotations via alternating successive convex approximation. Comput. Vis. Image Underst. 224, 103539 (2022)","journal-title":"Comput. Vis. Image Underst."},{"key":"4990_CR36","doi-asserted-by":"crossref","unstructured":"Tang, T., You, Y., Wang, T., Liu, H.: An efficient graph transformer network for video-based human mesh reconstruction. In: CAAI International Conference on Artificial Intelligence, pp. 207\u2013219 (2023). Springer","DOI":"10.1007\/978-981-99-8850-1_17"},{"key":"4990_CR37","doi-asserted-by":"crossref","unstructured":"Shen, X., Yang, Z., Wang, X., Ma, J., Zhou, C., Yang, Y.: Global-to-local modeling for video-based 3d human pose and shape estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8887\u20138896 (2023)","DOI":"10.1109\/CVPR52729.2023.00858"},{"key":"4990_CR38","doi-asserted-by":"crossref","unstructured":"Wei, W.-L., Lin, J.-C.: Multi-candidate motion modeling for 3d human pose and shape estimation from monocular video. In: 2024 IEEE International Conference on Multimedia and Expo (ICME), pp. 1\u20136 (2024). IEEE","DOI":"10.1109\/ICME57554.2024.10687476"},{"key":"4990_CR39","doi-asserted-by":"crossref","unstructured":"Zhao, N., Lv, N.: A diffusion-based framework for 3d human pose and shape estimation from monocular videos. In: 2024 International Conference on Virtual Reality and Visualization (ICVRV), pp. 256\u2013261 (2024). IEEE","DOI":"10.1109\/ICVRV62410.2024.00053"},{"key":"4990_CR40","doi-asserted-by":"crossref","unstructured":"Zheng, C., Liu, X., Peng, Q., Wu, T., Wang, P., Chen, C.: Diffmesh: A motion-aware diffusion framework for human mesh recovery from videos. In: 2025 IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV), pp. 4891\u20134901 (2025). IEEE","DOI":"10.1109\/WACV61041.2025.00479"}],"container-title":["Signal, Image and Video Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-025-04990-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11760-025-04990-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-025-04990-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,17]],"date-time":"2025-12-17T23:20:48Z","timestamp":1766013648000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11760-025-04990-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,29]]},"references-count":40,"journal-issue":{"issue":"17","published-print":{"date-parts":[[2025,12]]}},"alternative-id":["4990"],"URL":"https:\/\/doi.org\/10.1007\/s11760-025-04990-z","relation":{},"ISSN":["1863-1703","1863-1711"],"issn-type":[{"type":"print","value":"1863-1703"},{"type":"electronic","value":"1863-1711"}],"subject":[],"published":{"date-parts":[[2025,11,29]]},"assertion":[{"value":"25 July 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 November 2025","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 November 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 November 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"1390"}}