{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T19:05:48Z","timestamp":1761419148185,"version":"build-2065373602"},"reference-count":47,"publisher":"Springer Science and Business Media LLC","issue":"14","license":[{"start":{"date-parts":[[2025,9,26]],"date-time":"2025-09-26T00:00:00Z","timestamp":1758844800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,9,26]],"date-time":"2025-09-26T00:00:00Z","timestamp":1758844800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100015401","name":"Key Research and Development Projects of Shaanxi Province","doi-asserted-by":"publisher","award":["2021ZDLGY07-08","2021ZDLGY07-08"],"award-info":[{"award-number":["2021ZDLGY07-08","2021ZDLGY07-08"]}],"id":[{"id":"10.13039\/501100015401","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["SIViP"],"published-print":{"date-parts":[[2025,12]]},"DOI":"10.1007\/s11760-025-04818-w","type":"journal-article","created":{"date-parts":[[2025,9,26]],"date-time":"2025-09-26T11:29:40Z","timestamp":1758886180000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["LiftMamba: Mamba-based lightweight network for 3D human pose estimation"],"prefix":"10.1007","volume":"19","author":[{"given":"Ma","family":"Li","sequence":"first","affiliation":[]},{"given":"Dexiang","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Xinguan","family":"Dai","sequence":"additional","affiliation":[]},{"given":"Hangbiao","family":"Gao","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,9,26]]},"reference":[{"issue":"1","key":"4818_CR1","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1049\/iet-cvi.2017.0052","volume":"12","author":"BK Chakraborty","year":"2018","unstructured":"Chakraborty, B.K., Sarma, D., Bhuyan, M.K., MacDorman, K.F.: Review of constraints on vision-based gesture recognition for human-computer interaction. IET Comput. Vision 12(1), 3\u201315 (2018)","journal-title":"IET Comput. Vision"},{"issue":"41","key":"4818_CR2","doi-asserted-by":"publisher","first-page":"30509","DOI":"10.1007\/s11042-020-09004-3","volume":"79","author":"DR Beddiar","year":"2020","unstructured":"Beddiar, D.R., Nini, B., Sabokrou, M., Hadid, A.: Vision-based human activity recognition: a survey. Multimed Tools and Applica 79(41), 30509\u201330555 (2020)","journal-title":"Multimed Tools and Applica"},{"key":"4818_CR3","doi-asserted-by":"crossref","unstructured":"Sun, K., Xiao, B., Liu, D., Wang, J.: Deep high-resolution representation learning for human pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5693\u20135703 (2019)","DOI":"10.1109\/CVPR.2019.00584"},{"key":"4818_CR4","doi-asserted-by":"crossref","unstructured":"Xiao, B., Wu, H., Wei, Y.: Simple baselines for human pose estimation and tracking. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 466\u2013481 (2018)","DOI":"10.1007\/978-3-030-01231-1_29"},{"key":"4818_CR5","doi-asserted-by":"crossref","unstructured":"Martinez, J., Hossain, R., Romero, J., Little, J.J.: A simple yet effective baseline for 3d human pose estimation. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2640\u20132649 (2017)","DOI":"10.1109\/ICCV.2017.288"},{"key":"4818_CR6","doi-asserted-by":"crossref","unstructured":"Chen, C.-H., Ramanan, D.: 3d human pose estimation= 2d pose estimation+ matching. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7035\u20137043 (2017)","DOI":"10.1109\/CVPR.2017.610"},{"key":"4818_CR7","doi-asserted-by":"crossref","unstructured":"Park, S., Hwang, J., Kwak, N.: 3d human pose estimation using convolutional neural networks with 2d pose information. In: European Conference on Computer Vision, pp. 156\u2013169 (2016). Springer","DOI":"10.1007\/978-3-319-49409-8_15"},{"key":"4818_CR8","doi-asserted-by":"crossref","unstructured":"Chen, Y., Wang, Z., Peng, Y., Zhang, Z., Yu, G., Sun, J.: Cascaded pyramid network for multi-person pose estimation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7103\u20137112 (2018)","DOI":"10.1109\/CVPR.2018.00742"},{"key":"4818_CR9","doi-asserted-by":"crossref","unstructured":"Sun, K., Xiao, B., Liu, D., Wang, J.: Deep high-resolution representation learning for human pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5693\u20135703 (2019)","DOI":"10.1109\/CVPR.2019.00584"},{"key":"4818_CR10","doi-asserted-by":"crossref","unstructured":"Newell, A., Yang, K., Deng, J.: Stacked hourglass networks for human pose estimation. In: European Conference on Computer Vision, pp. 483\u2013499 (2016). Springer","DOI":"10.1007\/978-3-319-46484-8_29"},{"key":"4818_CR11","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141., Polosukhin, I.: Attention is all you need. Advances in neural information processing systems 30 (2017)"},{"key":"4818_CR12","first-page":"572","volume":"34","author":"A Gu","year":"2021","unstructured":"Gu, A., Johnson, I., Goel, K., Saab, K., Dao, T., Rudra, A., R\u00e9, C.: Combining recurrent, convolutional, and continuous-time models with linear state space layers. Adv. Neural. Inf. Process. Syst. 34, 572\u2013585 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"4818_CR13","first-page":"2846","volume":"35","author":"E Nguyen","year":"2022","unstructured":"Nguyen, E., Goel, K., Gu, A., Downs, G., Shah, P., Dao, T., Baccus, S., R\u00e9, C.: S4nd: modeling images and videos as multidimensional signals with state spaces. Adv. Neural. Inf. Process. Syst. 35, 2846\u20132861 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"4818_CR14","doi-asserted-by":"crossref","unstructured":"Saon, G., Gupta, A., Cui, X.: Diagonal state space augmented transformers for speech recognition. In: ICASSP 2023-2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 1\u20135 IEEE (2023)","DOI":"10.1109\/ICASSP49357.2023.10096271"},{"key":"4818_CR15","unstructured":"Gu, A., Dao, T.: Mamba: Linear-time sequence modeling with selective state spaces. arXiv preprint arXiv:2312.00752 (2023)"},{"key":"4818_CR16","unstructured":"Dao, T., Gu, A.: Transformers are ssms: Generalized models and efficient algorithms through structured state space duality. arXiv preprint arXiv:2405.21060 (2024)"},{"key":"4818_CR17","doi-asserted-by":"crossref","unstructured":"Zheng, C., Zhu, S., Mendieta, M., Yang, T., Chen, C., Ding, Z.: 3d human pose estimation with spatial and temporal transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 11656\u201311665 (2021)","DOI":"10.1109\/ICCV48922.2021.01145"},{"key":"4818_CR18","doi-asserted-by":"crossref","unstructured":"Li, W., Liu, H., Tang, H., Wang, P., Van\u00a0Gool, L.: Mhformer: Multi-hypothesis transformer for 3d human pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13147\u201313156 (2022)","DOI":"10.1109\/CVPR52688.2022.01280"},{"key":"4818_CR19","doi-asserted-by":"crossref","unstructured":"Zhu, W., Ma, X., Liu, Z., Liu, L., Wu, W., Wang, Y.: Motionbert: A unified perspective on learning human motion representations. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 15085\u201315099 (2023)","DOI":"10.1109\/ICCV51070.2023.01385"},{"key":"4818_CR20","doi-asserted-by":"crossref","unstructured":"Zhang, J., Tu, Z., Yang, J., Chen, Y., Yuan, J.: Mixste: Seq2seq mixed spatio-temporal encoder for 3d human pose estimation in video. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13232\u201313242 (2022)","DOI":"10.1109\/CVPR52688.2022.01288"},{"key":"4818_CR21","doi-asserted-by":"crossref","unstructured":"Hossain, M.R.I., Little, J.J.: Exploiting temporal information for 3d human pose estimation. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 68\u201384 (2018)","DOI":"10.1007\/978-3-030-01249-6_5"},{"key":"4818_CR22","doi-asserted-by":"crossref","unstructured":"Pavllo, D., Feichtenhofer, C., Grangier, D., Auli, M.: 3d human pose estimation in video with temporal convolutions and semi-supervised training. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7753\u20137762 (2019)","DOI":"10.1109\/CVPR.2019.00794"},{"key":"4818_CR23","doi-asserted-by":"crossref","unstructured":"Liu, R., Shen, J., Wang, H., Chen, C., Cheung, S.-c., Asari, V.: Attention mechanism exploits temporal contexts: Real-time 3d human pose reconstruction. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5064\u20135073 (2020)","DOI":"10.1109\/CVPR42600.2020.00511"},{"key":"4818_CR24","doi-asserted-by":"crossref","unstructured":"Zhao, L., Peng, X., Tian, Y., Kapadia, M., Metaxas, D.N.: Semantic graph convolutional networks for 3d human pose regression. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3425\u20133435 (2019)","DOI":"10.1109\/CVPR.2019.00354"},{"key":"4818_CR25","doi-asserted-by":"crossref","unstructured":"Ci, H., Wang, C., Ma, X., Wang, Y.: Optimizing network structure for 3d human pose estimation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 2262\u20132271 (2019)","DOI":"10.1109\/ICCV.2019.00235"},{"key":"4818_CR26","doi-asserted-by":"crossref","unstructured":"Peng, K., Yin, C., Zheng, J., Liu, R., Schneider, D., Zhang, J., Yang, K., Sarfraz, M.S., Stiefelhagen, R., Roitberg, A.: Navigating open set scenarios for skeleton-based action recognition. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 38, pp. 4487\u20134496 (2024)","DOI":"10.1609\/aaai.v38i5.28247"},{"key":"4818_CR27","doi-asserted-by":"crossref","unstructured":"Peng, K., Roitberg, A., Yang, K., Zhang, J., Stiefelhagen, R.: Delving deep into one-shot skeleton-based action recognition with diverse occlusions. IEEE Trans. Multimed 25, 1489\u20131504 (2023)","DOI":"10.1109\/TMM.2023.3235300"},{"key":"4818_CR28","doi-asserted-by":"crossref","unstructured":"Xie, J., Meng, Y., Zhao, Y., Nguyen, A., Yang, X., Zheng, Y.: Dynamic semantic-based spatial graph convolution network for skeleton-based human action recognition. Proc. AAAI Conf. Artif. Intell. 38, 6225\u20136233 (2024)","DOI":"10.1609\/aaai.v38i6.28440"},{"key":"4818_CR29","doi-asserted-by":"crossref","unstructured":"Xu, Y., Peng, K., Wen, D., Liu, R., Zheng, J., Chen, Y., Zhang, J., Roitberg, A., Yang, K., Stiefelhagen, R.: Skeleton-based human action recognition with noisy labels. In: 2024 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 4716\u20134723 IEEE (2024)","DOI":"10.1109\/IROS58592.2024.10801681"},{"key":"4818_CR30","doi-asserted-by":"crossref","unstructured":"Zhao, W., Wang, W., Tian, Y.: Graformer: Graph-oriented transformer for 3d pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 20438\u201320447 (2022)","DOI":"10.1109\/CVPR52688.2022.01979"},{"issue":"7","key":"4818_CR31","doi-asserted-by":"publisher","first-page":"1325","DOI":"10.1109\/TPAMI.2013.248","volume":"36","author":"C Ionescu","year":"2013","unstructured":"Ionescu, C., Papava, D., Olaru, V., Sminchisescu, C.: Human3. 6m: large scale datasets and predictive methods for 3d human sensing in natural environments. IEEE Trans. Pattern Anal. Mach. Intell. 36(7), 1325\u20131339 (2013)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"4818_CR32","doi-asserted-by":"crossref","unstructured":"Mehta, D., Rhodin, H., Casas, D., Fua, P., Sotnychenko, O., Xu, W., Theobalt, C.: Monocular 3d human pose estimation in the wild using improved cnn supervision. In: 2017 International Conference on 3D Vision (3DV), pp. 506\u2013516 IEEE (2017)","DOI":"10.1109\/3DV.2017.00064"},{"key":"4818_CR33","doi-asserted-by":"crossref","unstructured":"Shan, W., Liu, Z., Zhang, X., Wang, Z., Han, K., Wang, S., Ma, S., Gao, W.: Diffusion-based 3d human pose estimation with multi-hypothesis aggregation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 14761\u201314771 (2023)","DOI":"10.1109\/ICCV51070.2023.01356"},{"key":"4818_CR34","doi-asserted-by":"crossref","unstructured":"Yu, B.X., Zhang, Z., Liu, Y., Zhong, S.-h., Liu, Y., Chen, C.W.: Gla-gcn: Global-local adaptive graph convolutional network for 3d human pose estimation from monocular video. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 8818\u20138829 (2023)","DOI":"10.1109\/ICCV51070.2023.00810"},{"key":"4818_CR35","unstructured":"Lipton, Z.C., Berkowitz, J., Elkan, C.: A critical review of recurrent neural networks for sequence learning. arXiv preprint arXiv:1506.00019 (2015)"},{"issue":"8","key":"4818_CR36","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"issue":"10","key":"4818_CR37","doi-asserted-by":"publisher","first-page":"2222","DOI":"10.1109\/TNNLS.2016.2582924","volume":"28","author":"K Greff","year":"2016","unstructured":"Greff, K., Srivastava, R.K., Koutn\u00edk, J., Steunebrink, B.R., Schmidhuber, J.: Lstm: a search space odyssey. IEEE trans neural netw and learn syst 28(10), 2222\u20132232 (2016)","journal-title":"IEEE trans neural netw and learn syst"},{"key":"4818_CR38","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: Imagenet classification with deep convolutional neural networks. Advances in neural information processing systems 25 (2012)"},{"key":"4818_CR39","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"4818_CR40","unstructured":"Bai, S., Kolter, J.Z., Koltun, V.: An empirical evaluation of generic convolutional and recurrent networks for sequence modeling. arXiv preprint arXiv:1803.01271 (2018)"},{"key":"4818_CR41","doi-asserted-by":"crossref","unstructured":"Tang, Z., Qiu, Z., Hao, Y., Hong, R., Yao, T.: 3d human pose estimation with spatio-temporal criss-cross attention. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4790\u20134799 (2023)","DOI":"10.1109\/CVPR52729.2023.00464"},{"key":"4818_CR42","doi-asserted-by":"crossref","unstructured":"Li, W., Liu, M., Liu, H., Wang, P., Cai, J., Sebe, N.: Hourglass tokenizer for efficient transformer-based 3d human pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 604\u2013613 (2024)","DOI":"10.1109\/CVPR52733.2024.00064"},{"key":"4818_CR43","doi-asserted-by":"crossref","unstructured":"Lin, H., Cheng, X., Wu, X., Shen, D.: Cat: Cross attention in vision transformer. In: 2022 IEEE International Conference on Multimedia and Expo (ICME), pp. 1\u20136 IEEE (2022)","DOI":"10.1109\/ICME52920.2022.9859720"},{"key":"4818_CR44","doi-asserted-by":"crossref","unstructured":"Wang, J., Yan, S., Xiong, Y., Lin, D.: Motion guided 3d pose estimation from videos. In: European Conference on Computer Vision, pp. 764\u2013780 Springer (2020)","DOI":"10.1007\/978-3-030-58601-0_45"},{"key":"4818_CR45","doi-asserted-by":"crossref","unstructured":"Gong, J., Foo, L.G., Fan, Z., Ke, Q., Rahmani, H., Liu, J.: Diffpose: Toward more reliable 3d pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13041\u201313051 (2023)","DOI":"10.1109\/CVPR52729.2023.01253"},{"key":"4818_CR46","doi-asserted-by":"crossref","unstructured":"Peng, J., Zhou, Y., Mok, P.: Ktpformer: Kinematics and trajectory prior knowledge-enhanced transformer for 3d human pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1123\u20131132 (2024)","DOI":"10.1109\/CVPR52733.2024.00113"},{"key":"4818_CR47","doi-asserted-by":"crossref","unstructured":"Xu, J., Guo, Y., Peng, Y.: Finepose: Fine-grained prompt-driven 3d human pose estimation via diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 561\u2013570 (2024)","DOI":"10.1109\/CVPR52733.2024.00060"}],"container-title":["Signal, Image and Video Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-025-04818-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11760-025-04818-w\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-025-04818-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T18:58:35Z","timestamp":1761418715000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11760-025-04818-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,26]]},"references-count":47,"journal-issue":{"issue":"14","published-print":{"date-parts":[[2025,12]]}},"alternative-id":["4818"],"URL":"https:\/\/doi.org\/10.1007\/s11760-025-04818-w","relation":{},"ISSN":["1863-1703","1863-1711"],"issn-type":[{"type":"print","value":"1863-1703"},{"type":"electronic","value":"1863-1711"}],"subject":[],"published":{"date-parts":[[2025,9,26]]},"assertion":[{"value":"11 April 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 July 2025","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 September 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 September 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"1230"}}