{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,4]],"date-time":"2026-03-04T17:06:40Z","timestamp":1772644000807,"version":"3.50.1"},"reference-count":66,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,12,14]],"date-time":"2025-12-14T00:00:00Z","timestamp":1765670400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,12,14]],"date-time":"2025-12-14T00:00:00Z","timestamp":1765670400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Vis Comput"],"published-print":{"date-parts":[[2026,1]]},"DOI":"10.1007\/s00371-025-04212-0","type":"journal-article","created":{"date-parts":[[2025,12,14]],"date-time":"2025-12-14T08:35:01Z","timestamp":1765701301000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["A spatiotemporal bidirectional mamba network with global\u2013local skeletal enhancement for 3D human pose estimation"],"prefix":"10.1007","volume":"42","author":[{"given":"Chuhan","family":"Wu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zan","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Guixian","family":"Zhou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiahao","family":"Hua","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,12,14]]},"reference":[{"key":"4212_CR1","first-page":"4487","volume":"38","author":"K Peng","year":"2024","unstructured":"Peng, K., Yin, C., Zheng, J., Liu, R., Schneider, D., Zhang, J., Yang, K., Sarfraz, M.S., Stiefelhagen, R., Roitberg, A.: Navigating open set scenarios for skeleton-based action recognition. Proc. AAAI Conf. Artif. Intell. 38, 4487\u20134496 (2024)","journal-title":"Proc. AAAI Conf. Artif. Intell."},{"key":"4212_CR2","doi-asserted-by":"crossref","unstructured":"Dittakavi, B., Bavikadi, D., Desai, S.V., Chakraborty, S., Reddy, N., Balasubramanian, V.N., Callepalli, B., Sharma, A.: Pose tutor: an explainable system for pose correction in the wild. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3540\u20133549 (2022)","DOI":"10.1109\/CVPRW56347.2022.00398"},{"issue":"4","key":"4212_CR3","first-page":"66","volume":"42","author":"Y Yuan","year":"2023","unstructured":"Yuan, Y., Makoviychuk, V., Guo, Y., Fidler, S., Peng, X., Fatahalian, K.: Learning physically simulated tennis skills from broadcast videos. ACM Trans. Graph 42(4), 66 (2023)","journal-title":"ACM Trans. Graph"},{"key":"4212_CR4","doi-asserted-by":"crossref","unstructured":"Liu, R., Shen, J., Wang, H., Chen, C., Cheung, S.-C., Asari, V.: Attention mechanism exploits temporal contexts: Real-time 3d human pose reconstruction. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5064\u20135073 (2020)","DOI":"10.1109\/CVPR42600.2020.00511"},{"issue":"1","key":"4212_CR5","doi-asserted-by":"publisher","first-page":"198","DOI":"10.1109\/TCSVT.2021.3057267","volume":"32","author":"T Chen","year":"2021","unstructured":"Chen, T., Fang, C., Shen, X., Zhu, Y., Chen, Z., Luo, J.: Anatomy-aware 3d human pose estimation with bone-based pose decomposition. IEEE Trans. Circuits Syst. Video Technol. 32(1), 198\u2013209 (2021)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"4212_CR6","doi-asserted-by":"crossref","unstructured":"Zeng, A., Sun, X., Huang, F., Liu, M., Xu, Q., Lin, S.: Srnet: Improving generalization in 3d human pose estimation with a split-and-recombine approach. In: European Conference on Computer Vision, pp. 507\u2013523 (2020). Springer","DOI":"10.1007\/978-3-030-58568-6_30"},{"key":"4212_CR7","doi-asserted-by":"crossref","unstructured":"Wang, J., Yan, S., Xiong, Y., Lin, D.: Motion guided 3d pose estimation from videos. In: European Conference on Computer Vision, pp. 764\u2013780 (2020). Springer","DOI":"10.1007\/978-3-030-58601-0_45"},{"key":"4212_CR8","first-page":"66","volume":"30","author":"A Vaswani","year":"2017","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141, Polosukhin, I.: Attention is all you need. Adv. Neural Inf. Process. Syst. 30, 66 (2017)","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"4212_CR9","doi-asserted-by":"crossref","unstructured":"Zheng, C., Zhu, S., Mendieta, M., Yang, T., Chen, C., Ding, Z.: 3d human pose estimation with spatial and temporal transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 11656\u201311665 (2021)","DOI":"10.1109\/ICCV48922.2021.01145"},{"key":"4212_CR10","doi-asserted-by":"crossref","unstructured":"Li, W., Liu, H., Tang, H., Wang, P., Van\u00a0Gool, L.: Mhformer: Multi-hypothesis transformer for 3d human pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13147\u201313156 (2022)","DOI":"10.1109\/CVPR52688.2022.01280"},{"key":"4212_CR11","doi-asserted-by":"crossref","unstructured":"Zhang, J., Tu, Z., Yang, J., Chen, Y., Yuan, J.: Mixste: Seq2seq mixed spatio-temporal encoder for 3d human pose estimation in video. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13232\u201313242 (2022)","DOI":"10.1109\/CVPR52688.2022.01288"},{"key":"4212_CR12","doi-asserted-by":"crossref","unstructured":"Ma, H., Wang, Z., Chen, Y., Kong, D., Chen, L., Liu, X., Yan, X., Tang, H., Xie, X.: Ppt: token-pruned pose transformer for monocular and multi-view human pose estimation. In: European Conference on Computer Vision, pp. 424\u2013442 (2022). Springer","DOI":"10.1007\/978-3-031-20065-6_25"},{"key":"4212_CR13","doi-asserted-by":"crossref","unstructured":"Li, W., Liu, M., Liu, H., Wang, P., Cai, J., Sebe, N.: Hourglass tokenizer for efficient transformer-based 3d human pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 604\u2013613 (2024)","DOI":"10.1109\/CVPR52733.2024.00064"},{"key":"4212_CR14","unstructured":"Gu, A., Dao, T.: Mamba: Linear-time sequence modeling with selective state spaces. arXiv preprint arXiv:2312.00752 (2023)"},{"key":"4212_CR15","doi-asserted-by":"crossref","unstructured":"Wang, J., Zhu, W., Wang, P., Yu, X., Liu, L., Omar, M., Hamid, R.: Selective structured state-spaces for long-form video understanding. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6387\u20136397 (2023)","DOI":"10.1109\/CVPR52729.2023.00618"},{"key":"4212_CR16","doi-asserted-by":"crossref","unstructured":"Islam, M.M., Bertasius, G.: Long movie clip classification with state-space video models. In: European Conference on Computer Vision, pp. 87\u2013104 (2022). Springer","DOI":"10.1007\/978-3-031-19833-5_6"},{"key":"4212_CR17","unstructured":"Gu, A., Goel, K., R\u00e9, C.: Efficiently modeling long sequences with structured state spaces. arXiv preprint arXiv:2111.00396 (2021)"},{"key":"4212_CR18","doi-asserted-by":"crossref","unstructured":"Zhu, W., Ma, X., Liu, Z., Liu, L., Wu, W., Wang, Y.: Motionbert: A unified perspective on learning human motion representations. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 15085\u201315099 (2023)","DOI":"10.1109\/ICCV51070.2023.01385"},{"key":"4212_CR19","doi-asserted-by":"crossref","unstructured":"Qiu, H., Wang, C., Wang, J., Wang, N., Zeng, W.: Cross view fusion for 3d human pose estimation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 4342\u20134351 (2019)","DOI":"10.1109\/ICCV.2019.00444"},{"key":"4212_CR20","first-page":"7205","volume":"38","author":"L Zhang","year":"2024","unstructured":"Zhang, L., Zhou, K., Lu, F., Zhou, X.-D., Shi, Y.: Deep semantic graph transformer for multi-view 3d human pose estimation. Proc. AAAI Conf. Artif. Intell. 38, 7205\u20137214 (2024)","journal-title":"Proc. AAAI Conf. Artif. Intell."},{"key":"4212_CR21","doi-asserted-by":"crossref","unstructured":"Zhang, X., Cui, Q., Bao, Q., Yang, W., Liao, Q.: Geometry-guided diffusion model with masked transformer for robust multi-view 3d human pose estimation. In: Proceedings of the 32nd ACM International Conference on Multimedia, pp. 681\u2013690 (2024)","DOI":"10.1145\/3664647.3681265"},{"key":"4212_CR22","doi-asserted-by":"crossref","unstructured":"Newell, A., Yang, K., Deng, J.: Stacked hourglass networks for human pose estimation. In: European Conference on Computer Vision, pp. 483\u2013499 (2016). Springer","DOI":"10.1007\/978-3-319-46484-8_29"},{"key":"4212_CR23","doi-asserted-by":"crossref","unstructured":"Chen, Y., Wang, Z., Peng, Y., Zhang, Z., Yu, G., Sun, J.: Cascaded pyramid network for multi-person pose estimation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7103\u20137112 (2018)","DOI":"10.1109\/CVPR.2018.00742"},{"key":"4212_CR24","doi-asserted-by":"crossref","unstructured":"Mondal, A., Alletto, S., Tome, D.: Hummuss: Human motion understanding using state space models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2318\u20132330 (2024)","DOI":"10.1109\/CVPR52733.2024.00225"},{"issue":"10","key":"4212_CR25","doi-asserted-by":"publisher","first-page":"1369","DOI":"10.1109\/TVCG.2010.241","volume":"17","author":"N Hagbi","year":"2010","unstructured":"Hagbi, N., Bergig, O., El-Sana, J., Billinghurst, M.: Shape recognition and pose estimation for mobile augmented reality. IEEE Trans. Visual Comput. Graphics 17(10), 1369\u20131379 (2010)","journal-title":"IEEE Trans. Visual Comput. Graphics"},{"key":"4212_CR26","doi-asserted-by":"crossref","unstructured":"Hossain, M.R.I., Little, J.J.: Exploiting temporal information for 3d human pose estimation. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 68\u201384 (2018)","DOI":"10.1007\/978-3-030-01249-6_5"},{"key":"4212_CR27","doi-asserted-by":"crossref","unstructured":"Hu, W., Zhang, C., Zhan, F., Zhang, L., Wong, T.-T.: Conditional directed graph convolution for 3d human pose estimation. In: Proceedings of the 29th ACM International Conference on Multimedia, pp. 602\u2013611 (2021)","DOI":"10.1145\/3474085.3475219"},{"issue":"7","key":"4212_CR28","doi-asserted-by":"publisher","first-page":"1325","DOI":"10.1109\/TPAMI.2013.248","volume":"36","author":"C Ionescu","year":"2013","unstructured":"Ionescu, C., Papava, D., Olaru, V., Sminchisescu, C.: Human3. 6m: large scale datasets and predictive methods for 3d human sensing in natural environments. IEEE Trans. Pattern Anal. Mach. Intell. 36(7), 1325\u20131339 (2013)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"4212_CR29","doi-asserted-by":"publisher","DOI":"10.1007\/0-387-27890-7","volume-title":"Real-time Vision for Human-computer Interaction","author":"B Kisacanin","year":"2005","unstructured":"Kisacanin, B., Pavlovic, V., Huang, T.S.: Real-time Vision for Human-computer Interaction. Springer, Berlin (2005)"},{"key":"4212_CR30","first-page":"1296","volume":"37","author":"H Li","year":"2023","unstructured":"Li, H., Shi, B., Dai, W., Zheng, H., Wang, B., Sun, Y., Guo, M., Li, C., Zou, J., Xiong, H.: Pose-oriented transformer with uncertainty-guided refinement for 2d-to-3d human pose estimation. Proc. AAAI Conf. Artif. Intell. 37, 1296\u20131304 (2023)","journal-title":"Proc. AAAI Conf. Artif. Intell."},{"key":"4212_CR31","doi-asserted-by":"crossref","unstructured":"Wang, Y., Li, M., Cai, H., Chen, W.-M., Han, S.: Litepose: Efficient architecture design for 2d human pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 13126\u201313136 (2022)","DOI":"10.1109\/CVPR52688.2022.01278"},{"key":"4212_CR32","doi-asserted-by":"crossref","unstructured":"Martinez, J., Hossain, R., Romero, J., Little, J.J.: A simple yet effective baseline for 3d human pose estimation. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2640\u20132649 (2017)","DOI":"10.1109\/ICCV.2017.288"},{"key":"4212_CR33","doi-asserted-by":"crossref","unstructured":"Pavllo, D., Feichtenhofer, C., Grangier, D., Auli, M.: 3d human pose estimation in video with temporal convolutions and semi-supervised training. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7753\u20137762 (2019)","DOI":"10.1109\/CVPR.2019.00794"},{"key":"4212_CR34","doi-asserted-by":"crossref","unstructured":"Yu, B.X., Zhang, Z., Liu, Y., Zhong, S.-h., Liu, Y., Chen, C.W.: Gla-gcn: Global-local adaptive graph convolutional network for 3d human pose estimation from monocular video. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 8818\u20138829 (2023)","DOI":"10.1109\/ICCV51070.2023.00810"},{"key":"4212_CR35","first-page":"3842","volume":"39","author":"Y Huang","year":"2025","unstructured":"Huang, Y., Liu, J., Xian, K., Qiu, R.C.: Posemamba: monocular 3d human pose estimation with bidirectional global-local spatio-temporal state space model. Proc. AAAI Conf. Artif. Intell. 39, 3842\u20133850 (2025)","journal-title":"Proc. AAAI Conf. Artif. Intell."},{"key":"4212_CR36","first-page":"10248","volume":"39","author":"X Zhang","year":"2025","unstructured":"Zhang, X., Bao, Q., Cui, Q., Yang, W., Liao, Q.: Pose magic: efficient and temporally consistent human pose estimation with a hybrid mamba-gcn network. Proc. AAAI Conf. Artif. Intell. 39, 10248\u201310256 (2025)","journal-title":"Proc. AAAI Conf. Artif. Intell."},{"key":"4212_CR37","doi-asserted-by":"crossref","unstructured":"Author(s): Mambapose: A human pose estimation based on gated feedforward network and mamba. Sensors 24(x), (2024)","DOI":"10.3390\/s24248158"},{"key":"4212_CR38","doi-asserted-by":"crossref","unstructured":"Zhang, J., Chen, Y., Tu, Z.: Uncertainty-aware 3d human pose estimation from monocular video. In: Proceedings of the 30th ACM International Conference on Multimedia, pp. 5102\u20135113 (2022)","DOI":"10.1145\/3503161.3547773"},{"key":"4212_CR39","doi-asserted-by":"crossref","unstructured":"Xu, J., Guo, Y., Peng, Y.: Finepose: Fine-grained prompt-driven 3d human pose estimation via diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 561\u2013570 (2024)","DOI":"10.1109\/CVPR52733.2024.00060"},{"key":"4212_CR40","first-page":"35971","volume":"35","author":"A Gu","year":"2022","unstructured":"Gu, A., Goel, K., Gupta, A., R\u00e9, C.: On the parameterization and initialization of diagonal state space models. Adv. Neural. Inf. Process. Syst. 35, 35971\u201335983 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"4212_CR41","first-page":"22982","volume":"35","author":"A Gupta","year":"2022","unstructured":"Gupta, A., Gu, A., Berant, J.: Diagonal state spaces are as effective as structured state spaces. Adv. Neural. Inf. Process. Syst. 35, 22982\u201322994 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"4212_CR42","unstructured":"Smith, J.T., Warrington, A., Linderman, S.W.: Simplified state space layers for sequence modeling. arXiv preprint arXiv:2208.04933 (2022)"},{"key":"4212_CR43","unstructured":"Zhu, L., Liao, B., Zhang, Q., Wang, X., Liu, W., Wang, X.: Vision mamba: Efficient visual representation learning with bidirectional state space model. arXiv preprint arXiv:2401.09417 (2024)"},{"key":"4212_CR44","doi-asserted-by":"crossref","unstructured":"Wang, J., Yan, J.N., Gu, A., Rush, A.M.: Pretraining without attention. arXiv preprint arXiv:2212.10544 (2022)","DOI":"10.18653\/v1\/2023.findings-emnlp.5"},{"key":"4212_CR45","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S., et al.: An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"4212_CR46","unstructured":"Huang, Z., Ben, Y., Luo, G., Cheng, P., Yu, G., Fu, B.: Shuffle transformer: Rethinking spatial shuffle for vision transformer. arXiv preprint arXiv:2106.03650 (2021)"},{"key":"4212_CR47","doi-asserted-by":"crossref","unstructured":"Zhang, X., Zhou, X., Lin, M., Sun, J.: Shufflenet: An extremely efficient convolutional neural network for mobile devices. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6848\u20136856 (2018)","DOI":"10.1109\/CVPR.2018.00716"},{"key":"4212_CR48","unstructured":"Huang, Z., Chen, H., Li, J., Lan, J., Zhu, H., Wang, W., Wang, L.: Stochastic layer-wise shuffle: A good practice to improve vision mamba training. arXiv preprint arXiv:2408.17081 (2024)"},{"key":"4212_CR49","unstructured":"Zhang, B., Sennrich, R.: Root mean square layer normalization. Advances in neural information processing systems 32 (2019)"},{"key":"4212_CR50","doi-asserted-by":"crossref","unstructured":"Mehta, D., Rhodin, H., Casas, D., Fua, P., Sotnychenko, O., Xu, W., Theobalt, C.: Monocular 3d human pose estimation in the wild using improved cnn supervision. In: 2017 International Conference on 3D Vision (3DV), pp. 506\u2013516 (2017). IEEE","DOI":"10.1109\/3DV.2017.00064"},{"key":"4212_CR51","unstructured":"Adam, K.D.B.J., et al.: A method for stochastic optimization. arXiv preprint arXiv:1412.69801412(6) (2014)"},{"issue":"2","key":"4212_CR52","doi-asserted-by":"publisher","first-page":"492","DOI":"10.1007\/s11119-021-09846-3","volume":"23","author":"W Jia","year":"2022","unstructured":"Jia, W., Zhang, Z., Shao, W., Ji, Z., Hou, S.: Rs-net: Robust segmentation of green overlapped apples. Precision Agric. 23(2), 492\u2013513 (2022)","journal-title":"Precision Agric."},{"key":"4212_CR53","doi-asserted-by":"publisher","first-page":"175","DOI":"10.1016\/j.patrec.2025.01.016","volume":"189","author":"J Peng","year":"2025","unstructured":"Peng, J., Zhou, Y., Mok, P.: A cross-feature interaction network for 3d human pose estimation. Pattern Recogn. Lett. 189, 175\u2013181 (2025)","journal-title":"Pattern Recogn. Lett."},{"key":"4212_CR54","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2024.110925","volume":"158","author":"W Li","year":"2025","unstructured":"Li, W., Liu, M., Liu, H., Guo, T., Wang, T., Tang, H., Sebe, N.: Graphmlp: a graph mlp-like architecture for 3d human pose estimation. Pattern Recogn. 158, 110925 (2025)","journal-title":"Pattern Recogn."},{"key":"4212_CR55","doi-asserted-by":"publisher","first-page":"1282","DOI":"10.1109\/TMM.2022.3141231","volume":"25","author":"W Li","year":"2022","unstructured":"Li, W., Liu, H., Ding, R., Liu, M., Wang, P., Yang, W.: Exploiting temporal contexts with strided transformer for 3d human pose estimation. IEEE Trans. Multimedia 25, 1282\u20131293 (2022)","journal-title":"IEEE Trans. Multimedia"},{"key":"4212_CR56","unstructured":"Qian, X., Tang, Y., Zhang, N., Han, M., Xiao, J., Huang, M.-C., Lin, R.-S.: Hstformer: Hierarchical spatial-temporal transformers for 3d human pose estimation. arXiv preprint arXiv:2301.07322 (2023)"},{"key":"4212_CR57","doi-asserted-by":"crossref","unstructured":"Chen, H., He, J.-Y., Xiang, W., Cheng, Z.-Q., Liu, W., Liu, H., Luo, B., Geng, Y., Xie, X.: Hdformer: High-order directed transformer for 3d human pose estimation. arXiv preprint arXiv:2302.01825 (2023)","DOI":"10.24963\/ijcai.2023\/65"},{"key":"4212_CR58","doi-asserted-by":"crossref","unstructured":"Tang, Z., Qiu, Z., Hao, Y., Hong, R., Yao, T.: 3d human pose estimation with spatio-temporal criss-cross attention. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4790\u20134799 (2023)","DOI":"10.1109\/CVPR52729.2023.00464"},{"key":"4212_CR59","doi-asserted-by":"crossref","unstructured":"Zeng, A., Sun, X., Yang, L., Zhao, N., Liu, M., Xu, Q.: Learning skeletal graph neural networks for hard 3d pose estimation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 11436\u201311445 (2021)","DOI":"10.1109\/ICCV48922.2021.01124"},{"key":"4212_CR60","doi-asserted-by":"crossref","unstructured":"Lin, X., Yan, Z., Deng, X., Zheng, C., Yu, L.: Convformer: Plug-and-play cnn-style transformers for improving medical image segmentation. In: International Conference on Medical Image Computing and Computer-Assisted Intervention, pp. 642\u2013651 (2023). Springer","DOI":"10.1007\/978-3-031-43901-8_61"},{"key":"4212_CR61","doi-asserted-by":"crossref","unstructured":"Shan, W., Liu, Z., Zhang, X., Wang, S., Ma, S., Gao, W.: P-stmo: Pre-trained spatial temporal many-to-one model for 3d human pose estimation. In: European Conference on Computer Vision, pp. 461\u2013478 (2022). Springer","DOI":"10.1007\/978-3-031-20065-6_27"},{"key":"4212_CR62","doi-asserted-by":"crossref","unstructured":"Cui, H., Hayama, T.: Hgmamba: Enhancing 3d human pose estimation with a hypergcn-mamba network. arXiv preprint arXiv:2504.06638 (2025)","DOI":"10.1109\/IJCNN64981.2025.11227716"},{"key":"4212_CR63","doi-asserted-by":"crossref","unstructured":"Mehraban, S., Adeli, V., Taati, B.: Motionagformer: Enhancing 3d human pose estimation with a transformer-gcnformer network. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 6920\u20136930 (2024)","DOI":"10.1109\/WACV57701.2024.00677"},{"key":"4212_CR64","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2024.105142","volume":"149","author":"F Hao","year":"2024","unstructured":"Hao, F., Zhong, F., Yu, H., Hu, J., Yang, Y.: Stafformer: Spatio-temporal adaptive fusion transformer for efficient 3d human pose estimation. Image Vis. Comput. 149, 105142 (2024)","journal-title":"Image Vis. Comput."},{"key":"4212_CR65","doi-asserted-by":"crossref","unstructured":"Gong, J., Foo, L.G., Fan, Z., Ke, Q., Rahmani, H., Liu, J.: Diffpose: Toward more reliable 3d pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13041\u201313051 (2023)","DOI":"10.1109\/CVPR52729.2023.01253"},{"key":"4212_CR66","doi-asserted-by":"crossref","unstructured":"Zhao, Q., Zheng, C., Liu, M., Wang, P., Chen, C.: Poseformerv2: Exploring frequency domain for efficient and robust 3d human pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8877\u20138886 (2023)","DOI":"10.1109\/CVPR52729.2023.00857"}],"container-title":["The Visual Computer"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-025-04212-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00371-025-04212-0","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-025-04212-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,4]],"date-time":"2026-03-04T13:02:47Z","timestamp":1772629367000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00371-025-04212-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,14]]},"references-count":66,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2026,1]]}},"alternative-id":["4212"],"URL":"https:\/\/doi.org\/10.1007\/s00371-025-04212-0","relation":{"has-preprint":[{"id-type":"doi","id":"10.21203\/rs.3.rs-7477209\/v1","asserted-by":"object"}]},"ISSN":["0178-2789","1432-2315"],"issn-type":[{"value":"0178-2789","type":"print"},{"value":"1432-2315","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,12,14]]},"assertion":[{"value":"28 August 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 November 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 December 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no Conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"63"}}