{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,24]],"date-time":"2026-01-24T18:57:48Z","timestamp":1769281068735,"version":"3.49.0"},"reference-count":64,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2025,9,25]],"date-time":"2025-09-25T00:00:00Z","timestamp":1758758400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,9,25]],"date-time":"2025-09-25T00:00:00Z","timestamp":1758758400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100006407","name":"Natural Science Foundation of Henan","doi-asserted-by":"crossref","award":["242300421220"],"award-info":[{"award-number":["242300421220"]}],"id":[{"id":"10.13039\/501100006407","id-type":"DOI","asserted-by":"crossref"}]},{"name":"Henan Province Key R&D Project","award":["241111210400"],"award-info":[{"award-number":["241111210400"]}]},{"DOI":"10.13039\/501100017700","name":"Henan Provincial Science and Technology Research Project","doi-asserted-by":"crossref","award":["242102211007, 242102211020"],"award-info":[{"award-number":["242102211007, 242102211020"]}],"id":[{"id":"10.13039\/501100017700","id-type":"DOI","asserted-by":"crossref"}]},{"name":"Science and Technology Innovation Project of Zhengzhou University of Light Industry","award":["23XNKJTD0205"],"award-info":[{"award-number":["23XNKJTD0205"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["62372384"],"award-info":[{"award-number":["62372384"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"name":"Social Science Fund of Shaanxi Province","award":["2019Q019"],"award-info":[{"award-number":["2019Q019"]}]},{"name":"China Higher Education Association Special Project","award":["2020XXHYB13"],"award-info":[{"award-number":["2020XXHYB13"]}]},{"DOI":"10.13039\/501100011710","name":"Shaanxi Provincial Department of Science and Technology","doi-asserted-by":"crossref","award":["2022GY-317"],"award-info":[{"award-number":["2022GY-317"]}],"id":[{"id":"10.13039\/501100011710","id-type":"DOI","asserted-by":"crossref"}]},{"name":"XJTLU RDF","award":["21-02-008"],"award-info":[{"award-number":["21-02-008"]}]},{"name":"Jiangsu Double-Innovation Plan","award":["JSSCBS20230474"],"award-info":[{"award-number":["JSSCBS20230474"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2025,10]]},"DOI":"10.1007\/s00530-025-01987-z","type":"journal-article","created":{"date-parts":[[2025,9,25]],"date-time":"2025-09-25T07:42:22Z","timestamp":1758786142000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Efficient 3D human pose estimation via spatio-temporal graph transformer with token pruning"],"prefix":"10.1007","volume":"31","author":[{"given":"Zuhe","family":"Li","sequence":"first","affiliation":[]},{"given":"Hongyang","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Fengqin","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Gang","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Qidong","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Yushan","family":"Pan","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,9,25]]},"reference":[{"key":"1987_CR1","doi-asserted-by":"publisher","unstructured":"Zimmermann, C., Welschehold, T., Dornhege, C., Burgard, W., Brox, T.: 3d human pose estimation in rgbd images for robotic task learning. In: 2018 IEEE International Conference on Robotics and Automation (ICRA), pp. 1986\u20131992 (2018). https:\/\/doi.org\/10.1109\/ICRA.2018.8462833","DOI":"10.1109\/ICRA.2018.8462833"},{"key":"1987_CR2","doi-asserted-by":"crossref","unstructured":"Gong, J., Fan, Z., Ke, Q., Rahmani, H., Liu, J.: Meta agent teaming active learning for pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 11079\u201311089 (2022)","DOI":"10.1109\/CVPR52688.2022.01080"},{"key":"1987_CR3","doi-asserted-by":"publisher","first-page":"379","DOI":"10.1109\/TIP.2021.3131937","volume":"31","author":"M Ye","year":"2022","unstructured":"Ye, M., Li, H., Du, B., Shen, J., Shao, L., Hoi, S.C.H.: Collaborative refining for person re-identification with label noise. IEEE Trans. Image Process. 31, 379\u2013391 (2022). https:\/\/doi.org\/10.1109\/TIP.2021.3131937","journal-title":"IEEE Trans. Image Process."},{"issue":"4\u20135","key":"1987_CR4","doi-asserted-by":"publisher","first-page":"427","DOI":"10.1080\/10447318.2018.1543081","volume":"35","author":"A Kamel","year":"2019","unstructured":"Kamel, A., Liu, B., Li, P., Sheng, B.: An investigation of 3d human pose estimation for learning tai chi: A human factor perspective. International Journal of Human-Computer Interaction 35(4\u20135), 427\u2013439 (2019)","journal-title":"International Journal of Human-Computer Interaction"},{"issue":"3","key":"1987_CR5","doi-asserted-by":"publisher","first-page":"2266","DOI":"10.1002\/cav.2266","volume":"35","author":"W Lin","year":"2024","unstructured":"Lin, W., Zhang, J., Meng, W., Liu, X., Zhang, X.: Hide: Hierarchical iterative decoding enhancement for multi-view 3d human parameter regression. Computer Animation and Virtual Worlds 35(3), 2266 (2024)","journal-title":"Computer Animation and Virtual Worlds"},{"key":"1987_CR6","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2024.111091","volume":"159","author":"J Ding","year":"2025","unstructured":"Ding, J., Li, W., Yang, M., Zhao, Y., Pei, L., Tian, A.: Seatrack: Rethinking observation-centric sort for robust nearshore multiple object tracking. Pattern Recogn. 159, 111091 (2025). https:\/\/doi.org\/10.1016\/j.patcog.2024.111091","journal-title":"Pattern Recogn."},{"key":"1987_CR7","doi-asserted-by":"crossref","unstructured":"Zhang, C., Yang, T., Weng, J., Cao, M., Wang, J., Zou, Y.: Unsupervised pre-training for temporal action localization tasks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 14031\u201314041 (2022)","DOI":"10.1109\/CVPR52688.2022.01364"},{"issue":"8","key":"1987_CR8","doi-asserted-by":"publisher","first-page":"2752","DOI":"10.1109\/TPAMI.2020.2976014","volume":"43","author":"DC Luvizon","year":"2021","unstructured":"Luvizon, D.C., Picard, D., Tabia, H.: Multi-task deep learning for real-time 3d human pose estimation and action recognition. IEEE Trans. Pattern Anal. Mach. Intell. 43(8), 2752\u20132764 (2021). https:\/\/doi.org\/10.1109\/TPAMI.2020.2976014","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1987_CR9","doi-asserted-by":"crossref","unstructured":"Zheng, C., Zhu, S., Mendieta, M., Yang, T., Chen, C., Ding, Z.: 3d human pose estimation with spatial and temporal transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 11656\u201311665 (2021)","DOI":"10.1109\/ICCV48922.2021.01145"},{"key":"1987_CR10","doi-asserted-by":"crossref","unstructured":"Li, W., Liu, H., Tang, H., Wang, P., Van\u00a0Gool, L.: Mhformer: Multi-hypothesis transformer for 3d human pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 13147\u201313156 (2022)","DOI":"10.1109\/CVPR52688.2022.01280"},{"key":"1987_CR11","doi-asserted-by":"crossref","unstructured":"Chen, Z., Dai, J., Pan, J., Zhou, F.: Diffusion model with temporal constraint for 3d human pose estimation. Vis. Comput. pp. 1\u201317 (2024)","DOI":"10.1007\/s00371-024-03763-y"},{"key":"1987_CR12","doi-asserted-by":"crossref","unstructured":"Chen, Y., Wang, Z., Peng, Y., Zhang, Z., Yu, G., Sun, J.: Cascaded pyramid network for multi-person pose estimation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2018)","DOI":"10.1109\/CVPR.2018.00742"},{"key":"1987_CR13","doi-asserted-by":"crossref","unstructured":"Sun, K., Xiao, B., Liu, D., Wang, J.: Deep high-resolution representation learning for human pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2019)","DOI":"10.1109\/CVPR.2019.00584"},{"key":"1987_CR14","doi-asserted-by":"crossref","unstructured":"Zhang, J., Tu, Z., Yang, J., Chen, Y., Yuan, J.: Mixste: Seq2seq mixed spatio-temporal encoder for 3d human pose estimation in video. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 13232\u201313242 (2022)","DOI":"10.1109\/CVPR52688.2022.01288"},{"key":"1987_CR15","doi-asserted-by":"crossref","unstructured":"Tang, Z., Qiu, Z., Hao, Y., Hong, R., Yao, T.: 3d human pose estimation with spatio-temporal criss-cross attention. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 4790\u20134799 (2023)","DOI":"10.1109\/CVPR52729.2023.00464"},{"key":"1987_CR16","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141., Polosukhin, I.: Attention is all you need. Adv. Neural Inf. Process. Syst. 30 (2017)"},{"issue":"1","key":"1987_CR17","doi-asserted-by":"publisher","first-page":"2201","DOI":"10.1002\/cav.2201","volume":"35","author":"X Zhu","year":"2024","unstructured":"Zhu, X., Yao, X., Zhang, J., Zhu, M., You, L., Yang, X., Zhang, J., Zhao, H., Zeng, D.: Tmsdnet: Transformer with multi-scale dense network for single and multi-view 3d reconstruction. Comput. Animat. Virtual Worlds 35(1), 2201 (2024)","journal-title":"Comput. Animat. Virtual Worlds"},{"key":"1987_CR18","doi-asserted-by":"crossref","unstructured":"Yao, J., Chen, J., Niu, L., Sheng, B.: Scene-aware human pose generation using transformer. In: Proceedings of the 31st ACM International Conference on Multimedia, pp. 2847\u20132855 (2023)","DOI":"10.1145\/3581783.3612439"},{"key":"1987_CR19","unstructured":"Kipf, T.N., Welling, M.: Semi-supervised classification with graph convolutional networks. In: International Conference on Learning Representations (2017)"},{"key":"1987_CR20","doi-asserted-by":"crossref","unstructured":"Ci, H., Wang, C., Ma, X., Wang, Y.: Optimizing network structure for 3d human pose estimation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV) (2019)","DOI":"10.1109\/ICCV.2019.00235"},{"key":"1987_CR21","doi-asserted-by":"crossref","unstructured":"Yu, B.X., Zhang, Z., Liu, Y., Zhong, S.-h., Liu, Y., Chen, C.W.: Gla-gcn: Global-local adaptive graph convolutional network for 3d human pose estimation from monocular video. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 8818\u20138829 (2023)","DOI":"10.1109\/ICCV51070.2023.00810"},{"key":"1987_CR22","first-page":"13937","volume":"34","author":"Y Rao","year":"2021","unstructured":"Rao, Y., Zhao, W., Liu, B., Lu, J., Zhou, J., Hsieh, C.-J.: Dynamicvit: Efficient vision transformers with dynamic token sparsification. Adv. Neural. Inf. Process. Syst. 34, 13937\u201313949 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1987_CR23","doi-asserted-by":"crossref","unstructured":"Pavlakos, G., Zhou, X., Daniilidis, K.: Ordinal depth supervision for 3d human pose estimation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2018)","DOI":"10.1109\/CVPR.2018.00763"},{"key":"1987_CR24","doi-asserted-by":"crossref","unstructured":"Pavlakos, G., Zhou, X., Derpanis, K.G., Daniilidis, K.: Coarse-to-fine volumetric prediction for single-image 3d human pose. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2017)","DOI":"10.1109\/CVPR.2017.139"},{"key":"1987_CR25","doi-asserted-by":"crossref","unstructured":"Sun, X., Xiao, B., Wei, F., Liang, S., Wei, Y.: Integral human pose regression. In: Proceedings of the European Conference on Computer Vision (ECCV) (2018)","DOI":"10.1007\/978-3-030-01231-1_33"},{"key":"1987_CR26","doi-asserted-by":"crossref","unstructured":"Zhou, K., Han, X., Jiang, N., Jia, K., Lu, J.: Hemlets pose: Learning part-centric heatmap triplets for accurate 3d human pose estimation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV) (2019)","DOI":"10.1109\/ICCV.2019.00243"},{"key":"1987_CR27","doi-asserted-by":"crossref","unstructured":"Mehraban, S., Adeli, V., Taati, B.: Motionagformer: Enhancing 3d human pose estimation with a transformer-gcnformer network. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV), pp. 6920\u20136930 (2024)","DOI":"10.1109\/WACV57701.2024.00677"},{"key":"1987_CR28","unstructured":"Kaiming, H., Georgia, G., Piotr, D., Ross, G.-s.: Mask r-cnn. In: Proceedings of the IEEE International Conference on Computer Vision, vol. 2017, pp. 2961\u20132969 (2017)"},{"issue":"2","key":"1987_CR29","doi-asserted-by":"publisher","first-page":"1944","DOI":"10.1609\/aaai.v36i2.20089","volume":"36","author":"X Ma","year":"2022","unstructured":"Ma, X., Rahmani, H., Fan, Z., Yang, B., Chen, J., Liu, J.: Remote: Reinforced motion transformation network for semi-supervised 2d pose estimation in videos. Proc. AAAI Confere. Artific. Intell. 36(2), 1944\u20131952 (2022). https:\/\/doi.org\/10.1609\/aaai.v36i2.20089","journal-title":"Proc. AAAI Confere. Artific. Intell."},{"key":"1987_CR30","doi-asserted-by":"crossref","unstructured":"Xu, T., Takano, W.: Graph stacked hourglass networks for 3d human pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 16105\u201316114 (2021)","DOI":"10.1109\/CVPR46437.2021.01584"},{"key":"1987_CR31","doi-asserted-by":"publisher","first-page":"1330","DOI":"10.1109\/TMM.2020.2999181","volume":"23","author":"A Kamel","year":"2020","unstructured":"Kamel, A., Sheng, B., Li, P., Kim, J., Feng, D.D.: Hybrid refinement-correction heatmaps for human pose estimation. IEEE Trans. Multimed. 23, 1330\u20131342 (2020)","journal-title":"IEEE Trans. Multimed."},{"key":"1987_CR32","doi-asserted-by":"crossref","unstructured":"Zhu, W., Ma, X., Liu, Z., Liu, L., Wu, W., Wang, Y.: Motionbert: A unified perspective on learning human motion representations. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 15085\u201315099 (2023)","DOI":"10.1109\/ICCV51070.2023.01385"},{"issue":"8","key":"1987_CR33","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s11760-025-04145-0","volume":"19","author":"M Hassanin","year":"2025","unstructured":"Hassanin, M., Khamis, A., Bennamoun, M., Boussaid, F., Radwan, I.: Crossformer3d: cross spatio-temporal transformer for 3d human pose estimation. SIViP 19(8), 1\u201313 (2025)","journal-title":"SIViP"},{"key":"1987_CR34","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2025.111562","volume":"164","author":"H Zhang","year":"2025","unstructured":"Zhang, H., Hu, Z., Bi, S., Di, J., Sun, Z.: Hmsft: Hierarchical multi-scale spatial-frequency-temporal collaborative transformer for 3d human pose estimation. Pattern Recogn. 164, 111562 (2025)","journal-title":"Pattern Recogn."},{"key":"1987_CR35","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2024.110925","volume":"158","author":"W Li","year":"2025","unstructured":"Li, W., Liu, M., Liu, H., Guo, T., Wang, T., Tang, H., Sebe, N.: Graphmlp: A graph mlp-like architecture for 3d human pose estimation. Pattern Recogn. 158, 110925 (2025)","journal-title":"Pattern Recogn."},{"key":"1987_CR36","unstructured":"Kang, H., Wang, Y., Liu, M., Wu, D., Liu, P., Yang, W.: Double-chain Constraints for 3D Human Pose Estimation in Images and Videos (2023). https:\/\/arxiv.org\/abs\/2308.05298"},{"key":"1987_CR37","doi-asserted-by":"crossref","unstructured":"Pavllo, D., Feichtenhofer, C., Grangier, D., Auli, M.: 3d human pose estimation in video with temporal convolutions and semi-supervised training. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2019)","DOI":"10.1109\/CVPR.2019.00794"},{"key":"1987_CR38","doi-asserted-by":"crossref","unstructured":"Choi, S., Choi, S., Kim, C.: Mobilehumanpose: Toward real-time 3d human pose estimation in mobile devices. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops, pp. 2328\u20132338 (2021)","DOI":"10.1109\/CVPRW53098.2021.00265"},{"key":"1987_CR39","doi-asserted-by":"crossref","unstructured":"Einfalt, M., Ludwig, K., Lienhart, R.: Uplift and upsample: Efficient 3d human pose estimation with uplifting transformers. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV), pp. 2903\u20132913 (2023)","DOI":"10.1109\/WACV56688.2023.00292"},{"key":"1987_CR40","doi-asserted-by":"publisher","first-page":"461","DOI":"10.1007\/978-3-031-20065-6_27","volume-title":"Computer Vision - ECCV 2022","author":"W Shan","year":"2022","unstructured":"Shan, W., Liu, Z., Zhang, X., Wang, S., Ma, S., Gao, W.: P-stmo: Pre-trained spatial temporal many-to-one model for 3d human pose estimation. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) Computer Vision - ECCV 2022, pp. 461\u2013478. Springer, Cham (2022)"},{"key":"1987_CR41","doi-asserted-by":"crossref","unstructured":"Peng, J., Zhou, Y., Mok, P.: Ehfusion: an efficient heterogeneous fusion model for group-based 3d human pose estimation. Vis. Comput, pp. 1\u201323 (2024)","DOI":"10.1007\/s00371-024-03724-5"},{"key":"1987_CR42","doi-asserted-by":"publisher","first-page":"1282","DOI":"10.1109\/TMM.2022.3141231","volume":"25","author":"W Li","year":"2023","unstructured":"Li, W., Liu, H., Ding, R., Liu, M., Wang, P., Yang, W.: Exploiting temporal contexts with strided transformer for 3d human pose estimation. IEEE Trans. Multimed. 25, 1282\u20131293 (2023). https:\/\/doi.org\/10.1109\/TMM.2022.3141231","journal-title":"IEEE Trans. Multimed."},{"key":"1987_CR43","doi-asserted-by":"crossref","unstructured":"Zhao, Q., Zheng, C., Liu, M., Wang, P., Chen, C.: Poseformerv2: Exploring frequency domain for efficient and robust 3d human pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 8877\u20138886 (2023)","DOI":"10.1109\/CVPR52729.2023.00857"},{"key":"1987_CR44","doi-asserted-by":"crossref","unstructured":"Chen, H., He, J.-Y., Xiang, W., Cheng, Z.-Q., Liu, W., Liu, H., Luo, B., Geng, Y., Xie, X.: Hdformer: high-order directed transformer for 3d human pose estimation. In: Proceedings of the Thirty-Second International Joint Conference on Artificial Intelligence, pp. 581\u2013589 (2023)","DOI":"10.24963\/ijcai.2023\/65"},{"key":"1987_CR45","doi-asserted-by":"crossref","unstructured":"Zeng, A., Ju, X., Yang, L., Gao, R., Zhu, X., Dai, B., Xu, Q.: Deciwatch: A simple baseline for 10$$\\times $$ efficient 2d and 3d pose estimation. In: European Conference on Computer Vision, pp. 607\u2013624 (2022). Springer","DOI":"10.1007\/978-3-031-20065-6_35"},{"key":"1987_CR46","doi-asserted-by":"crossref","unstructured":"Haurum, J.B., Escalera, S., Taylor, G.W., Moeslund, T.B.: Which tokens to use? investigating token reduction in vision transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV) Workshops, pp. 773\u2013783 (2023)","DOI":"10.1109\/ICCVW60793.2023.00085"},{"key":"1987_CR47","doi-asserted-by":"crossref","unstructured":"Chang, S., Wang, P., Lin, M., Wang, F., Zhang, D.J., Jin, R., Shou, M.Z.: Making vision transformers efficient from a token sparsification view. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 6195\u20136205 (2023)","DOI":"10.1109\/CVPR52729.2023.00600"},{"key":"1987_CR48","doi-asserted-by":"crossref","unstructured":"Dou, Z., Wu, Q., Lin, C., Cao, Z., Wu, Q., Wan, W., Komura, T., Wang, W.: Tore: Token reduction for efficient human mesh recovery with transformer. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 15143\u201315155 (2023)","DOI":"10.1109\/ICCV51070.2023.01390"},{"key":"1987_CR49","doi-asserted-by":"publisher","first-page":"620","DOI":"10.1007\/978-3-031-20083-0_37","volume-title":"Computer Vision - ECCV 2022","author":"Z Kong","year":"2022","unstructured":"Kong, Z., Dong, P., Ma, X., Meng, X., Niu, W., Sun, M., Shen, X., Yuan, G., Ren, B., Tang, H., Qin, M., Wang, Y.: Spvit: Enabling faster vision transformers via latency-aware soft token pruning. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) Computer Vision - ECCV 2022, pp. 620\u2013640. Springer, Cham (2022)"},{"key":"1987_CR50","doi-asserted-by":"crossref","unstructured":"Long, S., Zhao, Z., Pi, J., Wang, S., Wang, J.: Beyond attentive tokens: Incorporating token importance and diversity for efficient vision transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 10334\u201310343 (2023)","DOI":"10.1109\/CVPR52729.2023.00996"},{"key":"1987_CR51","doi-asserted-by":"crossref","unstructured":"Ma, H., Wang, Z., Chen, Y., Kong, D., Chen, L., Liu, X., Yan, X., Tang, H., Xie, X.: Ppt: token-pruned pose transformer for monocular and multi-view human pose estimation. In: European Conference on Computer Vision, pp. 424\u2013442 (2022). Springer","DOI":"10.1007\/978-3-031-20065-6_25"},{"key":"1987_CR52","doi-asserted-by":"crossref","unstructured":"Zeng, W., Jin, S., Liu, W., Qian, C., Luo, P., Ouyang, W., Wang, X.: Not all tokens are equal: Human-centric visual analysis via token clustering transformer. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 11101\u201311111 (2022)","DOI":"10.1109\/CVPR52688.2022.01082"},{"key":"1987_CR53","unstructured":"Liang, Y., Ge, C., Tong, Z., Song, Y., Wang, J., Xie, P.: Not all patches are what you need: Expediting vision transformers via token reorganizations. In: International Conference on Learning Representations (2022)"},{"key":"1987_CR54","doi-asserted-by":"crossref","unstructured":"Xu, Y., Peng, K., Wen, D., Liu, R., Zheng, J., Chen, Y., Zhang, J., Roitberg, A., Yang, K., Stiefelhagen, R.: Skeleton-based human action recognition with noisy labels. In: 2024 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 4716\u20134723 (2024). IEEE","DOI":"10.1109\/IROS58592.2024.10801681"},{"key":"1987_CR55","doi-asserted-by":"publisher","first-page":"1489","DOI":"10.1109\/TMM.2023.3235300","volume":"25","author":"K Peng","year":"2023","unstructured":"Peng, K., Roitberg, A., Yang, K., Zhang, J., Stiefelhagen, R.: Delving deep into one-shot skeleton-based action recognition with diverse occlusions. IEEE Trans. Multimed. 25, 1489\u20131504 (2023)","journal-title":"IEEE Trans. Multimed."},{"key":"1987_CR56","doi-asserted-by":"crossref","unstructured":"Peng, K., Yin, C., Zheng, J., Liu, R., Schneider, D., Zhang, J., Yang, K., Sarfraz, M.S., Stiefelhagen, R., Roitberg, A.: Navigating open set scenarios for skeleton-based action recognition. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 38, pp. 4487\u20134496 (2024)","DOI":"10.1609\/aaai.v38i5.28247"},{"key":"1987_CR57","doi-asserted-by":"crossref","unstructured":"Wei, Y., Peng, K., Roitberg, A., Zhang, J., Zheng, J., Liu, R., Chen, Y., Yang, K., Stiefelhagen, R.: Elevating skeleton-based action recognition with efficient multi-modality self-supervision. In: ICASSP 2024-2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 6040\u20136044 (2024). IEEE","DOI":"10.1109\/ICASSP48485.2024.10447178"},{"key":"1987_CR58","doi-asserted-by":"crossref","unstructured":"Luo, C., Song, S., Xie, W., Shen, L., Gunes, H.: Learning multi-dimensional edge feature-based au relation graph for facial action unit recognition. In: Proceedings of the Thirty-First International Joint Conference on Artificial Intelligence, IJCAI-22, pp. 1239\u20131246 (2022)","DOI":"10.24963\/ijcai.2022\/173"},{"key":"1987_CR59","doi-asserted-by":"crossref","unstructured":"Li, W., Liu, M., Liu, H., Wang, P., Cai, J., Sebe, N.: Hourglass tokenizer for efficient transformer-based 3d human pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 604\u2013613 (2024)","DOI":"10.1109\/CVPR52733.2024.00064"},{"issue":"7","key":"1987_CR60","doi-asserted-by":"publisher","first-page":"1325","DOI":"10.1109\/TPAMI.2013.248","volume":"36","author":"C Ionescu","year":"2014","unstructured":"Ionescu, C., Papava, D., Olaru, V., Sminchisescu, C.: Human3.6m: Large scale datasets and predictive methods for 3d human sensing in natural environments. IEEE Trans. Pattern Anal. Mach. Intell. 36(7), 1325\u20131339 (2014). https:\/\/doi.org\/10.1109\/TPAMI.2013.248","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1987_CR61","doi-asserted-by":"publisher","unstructured":"Mehta, D., Rhodin, H., Casas, D., Fua, P., Sotnychenko, O., Xu, W., Theobalt, C.: Monocular 3d human pose estimation in the wild using improved cnn supervision. In: 2017 International Conference on 3D Vision (3DV), pp. 506\u2013516 (2017). https:\/\/doi.org\/10.1109\/3DV.2017.00064","DOI":"10.1109\/3DV.2017.00064"},{"key":"1987_CR62","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2024.103992","volume":"243","author":"L Zhou","year":"2024","unstructured":"Zhou, L., Chen, Y., Wang, J.: Slowfastformer for 3d human pose estimation. Comput. Vis. Image Underst. 243, 103992 (2024). https:\/\/doi.org\/10.1016\/j.cviu.2024.103992","journal-title":"Comput. Vis. Image Underst."},{"key":"1987_CR63","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2023.104863","volume":"140","author":"X Liu","year":"2023","unstructured":"Liu, X., Tang, H.: Strformer: Spatial-temporal-retemporal transformer for 3d human pose estimation. Image Vis. Comput. 140, 104863 (2023). https:\/\/doi.org\/10.1016\/j.imavis.2023.104863","journal-title":"Image Vis. Comput."},{"key":"1987_CR64","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2024.105142","volume":"149","author":"F Hao","year":"2024","unstructured":"Hao, F., Zhong, F., Yu, H., Hu, J., Yang, Y.: Stafformer: Spatio-temporal adaptive fusion transformer for efficient 3d human pose estimation. Image Vis. Comput. 149, 105142 (2024). https:\/\/doi.org\/10.1016\/j.imavis.2024.105142","journal-title":"Image Vis. Comput."}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-01987-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-025-01987-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-01987-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T10:27:44Z","timestamp":1761388064000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-025-01987-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,25]]},"references-count":64,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2025,10]]}},"alternative-id":["1987"],"URL":"https:\/\/doi.org\/10.1007\/s00530-025-01987-z","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"value":"0942-4962","type":"print"},{"value":"1432-1882","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,9,25]]},"assertion":[{"value":"26 March 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 August 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 September 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"398"}}