{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,11]],"date-time":"2026-02-11T08:41:25Z","timestamp":1770799285537,"version":"3.50.0"},"reference-count":45,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T00:00:00Z","timestamp":1764979200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T00:00:00Z","timestamp":1764979200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62376089, 62302153, 62302154"],"award-info":[{"award-number":["62376089, 62302153, 62302154"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"the key Research and Development Program of Hubei Province, China","award":["2023BEB024"],"award-info":[{"award-number":["2023BEB024"]}]},{"name":"Young and Middle-aged Scientific and Technological Innovation Team Plan in Higher Education Institutions in Hubei Province,China","award":["T2023007"],"award-info":[{"award-number":["T2023007"]}]},{"name":"the High Level Talent Project at Hubei University of Technology","award":["XJ2022010901"],"award-info":[{"award-number":["XJ2022010901"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2026,2]]},"DOI":"10.1007\/s00530-025-02109-5","type":"journal-article","created":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T09:15:04Z","timestamp":1765012504000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["TDZS: top semantic embedding and dynamic feature matching for zero-shot skeleton action recognition"],"prefix":"10.1007","volume":"32","author":[{"given":"Hongwei","family":"Chen","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sheng","family":"Guo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Fangquan","family":"Cheng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,12,6]]},"reference":[{"issue":"6","key":"2109_CR1","doi-asserted-by":"publisher","first-page":"3957","DOI":"10.1007\/s00371-024-03640-8","volume":"41","author":"G Yue","year":"2025","unstructured":"Yue, G., Jiao, G., Li, C., Xiang, J.: When cnn meet with vit: decision-level feature fusion for camouflaged object detection. Vis. Comput. 41(6), 3957\u20133972 (2025)","journal-title":"Vis. Comput."},{"key":"2109_CR2","doi-asserted-by":"crossref","unstructured":"Yue, G., Jiao, G., Xiang, J.: Semi-supervised iterative learning network for camouflaged object detection. In: ICASSP 2025-2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 1\u20135 (2025). IEEE","DOI":"10.1109\/ICASSP49660.2025.10890224"},{"key":"2109_CR3","doi-asserted-by":"crossref","unstructured":"Wang, F., Jiao, G., Yue, G.: More observation leads to more clarity: Multi-view collaboration network for camouflaged object detection. Neurocomputing, 130433 (2025)","DOI":"10.1016\/j.neucom.2025.130433"},{"key":"2109_CR4","doi-asserted-by":"crossref","unstructured":"Fang, B., Wu, W., Liu, C., Zhou, Y., Song, Y., Wang, W., Shu, X., Ji, X., Wang, J.: Uatvr: Uncertainty-adaptive text-video retrieval. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 13723\u201313733 (2023)","DOI":"10.1109\/ICCV51070.2023.01262"},{"key":"2109_CR5","doi-asserted-by":"crossref","unstructured":"Yin, T., Zhou, X., Krahenbuhl, P.: Center-based 3d object detection and tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11784\u201311793 (2021)","DOI":"10.1109\/CVPR46437.2021.01161"},{"key":"2109_CR6","unstructured":"Cadena, C., Dick, A.R., Reid, I.D.: Multi-modal auto-encoders as joint estimators for robotics scene understanding. In: Robotics: Science and Systems, 5 (2016)"},{"key":"2109_CR7","doi-asserted-by":"crossref","unstructured":"Wang, X., Fang, Z., Li, X., Li, X., Chen, C., Liu, M.: Skeleton-in-context: Unified skeleton sequence modeling with in-context learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2436\u20132446 (2024)","DOI":"10.1109\/CVPR52733.2024.00236"},{"key":"2109_CR8","first-page":"42748","volume":"36","author":"V Patraucean","year":"2024","unstructured":"Patraucean, V., Smaira, L., Gupta, A., Recasens, A., Markeeva, L., Banarse, D., Koppula, S., Malinowski, M., Yang, Y., Doersch, C., et al.: Perception test: A diagnostic benchmark for multimodal video models. Adv. Neural. Inf. Process. Syst. 36, 42748\u201342761 (2024)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"2109_CR9","doi-asserted-by":"crossref","unstructured":"Pi, R., Yao, L., Gao, J., Zhang, J., Zhang, T.: Perceptiongpt: Effectively fusing visual perception into llm. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 27124\u201327133 (2024)","DOI":"10.1109\/CVPR52733.2024.02561"},{"key":"2109_CR10","doi-asserted-by":"crossref","unstructured":"Noh, S., Bae, K., Bae, Y., Lee, B.-D.: H$$^{\\wedge }$$ 3net: Irregular posture detection by understanding human character and core structures. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 5631\u20135641 (2024)","DOI":"10.1109\/CVPRW63382.2024.00572"},{"key":"2109_CR11","doi-asserted-by":"crossref","unstructured":"Cunico, F., Girella, F., Avogaro, A., Emporio, M., Giachetti, A., Cristani, M.: Oo-dmvmt: A deep multi-view multi-task classification framework for real-time 3d hand gesture classification and segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2745\u20132754 (2023)","DOI":"10.1109\/CVPRW59228.2023.00275"},{"key":"2109_CR12","unstructured":"Li, J., Zhang, J., Schmidt, L., Ratner, A.J.: Characterizing the impacts of semi-supervised learning for weak supervision. Advances in Neural Information Processing Systems 36 (2024)"},{"key":"2109_CR13","unstructured":"Zhang, Z., Wang, X., Zhang, Z., Shen, G., Shen, S., Zhu, W.: Unsupervised graph neural architecture search with disentangled self-supervision. Advances in Neural Information Processing Systems 36 (2024)"},{"key":"2109_CR14","doi-asserted-by":"publisher","first-page":"762","DOI":"10.1609\/aaai.v36i1.19957","volume":"36","author":"T Guo","year":"2022","unstructured":"Guo, T., Liu, H., Chen, Z., Liu, M., Wang, T., Ding, R.: Contrastive learning from extremely augmented skeleton sequences for self-supervised action recognition. Proceedings of the AAAI Conference on Artificial Intelligence 36, 762\u2013770 (2022)","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"2109_CR15","doi-asserted-by":"crossref","unstructured":"Hou, W., Chen, S., Chen, S., Hong, Z., Wang, Y., Feng, X., Khan, S., Khan, F.S., You, X.: Visual-augmented dynamic semantic prototype for generative zero-shot learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 23627\u201323637 (2024)","DOI":"10.1109\/CVPR52733.2024.02230"},{"key":"2109_CR16","doi-asserted-by":"crossref","unstructured":"Chen, S., Hou, W., Khan, S., Khan, F.S.: Progressive semantic-guided vision transformer for zero-shot learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 23964\u201323974 (2024)","DOI":"10.1109\/CVPR52733.2024.02262"},{"key":"2109_CR17","doi-asserted-by":"crossref","unstructured":"Gupta, P., Sharma, D., Sarvadevabhatla, R.K.: Syntactically guided generative embeddings for zero-shot skeleton action recognition. In: 2021 IEEE International Conference on Image Processing (ICIP), 439\u2013443 (2021). IEEE","DOI":"10.1109\/ICIP42928.2021.9506179"},{"key":"2109_CR18","doi-asserted-by":"crossref","unstructured":"Li, M.-Z., Jia, Z., Zhang, Z., Ma, Z., Wang, L.: Multi-semantic fusion model for generalized zero-shot skeleton-based action recognition. In: International Conference on Image and Graphics, 68\u201380 (2023). Springer","DOI":"10.1007\/978-3-031-46305-1_6"},{"key":"2109_CR19","doi-asserted-by":"crossref","unstructured":"Xu, H., Gao, Y., Li, J., Gao, X.: An information compensation framework for zero-shot skeleton-based action recognition. IEEE Transactions on Multimedia (2025)","DOI":"10.1109\/TMM.2025.3543004"},{"key":"2109_CR20","doi-asserted-by":"crossref","unstructured":"Zhu, A., Ke, Q., Gong, M., Bailey, J.: Part-aware unified representation of language and skeleton for zero-shot action recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 18761\u201318770 (2024)","DOI":"10.1109\/CVPR52733.2024.01775"},{"key":"2109_CR21","unstructured":"Radford, A., Kim, J.W., Hallacy, C., Ramesh, A., Goh, G., Agarwal, S., Sastry, G., Askell, A., Mishkin, P., Clark, J., et al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, 8748\u20138763 (2021). PMLR"},{"key":"2109_CR22","doi-asserted-by":"crossref","unstructured":"Reimers, N.: Sentence-bert: Sentence embeddings using siamese bert-networks. arXiv preprint arXiv:1908.10084 (2019)","DOI":"10.18653\/v1\/D19-1410"},{"key":"2109_CR23","doi-asserted-by":"crossref","unstructured":"Cao, Z., Simon, T., Wei, S.-E., Sheikh, Y.: Realtime multi-person 2d pose estimation using part affinity fields. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 7291\u20137299 (2017)","DOI":"10.1109\/CVPR.2017.143"},{"issue":"1","key":"2109_CR24","doi-asserted-by":"publisher","first-page":"116","DOI":"10.1145\/2398356.2398381","volume":"56","author":"J Shotton","year":"2013","unstructured":"Shotton, J., Sharp, T., Kipman, A., Fitzgibbon, A., Finocchio, M., Blake, A., Cook, M., Moore, R.: Real-time human pose recognition in parts from single depth images. Commun. ACM 56(1), 116\u2013124 (2013)","journal-title":"Commun. ACM"},{"key":"2109_CR25","doi-asserted-by":"crossref","unstructured":"Du, Y., Wang, W., Wang, L.: Hierarchical recurrent neural network for skeleton based action recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 1110\u20131118 (2015)","DOI":"10.1109\/CVPR.2015.7298714"},{"key":"2109_CR26","doi-asserted-by":"crossref","unstructured":"Tang, S., Li, C., Zhang, P., Tang, R.: Swinlstm: Improving spatiotemporal prediction accuracy using swin transformer and lstm. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 13470\u201313479 (2023)","DOI":"10.1109\/ICCV51070.2023.01239"},{"key":"2109_CR27","doi-asserted-by":"crossref","unstructured":"Yan, S., Xiong, Y., Lin, D.: Spatial temporal graph convolutional networks for skeleton-based action recognition. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 32 (2018)","DOI":"10.1609\/aaai.v32i1.12328"},{"key":"2109_CR28","doi-asserted-by":"publisher","first-page":"1175","DOI":"10.1109\/TMM.2021.3139768","volume":"25","author":"R Wang","year":"2021","unstructured":"Wang, R., Liu, J., Ke, Q., Peng, D., Lei, Y.: Dear-net: learning diversities for skeleton-based early action recognition. IEEE Trans. Multimed. 25, 1175\u20131189 (2021)","journal-title":"IEEE Trans. Multimed."},{"key":"2109_CR29","doi-asserted-by":"publisher","first-page":"1061","DOI":"10.1109\/TMM.2021.3137745","volume":"25","author":"W Wang","year":"2021","unstructured":"Wang, W., Chang, F., Liu, C., Li, G., Wang, B.: Ga-net: a guidance aware network for skeleton-based early activity recognition. IEEE Trans. Multimed. 25, 1061\u20131073 (2021)","journal-title":"IEEE Trans. Multimed."},{"key":"2109_CR30","doi-asserted-by":"crossref","unstructured":"Xin, W., Miao, Q., Liu, Y., Liu, R., Pun, C.-M., Shi, C.: Skeleton mixformer: Multivariate topology representation for skeleton-based action recognition. In: Proceedings of the 31st ACM International Conference on Multimedia, 2211\u20132220 (2023)","DOI":"10.1145\/3581783.3611900"},{"key":"2109_CR31","doi-asserted-by":"crossref","unstructured":"Liu, S., Zhang, Y., Li, W., Lin, Z., Jia, J.: Video-p2p: Video editing with cross-attention control. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 8599\u20138608 (2024)","DOI":"10.1109\/CVPR52733.2024.00821"},{"key":"2109_CR32","doi-asserted-by":"crossref","unstructured":"Lin, L., Zhang, J., Liu, J.: Actionlet-dependent contrastive learning for unsupervised skeleton-based action recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2363\u20132372 (2023)","DOI":"10.1109\/CVPR52729.2023.00234"},{"key":"2109_CR33","unstructured":"Xu, H., Gao, Y., Hui, Z., Li, J., Gao, X.: Language knowledge-assisted representation learning for skeleton-based action recognition. arXiv preprint arXiv:2305.12398 (2023)"},{"key":"2109_CR34","doi-asserted-by":"crossref","unstructured":"Zhou, Y., Qiang, W., Rao, A., Lin, N., Su, B., Wang, J.: Zero-shot skeleton-based action recognition via mutual information estimation and maximization. In: Proceedings of the 31st ACM International Conference on Multimedia, 5302\u20135310 (2023)","DOI":"10.1145\/3581783.3611888"},{"issue":"8","key":"2109_CR35","doi-asserted-by":"publisher","first-page":"832","DOI":"10.1109\/34.709601","volume":"20","author":"TK Ho","year":"1998","unstructured":"Ho, T.K.: The random subspace method for constructing decision forests. IEEE Trans. Pattern Anal. Mach. Intell. 20(8), 832\u2013844 (1998)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"2109_CR36","doi-asserted-by":"crossref","unstructured":"He, K., Fan, H., Wu, Y., Xie, S., Girshick, R.: Momentum contrast for unsupervised visual representation learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 9729\u20139738 (2020)","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"2109_CR37","unstructured":"Hjelm, R.D., Fedorov, A., Lavoie-Marchildon, S., Grewal, K., Bachman, P., Trischler, A., Bengio, Y.: Learning deep representations by mutual information estimation and maximization. arXiv preprint arXiv:1808.06670 (2018)"},{"key":"2109_CR38","doi-asserted-by":"crossref","unstructured":"Shahroudy, A., Liu, J., Ng, T.-T., Wang, G.: Ntu rgb+ d: A large scale dataset for 3d human activity analysis. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 1010\u20131019 (2016)","DOI":"10.1109\/CVPR.2016.115"},{"issue":"10","key":"2109_CR39","doi-asserted-by":"publisher","first-page":"2684","DOI":"10.1109\/TPAMI.2019.2916873","volume":"42","author":"J Liu","year":"2019","unstructured":"Liu, J., Shahroudy, A., Perez, M., Wang, G., Duan, L.-Y., Kot, A.C.: Ntu rgb+ d 120: A large-scale benchmark for 3d human activity understanding. IEEE Trans. Pattern Anal. Mach. Intell. 42(10), 2684\u20132701 (2019)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"2109_CR40","doi-asserted-by":"crossref","unstructured":"Liu, J., Song, S., Liu, C., Li, Y., Hu, Y.: A benchmark dataset and comparison study for multi-modal human action analytics. ACM Transactions on Multimedia Computing, Communications, and Applications (TOMM) 16(2), 1\u201324 (2020)","DOI":"10.1145\/3365212"},{"key":"2109_CR41","doi-asserted-by":"crossref","unstructured":"Wray, M., Larlus, D., Csurka, G., Damen, D.: Fine-grained action retrieval through multiple parts-of-speech embeddings. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 450\u2013459 (2019)","DOI":"10.1109\/ICCV.2019.00054"},{"key":"2109_CR42","doi-asserted-by":"crossref","unstructured":"Hubert\u00a0Tsai, Y.-H., Huang, L.-K., Salakhutdinov, R.: Learning robust visual-semantic embeddings. In: Proceedings of the IEEE International Conference on Computer Vision, 3571\u20133580 (2017)","DOI":"10.1109\/ICCV.2017.386"},{"key":"2109_CR43","unstructured":"Frome, A., Corrado, G.S., Shlens, J., Bengio, S., Dean, J., Ranzato, M., Mikolov, T.: Devise: A deep visual-semantic embedding model. Advances in neural information processing systems 26 (2013)"},{"key":"2109_CR44","doi-asserted-by":"crossref","unstructured":"Schonfeld, E., Ebrahimi, S., Sinha, S., Darrell, T., Akata, Z.: Generalized zero-and few-shot learning via aligned variational autoencoders. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 8247\u20138255 (2019)","DOI":"10.1109\/CVPR.2019.00844"},{"key":"2109_CR45","unstructured":"Jasani, B., Mazagonwalla, A.: Skeleton based zero shot action recognition in joint pose-language semantic space. arXiv preprint arXiv:1911.11344 (2019)"}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-02109-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-025-02109-5","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-02109-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,11]],"date-time":"2026-02-11T04:20:20Z","timestamp":1770783620000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-025-02109-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,6]]},"references-count":45,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2026,2]]}},"alternative-id":["2109"],"URL":"https:\/\/doi.org\/10.1007\/s00530-025-02109-5","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"value":"0942-4962","type":"print"},{"value":"1432-1882","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,12,6]]},"assertion":[{"value":"27 May 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 November 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 December 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that there are no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"36"}}