{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,18]],"date-time":"2025-12-18T14:28:39Z","timestamp":1766068119306,"version":"3.40.3"},"publisher-location":"Cham","reference-count":46,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031726606"},{"type":"electronic","value":"9783031726613"}],"license":[{"start":{"date-parts":[[2024,11,27]],"date-time":"2024-11-27T00:00:00Z","timestamp":1732665600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,27]],"date-time":"2024-11-27T00:00:00Z","timestamp":1732665600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72661-3_7","type":"book-chapter","created":{"date-parts":[[2024,11,26]],"date-time":"2024-11-26T07:49:44Z","timestamp":1732607384000},"page":"113-131","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["CrossGLG: LLM Guides One-Shot Skeleton-Based 3D Action Recognition in\u00a0a\u00a0Cross-Level Manner"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-6814-5436","authenticated-orcid":false,"given":"Tingbing","family":"Yan","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8467-5862","authenticated-orcid":false,"given":"Wenzheng","family":"Zeng","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7739-4146","authenticated-orcid":false,"given":"Yang","family":"Xiao","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9221-2531","authenticated-orcid":false,"given":"Xingyu","family":"Tong","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0000-4633-6026","authenticated-orcid":false,"given":"Bo","family":"Tan","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9314-5262","authenticated-orcid":false,"given":"Zhiwen","family":"Fang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9223-1863","authenticated-orcid":false,"given":"Zhiguo","family":"Cao","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4675-7055","authenticated-orcid":false,"given":"Joey Tianyi","family":"Zhou","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,27]]},"reference":[{"key":"7_CR1","doi-asserted-by":"crossref","unstructured":"Aguilar, G., Ling, Y., Zhang, Y., Yao, B., Guo, C.: Knowledge distillation from internal representations, pp. 7350\u20137357 (2020)","DOI":"10.1609\/aaai.v34i05.6229"},{"key":"7_CR2","unstructured":"Bai, J., et al.: Qwen technical report. arXiv preprint arXiv:2309.16609 (2023)"},{"key":"7_CR3","doi-asserted-by":"publisher","first-page":"47","DOI":"10.1146\/annurev.psych.57.102904.190152","volume":"58","author":"R Blake","year":"2007","unstructured":"Blake, R., Shiffrar, M.: Perception of human motion. Annu. Rev. Psychol. 58, 47\u201373 (2007)","journal-title":"Annu. Rev. Psychol."},{"key":"7_CR4","first-page":"1877","volume":"33","author":"T Brown","year":"2020","unstructured":"Brown, T., et al.: Language models are few-shot learners. Adv. Neural. Inf. Process. Syst. 33, 1877\u20131901 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"7_CR5","doi-asserted-by":"crossref","unstructured":"Cai, J., Jiang, N., Han, X., Jia, K., Lu, J.: JOLO-GCN: mining joint-centered light-weight information for skeleton-based action recognition. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 2735\u20132744 (2021)","DOI":"10.1109\/WACV48630.2021.00278"},{"key":"7_CR6","doi-asserted-by":"publisher","unstructured":"Chen, D., Mei, J.P., Zhang, H., Wang, C., Feng, Y., Chen, C.: Knowledge distillation with the reused teacher classifier. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 11923\u201311932 (2022). https:\/\/doi.org\/10.1109\/CVPR52688.2022.01163","DOI":"10.1109\/CVPR52688.2022.01163"},{"key":"7_CR7","doi-asserted-by":"crossref","unstructured":"Chi, H.G., Ha, M.H., Chi, S., Lee, S.W., Huang, Q., Ramani, K.: Infogcn: representation learning for human skeleton-based action recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 20186\u201320196 (2022)","DOI":"10.1109\/CVPR52688.2022.01955"},{"key":"7_CR8","doi-asserted-by":"crossref","unstructured":"Dai, D., Sun, Y., Dong, L., Hao, Y., Sui, Z., Wei, F.: Why can GPT learn in-context? language models secretly perform gradient descent as meta optimizers. arXiv preprint arXiv:2212.10559 (2022)","DOI":"10.18653\/v1\/2023.findings-acl.247"},{"key":"7_CR9","doi-asserted-by":"crossref","unstructured":"Guo, M., Chou, E., Huang, D.A., Song, S., Yeung, S., Fei-Fei, L.: Neural graph matching networks for fewshot 3D action recognition. In: Proceedings of the European Conference on Computer Vision, pp. 653\u2013669 (2018)","DOI":"10.1007\/978-3-030-01246-5_40"},{"key":"7_CR10","doi-asserted-by":"publisher","first-page":"49","DOI":"10.3389\/fnint.2015.00049","volume":"9","author":"B Hadad","year":"2015","unstructured":"Hadad, B., Schwartz, S., Maurer, D., Lewis, T.L.: Motion perception: a review of developmental changes and the role of early visual experience. Front. Integr. Neurosci. 9, 49 (2015)","journal-title":"Front. Integr. Neurosci."},{"key":"7_CR11","unstructured":"He, P., Liu, X., Gao, J., Chen, W.: Deberta: decoding-enhanced bert with disentangled attention. arXiv preprint arXiv:2006.03654 (2020)"},{"key":"7_CR12","doi-asserted-by":"crossref","unstructured":"Hou, Z., Yu, B., Tao, D.: Batchformer: learning to explore sample relationships for robust representation learning. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.00711"},{"key":"7_CR13","doi-asserted-by":"crossref","unstructured":"Jing, Y., Wang, F.: TP-VIT: a two-pathway vision transformer for video action recognition. In: 2022-2022 IEEE International Conference on Acoustics, Speech and Signal Processing, pp. 2185\u20132189. IEEE (2022)","DOI":"10.1109\/ICASSP43922.2022.9747276"},{"issue":"9","key":"7_CR14","doi-asserted-by":"publisher","first-page":"1806","DOI":"10.1109\/TSMC.2018.2850149","volume":"49","author":"A Kamel","year":"2018","unstructured":"Kamel, A., Sheng, B., Yang, P., Li, P., Shen, R., Feng, D.D.: Deep convolutional neural networks for human action recognition using depth maps and postures. IEEE Trans. Syst. Man Cybern. Syst. 49(9), 1806\u20131819 (2018)","journal-title":"IEEE Trans. Syst. Man Cybern. Syst."},{"key":"7_CR15","unstructured":"Kay, W., et\u00a0al.: The kinetics human action video dataset. arXiv preprint arXiv:1705.06950 (2017)"},{"key":"7_CR16","unstructured":"Knox, W.B., Stone, P.: Augmenting reinforcement learning with human feedback. In: ICML 2011 Workshop on New Developments in Imitation Learning, vol.\u00a0855, p.\u00a03 (2011)"},{"key":"7_CR17","doi-asserted-by":"crossref","unstructured":"Lee, J., Lee, M., Lee, D., Lee, S.: Hierarchically decomposed graph convolutional networks for skeleton-based action recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10444\u201310453 (2023)","DOI":"10.1109\/ICCV51070.2023.00958"},{"key":"7_CR18","doi-asserted-by":"crossref","unstructured":"Li, M., Chen, S., Chen, X., Zhang, Y., Wang, Y., Tian, Q.: Actional-structural graph convolutional networks for skeleton-based action recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3595\u20133603 (2019)","DOI":"10.1109\/CVPR.2019.00371"},{"issue":"10","key":"7_CR19","doi-asserted-by":"publisher","first-page":"2684","DOI":"10.1109\/TPAMI.2019.2916873","volume":"42","author":"J Liu","year":"2019","unstructured":"Liu, J., Shahroudy, A., Perez, M., Wang, G., Duan, L.Y., Kot, A.C.: NTU RGB+D 120: a large-scale benchmark for 3D human activity understanding. IEEE Trans. Pattern Anal. Mach. Intell. 42(10), 2684\u20132701 (2019)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"9","key":"7_CR20","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3560815","volume":"55","author":"P Liu","year":"2023","unstructured":"Liu, P., Yuan, W., Fu, J., Jiang, Z., Hayashi, H., Neubig, G.: Pre-train, prompt, and predict: a systematic survey of prompting methods in natural language processing. ACM Comput. Surv. 55(9), 1\u201335 (2023)","journal-title":"ACM Comput. Surv."},{"key":"7_CR21","doi-asserted-by":"crossref","unstructured":"Manning, C.D., Surdeanu, M., Bauer, J., Finkel, J.R., Bethard, S., McClosky, D.: The stanford corenlp natural language processing toolkit. In: Proceedings of 52nd Annual Meeting of the Association for Computational Linguistics: System Demonstrations, pp. 55\u201360 (2014)","DOI":"10.3115\/v1\/P14-5010"},{"key":"7_CR22","doi-asserted-by":"crossref","unstructured":"Memmesheimer, R., H\u00e4ring, S., Theisen, N., Paulus, D.: Skeleton-DML: deep metric learning for skeleton-based one-shot action recognition. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 3702\u20133710 (2022)","DOI":"10.1109\/WACV51458.2022.00091"},{"key":"7_CR23","doi-asserted-by":"crossref","unstructured":"Memmesheimer, R., Theisen, N., Paulus, D.: SL-DML: signal level deep metric learning for multimodal one-shot action recognition. In: 2020 25th International Conference on Pattern Recognition, pp. 4573\u20134580. IEEE (2021)","DOI":"10.1109\/ICPR48806.2021.9413336"},{"key":"7_CR24","unstructured":"OpenAI: Introducing chatgpt. Website (2022). https:\/\/openai.com\/index\/chatgpt"},{"key":"7_CR25","first-page":"27730","volume":"35","author":"L Ouyang","year":"2022","unstructured":"Ouyang, L., et al.: Training language models to follow instructions with human feedback. Adv. Neural. Inf. Process. Syst. 35, 27730\u201327744 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"7_CR26","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"694","DOI":"10.1007\/978-3-030-68796-0_50","volume-title":"Pattern Recognition. ICPR International Workshops and Challenges","author":"C Plizzari","year":"2021","unstructured":"Plizzari, C., Cannici, M., Matteucci, M.: Spatial temporal transformer network for skeleton-based action recognition. In: Del Bimbo, A., et al. (eds.) ICPR 2021. LNCS, vol. 12663, pp. 694\u2013701. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-68796-0_50"},{"key":"7_CR27","unstructured":"Radford, A., Narasimhan, K., Salimans, T., Sutskever, I., et\u00a0al.: Improving language understanding by generative pre-training (2018)"},{"issue":"1","key":"7_CR28","first-page":"5485","volume":"21","author":"C Raffel","year":"2020","unstructured":"Raffel, C., et al.: Exploring the limits of transfer learning with a unified text-to-text transformer. J. Mach. Learn. Res. 21(1), 5485\u20135551 (2020)","journal-title":"J. Mach. Learn. Res."},{"key":"7_CR29","doi-asserted-by":"publisher","first-page":"3164","DOI":"10.1016\/j.matpr.2020.09.052","volume":"37","author":"SS Rani","year":"2021","unstructured":"Rani, S.S., Naidu, G.A., Shree, V.U.: Kinematic joint descriptor and depth motion descriptor with convolutional neural networks for human action recognition. Mater. Today Proc. 37, 3164\u20133173 (2021)","journal-title":"Mater. Today Proc."},{"key":"7_CR30","doi-asserted-by":"crossref","unstructured":"Shahroudy, A., Liu, J., Ng, T.T., Wang, G.: NTU RGB+D: a large scale dataset for 3D human activity analysis. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1010\u20131019 (2016)","DOI":"10.1109\/CVPR.2016.115"},{"key":"7_CR31","doi-asserted-by":"publisher","first-page":"3957","DOI":"10.1109\/TIP.2020.2967577","volume":"29","author":"S Song","year":"2020","unstructured":"Song, S., Liu, J., Li, Y., Guo, Z.: Modality compensation network: cross-modal adaptation for action recognition. IEEE Trans. Image Process. 29, 3957\u20133969 (2020)","journal-title":"IEEE Trans. Image Process."},{"issue":"3","key":"7_CR32","first-page":"3200","volume":"45","author":"Z Sun","year":"2022","unstructured":"Sun, Z., Ke, Q., Rahmani, H., Bennamoun, M., Wang, G., Liu, J.: Human action recognition from various data modalities: a review. IEEE Trans. Pattern Anal. Mach. Intell. 45(3), 3200\u20133225 (2022)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"7_CR33","unstructured":"Team, G., et al.: Gemini: a family of highly capable multimodal models. arXiv preprint arXiv:2312.11805 (2023)"},{"key":"7_CR34","doi-asserted-by":"crossref","unstructured":"Thoker, F.M., Gall, J.: Cross-modal knowledge distillation for action recognition. In: 2019 IEEE International Conference on Image Processing, pp. 6\u201310. IEEE (2019)","DOI":"10.1109\/ICIP.2019.8802909"},{"key":"7_CR35","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"7_CR36","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"176","DOI":"10.1007\/978-3-031-19803-8_11","volume-title":"European Conference on Computer Vision","author":"L Wang","year":"2022","unstructured":"Wang, L., Koniusz, P.: Uncertainty-DTW for time series and sequences. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13681, pp. 176\u2013195. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19803-8_11"},{"key":"7_CR37","first-page":"24824","volume":"35","author":"J Wei","year":"2022","unstructured":"Wei, J., et al.: Chain-of-thought prompting elicits reasoning in large language models. Adv. Neural. Inf. Process. Syst. 35, 24824\u201324837 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"7_CR38","doi-asserted-by":"crossref","unstructured":"Xiang, W., Li, C., Zhou, Y., Wang, B., Zhang, L.: Generative action description prompts for skeleton-based action recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10276\u201310285 (2023)","DOI":"10.1109\/ICCV51070.2023.00943"},{"key":"7_CR39","doi-asserted-by":"crossref","unstructured":"Yan, S., Xiong, Y., Lin, D.: Spatial temporal graph convolutional networks for skeleton-based action recognition. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a032 (2018)","DOI":"10.1609\/aaai.v32i1.12328"},{"key":"7_CR40","unstructured":"Yang, S., Liu, L., Xu, M.: Free lunch for few-shot learning: distribution calibration. In: International Conference on Learning Representations (2021)"},{"key":"7_CR41","doi-asserted-by":"crossref","unstructured":"Yang, S., Liu, J., Lu, S., Hwa, E.M., Kot, A.C.: One-shot action recognition via multi-scale spatial-temporal skeleton matching. IEEE Trans. Pattern Anal. Mach. Intell. (2024)","DOI":"10.1109\/TPAMI.2024.3363831"},{"issue":"2","key":"7_CR42","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1109\/MMUL.2012.24","volume":"19","author":"Z Zhang","year":"2012","unstructured":"Zhang, Z.: Microsoft kinect sensor and its effect. IEEE Multimedia 19(2), 4\u201310 (2012)","journal-title":"IEEE Multimedia"},{"key":"7_CR43","doi-asserted-by":"crossref","unstructured":"Zhao, R., Ali, H., Van\u00a0der Smagt, P.: Two-stream RNN\/CNN for action recognition in 3D videos. In: 2017 IEEE\/RSJ International Conference on Intelligent Robots and Systems, pp. 4260\u20134267. IEEE (2017)","DOI":"10.1109\/IROS.2017.8206288"},{"key":"7_CR44","doi-asserted-by":"crossref","unstructured":"Zhu, A., Ke, Q., Gong, M., Bailey, J.: Adaptive local-component-aware graph convolutional network for one-shot skeleton-based action recognition. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 6038\u20136047 (2023)","DOI":"10.1109\/WACV56688.2023.00598"},{"key":"7_CR45","doi-asserted-by":"crossref","unstructured":"Zhu, W., Ma, X., Liu, Z., Liu, L., Wu, W., Wang, Y.: Motionbert: a unified perspective on learning human motion representations. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 15085\u201315099 (2023)","DOI":"10.1109\/ICCV51070.2023.01385"},{"issue":"12","key":"7_CR46","first-page":"3166","volume":"22","author":"Y Zou","year":"2020","unstructured":"Zou, Y., Shi, Y., Shi, D., Wang, Y., Liang, Y., Tian, Y.: Adaptation-oriented feature projection for one-shot action recognition. IEEE Trans. Multimedia 22(12), 3166\u20133179 (2020)","journal-title":"IEEE Trans. Multimedia"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72661-3_7","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,26]],"date-time":"2024-11-26T08:17:04Z","timestamp":1732609024000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72661-3_7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,27]]},"ISBN":["9783031726606","9783031726613"],"references-count":46,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72661-3_7","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,11,27]]},"assertion":[{"value":"27 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}