{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T17:23:08Z","timestamp":1777656188149,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":36,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"The Hong Kong RGC General Research Fund","award":["No. 152211\/23E and 15216424\/24E"],"award-info":[{"award-number":["No. 152211\/23E and 15216424\/24E"]}]},{"name":"NVIDIA AI Technology Center"},{"name":"The National Natural Science Foundation of China","award":["No. 62102327"],"award-info":[{"award-number":["No. 62102327"]}]},{"name":"PolyU Internal Fund","award":["No. P0043932"],"award-info":[{"award-number":["No. P0043932"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681196","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:49Z","timestamp":1729925989000},"page":"778-786","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":9,"title":["Fine-Grained Side Information Guided Dual-Prompts for Zero-Shot Skeleton Action Recognition"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-5752-1639","authenticated-orcid":false,"given":"Yang","family":"Chen","sequence":"first","affiliation":[{"name":"The Hong Kong Polytechnic University, Hong Kong SAR, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0449-4525","authenticated-orcid":false,"given":"Jingcai","family":"Guo","sequence":"additional","affiliation":[{"name":"The Hong Kong Polytechnic University &amp; Hong Kong Polytechnic University Shenzhen Research Institute, Hong Kong SAR, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-1404-7183","authenticated-orcid":false,"given":"Tian","family":"He","sequence":"additional","affiliation":[{"name":"University of Electronic Science and Technology of China, Chengdu, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-1071-8122","authenticated-orcid":false,"given":"Xiaocheng","family":"Lu","sequence":"additional","affiliation":[{"name":"The Hong Kong University of Science and Technology, Hong Kong SAR, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1235-1241","authenticated-orcid":false,"given":"Ling","family":"Wang","sequence":"additional","affiliation":[{"name":"University of Electronic Science and Technology of China, Chengdu, China"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Tom Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared D Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell et al. 2020. Language models are few-shot learners. Advances in neural information processing systems 33 (2020) 1877--1901."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46475-6_4"},{"key":"e_1_3_2_1_3_1","volume-title":"TransZero: Cross attributeguided transformer for zero-shot learning","author":"Chen Shiming","year":"2022","unstructured":"Shiming Chen, Ziming Hong, Wenjin Hou, Guo-Sen Xie, Yibing Song, Jian Zhao, Xinge You, Shuicheng Yan, and Ling Shao. 2022. TransZero: Cross attributeguided transformer for zero-shot learning. IEEE transactions on pattern analysis and machine intelligence (2022)."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i1.19909"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01311"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00026"},{"key":"e_1_3_2_1_7_1","volume-title":"PKUMMD: A Large Scale Benchmark for Continuous Multi-Modal Human Action Understanding. arXiv preprint arXiv:1703.07475","author":"Chunhui Liu","year":"2017","unstructured":"Liu Chunhui, Hu Yueyu, Li Yanghao, Song Sijie, and Liu Jiaying. 2017. PKUMMD: A Large Scale Benchmark for Continuous Multi-Modal Human Action Understanding. arXiv preprint arXiv:1703.07475 (2017)."},{"key":"e_1_3_2_1_8_1","volume-title":"Devise: A deep visual-semantic embedding model. Advances in neural information processing systems 26","author":"Frome Andrea","year":"2013","unstructured":"Andrea Frome, Greg S Corrado, Jon Shlens, Samy Bengio, Jeff Dean, Marc-Aurelio Ranzato, and Tomas Mikolov. 2013. Devise: A deep visual-semantic embedding model. Advances in neural information processing systems 26 (2013)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP42928.2021.9506179"},{"key":"e_1_3_2_1_10_1","volume-title":"Proceedings of the IEEE International conference on Computer Vision. 3571--3580","author":"Hubert Tsai Yao-Hung","year":"2017","unstructured":"Yao-Hung Hubert Tsai, Liang-Kang Huang, and Ruslan Salakhutdinov. 2017. Learning robust visual-semantic embeddings. In Proceedings of the IEEE International conference on Computer Vision. 3571--3580."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00454"},{"key":"e_1_3_2_1_12_1","volume-title":"Skeleton based zero shot action recognition in joint pose-language semantic space. arXiv preprint arXiv:1911.11344","author":"Jasani Bhavan","year":"2019","unstructured":"Bhavan Jasani and Afshaan Mazagonwalla. 2019. Skeleton based zero shot action recognition in joint pose-language semantic space. arXiv preprint arXiv:1911.11344 (2019)."},{"key":"e_1_3_2_1_13_1","volume-title":"Multisemantic Fusion Model For Generalized Zero-Shot Skeleton-Based Action Recognition. In International Conference on Image and Graphics. Springer, 68--80","author":"Li Ming-Zhe","year":"2023","unstructured":"Ming-Zhe Li, Zhen Jia, Zhang Zhang, Zhanyu Ma, and Liang Wang. 2023. Multisemantic Fusion Model For Generalized Zero-Shot Skeleton-Based Action Recognition. In International Conference on Image and Graphics. Springer, 68--80."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00779"},{"key":"e_1_3_2_1_15_1","volume-title":"Ntu rgb d 120: A large-scale benchmark for 3d human activity understanding","author":"Liu Jun","year":"2019","unstructured":"Jun Liu, Amir Shahroudy, Mauricio Perez, Gang Wang, Ling-Yu Duan, and Alex C Kot. 2019. Ntu rgb d 120: A large-scale benchmark for 3d human activity understanding. IEEE transactions on pattern analysis and machine intelligence 42, 10 (2019), 2684--2701."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01472"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00680"},{"key":"e_1_3_2_1_18_1","volume-title":"Efficient estimation of word representations in vector space. arXiv preprint arXiv:1301.3781","author":"Mikolov Tomas","year":"2013","unstructured":"Tomas Mikolov, Kai Chen, Greg Corrado, and Jeffrey Dean. 2013. Efficient estimation of word representations in vector space. arXiv preprint arXiv:1301.3781 (2013)."},{"key":"e_1_3_2_1_19_1","volume-title":"Distributed representations of words and phrases and their compositionality. Advances in neural information processing systems 26","author":"Mikolov Tomas","year":"2013","unstructured":"Tomas Mikolov, Ilya Sutskever, Kai Chen, Greg S Corrado, and Jeff Dean. 2013. Distributed representations of words and phrases and their compositionality. Advances in neural information processing systems 26 (2013)."},{"key":"e_1_3_2_1_20_1","first-page":"12283","article-title":"I2dformer: Learning image to document attention for zero-shot image classification","volume":"35","author":"Naeem Muhammad Ferjad","year":"2022","unstructured":"Muhammad Ferjad Naeem, Yongqin Xian, Luc V Gool, and Federico Tombari. 2022. I2dformer: Learning image to document attention for zero-shot image classification. Advances in Neural Information Processing Systems 35 (2022), 12283--12294.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_21_1","volume-title":"Xi-Zhao Wang, and QM Jonathan Wu.","author":"Pourpanah Farhad","year":"2022","unstructured":"Farhad Pourpanah, Moloud Abdar, Yuxuan Luo, Xinlei Zhou, Ran Wang, Chee Peng Lim, Xi-Zhao Wang, and QM Jonathan Wu. 2022. A review of generalized zero-shot learning methods. IEEE transactions on pattern analysis and machine intelligence (2022)."},{"key":"e_1_3_2_1_22_1","volume-title":"International conference on machine learning. PMLR, 8748--8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748--8763."},{"key":"e_1_3_2_1_23_1","volume-title":"Sentence-bert: Sentence embeddings using siamese bert-networks. arXiv preprint arXiv:1908.10084","author":"Reimers Nils","year":"2019","unstructured":"Nils Reimers and Iryna Gurevych. 2019. Sentence-bert: Sentence embeddings using siamese bert-networks. arXiv preprint arXiv:1908.10084 (2019)."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00626"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00844"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.115"},{"key":"e_1_3_2_1_27_1","volume-title":"Attention is all you need. Advances in neural information processing systems 30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00054"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00943"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00961"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58548-8_33"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.12328"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/MMUL.2012.24"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3603618"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3611888"},{"key":"e_1_3_2_1_36_1","volume-title":"Semantic-guided multi-attention localization for zero-shot learning. Advances in Neural Information Processing Systems 32","author":"Zhu Yizhe","year":"2019","unstructured":"Yizhe Zhu, Jianwen Xie, Zhiqiang Tang, Xi Peng, and Ahmed Elgammal. 2019. Semantic-guided multi-attention localization for zero-shot learning. Advances in Neural Information Processing Systems 32 (2019)."}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","location":"Melbourne VIC Australia","acronym":"MM '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681196","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681196","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:18:02Z","timestamp":1750295882000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681196"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":36,"alternative-id":["10.1145\/3664647.3681196","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681196","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}