{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:26:58Z","timestamp":1765308418265,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":43,"publisher":"ACM","funder":[{"name":"Zhejiang Provincial Natural Science Foundation of China","award":["LR23F020002"],"award-info":[{"award-number":["LR23F020002"]}]},{"name":"Hangzhou Key Research and Development Program","award":["2024SZD1A12"],"award-info":[{"award-number":["2024SZD1A12"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3755514","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T05:47:42Z","timestamp":1761371262000},"page":"544-552","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Sample-level Adaptive Knowledge Distillation for Action Recognition"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8515-7773","authenticated-orcid":false,"given":"Ping","family":"Li","sequence":"first","affiliation":[{"name":"Hangzhou Dianzi University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-6287-1992","authenticated-orcid":false,"given":"Chenhao","family":"Ping","sequence":"additional","affiliation":[{"name":"Hangzhou Dianzi University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6399-292X","authenticated-orcid":false,"given":"Wenxiao","family":"Wang","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2621-6048","authenticated-orcid":false,"given":"Mingli","family":"Song","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.502"},{"key":"e_1_3_2_1_2_1","first-page":"5627","article-title":"Fast greedy map inference for determinantal point process to improve recommendation diversity","author":"Chen Laming","year":"2018","unstructured":"Laming Chen, Guoxin Zhang, and Eric Zhou. 2018. Fast greedy map inference for determinantal point process to improve recommendation diversity. In Advances in Neural Information Processing Systems (NeurIPS). 5627-5638.","journal-title":"Advances in Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00807"},{"key":"e_1_3_2_1_4_1","volume-title":"Proceedings of the International Conference on Learning Representations (ICLR).","author":"Dosovitskiy Alexey","year":"2021","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, and Neil Houlsby. 2021. An image is worth 16x16 words: Transformers for image recognition at scale. In Proceedings of the International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00630"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.622"},{"key":"e_1_3_2_1_7_1","first-page":"1135","article-title":"Learning both weights and connections for efficient neural network","author":"Han Song","year":"2015","unstructured":"Song Han, Jeff Pool, John Tran, and William Dally. 2015. Learning both weights and connections for efficient neural network. In Advances in Neural Information Processing Systems (NeurIPS). 1135-1143.","journal-title":"Advances in Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_9_1","volume-title":"Distilling the knowledge in a neural network. arXiv preprint","author":"Hinton Geoffrey","year":"2015","unstructured":"Geoffrey Hinton, Oriol Vinyals, and Jeffrey Dean. 2015. Distilling the knowledge in a neural network. arXiv preprint, Vol. arXiv:1503.02531 (2015)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2012.59"},{"key":"e_1_3_2_1_11_1","volume-title":"The kinetics human action video dataset. arXiv preprint arXiv:1705.06950","author":"Kay Will","year":"2017","unstructured":"Will Kay, Joao Carreira, Karen Simonyan, Brian Zhang, Chloe Hillier, Sudheendra Vijayanarasimhan, Fabio Viola, Tim Green, Trevor Back, Paul Natsev, Mustafa Suleyman, and Andrew Zisserman. 2017. The kinetics human action video dataset. arXiv preprint arXiv:1705.06950 (2017)."},{"key":"e_1_3_2_1_12_1","unstructured":"Alex Krizhevsky and Geoffrey Hinton. 2009. Learning multiple layers of features from tiny images. Technical Report. Canadian Institute for Advanced Research."},{"key":"e_1_3_2_1_13_1","volume-title":"ImageNet classification with deep convolutional neural networks. Advances in Neural Information Processing Systems (NeurIPS)","author":"Krizhevsky Alex","year":"2012","unstructured":"Alex Krizhevsky, Ilya Sutskever, and Geoffrey Hinton. 2012. ImageNet classification with deep convolutional neural networks. Advances in Neural Information Processing Systems (NeurIPS) (2012), 1106-1114."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1561\/2200000044"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01826"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2024.120714"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i2.25236"},{"key":"e_1_3_2_1_18_1","volume-title":"Proceedings of the International Conference on Learning Representations (ICLR).","author":"Liu Xiaolong","year":"2023","unstructured":"Xiaolong Liu, Lujun Li, Chao Li, and Anbang Yao. 2023. NORM: knowledge distillation via n-to-one representation matching. In Proceedings of the International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00320"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01252-6_17"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00227"},{"key":"e_1_3_2_1_22_1","volume-title":"Two-stream convolutional networks for action recognition in videos. Advances in Neural Information Processing Systems (NeurIPS)","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Two-stream convolutional networks for action recognition in videos. Advances in Neural Information Processing Systems (NeurIPS) (2014), 568-576."},{"key":"e_1_3_2_1_23_1","volume-title":"Amir Roshan Zamir, and Mubarak Shah","author":"Soomro Khurram","year":"2012","unstructured":"Khurram Soomro, Amir Roshan Zamir, and Mubarak Shah. 2012. UCF101: A dataset of 101 human actions classes from videos in the wild. arXiv preprint arXiv:1212.0402 (2012)."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV45572.2020.9093274"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01489"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.510"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00145"},{"key":"e_1_3_2_1_28_1","first-page":"5998","article-title":"Attention is all you need","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan Gomez, Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. In Advances in Neural Information Processing Systems (NeurIPS). 5998-6008.","journal-title":"Advances in Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i14.29473"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01563"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46484-8_2"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2023.3343609"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19803-8_5"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01261"},{"key":"e_1_3_2_1_35_1","article-title":"Selecting and composing learning rate policies for deep neural networks","volume":"14","author":"Wu Yanzhao","year":"2023","unstructured":"Yanzhao Wu and Ling Liu. 2023. Selecting and composing learning rate policies for deep neural networks. ACM Transactions on Intelligent Systems and Technology (TIST), Vol. 14, 2 (2023), 22:1-22:25.","journal-title":"ACM Transactions on Intelligent Systems and Technology (TIST)"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01536"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00067"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01576"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.5244\/C.30.87"},{"key":"e_1_3_2_1_40_1","volume-title":"Proceedings of the International Conference on Learning Representations (ICLR).","author":"Zagoruyko Sergey","year":"2017","unstructured":"Sergey Zagoruyko and Nikos Komodakis. 2017. Paying more attention to attention: Improving the performance of convolutional neural networks via attention transfer. In Proceedings of the International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01165"},{"key":"e_1_3_2_1_42_1","first-page":"32011","article-title":"Teach less, learn more: On the undistillable classes in knowledge distillation","author":"Zhu Yichen","year":"2022","unstructured":"Yichen Zhu, Ning Liu, Zhiyuan Xu, Xin Liu, Weibin Meng, Louis Wang, Zhicai Ou, and Jian Tang. 2022. Teach less, learn more: On the undistillable classes in knowledge distillation. In Advances in Neural Information Processing Systems (NeurIPS). 32011-32024.","journal-title":"Advances in Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_3_2_1_43_1","volume-title":"Proceedings of the International Conference on Learning Representations (ICLR).","author":"Zong Martin","year":"2023","unstructured":"Martin Zong, Zengyu Qiu, Xinzhu Ma, Kunlin Yang, Chunya Liu, Jun Hou, Shuai Yi, and Wanli Ouyang. 2023. Better teacher better student: dynamic prior knowledge for knowledge distillation. In Proceedings of the International Conference on Learning Representations (ICLR)."}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755514","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:23:26Z","timestamp":1765308206000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3755514"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":43,"alternative-id":["10.1145\/3746027.3755514","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3755514","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}