{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,14]],"date-time":"2026-03-14T17:54:57Z","timestamp":1773510897733,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":53,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,27]],"date-time":"2024-10-27T00:00:00Z","timestamp":1729987200000},"content-version":"vor","delay-in-days":367,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Defense Advanced Research Projects Agency","award":["HR001120C0124"],"award-info":[{"award-number":["HR001120C0124"]}]},{"name":"Horizon Robotics"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,26]]},"DOI":"10.1145\/3581783.3612162","type":"proceedings-article","created":{"date-parts":[[2023,10,27]],"date-time":"2023-10-27T07:26:54Z","timestamp":1698391614000},"page":"2704-2713","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":10,"title":["Exploring the Knowledge Transferred by Response-Based Teacher-Student Distillation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8366-5088","authenticated-orcid":false,"given":"Liangchen","family":"Song","sequence":"first","affiliation":[{"name":"University at Buffalo, Buffalo, NY, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8303-633X","authenticated-orcid":false,"given":"Xuan","family":"Gong","sequence":"additional","affiliation":[{"name":"University at Buffalo, Buffalo, NY, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-8427-8366","authenticated-orcid":false,"given":"Helong","family":"Zhou","sequence":"additional","affiliation":[{"name":"Horizon Robotics, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2005-0200","authenticated-orcid":false,"given":"Jiajie","family":"Chen","sequence":"additional","affiliation":[{"name":"Horizon Robotics, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-4123-8979","authenticated-orcid":false,"given":"Qian","family":"Zhang","sequence":"additional","affiliation":[{"name":"Horizon Robotics, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1639-4561","authenticated-orcid":false,"given":"David","family":"Doermann","sequence":"additional","affiliation":[{"name":"University at Buffalo, Buffalo, NY, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7901-8793","authenticated-orcid":false,"given":"Junsong","family":"Yuan","sequence":"additional","affiliation":[{"name":"University at Buffalo, Buffalo, NY, USA"}]}],"member":"320","published-online":{"date-parts":[[2023,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i8.16865"},{"key":"e_1_3_2_1_2_1","unstructured":"Guobin Chen Wongun Choi Xiang Yu Tony X. Han and Manmohan Chandraker. 2017. Learning Efficient Object Detection Models with Knowledge Distillation. In Advances in Neural Information Processing Systems Isabelle Guyon Ulrike von Luxburg Samy Bengio Hanna M. Wallach Rob Fergus S. V. N. Vishwanathan and Roman Garnett (Eds.). 742--751."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01352"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01064"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01064"},{"key":"e_1_3_2_1_8_1","volume-title":"Res2net: A new multi-scale backbone architecture","author":"Gao Shanghua","year":"2019","unstructured":"Shanghua Gao, Ming-Ming Cheng, Kai Zhao, Xin-Yu Zhang, Ming-Hsuan Yang, and Philip HS Torr. 2019. Res2net: A new multi-scale backbone architecture. IEEE transactions on pattern analysis and machine intelligence (2019)."},{"key":"e_1_3_2_1_9_1","volume-title":"Explaining and Harnessing Adversarial Examples. In International Conference on Learning Representations, Yoshua Bengio and Yann LeCun (Eds.).","author":"Goodfellow Ian J.","year":"2015","unstructured":"Ian J. Goodfellow, Jonathon Shlens, and Christian Szegedy. 2015. Explaining and Harnessing Adversarial Examples. In International Conference on Learning Representations, Yoshua Bengio and Yann LeCun (Eds.)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-021-01453-z"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00165"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00065"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33013779"},{"key":"e_1_3_2_1_14_1","volume-title":"Distilling the Knowledge in a Neural Network. CoRR","author":"Hinton Geoffrey E.","year":"2015","unstructured":"Geoffrey E. Hinton, Oriol Vinyals, and Jeffrey Dean. 2015. Distilling the Knowledge in a Neural Network. CoRR, Vol. abs\/1503.02531 (2015)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00140"},{"key":"e_1_3_2_1_16_1","volume-title":"International Conference on Learning Representations. OpenReview.net.","author":"Hsu Daniel","year":"2021","unstructured":"Daniel Hsu, Ziwei Ji, Matus Telgarsky, and Lan Wang. 2021. Generalization bounds via distillation. In International Conference on Learning Representations. OpenReview.net."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01231-1_21"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00060"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00726"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.3001940"},{"key":"e_1_3_2_1_21_1","volume-title":"International Conference on Learning Representations, Yoshua Bengio and Yann LeCun (Eds.).","author":"Lopez-Paz David","year":"2016","unstructured":"David Lopez-Paz, L\u00e9on Bottou, Bernhard Sch\u00f6lkopf, and Vladimir Vapnik. 2016. Unifying distillation and privileged information. In International Conference on Learning Representations, Yoshua Bengio and Yann LeCun (Eds.)."},{"key":"e_1_3_2_1_22_1","volume-title":"International Conference on Machine Learning. PMLR, 7632--7642","author":"Menon Aditya K","year":"2021","unstructured":"Aditya K Menon, Ankit Singh Rawat, Sashank Reddi, Seungyeon Kim, and Sanjiv Kumar. 2021. A statistical perspective on distillation. In International Conference on Machine Learning. PMLR, 7632--7642."},{"key":"e_1_3_2_1_23_1","volume-title":"Bartlett","author":"Mobahi Hossein","year":"2020","unstructured":"Hossein Mobahi, Mehrdad Farajtabar, and Peter L. Bartlett. 2020. Self-Distillation Amplifies Regularization in Hilbert Space. In Advances in Neural Information Processing Systems, Hugo Larochelle, Marc'Aurelio Ranzato, Raia Hadsell, Maria-Florina Balcan, and Hsuan-Tien Lin (Eds.)."},{"key":"e_1_3_2_1_24_1","unstructured":"Rafael M\u00fcller Simon Kornblith and Geoffrey E Hinton. 2019. When does label smoothing help?. In Advances in Neural Information Processing Systems. 4694--4703."},{"key":"e_1_3_2_1_25_1","volume-title":"Dynamic Kernel Distillation for Efficient Pose Estimation in Videos. In IEEE\/CVF International Conference on Computer Vision. IEEE, 6941--6949","author":"Nie Xuecheng","year":"2019","unstructured":"Xuecheng Nie, Yuncheng Li, Linjie Luo, Ning Zhang, and Jiashi Feng. 2019. Dynamic Kernel Distillation for Efficient Pose Estimation in Videos. In IEEE\/CVF International Conference on Computer Vision. IEEE, 6941--6949."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00409"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00241"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i15.17610"},{"key":"e_1_3_2_1_29_1","volume-title":"International Conference on Learning Representations Workshop. OpenReview.net.","author":"Pereyra Gabriel","unstructured":"Gabriel Pereyra, George Tucker, Jan Chorowski, Lukasz Kaiser, and Geoffrey E. Hinton. 2017. Regularizing Neural Networks by Penalizing Confident Output Distributions. In International Conference on Learning Representations Workshop. OpenReview.net."},{"key":"e_1_3_2_1_30_1","volume-title":"International Conference on Machine Learning. 5142--5151","author":"Phuong Mary","year":"2019","unstructured":"Mary Phuong and Christoph Lampert. 2019. Towards understanding knowledge distillation. In International Conference on Machine Learning. 5142--5151."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01044"},{"key":"e_1_3_2_1_32_1","volume-title":"FitNets: Hints for Thin Deep Nets. In International Conference on Learning Representations.","author":"Romero Adriana","year":"2015","unstructured":"Adriana Romero, Nicolas Ballas, Samira Ebrahimi Kahou, Antoine Chassang, Carlo Gatta, and Yoshua Bengio. 2015. FitNets: Hints for Thin Deep Nets. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00474"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00035"},{"key":"e_1_3_2_1_35_1","volume-title":"Is Label Smoothing Truly Incompatible with Knowledge Distillation: An Empirical Study. In International Conference on Learning Representations.","author":"Shen Zhiqiang","year":"2020","unstructured":"Zhiqiang Shen, Zechun Liu, Dejia Xu, Zitian Chen, Kwang-Ting Cheng, and Marios Savvides. 2020. Is Label Smoothing Truly Incompatible with Knowledge Distillation: An Empirical Study. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_36_1","volume-title":"MEAL V2: Boosting Vanilla ResNet-50 to 80% Top-1 Accuracy on ImageNet without Tricks. arXiv preprint arXiv:2009.08453","author":"Shen Zhiqiang","year":"2020","unstructured":"Zhiqiang Shen and Marios Savvides. 2020. MEAL V2: Boosting Vanilla ResNet-50 to 80% Top-1 Accuracy on ImageNet without Tricks. arXiv preprint arXiv:2009.08453 (2020)."},{"key":"e_1_3_2_1_37_1","volume-title":"ECML PKDD (Lecture Notes in Computer Science","volume":"497","author":"Stamoulis Dimitrios","year":"2019","unstructured":"Dimitrios Stamoulis, Ruizhou Ding, Di Wang, Dimitrios Lymberopoulos, Bodhi Priyantha, Jie Liu, and Diana Marculescu. 2019. Single-Path NAS: Designing Hardware-Efficient ConvNets in Less Than 4 Hours. In ECML PKDD (Lecture Notes in Computer Science, Vol. 11907), Ulf Brefeld, \u00c9lisa Fromont, Andreas Hotho, Arno J. Knobbe, Marloes H. Maathuis, and C\u00e9 line Robardet (Eds.). Springer, 481--497."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.308"},{"key":"e_1_3_2_1_39_1","volume-title":"Learning efficient detector with semi-supervised adaptive distillation. arXiv preprint arXiv:1901.00366","author":"Tang Shitao","year":"2019","unstructured":"Shitao Tang, Litong Feng, Wenqi Shao, Zhanghui Kuang, Wei Zhang, and Yimin Chen. 2019a. Learning efficient detector with semi-supervised adaptive distillation. arXiv preprint arXiv:1901.00366 (2019)."},{"key":"e_1_3_2_1_40_1","volume-title":"Learning Efficient Detector with Semi-supervised Adaptive Distillation. In 30th British Machine Vision Conference 2019, BMVC 2019","author":"Tang Shitao","year":"2019","unstructured":"Shitao Tang, Litong Feng, Wenqi Shao, Zhanghui Kuang, Wayne Zhang, and Zheng Lu. 2019b. Learning Efficient Detector with Semi-supervised Adaptive Distillation. In 30th British Machine Vision Conference 2019, BMVC 2019, Cardiff, UK, September 9-12, 2019. BMVA Press, 215."},{"key":"e_1_3_2_1_41_1","volume-title":"Contrastive Representation Distillation. In International Conference on Learning Representations.","author":"Tian Yonglong","year":"2020","unstructured":"Yonglong Tian, Dilip Krishnan, and Phillip Isola. 2020. Contrastive Representation Distillation. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_42_1","volume-title":"International Conference on Machine Learning","volume":"139","author":"Touvron Hugo","year":"2021","unstructured":"Hugo Touvron, Matthieu Cord, Matthijs Douze, Francisco Massa, Alexandre Sablayrolles, and Herve Jegou. 2021. Training data-efficient image transformers & distillation through attention. In International Conference on Machine Learning, Vol. 139. 10347--10357."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00145"},{"key":"e_1_3_2_1_44_1","unstructured":"Jingdong Wang Ke Sun Tianheng Cheng Borui Jiang Chaorui Deng Yang Zhao Dong Liu Yadong Mu Mingkui Tan Xinggang Wang et al. 2020. Deep high-resolution representation learning for visual recognition. IEEE transactions on pattern analysis and machine intelligence (2020)."},{"key":"e_1_3_2_1_45_1","volume-title":"Preparing lessons: Improve knowledge distillation with better supervision. arXiv preprint arXiv:1911.07471","author":"Wen Tiancheng","year":"2019","unstructured":"Tiancheng Wen, Shenqi Lai, and Xueming Qian. 2019. Preparing lessons: Improve knowledge distillation with better supervision. arXiv preprint arXiv:1911.07471 (2019)."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","unstructured":"Ross Wightman. 2019. PyTorch Image Models. https:\/\/github.com\/rwightman\/pytorch-image-models. https:\/\/doi.org\/10.5281\/zenodo.4414861","DOI":"10.5281\/zenodo.4414861"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.754"},{"key":"e_1_3_2_1_48_1","volume-title":"Jiashi Feng, and Shuicheng Yan.","author":"Yuan Li","year":"2021","unstructured":"Li Yuan, Yunpeng Chen, Tao Wang, Weihao Yu, Yujun Shi, Francis EH Tay, Jiashi Feng, and Shuicheng Yan. 2021. Tokens-to-token vit: Training vision transformers from scratch on imagenet. arXiv preprint arXiv:2101.11986 (2021)."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00396"},{"key":"e_1_3_2_1_50_1","volume-title":"International Conference on Learning Representations.","author":"Zagoruyko Sergey","year":"2017","unstructured":"Sergey Zagoruyko and Nikos Komodakis. 2017. Paying More Attention to Attention: Improving the Performance of Convolutional Neural Networks via Attention Transfer. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_51_1","volume-title":"Prime-Aware Adaptive Distillation. In European Conference on Computer Vision (Lecture Notes in Computer Science","volume":"674","author":"Zhang Youcai","year":"2020","unstructured":"Youcai Zhang, Zhonghao Lan, Yuchen Dai, Fangao Zeng, Yan Bai, Jie Chang, and Yichen Wei. 2020. Prime-Aware Adaptive Distillation. In European Conference on Computer Vision (Lecture Notes in Computer Science, Vol. 12364), Andrea Vedaldi, Horst Bischof, Thomas Brox, and Jan-Michael Frahm (Eds.). Springer, 658--674."},{"key":"e_1_3_2_1_52_1","volume-title":"Sabuncu","author":"Zhang Zhilu","year":"2020","unstructured":"Zhilu Zhang and Mert R. Sabuncu. 2020. Self-Distillation as Instance-Specific Label Smoothing. In Advances in Neural Information Processing Systems, Hugo Larochelle, Marc'Aurelio Ranzato, Raia Hadsell, Maria-Florina Balcan, and Hsuan-Tien Lin (Eds.)."},{"key":"e_1_3_2_1_53_1","volume-title":"Rethinking Soft Labels for Knowledge Distillation: A Bias-Variance Tradeoff Perspective. In International Conference on Learning Representations.","author":"Zhou Helong","year":"2020","unstructured":"Helong Zhou, Liangchen Song, Jiajie Chen, Ye Zhou, Guoli Wang, Junsong Yuan, and Qian Zhang. 2020. Rethinking Soft Labels for Knowledge Distillation: A Bias-Variance Tradeoff Perspective. In International Conference on Learning Representations."}],"event":{"name":"MM '23: The 31st ACM International Conference on Multimedia","location":"Ottawa ON Canada","acronym":"MM '23","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 31st ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612162","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581783.3612162","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581783.3612162","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:06:03Z","timestamp":1755821163000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612162"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,26]]},"references-count":53,"alternative-id":["10.1145\/3581783.3612162","10.1145\/3581783"],"URL":"https:\/\/doi.org\/10.1145\/3581783.3612162","relation":{},"subject":[],"published":{"date-parts":[[2023,10,26]]},"assertion":[{"value":"2023-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}