{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T20:10:05Z","timestamp":1755893405702,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":31,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,12,15]],"date-time":"2023-12-15T00:00:00Z","timestamp":1702598400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,12,15]]},"DOI":"10.1145\/3627631.3627647","type":"proceedings-article","created":{"date-parts":[[2024,1,31]],"date-time":"2024-01-31T12:08:32Z","timestamp":1706702912000},"page":"1-10","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Knowledge Distillation with Ensemble Calibration"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-6314-5536","authenticated-orcid":false,"given":"Ishan","family":"Mishra","sequence":"first","affiliation":[{"name":"Computer Science and Engineering, Indian Institute of Technology Jodhpur, India"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-5913-6386","authenticated-orcid":false,"given":"Riyanshu","family":"Jain","sequence":"additional","affiliation":[{"name":"Computer Science and Engineering, Indian Institute of Technology, Jodhpur, India"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-6243-1362","authenticated-orcid":false,"given":"Dhruv","family":"Viradiya","sequence":"additional","affiliation":[{"name":"Computer Science and Engineering, Indian Institute of Technology Jodhpur, India"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-3645-9117","authenticated-orcid":false,"given":"Divyam","family":"Patel","sequence":"additional","affiliation":[{"name":"Electrical Engineering, Indian Institute of Technology Jodhpur, India"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4078-9400","authenticated-orcid":false,"given":"Deepak","family":"Mishra","sequence":"additional","affiliation":[{"name":"Computer Science and Engineering, Indian Institute of Technology Jodhpur, India"}]}],"member":"320","published-online":{"date-parts":[[2024,1,31]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Murat\u00a0Seckin Ayhan and Philipp Berens. 2018. Test-time Data Augmentation for Estimation of Heteroscedastic Aleatoric Uncertainty in Deep Neural Networks. In Medical Imaging with Deep Learning. https:\/\/openreview.net\/forum?id=rJZz-knjz"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/1150402.1150464"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5746"},{"key":"e_1_3_2_1_4_1","volume-title":"Feature-map-level Online Adversarial Knowledge Distillation. 119 (13\u201318","author":"Chung Inseop","year":"2020","unstructured":"Inseop Chung, Seonguk Park, Jangho Kim, and Nojun Kwak. 2020. Feature-map-level Online Adversarial Knowledge Distillation. 119 (13\u201318 Jul 2020), 2006\u20132015. https:\/\/proceedings.mlr.press\/v119\/chung20a.html"},{"key":"e_1_3_2_1_5_1","volume-title":"An empirical analysis of the impact of data augmentation on knowledge distillation. arXiv preprint arXiv:2006.03810","author":"Das Deepan","year":"2020","unstructured":"Deepan Das, Haley Massa, Abhimanyu Kulkarni, and Theodoros Rekatsinas. 2020. An empirical analysis of the impact of data augmentation on knowledge distillation. arXiv preprint arXiv:2006.03810 (2020)."},{"key":"e_1_3_2_1_6_1","volume-title":"Improved regularization of convolutional neural networks with cutout. arXiv preprint arXiv:1708.04552","author":"DeVries Terrance","year":"2017","unstructured":"Terrance DeVries and Graham\u00a0W Taylor. 2017. Improved regularization of convolutional neural networks with cutout. arXiv preprint arXiv:1708.04552 (2017)."},{"key":"e_1_3_2_1_7_1","volume-title":"Mohsin Ali, Jongseok Lee, Matthias Humt, Jianxiang Feng, Anna Kruspe, Rudolph Triebel","author":"Gawlikowski Jakob","year":"2021","unstructured":"Jakob Gawlikowski, Cedrique Rovile\u00a0Njieutcheu Tassi, Mohsin Ali, Jongseok Lee, Matthias Humt, Jianxiang Feng, Anna Kruspe, Rudolph Triebel, Peter Jung, Ribana Roscher, 2021. A survey of uncertainty in deep neural networks. arXiv preprint arXiv:2107.03342 (2021)."},{"key":"e_1_3_2_1_8_1","volume-title":"International conference on machine learning. PMLR, 1321\u20131330","author":"Guo Chuan","year":"2017","unstructured":"Chuan Guo, Geoff Pleiss, Yu Sun, and Kilian\u00a0Q Weinberger. 2017. On calibration of modern neural networks. In International conference on machine learning. PMLR, 1321\u20131330."},{"key":"e_1_3_2_1_9_1","volume-title":"Online Knowledge Distillation via Collaborative Learning. (June","author":"Guo Qiushan","year":"2020","unstructured":"Qiushan Guo, Xinjiang Wang, Yichao Wu, Zhipeng Yu, Ding Liang, Xiaolin Hu, and Ping Luo. 2020. Online Knowledge Distillation via Collaborative Learning. (June 2020)."},{"key":"e_1_3_2_1_10_1","volume-title":"Proceedings of the International Conference on Learning Representations (ICLR)","author":"Hendrycks Dan","year":"2020","unstructured":"Dan Hendrycks, Norman Mu, Ekin\u00a0D. Cubuk, Barret Zoph, Justin Gilmer, and Balaji Lakshminarayanan. 2020. AugMix: A Simple Data Processing Method to Improve Robustness and Uncertainty. Proceedings of the International Conference on Learning Representations (ICLR) (2020)."},{"key":"e_1_3_2_1_11_1","volume-title":"Distilling the knowledge in a neural network. arXiv preprint arXiv:1503.02531 2, 7","author":"Hinton Geoffrey","year":"2015","unstructured":"Geoffrey Hinton, Oriol Vinyals, Jeff Dean, 2015. Distilling the knowledge in a neural network. arXiv preprint arXiv:1503.02531 2, 7 (2015)."},{"key":"e_1_3_2_1_12_1","unstructured":"Zehao Huang and Naiyan Wang. 2019. Like What You Like: Knowledge Distill via Neuron Selectivity Transfer. https:\/\/openreview.net\/forum?id=rJf0BjAqYX"},{"key":"e_1_3_2_1_13_1","volume-title":"Simple and scalable predictive uncertainty estimation using deep ensembles. Advances in neural information processing systems 30","author":"Lakshminarayanan Balaji","year":"2017","unstructured":"Balaji Lakshminarayanan, Alexander Pritzel, and Charles Blundell. 2017. Simple and scalable predictive uncertainty estimation using deep ensembles. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_14_1","volume-title":"SMILE: Self-Distilled MIxup for Efficient Transfer LEarning. arXiv preprint arXiv:2103.13941","author":"Li Xingjian","year":"2021","unstructured":"Xingjian Li, Haoyi Xiong, Chengzhong Xu, and Dejing Dou. 2021. SMILE: Self-Distilled MIxup for Efficient Transfer LEarning. arXiv preprint arXiv:2103.13941 (2021)."},{"key":"e_1_3_2_1_15_1","volume-title":"Self-Distillation Amplifies Regularization in Hilbert Space. 33","author":"Mobahi Hossein","year":"2020","unstructured":"Hossein Mobahi, Mehrdad Farajtabar, and Peter Bartlett. 2020. Self-Distillation Amplifies Regularization in Hilbert Space. 33 (2020), 3351\u20133361. https:\/\/proceedings.neurips.cc\/paper\/2020\/file\/2288f691b58edecadcc9a8691762b4fd-Paper.pdf"},{"key":"e_1_3_2_1_16_1","first-page":"4555","article-title":"MixACM: Mixup-Based Robustness Transfer via Distillation of Activated Channel Maps","volume":"34","author":"Muhammad Awais","year":"2021","unstructured":"Awais Muhammad, Fengwei Zhou, Chuanlong Xie, Jiawei Li, Sung-Ho Bae, and Zhenguo Li. 2021. MixACM: Mixup-Based Robustness Transfer via Distillation of Activated Channel Maps. Advances in Neural Information Processing Systems 34 (2021), 4555\u20134569.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_17_1","volume-title":"CVPR Workshops, Vol.\u00a02.","author":"Nixon Jeremy","year":"2019","unstructured":"Jeremy Nixon, Michael\u00a0W Dusenberry, Linchuan Zhang, Ghassen Jerfel, and Dustin Tran. 2019. Measuring Calibration in Deep Learning.. In CVPR Workshops, Vol.\u00a02."},{"key":"e_1_3_2_1_18_1","volume-title":"Advances in Neural Information Processing Systems, H.\u00a0Wallach, H.\u00a0Larochelle, A.\u00a0Beygelzimer, F.\u00a0d'Alch\u00e9-Buc, E.\u00a0Fox, and R.\u00a0Garnett (Eds.). Vol.\u00a032. Curran Associates","author":"Ovadia Yaniv","year":"2019","unstructured":"Yaniv Ovadia, Emily Fertig, Jie Ren, Zachary Nado, D. Sculley, Sebastian Nowozin, Joshua Dillon, Balaji Lakshminarayanan, and Jasper Snoek. 2019. Can you trust your model's uncertainty? Evaluating predictive uncertainty under dataset shift. In Advances in Neural Information Processing Systems, H.\u00a0Wallach, H.\u00a0Larochelle, A.\u00a0Beygelzimer, F.\u00a0d'Alch\u00e9-Buc, E.\u00a0Fox, and R.\u00a0Garnett (Eds.). Vol.\u00a032. Curran Associates, Inc.https:\/\/proceedings.neurips.cc\/paper\/2019\/file\/8558cb408c1d76621371888657d2eb1d-Paper.pdf"},{"key":"e_1_3_2_1_19_1","volume-title":"in International Conference on Learning Representations (ICLR.","author":"Romero Adriana","year":"2015","unstructured":"Adriana Romero, Samira\u00a0Ebrahimi Kahou, Polytechnique Montr\u00e9al, Y. Bengio, Universit\u00e9\u00a0De Montr\u00e9al, Adriana Romero, Nicolas Ballas, Samira\u00a0Ebrahimi Kahou, Antoine Chassang, Carlo Gatta, and Yoshua Bengio. 2015. Fitnets: Hints for thin deep nets. In in International Conference on Learning Representations (ICLR."},{"key":"e_1_3_2_1_20_1","volume-title":"Online Distillation with Mixed Sample Augmentation. arXiv preprint arXiv:2206.12370","author":"Shen Yiqing","year":"2022","unstructured":"Yiqing Shen, Liwu Xu, Yuzhe Yang, Yaqian Li, and Yandong Guo. 2022. Online Distillation with Mixed Sample Augmentation. arXiv preprint arXiv:2206.12370 (2022)."},{"key":"e_1_3_2_1_21_1","volume-title":"Does knowledge distillation really work?Advances in Neural Information Processing Systems 34","author":"Stanton Samuel","year":"2021","unstructured":"Samuel Stanton, Pavel Izmailov, Polina Kirichenko, Alexander\u00a0A Alemi, and Andrew\u00a0G Wilson. 2021. Does knowledge distillation really work?Advances in Neural Information Processing Systems 34 (2021), 6906\u20136919."},{"key":"e_1_3_2_1_22_1","volume-title":"On mixup training: Improved calibration and predictive uncertainty for deep neural networks. Advances in Neural Information Processing Systems 32","author":"Thulasidasan Sunil","year":"2019","unstructured":"Sunil Thulasidasan, Gopinath Chennupati, Jeff\u00a0A Bilmes, Tanmoy Bhattacharya, and Sarah Michalak. 2019. On mixup training: Improved calibration and predictive uncertainty for deep neural networks. Advances in Neural Information Processing Systems 32 (2019)."},{"key":"e_1_3_2_1_23_1","volume-title":"Contrastive Representation Distillation. In International Conference on Learning Representations.","author":"Tian Yonglong","year":"2020","unstructured":"Yonglong Tian, Dilip Krishnan, and Phillip Isola. 2020. Contrastive Representation Distillation. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_24_1","volume-title":"Knowledge distillation thrives on data augmentation. arXiv preprint arXiv:2012.02909","author":"Wang Huan","year":"2020","unstructured":"Huan Wang, Suhas Lohit, Michael Jones, and Yun Fu. 2020. Knowledge distillation thrives on data augmentation. arXiv preprint arXiv:2012.02909 (2020)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3097983.3098135"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00612"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01389"},{"key":"e_1_3_2_1_28_1","unstructured":"Sergey Zagoruyko and Nikos Komodakis. 2017. Paying More Attention to Attention: Improving the Performance of Convolutional Neural Networks via Attention Transfer. In ICLR. https:\/\/arxiv.org\/abs\/1612.03928"},{"key":"e_1_3_2_1_29_1","volume-title":"International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=r1Ddp1-Rb","author":"Zhang Hongyi","year":"2018","unstructured":"Hongyi Zhang, Moustapha Cisse, Yann\u00a0N. Dauphin, and David Lopez-Paz. 2018. mixup: Beyond Empirical Risk Minimization. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=r1Ddp1-Rb"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00381"},{"key":"e_1_3_2_1_31_1","volume-title":"Similarity Transfer for Knowledge Distillation. arXiv preprint arXiv:2103.10047","author":"Zhao Haoran","year":"2021","unstructured":"Haoran Zhao, Kun Gong, Xin Sun, Junyu Dong, and Hui Yu. 2021. Similarity Transfer for Knowledge Distillation. arXiv preprint arXiv:2103.10047 (2021)."}],"event":{"name":"ICVGIP '23: Indian Conference on Computer Vision, Graphics and Image Processing","acronym":"ICVGIP '23","location":"Rupnagar India"},"container-title":["Proceedings of the Fourteenth Indian Conference on Computer Vision, Graphics and Image Processing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627631.3627647","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3627631.3627647","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T19:49:54Z","timestamp":1755892194000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627631.3627647"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12,15]]},"references-count":31,"alternative-id":["10.1145\/3627631.3627647","10.1145\/3627631"],"URL":"https:\/\/doi.org\/10.1145\/3627631.3627647","relation":{},"subject":[],"published":{"date-parts":[[2023,12,15]]},"assertion":[{"value":"2024-01-31","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}