{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T17:33:19Z","timestamp":1777656799185,"version":"3.51.4"},"publisher-location":"Cham","reference-count":57,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031730238","type":"print"},{"value":"9783031730245","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,11,24]],"date-time":"2024-11-24T00:00:00Z","timestamp":1732406400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,24]],"date-time":"2024-11-24T00:00:00Z","timestamp":1732406400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73024-5_1","type":"book-chapter","created":{"date-parts":[[2024,11,25]],"date-time":"2024-11-25T16:38:50Z","timestamp":1732552730000},"page":"1-18","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["How to\u00a0Train the\u00a0Teacher Model for\u00a0Effective Knowledge Distillation"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8321-7130","authenticated-orcid":false,"given":"Shayan Mohajer","family":"Hamidi","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0008-7974-848X","authenticated-orcid":false,"given":"Xizhen","family":"Deng","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0005-6448-9954","authenticated-orcid":false,"given":"Renhao","family":"Tan","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0009-2355-1773","authenticated-orcid":false,"given":"Linfeng","family":"Ye","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7836-5278","authenticated-orcid":false,"given":"Ahmed Hussein","family":"Salamah","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,24]]},"reference":[{"key":"1_CR1","doi-asserted-by":"publisher","unstructured":"Ahn, S., Hu, S., Damianou, A., Lawrence, N., Dai, Z.: Variational information distillation for knowledge transfer, pp. 9155\u20139163 (2019). https:\/\/doi.org\/10.1109\/CVPR.2019.00938","DOI":"10.1109\/CVPR.2019.00938"},{"key":"1_CR2","unstructured":"Allen-Zhu, Z., Li, Y.: Towards understanding ensemble, knowledge distillation and self-distillation in deep learning. arXiv preprint arXiv:2012.09816 (2020)"},{"key":"1_CR3","unstructured":"Anil, R., Pereyra, G., Passos, A., Ormandi, R., Dahl, G.E., Hinton, G.E.: Large scale distributed neural network training through online distillation. arXiv preprint arXiv:1804.03235 (2018)"},{"key":"1_CR4","doi-asserted-by":"crossref","unstructured":"Beyer, L., Zhai, X., Royer, A., Markeeva, L., Anil, R., Kolesnikov, A.: Knowledge distillation: a good teacher is patient and consistent. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10925\u201310934 (2022)","DOI":"10.1109\/CVPR52688.2022.01065"},{"key":"1_CR5","doi-asserted-by":"crossref","unstructured":"Bucilu\u01ce, C., Caruana, R., Niculescu-Mizil, A.: Model compression. In: Proceedings of the 12th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, pp. 535\u2013541 (2006)","DOI":"10.1145\/1150402.1150464"},{"key":"1_CR6","doi-asserted-by":"crossref","unstructured":"Chen, D., Mei, J.P., Wang, C., Feng, Y., Chen, C.: Online knowledge distillation with diverse peers. In: Proceedings of the AAAI Conference on Artificial Intelligence, pp. 3430\u20133437 (2020)","DOI":"10.1609\/aaai.v34i04.5746"},{"key":"1_CR7","doi-asserted-by":"crossref","unstructured":"Chen, P., Liu, S., Zhao, H., Jia, J.: Distilling knowledge via knowledge review. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5008\u20135017 (2021)","DOI":"10.1109\/CVPR46437.2021.00497"},{"key":"1_CR8","first-page":"22243","volume":"33","author":"T Chen","year":"2020","unstructured":"Chen, T., Kornblith, S., Swersky, K., Norouzi, M., Hinton, G.E.: Big self-supervised models are strong semi-supervised learners. Adv. Neural. Inf. Process. Syst. 33, 22243\u201322255 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1_CR9","doi-asserted-by":"crossref","unstructured":"Chi, Z., Gu, L., Liu, H., Wang, Y., Yu, Y., Tang, J.: MetaFSCIL: a meta-learning approach for few-shot class incremental learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14166\u201314175 (2022)","DOI":"10.1109\/CVPR52688.2022.01377"},{"key":"1_CR10","unstructured":"Chi, Z., et al.: Adapting to distribution shift by visual domain prompt generation. In: The Twelfth International Conference on Learning Representations (2024)"},{"key":"1_CR11","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1007\/978-3-030-58583-9_7","volume-title":"Computer Vision \u2013 ECCV 2020","author":"Z Chi","year":"2020","unstructured":"Chi, Z., Mohammadi Nasiri, R., Liu, Z., Lu, J., Tang, J., Plataniotis, K.N.: All at once: temporally adaptive multi-frame interpolation with advanced motion modeling. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020, Part XXVII. LNCS, vol. 12372, pp. 107\u2013123. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58583-9_7"},{"key":"1_CR12","doi-asserted-by":"crossref","unstructured":"Chi, Z., Wang, Y., Yu, Y., Tang, J.: Test-time fast adaptation for dynamic scene deblurring via meta-auxiliary learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9137\u20139146 (2021)","DOI":"10.1109\/CVPR46437.2021.00902"},{"key":"1_CR13","doi-asserted-by":"crossref","unstructured":"Cho, J.H., Hariharan, B.: On the efficacy of knowledge distillation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 4794\u20134802 (2019)","DOI":"10.1109\/ICCV.2019.00489"},{"key":"1_CR14","unstructured":"Dao, T., Kamath, G.M., Syrgkanis, V., Mackey, L.: Knowledge distillation as semiparametric inference. In: International Conference on Learning Representations (2020)"},{"key":"1_CR15","unstructured":"Dong, C., Liu, L., Shang, J.: Toward student-oriented teacher network training for knowledge distillation. In: The Twelfth International Conference on Learning Representations (2023)"},{"key":"1_CR16","doi-asserted-by":"publisher","first-page":"1004","DOI":"10.1109\/LSP.2024.3383948","volume":"31","author":"SM Hamidi","year":"2024","unstructured":"Hamidi, S.M.: Training neural networks on remote edge devices for unseen class classification. IEEE Signal Process. Lett. 31, 1004\u20131008 (2024). https:\/\/doi.org\/10.1109\/LSP.2024.3383948","journal-title":"IEEE Signal Process. Lett."},{"key":"1_CR17","unstructured":"Hinton, G., Vinyals, O., Dean, J.: Distilling the knowledge in a neural network. arXiv preprint arXiv:1503.02531 (2015)"},{"key":"1_CR18","unstructured":"Hui, L., Belkin, M.: Evaluation of neural architectures trained with square loss vs cross-entropy in classification tasks. In: International Conference on Learning Representations (2020)"},{"key":"1_CR19","first-page":"7024","volume":"35","author":"F Iliopoulos","year":"2022","unstructured":"Iliopoulos, F., Kontonis, V., Baykal, C., Menghani, G., Trinh, K., Vee, E.: Weighted distillation with unlabeled examples. Adv. Neural. Inf. Process. Syst. 35, 7024\u20137037 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1_CR20","doi-asserted-by":"crossref","unstructured":"Jin, X., et al.: Knowledge distillation via route constrained optimization. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1345\u20131354 (2019)","DOI":"10.1109\/ICCV.2019.00143"},{"issue":"4","key":"1_CR21","doi-asserted-by":"publisher","first-page":"471","DOI":"10.1109\/72.88169","volume":"2","author":"F Kanaya","year":"1991","unstructured":"Kanaya, F., Miyake, S.: Bayes statistical behavior and valid generalization of pattern classifying neural networks. IEEE Trans. Neural Netw. 2(4), 471\u2013475 (1991)","journal-title":"IEEE Trans. Neural Netw."},{"key":"1_CR22","unstructured":"Krizhevsky, A., Hinton, G., et\u00a0al.: Learning multiple layers of features from tiny images (2009)"},{"key":"1_CR23","doi-asserted-by":"crossref","unstructured":"Li, Z., Huang, Y., Chen, D., Luo, T., Cai, N., Pan, Z.: Online knowledge distillation via multi-branch diversity enhancement. In: Proceedings of the Asian Conference on Computer Vision (2020)","DOI":"10.1007\/978-3-030-69538-5_20"},{"key":"1_CR24","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"146","DOI":"10.1007\/978-3-031-20053-3_9","volume-title":"Computer Vision \u2013 ECCV 2022","author":"H Liu","year":"2022","unstructured":"Liu, H., et al.: Few-shot class-incremental learning via entropy-regularized data-free replay. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13684, pp. 146\u2013162. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-20053-3_9"},{"key":"1_CR25","doi-asserted-by":"crossref","unstructured":"Liu, Y., et al.: Knowledge distillation via instance relationship graph. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7096\u20137104 (2019)","DOI":"10.1109\/CVPR.2019.00726"},{"key":"1_CR26","unstructured":"Menon, A.K., Rawat, A.S., Reddi, S., Kim, S., Kumar, S.: A statistical perspective on distillation. In: International Conference on Machine Learning, pp. 7632\u20137642. PMLR (2021)"},{"key":"1_CR27","doi-asserted-by":"crossref","unstructured":"Mirzadeh, S.I., Farajtabar, M., Li, A., Levine, N., Matsukawa, A., Ghasemzadeh, H.: Improved knowledge distillation via teacher assistant. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a034, pp. 5191\u20135198 (2020)","DOI":"10.1609\/aaai.v34i04.5963"},{"key":"1_CR28","first-page":"3351","volume":"33","author":"H Mobahi","year":"2020","unstructured":"Mobahi, H., Farajtabar, M., Bartlett, P.: Self-distillation amplifies regularization in Hilbert space. Adv. Neural. Inf. Process. Syst. 33, 3351\u20133361 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1_CR29","unstructured":"M\u00fcller, R., Kornblith, S., Hinton, G.: Subclass distillation. arXiv preprint arXiv:2002.03936 (2020)"},{"key":"1_CR30","doi-asserted-by":"crossref","unstructured":"Park, W., Kim, D., Lu, Y., Cho, M.: Relational knowledge distillation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3967\u20133976 (2019)","DOI":"10.1109\/CVPR.2019.00409"},{"key":"1_CR31","doi-asserted-by":"crossref","unstructured":"Park, W., Kim, D., Lu, Y., Cho, M.: Relational knowledge distillation, pp. 3967\u20133976 (2019)","DOI":"10.1109\/CVPR.2019.00409"},{"key":"1_CR32","doi-asserted-by":"crossref","unstructured":"Passalis, N., Tefas, A.: Learning deep representations with probabilistic knowledge transfer (2018)","DOI":"10.1007\/978-3-030-01252-6_17"},{"key":"1_CR33","doi-asserted-by":"publisher","unstructured":"Peng, B., et al.: Correlation congruence for knowledge distillation, pp. 5006\u20135015 (2019). https:\/\/doi.org\/10.1109\/ICCV.2019.00511","DOI":"10.1109\/ICCV.2019.00511"},{"key":"1_CR34","unstructured":"Phuong, M., Lampert, C.: Towards understanding knowledge distillation. In: International Conference on Machine Learning, pp. 5142\u20135151. PMLR (2019)"},{"key":"1_CR35","unstructured":"Ren, Y., Guo, S., Sutherland, D.J.: Better supervisory signals by observing learning paths. In: International Conference on Learning Representations (2021)"},{"key":"1_CR36","unstructured":"Romero, A., Ballas, N., Kahou, S.E., Chassang, A., Gatta, C., Bengio, Y.: FitNets: hints for thin deep nets. arXiv preprint arXiv:1412.6550 (2014)"},{"key":"1_CR37","doi-asserted-by":"crossref","unstructured":"Russakovsky, O., et al.: ImageNet large scale visual recognition challenge. Int. J. Comput. Vision 115, 211\u2013252 (2015)","DOI":"10.1007\/s11263-015-0816-y"},{"key":"1_CR38","unstructured":"Sajedi, A., Plataniotis, K.N.: On the efficiency of subclass knowledge distillation in classification tasks. arXiv preprint arXiv:2109.05587 (2021)"},{"key":"1_CR39","first-page":"6906","volume":"34","author":"S Stanton","year":"2021","unstructured":"Stanton, S., Izmailov, P., Kirichenko, P., Alemi, A.A., Wilson, A.G.: Does knowledge distillation really work? Adv. Neural. Inf. Process. Syst. 34, 6906\u20136919 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1_CR40","unstructured":"Tan, C., Liu, J.: Improving knowledge distillation with a customized teacher. IEEE Trans. Neural Netw. Learn. Syst. (2022)"},{"key":"1_CR41","unstructured":"Tian, Y., Krishnan, D., Isola, P.: Contrastive representation distillation (2020)"},{"key":"1_CR42","doi-asserted-by":"crossref","unstructured":"Tung, F., Mori, G.: Similarity-preserving knowledge distillation. In: 2019 IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 1365\u20131374 (2019). https:\/\/api.semanticscholar.org\/CorpusID:198179476","DOI":"10.1109\/ICCV.2019.00145"},{"key":"1_CR43","doi-asserted-by":"crossref","unstructured":"Tzelepi, M., Passalis, N., Tefas, A.: Efficient online subclass knowledge distillation for image classification. In: 2020 25th International Conference on Pattern Recognition (ICPR), pp. 1007\u20131014 (2021). 10.1109\/ICPR48806.2021.9411995","DOI":"10.1109\/ICPR48806.2021.9411995"},{"key":"1_CR44","doi-asserted-by":"publisher","first-page":"115132","DOI":"10.1016\/j.eswa.2021.115132","volume":"181","author":"M Tzelepi","year":"2021","unstructured":"Tzelepi, M., Passalis, N., Tefas, A.: Online subclass knowledge distillation. Expert Syst. Appl. 181, 115132 (2021)","journal-title":"Expert Syst. Appl."},{"key":"1_CR45","first-page":"607","volume":"35","author":"C Wang","year":"2022","unstructured":"Wang, C., Yang, Q., Huang, R., Song, S., Huang, G.: Efficient knowledge distillation from model checkpoints. Adv. Neural. Inf. Process. Syst. 35, 607\u2013619 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1_CR46","doi-asserted-by":"crossref","unstructured":"Wu, Y., Chi, Z., Wang, Y., Feng, S.: MetaGCD: learning to continually learn in generalized category discovery. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1655\u20131665 (2023)","DOI":"10.1109\/ICCV51070.2023.00159"},{"key":"1_CR47","doi-asserted-by":"crossref","unstructured":"Wu, Y., Chi, Z., Wang, Y., Plataniotis, K.N., Feng, S.: Test-time domain adaptation by learning domain-aware batch normalization. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a038, pp. 15961\u201315969 (2024)","DOI":"10.1609\/aaai.v38i14.29527"},{"key":"1_CR48","doi-asserted-by":"crossref","unstructured":"Yang, C., Xie, L., Qiao, S., Yuille, A.L.: Training deep neural networks in generations: a more tolerant teacher educates better students. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a033, pp. 5628\u20135635 (2019)","DOI":"10.1609\/aaai.v33i01.33015628"},{"key":"1_CR49","doi-asserted-by":"crossref","unstructured":"Yang, C., An, Z., Cai, L., Xu, Y.: Hierarchical self-supervised augmented knowledge distillation. arXiv preprint arXiv:2107.13715 (2021)","DOI":"10.24963\/ijcai.2021\/168"},{"key":"1_CR50","doi-asserted-by":"crossref","unstructured":"Yang, C., Zhou, H., An, Z., Jiang, X., Xu, Y., Zhang, Q.: Cross-image relational knowledge distillation for semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12319\u201312328 (2022)","DOI":"10.1109\/CVPR52688.2022.01200"},{"key":"1_CR51","unstructured":"Yang, J., Martinez, B., Bulat, A., Tzimiropoulos, G.: Knowledge distillation via softmax regression representation learning. In: International Conference on Learning Representations (2020)"},{"key":"1_CR52","unstructured":"Ye, L., Hamidi, S.M., Tan, R., Yang, E.H.: Bayes conditional distribution estimation for knowledge distillation based on conditional mutual information. In: The Twelfth International Conference on Learning Representations (2024). https:\/\/openreview.net\/forum?id=yV6wwEbtkR"},{"key":"1_CR53","doi-asserted-by":"crossref","unstructured":"Yim, J., Joo, D., Bae, J., Kim, J.: A gift from knowledge distillation: fast optimization, network minimization and transfer learning. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4133\u20134141 (2017)","DOI":"10.1109\/CVPR.2017.754"},{"key":"1_CR54","unstructured":"Zagoruyko, S., Komodakis, N.: Paying more attention to attention: improving the performance of convolutional neural networks via attention transfer (2017). https:\/\/arxiv.org\/abs\/1612.03928"},{"key":"1_CR55","doi-asserted-by":"crossref","unstructured":"Zhao, B., Cui, Q., Song, R., Qiu, Y., Liang, J.: Decoupled knowledge distillation. arXiv preprint arXiv:2203.08679 (2022)","DOI":"10.1109\/CVPR52688.2022.01165"},{"key":"1_CR56","first-page":"22243","volume":"35","author":"T Zhong","year":"2022","unstructured":"Zhong, T., Chi, Z., Gu, L., Wang, Y., Yu, Y., Tang, J.: Meta-DMoE: adapting to domain shift by meta-distillation from mixture-of-experts. Adv. Neural. Inf. Process. Syst. 35, 22243\u201322257 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1_CR57","unstructured":"Zhu, X., Gong, S., et\u00a0al.: Knowledge distillation by on-the-fly native ensemble. Adv. Neural. Inf. Process. Syst. 31 (2018)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73024-5_1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,25]],"date-time":"2024-11-25T17:03:04Z","timestamp":1732554184000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73024-5_1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,24]]},"ISBN":["9783031730238","9783031730245"],"references-count":57,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73024-5_1","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,24]]},"assertion":[{"value":"24 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}