{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,21]],"date-time":"2026-01-21T18:59:36Z","timestamp":1769021976506,"version":"3.49.0"},"reference-count":77,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2026,1,21]],"date-time":"2026-01-21T00:00:00Z","timestamp":1768953600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,21]],"date-time":"2026-01-21T00:00:00Z","timestamp":1768953600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Front. Comput. Sci."],"published-print":{"date-parts":[[2026,6]]},"DOI":"10.1007\/s11704-025-41434-w","type":"journal-article","created":{"date-parts":[[2026,1,21]],"date-time":"2026-01-21T02:00:11Z","timestamp":1768960811000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Exploring dark knowledge under various teacher capacities and addressing capacity mismatch"],"prefix":"10.1007","volume":"20","author":[{"given":"Wen-Shu","family":"Fan","sequence":"first","affiliation":[]},{"given":"Xin-Chun","family":"Li","sequence":"additional","affiliation":[]},{"given":"De-Chuan","family":"Zhan","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,1,21]]},"reference":[{"key":"41434_CR1","doi-asserted-by":"publisher","first-page":"535","DOI":"10.1145\/1150402.1150464","volume-title":"Proceedings of the 12th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining","author":"C Bucilu\u0103","year":"2006","unstructured":"Bucilu\u0103 C, Caruana R, Niculescu-Mizil A. Model compression. In: Proceedings of the 12th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining. 2006, 535\u2013541"},{"key":"41434_CR2","unstructured":"Howard A G, Zhu M, Chen B, Kalenichenko D, Wang W, Weyand T, Andreetto M, Adam H. MobileNets: efficient convolutional neural networks for mobile vision applications. 2017, arXiv preprint arXiv: 1704.04861"},{"key":"41434_CR3","doi-asserted-by":"publisher","first-page":"4510","DOI":"10.1109\/CVPR.2018.00474","volume-title":"Proceedings of 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"M Sandler","year":"2018","unstructured":"Sandler M, Howard A, Zhu M, Zhmoginov A, Chen L C. MobileNetV2: inverted residuals and linear bottlenecks. In: Proceedings of 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2018, 4510\u20134520"},{"key":"41434_CR4","doi-asserted-by":"publisher","first-page":"6848","DOI":"10.1109\/CVPR.2018.00716","volume-title":"Proceedings of 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"X Zhang","year":"2018","unstructured":"Zhang X, Zhou X, Lin M, Sun J. ShuffleNet: an extremely efficient convolutional neural network for mobile devices. In: Proceedings of 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2018, 6848\u20136856"},{"key":"41434_CR5","first-page":"122","volume-title":"Proceedings of the 15th European Conference on Computer Vision (ECCV)","author":"N Ma","year":"2018","unstructured":"Ma N, Zhang X, Zheng H T, Sun J. ShuffleNet V2: practical guidelines for efficient CNN architecture design. In: Proceedings of the 15th European Conference on Computer Vision (ECCV). 2018, 122\u2013138"},{"issue":"1","key":"41434_CR6","first-page":"3","volume":"16","author":"Z H Zhou","year":"2003","unstructured":"Zhou Z H, Jiang Y, Chen S F. Extracting symbolic rules from trained neural network ensembles. AI Communications, 2003, 16(1): 3\u201315","journal-title":"AI Communications"},{"issue":"6","key":"41434_CR7","doi-asserted-by":"publisher","first-page":"770","DOI":"10.1109\/TKDE.2004.11","volume":"16","author":"Z H Zhou","year":"2004","unstructured":"Zhou Z H, Jiang Y. Nec4.5: neural ensemble based C4.5. IEEE Transactions on Knowledge and Data Engineering, 2004, 16(6): 770\u2013773","journal-title":"IEEE Transactions on Knowledge and Data Engineering"},{"key":"41434_CR8","first-page":"1","volume-title":"Proceedings of the 5th International Conference on Learning Representations","author":"G Urban","year":"2017","unstructured":"Urban G, Geras K J, Kahou S E, Aslan \u00d6, Wang S, Mohamed A, Philipose M, Richardson M, Caruana R. Do deep convolutional nets really need to be deep and convolutional? In: Proceedings of the 5th International Conference on Learning Representations. 2017, 1\u201313"},{"key":"41434_CR9","unstructured":"Hinton G E, Vinyals O, Dean J. Distilling the knowledge in a neural network. 2015, arXiv preprint arXiv: 1503.02531"},{"issue":"5","key":"41434_CR10","doi-asserted-by":"publisher","first-page":"6718","DOI":"10.1109\/TNNLS.2022.3212733","volume":"35","author":"J Gou","year":"2024","unstructured":"Gou J, Sun L, Yu B, Du L, Ramamohanarao K, Tao D. Collaborative knowledge distillation via multiknowledge transfer. IEEE Transactions on Neural Networks and Learning Systems, 2024, 35(5): 6718\u20136730","journal-title":"IEEE Transactions on Neural Networks and Learning Systems"},{"key":"41434_CR11","first-page":"1","volume-title":"Proceedings of the 4th International Conference on Learning Representations","author":"D Lopez-Paz","year":"2016","unstructured":"Lopez-Paz D, Bottou L, Sch\u00f6lkopf B, Vapnik V. Unifying distillation and privileged information. In: Proceedings of the 4th International Conference on Learning Representations. 2016, 1\u201310"},{"key":"41434_CR12","first-page":"5142","volume-title":"Proceedings of the 36th International Conference on Machine Learning","author":"M Phuong","year":"2019","unstructured":"Phuong M, Lampert C. Towards understanding knowledge distillation. In: Proceedings of the 36th International Conference on Machine Learning. 2019, 5142\u20135151"},{"key":"41434_CR13","first-page":"7632","volume-title":"Proceedings of the 38th International Conference on Machine Learning","author":"A K Menon","year":"2021","unstructured":"Menon A K, Rawat A S, Reddi S J, Kim S, Kumar S. A statistical perspective on distillation. In: Proceedings of the 38th International Conference on Machine Learning. 2021, 7632\u20137642"},{"issue":"6","key":"41434_CR14","doi-asserted-by":"publisher","first-page":"1789","DOI":"10.1007\/s11263-021-01453-z","volume":"129","author":"J Gou","year":"2021","unstructured":"Gou J, Yu B, Maybank S J, Tao D. Knowledge distillation: a survey. International Journal of Computer Vision, 2021, 129(6): 1789\u20131819","journal-title":"International Journal of Computer Vision"},{"key":"41434_CR15","first-page":"1","volume-title":"Proceedings of the 3rd International Conference on Learning Representations","author":"A Romero","year":"2015","unstructured":"Romero A, Ballas N, Kahou S E, Chassang A, Gatta C, Bengio Y. FitNets: hints for thin deep nets. In: Proceedings of the 3rd International Conference on Learning Representations. 2015, 1\u201313"},{"key":"41434_CR16","first-page":"1","volume-title":"Proceedings of the 5th International Conference on Learning Representations","author":"S Zagoruyko","year":"2017","unstructured":"Zagoruyko S, Komodakis N. Paying more attention to attention: improving the performance of convolutional neural networks via attention transfer. In: Proceedings of the 5th International Conference on Learning Representations. 2017, 1\u201313"},{"key":"41434_CR17","unstructured":"Huang Z, Wang N. Like what you like: knowledge distill via neuron selectivity transfer. 2017, arXiv preprint arXiv: 1707.01219"},{"key":"41434_CR18","first-page":"3779","volume-title":"Proceedings of the 33rd AAAI Conference on Artificial Intelligence","author":"B Heo","year":"2019","unstructured":"Heo B, Lee M, Yun S, Choi J Y. Knowledge transfer via distillation of activation boundaries formed by hidden neurons. In: Proceedings of the 33rd AAAI Conference on Artificial Intelligence. 2019, 3779\u20133787"},{"key":"41434_CR19","first-page":"283","volume-title":"Proceedings of the 15th European Conference on Computer Vision (ECCV)","author":"N Passalis","year":"2018","unstructured":"Passalis N, Tefas A. Learning deep representations with probabilistic knowledge transfer. In: Proceedings of the 15th European Conference on Computer Vision (ECCV). 2018, 283\u2013299"},{"issue":"4","key":"41434_CR20","doi-asserted-by":"publisher","first-page":"4656","DOI":"10.1109\/TNNLS.2022.3223018","volume":"35","author":"C K Joshi","year":"2024","unstructured":"Joshi C K, Liu F, Xun X, Lin J, Foo C S. On representation knowledge distillation for graph neural networks. IEEE Transactions on Neural Networks and Learning Systems, 2024, 35(4): 4656\u20134667","journal-title":"IEEE Transactions on Neural Networks and Learning Systems"},{"key":"41434_CR21","first-page":"5006","volume-title":"Proceedings of 2019 IEEE\/CVF International Conference on Computer Vision","author":"B Peng","year":"2019","unstructured":"Peng B, Jin X, Li D, Zhou S, Wu Y, Liu J, Zhang Z, Liu Y. Correlation congruence for knowledge distillation. In: Proceedings of 2019 IEEE\/CVF International Conference on Computer Vision. 2019, 5006\u20135015"},{"key":"41434_CR22","first-page":"1","volume-title":"Proceedings of the 8th International Conference on Learning Representations","author":"Y Tian","year":"2020","unstructured":"Tian Y, Krishnan D, Isola P. Contrastive representation distillation. In: Proceedings of the 8th International Conference on Learning Representations. 2020, 1\u201319"},{"key":"41434_CR23","first-page":"7130","volume-title":"Proceedings of 2017 IEEE Conference on Computer Vision and Pattern Recognition","author":"J Yim","year":"2017","unstructured":"Yim J, Joo D, Bae J, Kim J. A gift from knowledge distillation: fast optimization, network minimization and transfer learning. In: Proceedings of 2017 IEEE Conference on Computer Vision and Pattern Recognition. 2017, 7130\u20137138"},{"key":"41434_CR24","first-page":"7089","volume-title":"Proceedings of 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Y Liu","year":"2019","unstructured":"Liu Y, Cao J, Li B, Yuan C, Hu W, Li Y, Duan Y. Knowledge distillation via instance relationship graph. In: Proceedings of 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2019, 7089\u20137097"},{"key":"41434_CR25","first-page":"2902","volume-title":"Proceedings of 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"L Yu","year":"2019","unstructured":"Yu L, Yazici V O, Liu X, van de Weijer J, Cheng Y, Ramisa A C. In: Proceedings of 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2019, 2902\u20132911"},{"key":"41434_CR26","first-page":"1365","volume-title":"Proceedings of 2019 IEEE\/CVF International Conference on Computer Vision","author":"F Tung","year":"2019","unstructured":"Tung F, Mori G. Similarity-preserving knowledge distillation. In: Proceedings of 2019 IEEE\/CVF International Conference on Computer Vision. 2019, 1365\u20131374"},{"key":"41434_CR27","first-page":"12393","volume-title":"Proceedings of 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"H J Ye","year":"2020","unstructured":"Ye H J, Lu S, Zhan D C. Distilling cross-task knowledge via relationship matching. In: Proceedings of 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2020, 12393\u201312402"},{"key":"41434_CR28","first-page":"638","volume-title":"Proceedings of 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops","author":"J Liu","year":"2019","unstructured":"Liu J, Wen D, Gao H, Tao W, Chen T W, Osa K, Kato M. Knowledge representing: efficient, sparse representation of prior knowledge for knowledge distillation. In: Proceedings of 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops. 2019, 638\u2013646"},{"key":"41434_CR29","first-page":"2825","volume-title":"Proceedings of the 35th International Conference on Machine Learning","author":"X Li","year":"2018","unstructured":"Li X, Grandvalet Y, Davoine F. Explicit inductive bias for transfer learning with convolutional networks. In: Proceedings of the 35th International Conference on Machine Learning. 2018, 2825\u20132834"},{"issue":"11","key":"41434_CR30","doi-asserted-by":"publisher","first-page":"8743","DOI":"10.1109\/TNNLS.2022.3152732","volume":"34","author":"S Li","year":"2023","unstructured":"Li S, Lin M, Wang Y, Wu Y, Tian Y, Shao L, Ji R. Distilling a powerful student model via online knowledge distillation. IEEE Transactions on Neural Networks and Learning Systems, 2023, 34(11): 8743\u20138752","journal-title":"IEEE Transactions on Neural Networks and Learning Systems"},{"key":"41434_CR31","first-page":"277","volume-title":"Proceedings of the 36th International Conference on Neural Information Processing Systems","author":"X C Li","year":"2022","unstructured":"Li X C, Fan W S, Song S, Li Y, Li B, Shao Y, Zhan D C. Asymmetric temperature scaling makes larger networks teach well again. In: Proceedings of the 36th International Conference on Neural Information Processing Systems. 2022, 277"},{"key":"41434_CR32","first-page":"4793","volume-title":"Proceedings of 2019 IEEE\/CVF International Conference on Computer Vision","author":"J H Cho","year":"2019","unstructured":"Cho J H, Hariharan B. On the efficacy of knowledge distillation. In: Proceedings of 2019 IEEE\/CVF International Conference on Computer Vision. 2019, 4793\u20134801"},{"key":"41434_CR33","first-page":"5191","volume-title":"Proceedings of the 34th AAAI Conference on Artificial Intelligence","author":"S I Mirzadeh","year":"2020","unstructured":"Mirzadeh S I, Farajtabar M, Li A, Levine N, Matsukawa A, Ghasemzadeh H. Improved knowledge distillation via teacher assistant. In: Proceedings of the 34th AAAI Conference on Artificial Intelligence. 2020, 5191\u20135198"},{"key":"41434_CR34","first-page":"5037","volume-title":"Proceedings of 2021 IEEE\/CVF International Conference on Computer Vision","author":"Y Zhu","year":"2021","unstructured":"Zhu Y, Wang Y. Student customized knowledge distillation: bridging the gap between student and teacher. In: Proceedings of 2021 IEEE\/CVF International Conference on Computer Vision. 2021, 5037\u20135046"},{"key":"41434_CR35","doi-asserted-by":"publisher","first-page":"154","DOI":"10.1016\/j.neucom.2020.10.113","volume":"433","author":"M Gao","year":"2021","unstructured":"Gao M, Wang Y, Wan L. Residual error based knowledge distillation. Neurocomputing, 2021, 433: 154\u2013161","journal-title":"Neurocomputing"},{"key":"41434_CR36","doi-asserted-by":"publisher","first-page":"4735","DOI":"10.1109\/TIP.2021.3066051","volume":"30","author":"X Li","year":"2021","unstructured":"Li X, Li S, Omar B, Wu F, Li X. ResKD: residual-guided knowledge distillation. IEEE Transactions on Image Processing, 2021, 30: 4735\u20134746","journal-title":"IEEE Transactions on Image Processing"},{"key":"41434_CR37","series-title":"Technical Report TR-2009","volume-title":"Learning multiple layers of features from tiny images","author":"A Krizhevsky","year":"2012","unstructured":"Krizhevsky A. Learning multiple layers of features from tiny images. Technical Report TR-2009, Toronto: University of Toronto, 2012"},{"key":"41434_CR38","first-page":"1180","volume-title":"Proceedings of the 32nd International Conference on International Conference on Machine Learning","author":"Y Ganin","year":"2015","unstructured":"Ganin Y, Lempitsky V. Unsupervised domain adaptation by backpropagation. In: Proceedings of the 32nd International Conference on International Conference on Machine Learning. 2015, 1180\u20131189"},{"key":"41434_CR39","unstructured":"Tang J, Shivanna R, Zhao Z, Lin D, Singh A, Chi E H, Jain S. Understanding and improving knowledge distillation. 2020, arXiv preprint arXiv: 2002.03532"},{"key":"41434_CR40","first-page":"1749","volume-title":"Proceedings of the 34th International Conference on Neural Information Processing Systems","author":"G Ji","year":"2020","unstructured":"Ji G, Zhu Z. Knowledge distillation in wide neural networks: risk bound, data efficiency and imperfect teacher. In: Proceedings of the 34th International Conference on Neural Information Processing Systems. 2020, 1749"},{"key":"41434_CR41","first-page":"3902","volume-title":"Proceedings of 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"L Yuan","year":"2020","unstructured":"Yuan L, Tay F E H, Li G, Wang T, Feng J. Revisiting knowledge distillation via label smoothing regularization. In: Proceedings of 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2020, 3902\u20133910"},{"key":"41434_CR42","first-page":"1","volume-title":"Proceedings of the 9th International Conference on Learning Representations","author":"T Dao","year":"2021","unstructured":"Dao T, Kamath G M, Syrgkanis V, Mackey L. Knowledge distillation as semiparametric inference. In: Proceedings of the 9th International Conference on Learning Representations. 2021, 1\u201320"},{"key":"41434_CR43","first-page":"1","volume-title":"Proceedings of the 9th International Conference on Learning Representations","author":"H Zhou","year":"2021","unstructured":"Zhou H, Song L, Chen J, Zhou Y, Wang G, Yuan J, Zhang Q. Rethinking soft labels for knowledge distillation: a bias-variance tradeoff perspective. In: Proceedings of the 9th International Conference on Learning Representations. 2021, 1\u201315"},{"key":"41434_CR44","first-page":"1","volume-title":"Proceedings of the 9th International Conference on Learning Representations","author":"D Hsu","year":"2021","unstructured":"Hsu D, Ji Z, Telgarsky M, Wang L. Generalization bounds via distillation. In: Proceedings of the 9th International Conference on Learning Representations. 2021, 1\u201325"},{"key":"41434_CR45","first-page":"12922","volume-title":"Proceedings of 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"X Cheng","year":"2020","unstructured":"Cheng X, Rao Z, Chen Y, Zhang Q. Explaining knowledge distillation by quantifying the knowledge. In: Proceedings of 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2020, 12922\u201312932"},{"key":"41434_CR46","first-page":"422","volume-title":"Proceedings of the 33rd International Conference on Neural Information Processing Systems","author":"R M\u00fcller","year":"2019","unstructured":"M\u00fcller R, Kornblith S, Hinton G. When does label smoothing help? In: Proceedings of the 33rd International Conference on Neural Information Processing Systems. 2019, 422"},{"key":"41434_CR47","first-page":"1","volume-title":"Proceedings of the 9th International Conference on Learning Representations","author":"Z Shen","year":"2021","unstructured":"Shen Z, Liu Z, Xu D, Chen Z, Cheng K T, Savvides M. Is label smoothing truly incompatible with knowledge distillation: an empirical study. In: Proceedings of the 9th International Conference on Learning Representations. 2021, 1\u201317"},{"issue":"6","key":"41434_CR48","doi-asserted-by":"publisher","first-page":"4190","DOI":"10.1109\/TCSVT.2023.3327113","volume":"34","author":"C Li","year":"2024","unstructured":"Li C, Cheng G, Han J. Boosting knowledge distillation via intra-class logit distribution smoothing. IEEE Transactions on Circuits and Systems for Video Technology, 2024, 34(6): 4190\u20134201","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"key":"41434_CR49","first-page":"1607","volume-title":"Proceedings of the 35th International Conference on Machine Learning","author":"T Furlanello","year":"2018","unstructured":"Furlanello T, Lipton Z, Tschannen M, Itti L, Anandkumar A. Born again neural networks. In: Proceedings of the 35th International Conference on Machine Learning. 2018, 1607\u20131616"},{"issue":"2","key":"41434_CR50","doi-asserted-by":"publisher","first-page":"2290","DOI":"10.1109\/TNNLS.2022.3189680","volume":"35","author":"C Tan","year":"2024","unstructured":"Tan C, Liu J. Improving knowledge distillation with a customized teacher. IEEE Transactions on Neural Networks and Learning Systems, 2024, 35(2): 2290\u20132299","journal-title":"IEEE Transactions on Neural Networks and Learning Systems"},{"key":"41434_CR51","first-page":"2849","volume-title":"Proceedings of the 37th International Conference on Neural Information Processing Systems","author":"T Huang","year":"2023","unstructured":"Huang T, Zhang Y, Zheng M, You S, Wang F, Qian C, Xu C. Knowledge diffusion for distillation. In: Proceedings of the 37th International Conference on Neural Information Processing Systems. 2023, 2849"},{"key":"41434_CR52","first-page":"2443","volume-title":"Proceedings of the 36th International Conference on Neural Information Processing Systems","author":"T Huang","year":"2022","unstructured":"Huang T, You S, Wang F, Qian C, Xu C. Knowledge distillation from a stronger teacher. In: Proceedings of the 36th International Conference on Neural Information Processing Systems. 2022, 2443"},{"key":"41434_CR53","first-page":"44","volume-title":"Proceedings of the 36th International Conference on Neural Information Processing Systems","author":"C Wang","year":"2022","unstructured":"Wang C, Yang Q, Huang R, Song S, Huang G. Efficient knowledge distillation from model checkpoints. In: Proceedings of the 36th International Conference on Neural Information Processing Systems. 2022, 44"},{"issue":"3","key":"41434_CR54","doi-asserted-by":"publisher","first-page":"1425","DOI":"10.1109\/TPAMI.2022.3160362","volume":"46","author":"H Ye","year":"2024","unstructured":"Ye H, Ming L, Zhan D, Chao W. Few-shot learning with a strong teacher. IEEE Transactions on Pattern Analysis and Machine Intelligence, 2024, 46(3): 1425\u20131440","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"9","key":"41434_CR55","doi-asserted-by":"publisher","first-page":"6280","DOI":"10.1109\/TPAMI.2024.3379505","volume":"46","author":"Y Wang","year":"2024","unstructured":"Wang Y, Qian B, Liu H, Rui Y, Wang M. Unpacking the gap box against data-free knowledge distillation. IEEE Transactions on Pattern Analysis and Machine Intelligence, 2024, 46(9): 6280\u20136291","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"41434_CR56","first-page":"15731","volume-title":"Proceedings of 2024 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"S Sun","year":"2024","unstructured":"Sun S, Ren W, Li J, Wang R, Cao X. Logit standardization in knowledge distillation. In: Proceedings of 2024 IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2024, 15731\u201315740"},{"key":"41434_CR57","first-page":"770","volume-title":"Proceedings of 2016 IEEE Conference on Computer Vision and Pattern Recognition","author":"K He","year":"2016","unstructured":"He K, Zhang X, Ren S, Sun J. Deep residual learning for image recognition. In: Proceedings of 2016 IEEE Conference on Computer Vision and Pattern Recognition. 2016, 770\u2013778"},{"key":"41434_CR58","first-page":"5987","volume-title":"Proceedings of 2017 IEEE Conference on Computer Vision and Pattern Recognition","author":"S Xie","year":"2017","unstructured":"Xie S, Girshick R, Doll\u00e1r P, Tu Z, He K. Aggregated residual transformations for deep neural networks. In: Proceedings of 2017 IEEE Conference on Computer Vision and Pattern Recognition. 2017, 5987\u20135995"},{"key":"41434_CR59","volume-title":"First Workshop on Fine-Grained Visual Categorization","author":"A Khosla","year":"2011","unstructured":"Khosla A, Jayadevaprakash N, Yao B, Li F F. Novel dataset for fine-grained image categorization. In: First Workshop on Fine-Grained Visual Categorization, CVPR. Citeseer. 2011"},{"key":"41434_CR60","unstructured":"Wah C, Branson S, Welinder P, Perona P, Belongie S. The CaltechUCSD birds-200-2011 dataset. dataset[J]. 2011"},{"key":"41434_CR61","first-page":"87.1","volume-title":"Proceedings of the British Machine Vision Conference","author":"S Zagoruyko","year":"2016","unstructured":"Zagoruyko S, Komodakis N. Wide residual networks. In: Proceedings of the British Machine Vision Conference. 2016, 87.1\u201387.12"},{"key":"41434_CR62","unstructured":"Tavanaei A. Embedded encoder-decoder in convolutional networks towards explainable AI. 2020, arXiv preprint arXiv: 2007.06712"},{"issue":"11","key":"41434_CR63","doi-asserted-by":"publisher","first-page":"2278","DOI":"10.1109\/5.726791","volume":"86","author":"Y Lecun","year":"1998","unstructured":"Lecun Y, Bottou L, Bengio Y, Haffner P. Gradient-based learning applied to document recognition. Proceedings of the IEEE, 1998, 86(11): 2278\u20132324","journal-title":"Proceedings of the IEEE"},{"key":"41434_CR64","first-page":"1","volume-title":"Proceedings of the 3rd International Conference on Learning Representations","author":"K Simonyan","year":"2015","unstructured":"Simonyan K, Zisserman A. Very deep convolutional networks for large-scale image recognition. In: Proceedings of the 3rd International Conference on Learning Representations. 2015, 1\u201314"},{"key":"41434_CR65","first-page":"1","volume-title":"Proceedings of the NIPS Workshop on Deep Learning and Unsupervised Feature Learning","author":"Y Netzer","year":"2011","unstructured":"Netzer Y, Wang T, Coates A, Bissacco A, Wu B, Ng A Y. Reading digits in natural images with unsupervised feature learning. In: Proceedings of the NIPS Workshop on Deep Learning and Unsupervised Feature Learning. 2011, 1\u20139"},{"key":"41434_CR66","first-page":"807","volume-title":"Proceedings of the 27th International Conference on International Conference on Machine Learning","author":"V Nair","year":"2010","unstructured":"Nair V, Hinton G E. Rectified linear units improve restricted Boltzmann machines. In: Proceedings of the 27th International Conference on International Conference on Machine Learning. 2010, 807\u2013814"},{"key":"41434_CR67","first-page":"1","volume-title":"Proceedings of the 1st International Conference on Learning Representations","author":"L van der Maaten","year":"2013","unstructured":"van der Maaten L. Barnes-hut-SNE. In: Proceedings of the 1st International Conference on Learning Representations. 2013, 1\u201311"},{"issue":"2","key":"41434_CR68","doi-asserted-by":"publisher","first-page":"179","DOI":"10.1111\/j.1469-1809.1936.tb02137.x","volume":"7","author":"R A Fisher","year":"1936","unstructured":"Fisher R A. The use of multiple measurements in taxonomic problems. Annals of Eugenics, 1936, 7(2): 179\u2013188","journal-title":"Annals of Eugenics"},{"issue":"4","key":"41434_CR69","doi-asserted-by":"publisher","first-page":"650","DOI":"10.1109\/TPAMI.2007.1008","volume":"29","author":"J Yang","year":"2007","unstructured":"Yang J, Zhang D, Yang J Y, Niu B. Globally maximizing, locally minimizing: unsupervised discriminant projection with applications to face and palm biometrics. IEEE Transactions on Pattern Analysis and Machine Intelligence, 2007, 29(4): 650\u2013664","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"41434_CR70","doi-asserted-by":"publisher","first-page":"995","DOI":"10.1145\/3447548.3467254","volume-title":"Proceedings of the 27th ACM SIGKDD Conference on Knowledge Discovery & Data Mining","author":"X C Li","year":"2021","unstructured":"Li X C, Zhan D C. FedRS: federated learning with restricted softmax for label distribution non-II D data. In: Proceedings of the 27th ACM SIGKDD Conference on Knowledge Discovery & Data Mining. 2021, 995\u20131005"},{"issue":"1","key":"41434_CR71","doi-asserted-by":"publisher","first-page":"134","DOI":"10.1137\/S0895480102412856","volume":"17","author":"R Fagin","year":"2003","unstructured":"Fagin R, Kumar R, Sivakumar D. Comparing top k lists. SIAM Journal on Discrete Mathematics, 2003, 17(1): 134\u2013160","journal-title":"SIAM Journal on Discrete Mathematics"},{"key":"41434_CR72","first-page":"12133","volume-title":"Proceedings of the 38th International Conference on Machine Learning","author":"K You","year":"2021","unstructured":"You K, Liu Y, Wang J, Long M. LogME: practical assessment of pre-trained models for transfer learning. In: Proceedings of the 38th International Conference on Machine Learning. 2021, 12133\u201312143"},{"key":"41434_CR73","first-page":"196","volume-title":"Proceedings of the 1st International Workshop on Feature Extraction: Modern Questions and Challenges at NIPS 2015","author":"Y Li","year":"2015","unstructured":"Li Y, Yosinski J, Clune J, Lipson H, Hopcroft J. Convergent learning: do different neural networks learn the same representations? In: Proceedings of the 1st International Workshop on Feature Extraction: Modern Questions and Challenges at NIPS 2015. 2015, 196\u2013212"},{"key":"41434_CR74","unstructured":"Ren Y, Guo Q, Jin Z, Ravfogel S, Sachan M, Sch\u00f6lkopf B, Cotterell R. All roads lead to Rome? Exploring the invariance of transformers\u2019 representations. 2023, arXiv preprint arXiv: 2305.14555"},{"key":"41434_CR75","first-page":"3446","volume-title":"Proceedings of the 28th International Joint Conference on Artificial Intelligence","author":"C Shui","year":"2019","unstructured":"Shui C, Abbasi M, Robitaille L \u00c9, Wang B, Gagn\u00e9 C. A principled approach for learning task similarity in multitask learning. In: Proceedings of the 28th International Joint Conference on Artificial Intelligence. 2019, 3446\u20133452"},{"key":"41434_CR76","first-page":"2890","volume-title":"Proceedings of the 39th International Conference on Machine Learning","author":"K Chandrasegaran","year":"2022","unstructured":"Chandrasegaran K, Tran N T, Zhao Y, Cheung N M. Revisiting label smoothing and knowledge distillation compatibility: what was missing? In: Proceedings of the 39th International Conference on Machine Learning. 2022, 2890\u20132916"},{"key":"41434_CR77","first-page":"1097","volume-title":"Proceedings of the 26th International Conference on Neural Information Processing Systems","author":"A Krizhevsky","year":"2012","unstructured":"Krizhevsky A, Sutskever I, Hinton G E. ImageNet classification with deep convolutional neural networks. In: Proceedings of the 26th International Conference on Neural Information Processing Systems. 2012, 1097\u20131105"}],"container-title":["Frontiers of Computer Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11704-025-41434-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11704-025-41434-w","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11704-025-41434-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,21]],"date-time":"2026-01-21T03:03:20Z","timestamp":1768964600000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11704-025-41434-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,1,21]]},"references-count":77,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2026,6]]}},"alternative-id":["41434"],"URL":"https:\/\/doi.org\/10.1007\/s11704-025-41434-w","relation":{},"ISSN":["2095-2228","2095-2236"],"issn-type":[{"value":"2095-2228","type":"print"},{"value":"2095-2236","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,1,21]]},"assertion":[{"value":"30 December 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 May 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 January 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The authors declare that they have no competing interests or financial conflicts to disclose.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"2006333"}}