{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,18]],"date-time":"2026-05-18T09:07:45Z","timestamp":1779095265035,"version":"3.51.4"},"reference-count":88,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2026,5,18]],"date-time":"2026-05-18T00:00:00Z","timestamp":1779062400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,5,18]],"date-time":"2026-05-18T00:00:00Z","timestamp":1779062400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001839","name":"University Grants Committee","doi-asserted-by":"publisher","award":["UGC\/FDS24\/E20\/25"],"award-info":[{"award-number":["UGC\/FDS24\/E20\/25"]}],"id":[{"id":"10.13039\/501100001839","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Computing"],"published-print":{"date-parts":[[2026,6]]},"DOI":"10.1007\/s00607-026-01671-8","type":"journal-article","created":{"date-parts":[[2026,5,18]],"date-time":"2026-05-18T08:21:03Z","timestamp":1779092463000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["A layered self-supervised knowledge distillation framework for efficient image representation learning"],"prefix":"10.1007","volume":"108","author":[{"given":"Tarique","family":"Dahri","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zulfiqar Ali","family":"Memon","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhenyu","family":"Yu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sheheryar","family":"Khan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sadiq","family":"Ahmad","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Muhammad","family":"Asim","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Saddam","family":"Aziz","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Rizwan","family":"Qureshi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,5,18]]},"reference":[{"key":"1671_CR1","doi-asserted-by":"crossref","unstructured":"Miko\u0142ajczyk A, Grochowski M (2018) Data augmentation for improving deep learning in image classification problem. In: International interdisciplinary PhD workshop (IIPhDW). IEEE, pp 117\u2013122","DOI":"10.1109\/IIPHDW.2018.8388338"},{"key":"1671_CR2","doi-asserted-by":"crossref","unstructured":"Yu Z, Wang P (2024) .Capan: Class-aware prototypical adversarial networks for unsupervised domain adaptation. In: 2024 IEEE international conference on multimedia and expo (ICME). IEEE, pp 1\u20136","DOI":"10.1109\/ICME57554.2024.10687425"},{"key":"1671_CR3","doi-asserted-by":"crossref","unstructured":"Wang P, Yang Y, Yu Z (2024) Multi-batch nuclear-norm adversarial network for unsupervised domain adaptation. 2024 IEEE international conference on multimedia and expo (ICME). IEEE, pp 1\u20136","DOI":"10.1109\/ICME57554.2024.10688076"},{"issue":"11","key":"1671_CR4","doi-asserted-by":"publisher","first-page":"3212","DOI":"10.1109\/TNNLS.2018.2876865","volume":"30","author":"Z-Q Zhao","year":"2019","unstructured":"Zhao Z-Q, Zheng P, Xu S-T, Wu X (2019) Object detection with deep learning: a review. IEEE Trans Neural Netw Learn Syst 30(11):3212\u20133232","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"1671_CR5","doi-asserted-by":"crossref","unstructured":"Huang Y, Wu J, Xu X, Ding S (2022) Evaluation-oriented knowledge distillation for deep face recognition. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. pp 18740\u201318749","DOI":"10.1109\/CVPR52688.2022.01818"},{"key":"1671_CR6","doi-asserted-by":"crossref","unstructured":"Zhang F, Zhu X, Ye M (2019) Fast human pose estimation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. pp 3517\u20133526","DOI":"10.1109\/CVPR.2019.00363"},{"key":"1671_CR7","doi-asserted-by":"publisher","first-page":"5573","DOI":"10.1109\/TIP.2021.3086590","volume":"30","author":"Y Liu","year":"2021","unstructured":"Liu Y, Wang K, Li G, Lin L (2021) Semantics-aware adaptive knowledge distillation for sensor-to-vision action recognition. IEEE Trans Image Process 30:5573\u20135588","journal-title":"IEEE Trans Image Process"},{"key":"1671_CR8","doi-asserted-by":"publisher","first-page":"41","DOI":"10.1016\/j.asoc.2018.05.018","volume":"70","author":"A Garcia-Garcia","year":"2018","unstructured":"Garcia-Garcia A, Orts-Escolano S, Oprea S, Villena-Martinez V, Martinez-Gonzalez P, Garcia-Rodriguez J (2018) A survey on deep learning techniques for image and video semantic segmentation. Appl Soft Comput 70:41\u201365","journal-title":"Appl Soft Comput"},{"issue":"21","key":"1671_CR9","doi-asserted-by":"publisher","first-page":"5415","DOI":"10.3390\/rs14215415","volume":"14","author":"Y Luo","year":"2022","unstructured":"Luo Y, Wang J, Yang X, Yu Z, Tan Z (2022) Pixel representation augmented through cross-attention for high-resolution remote sensing imagery segmentation. Remote Sensing 14(21):5415","journal-title":"Remote Sensing"},{"key":"1671_CR10","unstructured":"Thompson NC, Greenewald K, Lee K, Manso GF (2020) The computational limits of deep learning (arXiv preprint)"},{"issue":"3","key":"1671_CR11","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1109\/MCAS.2021.3092533","volume":"21","author":"S Yu","year":"2021","unstructured":"Yu S, Jiang H, Huang S, Peng X, Lu A (2021) Compute-in-memory chips for deep learning: recent trends and prospects. IEEE Circuits Syst Mag 21(3):31\u201356","journal-title":"IEEE Circuits Syst Mag"},{"key":"1671_CR12","doi-asserted-by":"crossref","unstructured":"Strubell E, Ganesh A, McCallum A. Energy and policy considerations for modern deep learning research. In: Proceedings of the AAAI conference on artificial intelligence, vol. 34, pp. 13693\u201313696 (2020)","DOI":"10.1609\/aaai.v34i09.7123"},{"key":"1671_CR13","first-page":"1","volume":"73","author":"J Ji","year":"2024","unstructured":"Ji J, Shu Z, Li H, Lai KX, Lu M, Jiang G, Wang W, Zheng Y, Jiang X (2024) Edge-computing based knowledge distillation and multi-task learning for partial discharge recognition. IEEE Trans Instrum Meas 73:1\u201311","journal-title":"IEEE Trans Instrum Meas"},{"issue":"1","key":"1671_CR14","first-page":"771","volume":"141","author":"L Yin","year":"2024","unstructured":"Yin L, Wang L, Cai Z, Lu S, Wang R, AlSanad A, AlQahtan SA, Chen X, Yin Z, Li X et al (2024) Dpal-bert: a faster and lighter question answering model. CMES-Comput Model Eng Sci 141(1):771\u2013786","journal-title":"CMES-Comput Model Eng Sci"},{"key":"1671_CR15","unstructured":"Han S, Pool J, Tran J, Dally W (2015) Learning both weights and connections for efficient neural network. Adv Neural Inf Process Syst, 28"},{"key":"1671_CR16","doi-asserted-by":"crossref","unstructured":"Wu J, Leng C, Wang Y, Hu Q, Cheng J (2016) Quantized convolutional neural networks for mobile devices. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp 4820\u20134828","DOI":"10.1109\/CVPR.2016.521"},{"key":"1671_CR17","doi-asserted-by":"crossref","unstructured":"Bucilu\u01ce C, Caruana R, Niculescu-Mizil A (2006) Model compression. In: Proceedings of the 12th ACM SIGKDD international conference on knowledge discovery and data mining. pp 535\u2013541","DOI":"10.1145\/1150402.1150464"},{"key":"1671_CR18","doi-asserted-by":"crossref","unstructured":"Kim K, Ji B, Yoon D, Hwang S (2021) Self-knowledge distillation with progressive refinement of targets. In: Proceedings of the IEEE\/CVF international conference on computer vision. pp 6567\u20136576","DOI":"10.1109\/ICCV48922.2021.00650"},{"key":"1671_CR19","doi-asserted-by":"crossref","unstructured":"Nowlan SJ, Hinton GE (2018) Simplifying neural networks by soft weight sharing. In: The mathematics of generalization, pp 373\u2013394. CRC Press","DOI":"10.1201\/9780429492525-13"},{"issue":"1","key":"1671_CR20","first-page":"1929","volume":"15","author":"N Srivastava","year":"2014","unstructured":"Srivastava N, Hinton G, Krizhevsky A, Sutskever I, Salakhutdinov R (2014) Dropout: a simple way to prevent neural networks from overfitting. J Mach Learn Res 15(1):1929\u20131958","journal-title":"J Mach Learn Res"},{"key":"1671_CR21","unstructured":"Ioffe S, Szegedy C (2015) Batch normalization: Accelerating deep network training by reducing internal covariate shift. In: International conference on machine learning. pp 448\u2013456 (pmlr)"},{"key":"1671_CR22","unstructured":"DeVries T, Taylor GW (2017) Improved regularization of convolutional neural networks with cutout. arXiv preprint arXiv:1708.04552"},{"key":"1671_CR23","unstructured":"Hinton G, Vinyals O, Dean J (2015) Distilling the knowledge in a neural network (arXiv preprint)"},{"key":"1671_CR24","unstructured":"Yu Z (2024) Improved implicit diffusion model with knowledge distillation to estimate the spatial distribution density of carbon stock in remote sensing imagery (arXiv preprint)"},{"key":"1671_CR25","unstructured":"Zagoruyko S, Komodakis N (2016) Paying more attention to attention: Improving the performance of convolutional neural networks via attention transfer (arXiv preprint)"},{"key":"1671_CR26","doi-asserted-by":"crossref","unstructured":"Yang C, An Z, Cai L, Xu Y (2021) Hierarchical self-supervised augmented knowledge distillation (arXiv preprint)","DOI":"10.24963\/ijcai.2021\/168"},{"issue":"2","key":"1671_CR27","doi-asserted-by":"publisher","first-page":"2094","DOI":"10.1109\/TNNLS.2022.3186807","volume":"35","author":"C Yang","year":"2022","unstructured":"Yang C, An Z, Cai L, Xu Y (2022) Knowledge distillation using hierarchical self-supervision augmented distribution. IEEE Trans Neural Netw Learn Syst 35(2):2094\u2013108","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"1671_CR28","doi-asserted-by":"crossref","unstructured":"Sarfraz F, Arani E, Zonooz B (2021) Knowledge distillation beyond model compression. In: 2020 25th international conference on pattern recognition (ICPR). IEEE, pp 6136\u20136143","DOI":"10.1109\/ICPR48806.2021.9413016"},{"key":"1671_CR29","unstructured":"Doshi D, Kim J-E (2024) Reffakd: Resource-efficient autoencoder-based knowledge distillation (arXiv preprint)"},{"key":"1671_CR30","doi-asserted-by":"crossref","unstructured":"Zhang J, Song J, Gao L, Sebe N, Shen HT (2025) Reliable few-shot learning under dual noises. IEEE Trans Pattern Anal Mach Intell","DOI":"10.1109\/TPAMI.2025.3584051"},{"key":"1671_CR31","doi-asserted-by":"crossref","unstructured":"Zhang J, Wu S, Gao L, Song J, Sebe N, Shen HT (2025) A closer look at conditional prompt tuning for vision-language models (arXiv preprint)","DOI":"10.1007\/s11263-026-02759-6"},{"key":"1671_CR32","unstructured":"Luo X, Zou D, Gao L, Xu Z, Song J (2023) Less is more: On the feature redundancy of pretrained models when transferring to few-shot tasks (arXiv preprint)"},{"key":"1671_CR33","doi-asserted-by":"publisher","first-page":"6115","DOI":"10.1109\/TIP.2023.3328478","volume":"32","author":"J Zhang","year":"2023","unstructured":"Zhang J, Gao L, Hao B, Huang H, Song J, Shen H (2023) From global to local: multi-scale out-of-distribution detection. IEEE Trans Image Process 32:6115\u20136128","journal-title":"IEEE Trans Image Process"},{"key":"1671_CR34","unstructured":"Lopez-Paz D, Bottou L, Sch\u00f6lkopf B, Vapnik V (2015) Unifying distillation and privileged information (arXiv preprint)"},{"key":"1671_CR35","first-page":"3430","volume":"34","author":"D Chen","year":"2020","unstructured":"Chen D, Mei J-P, Wang C, Feng Y, Chen C (2020) Online knowledge distillation with diverse peers. Proc AAAI Conf Artif Intell 34:3430\u20133437","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"1671_CR36","doi-asserted-by":"crossref","unstructured":"Son W, Na J, Choi J, Hwang W (2021) Densely guided knowledge distillation using multiple teacher assistants. In: Proceedings of the IEEE\/CVF international conference on computer vision. pp 9395\u20139404","DOI":"10.1109\/ICCV48922.2021.00926"},{"key":"1671_CR37","doi-asserted-by":"crossref","unstructured":"Li S, Su T, Zhang X, Wang Z (2024) Continual learning with knowledge distillation: a survey. Authorea Preprints","DOI":"10.36227\/techrxiv.170422196.66991957\/v1"},{"key":"1671_CR38","first-page":"1","volume":"61","author":"Y Wen","year":"2023","unstructured":"Wen Y, Gao T, Zhang J, Li Z, Chen T (2023) Encoder-free multiaxis physics-aware fusion network for remote sensing image dehazing. IEEE Trans Geosci Remote Sens 61:1\u201315","journal-title":"IEEE Trans Geosci Remote Sens"},{"key":"1671_CR39","doi-asserted-by":"publisher","first-page":"8383","DOI":"10.1109\/TMM.2023.3236837","volume":"25","author":"J Rao","year":"2023","unstructured":"Rao J, Ding L, Qi S, Fang M, Liu Y, Shen L, Tao D (2023) Dynamic contrastive distillation for image-text retrieval. IEEE Trans Multimedia 25:8383\u20138395","journal-title":"IEEE Trans Multimedia"},{"issue":"6","key":"1671_CR40","doi-asserted-by":"publisher","first-page":"1789","DOI":"10.1007\/s11263-021-01453-z","volume":"129","author":"J Gou","year":"2021","unstructured":"Gou J, Yu B, Maybank SJ, Tao D (2021) Knowledge distillation: a survey. Int J Comput Vision 129(6):1789\u20131819","journal-title":"Int J Comput Vision"},{"key":"1671_CR41","doi-asserted-by":"crossref","unstructured":"Hadi MU, Qureshi R, Shah A, Irfan M, Zafar A, Shaikh MB, Akhtar N, Wu J, Mirjalili S (2023) A survey on large language models: Applications, challenges, limitations, and practical usage. Authorea Preprints","DOI":"10.36227\/techrxiv.23589741.v1"},{"key":"1671_CR42","unstructured":"Xu X, Li M, Tao C, Shen T, Cheng R, Li J, Xu C, Tao D, Zhou T (2024) A survey on knowledge distillation of large language models. arXiv preprint arXiv:2402.13116"},{"key":"1671_CR43","unstructured":"Achiam J, Adler S, Agarwal S, Ahmad L, Akkaya I, Aleman FL, Almeida D, Altenschmidt J, Altman S, Anadkat S (2023) Gpt-4 technical report (arXiv preprint)"},{"key":"1671_CR44","unstructured":"Touvron H, Martin L, Stone K, Albert P, Almahairi A, Babaei Y, Bashlykov N, Batra S, Bhargava P, Bhosale S (2023) Llama 2: Open foundation and fine-tuned chat models (arXiv preprint)"},{"key":"1671_CR45","volume-title":"Mistral\u2013a journey towards reproducible language model training","author":"S Karamcheti","year":"2021","unstructured":"Karamcheti S, Orr L, Bolton J, Zhang T, Goel K, Narayan A, Bommasani R, Narayanan D, Hashimoto T, Jurafsky D et al (2021) Mistral\u2013a journey towards reproducible language model training. Stanford Center for Research on Foundation Models, Palo Alto"},{"key":"1671_CR46","doi-asserted-by":"crossref","unstructured":"Cho JH, Hariharan B (2019) On the efficacy of knowledge distillation. In: Proceedings of the IEEE\/CVF international conference on computer vision. pp 4794\u20134802","DOI":"10.1109\/ICCV.2019.00489"},{"key":"1671_CR47","unstructured":"Kim Y, Nam W, Kim H, Kim J-H, Kim G . Curiosity-bottleneck: Exploration by distilling task-specific novelty. In: International conference on machine learning, pp 3379\u20133388 (2019). PMLR"},{"key":"1671_CR48","first-page":"9164","volume":"35","author":"Z Hao","year":"2022","unstructured":"Hao Z, Guo J, Jia D, Han K, Tang Y, Zhang C, Hu H, Wang Y (2022) Learning efficient vision transformers via fine-grained manifold distillation. Adv Neural Inf Process Syst 35:9164\u20139175","journal-title":"Adv Neural Inf Process Syst"},{"key":"1671_CR49","unstructured":"Chen T, Zhang Z, Liu S, Chang S, Wang Z (2020) Robust overfitting may be mitigated by properly learned smoothening. In: International conference on learning representations"},{"key":"1671_CR50","doi-asserted-by":"crossref","unstructured":"Ahn S, Hu SX, Damianou A, Lawrence ND, Dai Z (2019) Variational information distillation for knowledge transfer. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. pp 9163\u20139171","DOI":"10.1109\/CVPR.2019.00938"},{"key":"1671_CR51","doi-asserted-by":"crossref","unstructured":"Park W, Kim D, Lu Y, Cho M (2019) Relational knowledge distillation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. pp 3967\u20133976","DOI":"10.1109\/CVPR.2019.00409"},{"key":"1671_CR52","doi-asserted-by":"crossref","unstructured":"Tung F, Mori G (2019) Similarity-preserving knowledge distillation. In: Proceedings of the IEEE\/CVF international conference on computer vision. pp 1365\u20131374","DOI":"10.1109\/ICCV.2019.00145"},{"issue":"3","key":"1671_CR53","first-page":"1","volume":"2","author":"R Adriana","year":"2015","unstructured":"Adriana R, Nicolas B, Ebrahimi KS, Antoine C, Carlo G, Yoshua B (2015) Fitnets: Hints for thin deep nets. Proc ICLR 2(3):1","journal-title":"Proc ICLR"},{"key":"1671_CR54","doi-asserted-by":"crossref","unstructured":"Yim J, Joo D, Bae J, Kim J (2017) A gift from knowledge distillation: Fast optimization, network minimization and transfer learning. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp 4133\u20134141","DOI":"10.1109\/CVPR.2017.754"},{"key":"1671_CR55","doi-asserted-by":"crossref","unstructured":"Heo B, Lee M, Yun S, Choi JY (2019) Knowledge transfer via distillation of activation boundaries formed by hidden neurons. In: Proceedings of the AAAI conference on artificial intelligence, vol 33. pp 3779\u20133787","DOI":"10.1609\/aaai.v33i01.33013779"},{"key":"1671_CR56","unstructured":"Lee C-Y, Xie S, Gallagher P, Zhang Z, Tu Z (2015) Deeply-supervised nets. Artificial intelligence and statistics 562\u2013570 (Pmlr)"},{"key":"1671_CR57","doi-asserted-by":"crossref","unstructured":"Chen D, Mei J-P, Zhang H, Wang C, Feng Y, Chen C (2022) Knowledge distillation with the reused teacher classifier. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 11933\u201311942","DOI":"10.1109\/CVPR52688.2022.01163"},{"key":"1671_CR58","doi-asserted-by":"crossref","unstructured":"Luo S, Chen D, Wang C (2023) Knowledge distillation with deep supervision. In: 2023 international joint conference on neural networks (IJCNN). IEEE, pp 1\u20138","DOI":"10.1109\/IJCNN54540.2023.10191309"},{"key":"1671_CR59","unstructured":"Tian Y, Krishnan D, Isola P (2019) Contrastive representation distillation (arXiv preprint)"},{"key":"1671_CR60","doi-asserted-by":"crossref","unstructured":"Liu W, Nie S, Yin J, Wang R, Gao D, Jin L (2021) Sskd: Self-supervised knowledge distillation for cross domain adaptive person re-identification. In: 2021 7th IEEE international conference on network intelligence and digital content (IC-NIDC). IEEE, pp 81\u201385","DOI":"10.1109\/IC-NIDC54101.2021.9660538"},{"key":"1671_CR61","unstructured":"Chen T, Kornblith S, Norouzi M, Hinton G (2020) A simple framework for contrastive learning of visual representations. In: International conference on machine learning. pp 1597\u20131607 (PMLR)"},{"issue":"1","key":"1671_CR62","doi-asserted-by":"publisher","first-page":"40","DOI":"10.1109\/TII.2024.3421600","volume":"21","author":"M Yao","year":"2024","unstructured":"Yao M, Tao D, Gao R, Qi P (2024) Anomaly detection for mec enabled hierarchical industrial iot with transformer enhanced variational auto encoder. IEEE Trans Industr Inf 21(1):40\u201348","journal-title":"IEEE Trans Industr Inf"},{"issue":"2","key":"1671_CR63","doi-asserted-by":"publisher","first-page":"1012","DOI":"10.1109\/TSC.2025.3536306","volume":"18","author":"M Yao","year":"2025","unstructured":"Yao M, Tao D, Qi P, Gao R (2025) Scalable large model for unlabeled anomaly detection with trio-attention u-transformer and manifold-learning siamese discriminator. IEEE Trans Serv Comput 18(2):1012\u20131025","journal-title":"IEEE Trans Serv Comput"},{"key":"1671_CR64","doi-asserted-by":"crossref","unstructured":"Yao M (2026) Spectra: Spatial-temporal parallel memory with agent attention fusion and embedding alignment for time-series anomaly detection","DOI":"10.31224\/5910"},{"key":"1671_CR65","doi-asserted-by":"publisher","first-page":"8579","DOI":"10.1109\/TASE.2024.3486688","volume":"22","author":"M Yao","year":"2024","unstructured":"Yao M, Tao D, Qi P, Gao R (2024) Rethinking discrepancy analysis: anomaly detection via meta-learning powered dual-source representation differentiation. IEEE Trans Autom Sci Eng 22:8579\u20138592","journal-title":"IEEE Trans Autom Sci Eng"},{"key":"1671_CR66","doi-asserted-by":"crossref","unstructured":"Yuan L, Tay FE, Li G, Wang T, Feng J (2020) Revisiting knowledge distillation via label smoothing regularization. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. pp 3903\u20133911","DOI":"10.1109\/CVPR42600.2020.00396"},{"key":"1671_CR67","doi-asserted-by":"crossref","unstructured":"Yun S, Park J, Lee K, Shin J (2020) Regularizing class-wise predictions via self-knowledge distillation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. pp 13876\u201313885","DOI":"10.1109\/CVPR42600.2020.01389"},{"issue":"6","key":"1671_CR68","doi-asserted-by":"publisher","first-page":"6181","DOI":"10.1007\/s10489-021-02721-8","volume":"52","author":"J-Y Niu","year":"2022","unstructured":"Niu J-Y, Xie Z-H, Li Y, Cheng S-J, Fan J-W (2022) Scale fusion light cnn for hyperspectral face recognition with knowledge distillation and attention mechanism. Appl Intell 52(6):6181\u20136195","journal-title":"Appl Intell"},{"key":"1671_CR69","doi-asserted-by":"crossref","unstructured":"Xu G, Liu Z, Li X, Loy CC (2020) Knowledge distillation meets self-supervision. European Conference on Computer Vision. Springer, pp 588\u2013604","DOI":"10.1007\/978-3-030-58545-7_34"},{"key":"1671_CR70","unstructured":"Kim JH, Ngo BH, Park JH, Kwon JE, Lee HS, Cho SI (2022) Distilling and refining domain-specific knowledge for semi-supervised domain adaptation. In: Bmvc, p 606"},{"key":"1671_CR71","doi-asserted-by":"crossref","unstructured":"You S, Xu C, Xu C, Tao D (2017) Learning from multiple teacher networks. In: Proceedings of the 23rd ACM SIGKDD international conference on knowledge discovery and data mining, pp 1285\u20131294","DOI":"10.1145\/3097983.3098135"},{"key":"1671_CR72","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Identity mappings in deep residual networks. In: Computer Vision\u2013ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11\u201314, 2016, Proceedings, Part IV 14, pp 630\u2013645. Springer","DOI":"10.1007\/978-3-319-46493-0_38"},{"key":"1671_CR73","unstructured":"Krizhevsky A, Hinton G (2009) Learning multiple layers of features from tiny images"},{"key":"1671_CR74","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2012) Imagenet classification with deep convolutional neural networks. Advances in neural information processing systems. p 25"},{"key":"1671_CR75","unstructured":"Le Y, Yang X (2015) Tiny imagenet visual recognition challenge. CS 231N 7(7), 3"},{"key":"1671_CR76","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"1671_CR77","doi-asserted-by":"crossref","unstructured":"Zagoruyko S, Komodakis N (2016) Wide residual networks (arXiv preprint)","DOI":"10.5244\/C.30.87"},{"key":"1671_CR78","doi-asserted-by":"crossref","unstructured":"Xie S, Girshick R, Doll\u00e1r P, Tu Z, He K (2017) Aggregated residual transformations for deep neural networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp 1492\u20131500","DOI":"10.1109\/CVPR.2017.634"},{"key":"1671_CR79","unstructured":"Simonyan K, Zisserman A (2014) Very deep convolutional networks for large-scale image recognition (arXiv preprint)"},{"key":"1671_CR80","doi-asserted-by":"crossref","unstructured":"Sandler M, Howard A, Zhu M, Zhmoginov A, Chen L-C (2018) Proceedings of the IEEE conference on computer vision and pattern recognition. pp 4510\u20134520 (Mobilenetv 2: Inverted residuals and linear bottlenecks)","DOI":"10.1109\/CVPR.2018.00474"},{"key":"1671_CR81","doi-asserted-by":"crossref","unstructured":"Ma N, Zhang X, Zheng H-T, Sun J (2018) Shufflenet v2: Practical guidelines for efficient cnn architecture design. In: Proceedings of the European conference on computer vision (ECCV). pp 116\u2013131","DOI":"10.1007\/978-3-030-01264-9_8"},{"key":"1671_CR82","doi-asserted-by":"crossref","unstructured":"Zhang X, Zhou X, Lin M, Sun J (2018) Shufflenet: An extremely efficient convolutional neural network for mobile devices. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp 6848\u20136856","DOI":"10.1109\/CVPR.2018.00716"},{"key":"1671_CR83","doi-asserted-by":"crossref","unstructured":"Peng B, Jin X, Liu J, Li D, Wu Y, Liu Y, Zhou S, Zhang Z. Correlation congruence for knowledge distillation. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 5007\u20135016 (2019)","DOI":"10.1109\/ICCV.2019.00511"},{"issue":"2","key":"1671_CR84","doi-asserted-by":"publisher","first-page":"2094","DOI":"10.1109\/TNNLS.2022.3186807","volume":"35","author":"C Yang","year":"2024","unstructured":"Yang C, An Z, Cai L, Xu Y (2024) Knowledge distillation using hierarchical self-supervision augmented distribution. IEEE Trans Neural Netw Learn Syst 35(2):2094\u20132108. https:\/\/doi.org\/10.1109\/TNNLS.2022.3186807","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"1671_CR85","doi-asserted-by":"crossref","unstructured":"Song K, Xie J, Zhang S, Luo Z (2023) Multi-mode online knowledge distillation for self-supervised visual representation learning. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. pp 11848\u201311857","DOI":"10.1109\/CVPR52729.2023.01140"},{"key":"1671_CR86","unstructured":"Chen D, Mei J-P, Zhang Y, Wang C, Wang Z, Feng Y, Chen C. Cross-layer distillation with semantic calibration. In: AAAI conference on artificial intelligence (2020). https:\/\/api.semanticscholar.org\/CorpusID:227335337"},{"key":"1671_CR87","doi-asserted-by":"crossref","unstructured":"Szegedy C, Vanhoucke V, Ioffe S, Shlens J, Wojna Z (2016) Rethinking the inception architecture for computer vision. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp 2818\u20132826","DOI":"10.1109\/CVPR.2016.308"},{"key":"1671_CR88","doi-asserted-by":"crossref","unstructured":"Yun S, Han D, Oh SJ, Chun S, Choe J, Yoo Y (2019) Cutmix: Regularization strategy to train strong classifiers with localizable features. In: Proceedings of the IEEE\/CVF international conference on computer vision. pp 6023\u20136032","DOI":"10.1109\/ICCV.2019.00612"}],"container-title":["Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00607-026-01671-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00607-026-01671-8","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00607-026-01671-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,18]],"date-time":"2026-05-18T08:22:02Z","timestamp":1779092522000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00607-026-01671-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,5,18]]},"references-count":88,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2026,6]]}},"alternative-id":["1671"],"URL":"https:\/\/doi.org\/10.1007\/s00607-026-01671-8","relation":{},"ISSN":["0010-485X","1436-5057"],"issn-type":[{"value":"0010-485X","type":"print"},{"value":"1436-5057","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,5,18]]},"assertion":[{"value":"14 January 2026","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 May 2026","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 May 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"81"}}