{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,11]],"date-time":"2026-05-11T16:11:49Z","timestamp":1778515909288,"version":"3.51.4"},"reference-count":70,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/100015333","name":"Kayamori Foundation of Informational Science Advancement","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100015333","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100015104","name":"Kayamori Foundation of Informational Science Advancement","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100015104","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001691","name":"Japan Society for the Promotion of Science","doi-asserted-by":"publisher","award":["JP25K15165"],"award-info":[{"award-number":["JP25K15165"]}],"id":[{"id":"10.13039\/501100001691","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Neurocomputing"],"published-print":{"date-parts":[[2026,7]]},"DOI":"10.1016\/j.neucom.2026.133636","type":"journal-article","created":{"date-parts":[[2026,4,10]],"date-time":"2026-04-10T17:18:41Z","timestamp":1775841521000},"page":"133636","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Top-KD: Top-scaled contrastive knowledge distillation"],"prefix":"10.1016","volume":"685","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-9056-9522","authenticated-orcid":false,"given":"Qi","family":"Wang","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5078-0522","authenticated-orcid":false,"given":"Jinjia","family":"Zhou","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.neucom.2026.133636_bib0005","author":"Hinton"},{"key":"10.1016\/j.neucom.2026.133636_bib0010","first-page":"79570","article-title":"One-for-all: bridge the gap between heterogeneous architectures in knowledge distillation","volume":"36","author":"Hao","year":"2023","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.neucom.2026.133636_bib0015","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"16006","article-title":"C2kd: bridging the modality gap for cross-modal knowledge distillation","author":"Huo","year":"2024"},{"key":"10.1016\/j.neucom.2026.133636_bib0020","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"11953","article-title":"Decoupled knowledge distillation","author":"Zhao","year":"2022"},{"key":"10.1016\/j.neucom.2026.133636_bib0025","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"6189","article-title":"DOT: a distillation-oriented trainer","author":"Zhao","year":"2023"},{"key":"10.1016\/j.neucom.2026.133636_bib0030","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"6356","article-title":"Mimicking very efficient network for object detection","author":"Li","year":"2017"},{"key":"10.1016\/j.neucom.2026.133636_bib0035","doi-asserted-by":"crossref","first-page":"150","DOI":"10.1016\/j.neucom.2023.01.088","article-title":"Boosting R-CNN: reweighting R-CNN samples by RPN\u2019s error for underwater object detection","volume":"530","author":"Song","year":"2023","journal-title":"Neurocomputing"},{"key":"10.1016\/j.neucom.2026.133636_bib0040","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"10012","article-title":"Swin transformer: hierarchical vision transformer using shifted windows","author":"Liu","year":"2021"},{"key":"10.1016\/j.neucom.2026.133636_bib0045","doi-asserted-by":"crossref","DOI":"10.1016\/j.neucom.2025.130791","article-title":"I2ckd: intra-and inter-class knowledge distillation for semantic segmentation","author":"Karine","year":"2025","journal-title":"Neurocomputing"},{"key":"10.1016\/j.neucom.2026.133636_bib0050","article-title":"Learning both weights and connections for efficient neural network","volume":"28","author":"Han","year":"2015","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.neucom.2026.133636_bib0055","author":"Li"},{"key":"10.1016\/j.neucom.2026.133636_bib0060","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"2704","article-title":"Quantization and training of neural networks for efficient integer-arithmetic-only inference","author":"Jacob","year":"2018"},{"key":"10.1016\/j.neucom.2026.133636_bib0065","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"12925","article-title":"Explaining knowledge distillation by quantifying the knowledge","author":"Cheng","year":"2020"},{"key":"10.1016\/j.neucom.2026.133636_bib0070","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"1365","article-title":"Similarity-preserving knowledge distillation","author":"Tung","year":"2019"},{"key":"10.1016\/j.neucom.2026.133636_bib0075","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence","first-page":"3779","article-title":"Knowledge transfer via distillation of activation boundaries formed by hidden neurons","volume":"vol. 33","author":"Heo","year":"2019"},{"key":"10.1016\/j.neucom.2026.133636_bib0080","article-title":"Paraphrasing complex network: network compression via factor transfer","volume":"31","author":"Kim","year":"2018","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.neucom.2026.133636_bib0085","series-title":"International Conference on Learning Representations","article-title":"Contrastive representation distillation","author":"Tian","year":"2020"},{"key":"10.1016\/j.neucom.2026.133636_bib0090","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"5008","article-title":"Distilling knowledge via knowledge review","author":"Chen","year":"2021"},{"key":"10.1016\/j.neucom.2026.133636_bib0095","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"11933","article-title":"Knowledge distillation with the reused teacher classifier","author":"Chen","year":"2022"},{"key":"10.1016\/j.neucom.2026.133636_bib0100","series-title":"International Conference on Learning Representations","article-title":"Function-consistent feature distillation","author":"Liu","year":"2023"},{"key":"10.1016\/j.neucom.2026.133636_bib0105","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence","first-page":"1504","article-title":"Curriculum temperature for knowledge distillation","volume":"vol. 37","author":"Li","year":"2023"},{"key":"10.1016\/j.neucom.2026.133636_bib0110","series-title":"International Conference on Learning Representations","article-title":"Knowledge distillation based on transformed teacher matching","author":"Zheng","year":"2024"},{"key":"10.1016\/j.neucom.2026.133636_bib0115","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"3967","article-title":"Relational knowledge distillation","author":"Park","year":"2019"},{"key":"10.1016\/j.neucom.2026.133636_bib0120","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1109\/TPAMI.2025.3586284","article-title":"Dist+: knowledge distillation from a stronger adaptive teacher","author":"Huang","year":"2025","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.neucom.2026.133636_bib0125","doi-asserted-by":"crossref","first-page":"3578","DOI":"10.1109\/TIP.2025.3573474","article-title":"CKD: contrastive knowledge distillation from a sample-wise perspective","volume":"34","author":"Zhu","year":"2025","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.neucom.2026.133636_bib0130","first-page":"65445","article-title":"Wasserstein distance rivals kullback-leibler divergence for knowledge distillation","volume":"37","author":"Lv","year":"2024","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.neucom.2026.133636_bib0135","series-title":"Learning Multiple Layers of Features from Tiny Images","author":"Krizhevsky","year":"2009"},{"key":"10.1016\/j.neucom.2026.133636_bib0140","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"4320","article-title":"Deep mutual learning","author":"Zhang","year":"2018"},{"key":"10.1016\/j.neucom.2026.133636_bib0145","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence","first-page":"5191","article-title":"Improved knowledge distillation via teacher assistant","volume":"vol. 34","author":"Mirzadeh","year":"2020"},{"key":"10.1016\/j.neucom.2026.133636_bib0150","first-page":"11037","article-title":"What knowledge gets distilled in knowledge distillation?","volume":"36","author":"Ojha","year":"2023","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.neucom.2026.133636_bib0155","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"15731","article-title":"Logit standardization in knowledge distillation","author":"Sun","year":"2024"},{"issue":"5","key":"10.1016\/j.neucom.2026.133636_bib0160","doi-asserted-by":"crossref","first-page":"3465","DOI":"10.1109\/TCSVT.2023.3325814","article-title":"Improving knowledge distillation via head and tail categories","volume":"34","author":"Xu","year":"2024","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.neucom.2026.133636_bib0165","author":"Romero"},{"key":"10.1016\/j.neucom.2026.133636_bib0170","series-title":"International Conference on Learning Representations","article-title":"Paying more attention to attention: improving the performance of convolutional neural networks via attention transfer","author":"Zagoruyko","year":"2017"},{"key":"10.1016\/j.neucom.2026.133636_bib0175","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"1921","article-title":"A comprehensive overhaul of feature distillation","author":"Heo","year":"2019"},{"key":"10.1016\/j.neucom.2026.133636_bib0180","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"11868","article-title":"Class attention transfer based knowledge distillation","author":"Guo","year":"2023"},{"key":"10.1016\/j.neucom.2026.133636_bib0185","series-title":"Proceedings of the IEEE Winter Conference on Applications of Computer Vision (WACV)","first-page":"2266","article-title":"Frequency attention for knowledge distillation","author":"Pham","year":"2024"},{"key":"10.1016\/j.neucom.2026.133636_bib0190","doi-asserted-by":"crossref","DOI":"10.1109\/TNNLS.2025.3539991","article-title":"Cascade fusion and correlation enhancement for knowledge distillation","author":"Sun","year":"2025","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"10.1016\/j.neucom.2026.133636_bib0195","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"3733","article-title":"Unsupervised feature learning via non-parametric instance discrimination","author":"Wu","year":"2018"},{"key":"10.1016\/j.neucom.2026.133636_bib0200","author":"Oord"},{"key":"10.1016\/j.neucom.2026.133636_bib0205","series-title":"European Conference on Computer Vision","first-page":"776","article-title":"Contrastive multiview coding","author":"Tian","year":"2020"},{"key":"10.1016\/j.neucom.2026.133636_bib0210","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"9729","article-title":"Momentum contrast for unsupervised visual representation learning","author":"He","year":"2020"},{"key":"10.1016\/j.neucom.2026.133636_bib0215","series-title":"International Conference on Machine Learning","first-page":"1597","article-title":"A simple framework for contrastive learning of visual representations","author":"Chen","year":"2020"},{"key":"10.1016\/j.neucom.2026.133636_bib0220","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"9260","article-title":"Complementary relation contrastive distillation","author":"Zhu","year":"2021"},{"key":"10.1016\/j.neucom.2026.133636_bib0225","series-title":"International Conference on Machine Learning","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","author":"Radford","year":"2021"},{"issue":"3","key":"10.1016\/j.neucom.2026.133636_bib0230","doi-asserted-by":"crossref","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","article-title":"ImageNet large scale visual recognition challenge","volume":"115","author":"Russakovsky","year":"2015","journal-title":"Int. J. Comput. Vis. (IJCV)"},{"key":"10.1016\/j.neucom.2026.133636_bib0235","series-title":"Proceedings of the Fourteenth International Conference on Artificial Intelligence and Statistics, Vol. 15 of Proceedings of Machine Learning Research","first-page":"215","article-title":"An analysis of single-layer networks in unsupervised feature learning","author":"Coates","year":"2011"},{"key":"10.1016\/j.neucom.2026.133636_bib0240","author":"Mnmoustafa"},{"key":"10.1016\/j.neucom.2026.133636_bib0245","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"770","article-title":"Deep residual learning for image recognition","author":"He","year":"2016"},{"key":"10.1016\/j.neucom.2026.133636_bib0250","author":"Zagoruyko"},{"key":"10.1016\/j.neucom.2026.133636_bib0255","author":"Simonyan"},{"key":"10.1016\/j.neucom.2026.133636_bib0260","author":"Howard"},{"key":"10.1016\/j.neucom.2026.133636_bib0265","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"6848","article-title":"Shufflenet: an extremely efficient convolutional neural network for mobile devices","author":"Zhang","year":"2018"},{"key":"10.1016\/j.neucom.2026.133636_bib0270","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"12052","article-title":"Dearkd: data-efficient early knowledge distillation for vision transformers","author":"Chen","year":"2022"},{"key":"10.1016\/j.neucom.2026.133636_bib0275","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"558","article-title":"Tokens-to-token ViT: training vision transformers from scratch on ImageNet","author":"Yuan","year":"2021"},{"key":"10.1016\/j.neucom.2026.133636_bib0280","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"11936","article-title":"Rethinking spatial dimensions of vision transformers","author":"Heo","year":"2021"},{"key":"10.1016\/j.neucom.2026.133636_bib0285","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"568","article-title":"Pyramid vision transformer: a versatile backbone for dense prediction without convolutions","author":"Wang","year":"2021"},{"key":"10.1016\/j.neucom.2026.133636_bib0290","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"24276","article-title":"Multi-level logit distillation","author":"Jin","year":"2023"},{"key":"10.1016\/j.neucom.2026.133636_bib0295","series-title":"International Conference on Machine Learning","first-page":"1139","article-title":"On the importance of initialization and momentum in deep learning","author":"Sutskever","year":"2013"},{"issue":"11","key":"10.1016\/j.neucom.2026.133636_bib0300","article-title":"Visualizing data using t-SNE","volume":"9","author":"Van der Maaten","year":"2008","journal-title":"J. Mach. Learn. Res."},{"key":"10.1016\/j.neucom.2026.133636_bib0305","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"4794","article-title":"On the efficacy of knowledge distillation","author":"Cho","year":"2019"},{"issue":"6","key":"10.1016\/j.neucom.2026.133636_bib0310","doi-asserted-by":"crossref","first-page":"3048","DOI":"10.1109\/TPAMI.2021.3055564","article-title":"Knowledge distillation and student-teacher learning for visual intelligence: a review and new outlooks","volume":"44","author":"Wang","year":"2021","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.neucom.2026.133636_bib0315","series-title":"European Conference on Computer Vision","first-page":"110","article-title":"Locality guidance for improving vision transformers on tiny datasets","author":"Li","year":"2022"},{"key":"10.1016\/j.neucom.2026.133636_bib0320","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"17413","article-title":"Automated knowledge distillation via monte carlo tree search","author":"Li","year":"2023"},{"key":"10.1016\/j.neucom.2026.133636_bib0325","series-title":"International Conference on Machine Learning","first-page":"10347","article-title":"Training data-efficient image transformers & distillation through attention","author":"Touvron","year":"2021"},{"key":"10.1016\/j.neucom.2026.133636_bib0330","first-page":"24261","article-title":"Mlp-mixer: an all-mlp architecture for vision","volume":"34","author":"Tolstikhin","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.neucom.2026.133636_bib0335","series-title":"Computer Vision\u2013ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6\u201312, 2014, Proceedings, Part V","first-page":"740","article-title":"Microsoft COCO: common objects in context","author":"Lin","year":"2014"},{"issue":"6","key":"10.1016\/j.neucom.2026.133636_bib0340","doi-asserted-by":"crossref","first-page":"1137","DOI":"10.1109\/TPAMI.2016.2577031","article-title":"Faster R-CNN: towards real-time object detection with region proposal networks","volume":"39","author":"Ren","year":"2016","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.neucom.2026.133636_bib0345","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"2117","article-title":"Feature pyramid networks for object detection","author":"Lin","year":"2017"},{"key":"10.1016\/j.neucom.2026.133636_bib0350","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"4933","article-title":"Distilling object detectors with fine-grained feature imitation","author":"Wang","year":"2019"}],"container-title":["Neurocomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0925231226010337?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0925231226010337?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,5,11]],"date-time":"2026-05-11T15:42:13Z","timestamp":1778514133000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0925231226010337"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,7]]},"references-count":70,"alternative-id":["S0925231226010337"],"URL":"https:\/\/doi.org\/10.1016\/j.neucom.2026.133636","relation":{},"ISSN":["0925-2312"],"issn-type":[{"value":"0925-2312","type":"print"}],"subject":[],"published":{"date-parts":[[2026,7]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Top-KD: Top-scaled contrastive knowledge distillation","name":"articletitle","label":"Article Title"},{"value":"Neurocomputing","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.neucom.2026.133636","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier B.V. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"133636"}}