{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,4]],"date-time":"2026-05-04T23:57:42Z","timestamp":1777939062062,"version":"3.51.4"},"reference-count":103,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"6","license":[{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"Baden-W&#x00FC;rttemberg through bwHPC"},{"name":"Helmholtz Association Initiative and Networking Fund on the HAICORE@KIT partition"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Knowl. Data Eng."],"published-print":{"date-parts":[[2026,6]]},"DOI":"10.1109\/tkde.2026.3671872","type":"journal-article","created":{"date-parts":[[2026,3,9]],"date-time":"2026-03-09T20:00:13Z","timestamp":1773086413000},"page":"3710-3721","source":"Crossref","is-referenced-by-count":0,"title":["Honey, I Shrunk the Language Model: Impact of Knowledge Distillation Methods on Performance and Explainability"],"prefix":"10.1109","volume":"38","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-3294-827X","authenticated-orcid":false,"given":"Daniel","family":"Hendriks","sequence":"first","affiliation":[{"name":"Institute for Information Systems (WIN), Karlsruhe Institute of Technology, Karlsruhe, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9378-0872","authenticated-orcid":false,"given":"Philipp","family":"Spitzer","sequence":"additional","affiliation":[{"name":"Institute for Information Systems (WIN), Karlsruhe Institute of Technology, Karlsruhe, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Niklas","family":"K\u00fchl","sequence":"additional","affiliation":[{"name":"Information Systems (WI) Institute, University of Bayreuth, Bayreuth, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8731-654X","authenticated-orcid":false,"given":"Gerhard","family":"Satzger","sequence":"additional","affiliation":[{"name":"Institute for Information Systems (WIN), Karlsruhe Institute of Technology, Karlsruhe, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","first-page":"1877","article-title":"Language models are few-shot learners","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Brown","year":"2020"},{"key":"ref2","article-title":"Language models are unsupervised multitask learners","author":"Radford","year":"2019"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.naacl-main.208"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.197"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-short.151"},{"key":"ref6","first-page":"24824","article-title":"Chain-of-thought prompting elicits reasoning in large language models","volume-title":"Proc. 36th Int. Conf. Neural Inf. Process. Syst.","author":"Wei","year":"2022"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.52202\/075280-0517"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICDSIS61070.2024.10594233"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.739"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.299"},{"key":"ref11","article-title":"Large language models: A survey","author":"Minaee","year":"2024"},{"key":"ref12","first-page":"1","article-title":"PaLM: Scaling language modeling with pathways","volume":"24","author":"Chowdhery","year":"2022","journal-title":"J. Mach. Learn. Res."},{"key":"ref13","article-title":"Distilling task-specific knowledge from BERT into simple neural networks","author":"Tang","year":"2019"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.findings-emnlp.372"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2024.3438074"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-acl.507"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2024.3376453"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-70239-6_3"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.emnlp-main.875"},{"key":"ref20","article-title":"Beyond imitation: Learning key reasoning steps from dual chain-of-thoughts in reasoning distillation","author":"Dai","year":"2024"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.findings-acl.387"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00236"},{"key":"ref23","article-title":"We\u2019re getting a better idea of AI\u2019s true carbon footprint","author":"Heikkil\u00e4","year":"2022"},{"key":"ref24","first-page":"74764","article-title":"How far can camels go? Exploring the state of instruction tuning on open resources","volume-title":"Proc. Neural Inf. Process. Syst.","author":"Wang","year":"2023"},{"key":"ref25","article-title":"Code Llama: Open foundation models for code","author":"Rozi\u00e8re","year":"2024"},{"key":"ref26","article-title":"Towards a rigorous science of interpretable machine learning","author":"Doshi-Velez","year":"2017"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CBI62504.2024.00018"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/3710999"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01216-8_35"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00915"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/3750052"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.304"},{"key":"ref33","article-title":"Constitutional AI: Harmlessness from AI feedback","author":"Bai","year":"2022"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.bigscience-1.9"},{"key":"ref35","article-title":"Distilling the knowledge in a neural network","volume-title":"Proc. NIPS 2014 Deep Learn. Representation Learn. Workshop","author":"Hinton","year":"2015"},{"key":"ref36","first-page":"13278","article-title":"MiniLLM: Knowledge distillation of large language models","volume-title":"Proc. 12th Int. Conf. Learn. Representations","author":"Gu","year":"2024"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CLNLP64123.2024.00023"},{"key":"ref38","article-title":"A survey on knowledge distillation of large language models","author":"Xu","year":"2024"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/EMC2-NIPS53020.2019.00018"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.172"},{"key":"ref41","article-title":"LoRA: Low-rank adaptation of large language models","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Hu","year":"2021"},{"key":"ref42","article-title":"LoPT: Low-rank prompt tuning for parameter efficient language models","author":"Guo","year":"2024"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.243"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/ICSECE61636.2024.10729518"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1002\/widm.1312"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1613\/jair.1.12228"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.385"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-short.151"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.830"},{"key":"ref50","first-page":"10421","article-title":"Specializing smaller language models towards multi-step reasoning","volume-title":"Proc. 40th Int. Conf. Mach. Learn.","volume":"202","author":"Fu","year":"2023"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/ICCC59590.2023.10507622"},{"key":"ref52","article-title":"Orca: Progressive learning from complex explanation traces of GPT-4","author":"Mukherjee","year":"2023"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1487"},{"key":"ref54","article-title":"Self-consistency improves chain of thought reasoning in language models","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Wang","year":"2023"},{"key":"ref55","article-title":"Learning the difference that makes a difference with counterfactually-augmented data","volume-title":"Proc. 8th Int. Conf. Learn. Representations","author":"Kaushik","year":"2020"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.804"},{"key":"ref57","first-page":"4149","article-title":"CommonsenseQA: A question answering challenge targeting commonsense knowledge","volume-title":"Proc. 2019 Conf. North Amer. Chapter Assoc. Comput. Linguistics: Hum. Lang. Technol.","author":"Talmor","year":"2019"},{"key":"ref58","first-page":"9560","article-title":"e-SNLI: Natural language inference with natural language explanations","volume-title":"Proc. Neural Inf. Process. Syst.","author":"Camburu","year":"2018"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00370"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.63317\/32y85i5g9gso"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i14.17490"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i15.17590"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.372"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i16.17708"},{"key":"ref65","article-title":"First train to generate, then generate to train: UnitedSynT5 for Few-Shot NLI","author":"Banerjee","year":"2024"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00741"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.251"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.142"},{"key":"ref69","first-page":"41618","article-title":"Self-evaluation guided beam search for reasoning","volume-title":"Proc. Neural Inf. Process. Syst.","author":"Xie","year":"2023"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.acl-long.84"},{"key":"ref71","article-title":"Llama 2: Open foundation and fine-tuned chat models","author":"Touvron","year":"2023"},{"key":"ref72","article-title":"RLCD: Reinforcement learning from contrastive distillation for LM alignment","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Yang","year":"2024"},{"issue":"1","key":"ref73","first-page":"1","article-title":"Exploring the limits of transfer learning with a unified text-to-text transformer","volume":"21","author":"Raffel","year":"2020","journal-title":"J. Mach. Learn. Res."},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-demos.6"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.naacl-long.142"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-acl.409"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.63317\/2kopd98p2rcz"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1109\/ASET56582.2023.10180671"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1007\/s12083-024-01817-5"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46493-0_1"},{"key":"ref81","article-title":"Prolific quickly find research participants you can trust","year":"2023"},{"key":"ref82","article-title":"Interpretation quality score for measuring the quality of interpretability methods","author":"Xie","year":"2022"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.3390\/info14080469"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.3390\/electronics10050593"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.112"},{"key":"ref86","article-title":"Oxford learner\u2019s dictionaries |find definitions, translations, and grammar explanations at Oxford learner\u2019s dictionaries","year":"2024"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.3389\/fcomp.2023.1096257"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.3390\/electronics8080832"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.findings-emnlp.390"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.1007\/s13218-020-00636-z"},{"key":"ref91","article-title":"A collection of principles for guiding and evaluating large language models","volume-title":"Proc. Soc. Respons. Lang. Modelling Res. Workshop, NeurlPS","author":"Hebenstreit","year":"2023"},{"key":"ref92","volume-title":"Interview Und Schriftliche Befragung: Entwicklung, Durchf\u00fchrung Und Auswertung","author":"Mayer","year":"2008"},{"key":"ref93","volume-title":"Social Science Research: Principles, Methods and Practices","author":"Bhattacherjee","year":"2012"},{"key":"ref94","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20719-8_2"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.1007\/s10459-010-9222-y"},{"key":"ref96","doi-asserted-by":"publisher","DOI":"10.1177\/1536867X1501500117"},{"key":"ref97","doi-asserted-by":"publisher","DOI":"10.2307\/1165329"},{"key":"ref98","article-title":"Chain-of-thought is not explainability","author":"Barez","year":"2025"},{"key":"ref99","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-acl.19"},{"key":"ref100","first-page":"6906","article-title":"Does knowledge distillation really work?","volume-title":"Proc. Neural Inf. Process. Syst.","author":"Stanton","year":"2021"},{"key":"ref101","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2019.12.012"},{"key":"ref102","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00489"},{"key":"ref103","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.findings-acl.146"}],"container-title":["IEEE Transactions on Knowledge and Data Engineering"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/69\/11503382\/11425825.pdf?arnumber=11425825","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,2]],"date-time":"2026-05-02T05:02:14Z","timestamp":1777698134000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11425825\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6]]},"references-count":103,"journal-issue":{"issue":"6"},"URL":"https:\/\/doi.org\/10.1109\/tkde.2026.3671872","relation":{},"ISSN":["1041-4347","1558-2191","2326-3865"],"issn-type":[{"value":"1041-4347","type":"print"},{"value":"1558-2191","type":"electronic"},{"value":"2326-3865","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,6]]}}}