{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,21]],"date-time":"2026-04-21T05:46:44Z","timestamp":1776750404238,"version":"3.51.2"},"reference-count":42,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100012389","name":"National Research Foundation of Korea","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100012389","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Korean Government [Ministry of Science and Information and Communication Technology (MSIT)]","award":["RS-2025-02214322"],"award-info":[{"award-number":["RS-2025-02214322"]}]},{"name":"Institute of Information and Communications Technology Planning and Evaluation (IITP)-Information Technology Research Center"},{"name":"Korean Government","award":["IITP-2025-RS-2022-00156295"],"award-info":[{"award-number":["IITP-2025-RS-2022-00156295"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2026]]},"DOI":"10.1109\/access.2026.3677560","type":"journal-article","created":{"date-parts":[[2026,3,25]],"date-time":"2026-03-25T19:57:30Z","timestamp":1774468650000},"page":"57983-57996","source":"Crossref","is-referenced-by-count":0,"title":["SHARP: Structured Hierarchical Attention Rank Projection for Efficient Language Model Distillation"],"prefix":"10.1109","volume":"14","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-7691-0930","authenticated-orcid":false,"given":"Jieui","family":"Kang","sequence":"first","affiliation":[{"name":"Artificial Intelligence Convergence, Ewha Womans University, Seoul, South Korea"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-2022-5460","authenticated-orcid":false,"given":"Eunjeong","family":"Yoo","sequence":"additional","affiliation":[{"name":"Artificial Intelligence Convergence, Ewha Womans University, Seoul, South Korea"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-2458-3048","authenticated-orcid":false,"given":"Soeun","family":"Choi","sequence":"additional","affiliation":[{"name":"Artificial Intelligence Convergence, Ewha Womans University, Seoul, South Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1882-5230","authenticated-orcid":false,"given":"Yeonhee","family":"Kim","sequence":"additional","affiliation":[{"name":"Artificial Intelligence Convergence, Ewha Womans University, Seoul, South Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8722-8486","authenticated-orcid":false,"given":"Jaehyeong","family":"Sim","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, Ewha Womans University, Seoul, South Korea"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.71"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20056-4_28"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i17.29947"},{"key":"ref4","volume-title":"Free dolly: Introducing the world\u2019s first truly open instruction-tuned LLM","author":"Conover","year":"2023"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.findings-emnlp.372"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.195"},{"key":"ref7","first-page":"4323","article-title":"Patient knowledge distillation for BERT compression","volume-title":"Proc. Conf. Empirical Methods Natural Lang. Process.","author":"Sun"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.findings-naacl.103"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1503.02531"},{"key":"ref10","first-page":"3509","article-title":"LIT: Learned intermediate representation training for model compression","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Koratana"},{"key":"ref11","article-title":"MiniLLM: Knowledge distillation of large language models","author":"Gu","year":"2023","journal-title":"arXiv:2306.08543"},{"key":"ref12","first-page":"25065","article-title":"DistiLLM: Towards streamlined distillation for large language models","volume-title":"Proc. 41st Int. Conf. Mach. Learn.","author":"Ko"},{"key":"ref13","article-title":"Speculative knowledge distillation: Bridging the teacher\u2013student gap through interleaved sampling","author":"Xu","year":"2024","journal-title":"arXiv:2410.11325"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.findings-acl.247"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/3568679"},{"key":"ref16","article-title":"A survey on multi-view learning","author":"Xu","year":"2013","journal-title":"arXiv:1304.5634"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-emnlp.547"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00409"},{"key":"ref19","article-title":"Contrastive representation distillation","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Tian"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2019.107049"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2023.07.047"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/3640457.3688118"},{"key":"ref23","article-title":"OPT: Open pre-trained transformer language models","author":"Zhang","year":"2022","journal-title":"arXiv:2205.01068"},{"key":"ref24","article-title":"TinyLlama: An open-source small language model","author":"Zhang","year":"2024","journal-title":"arXiv:2401.02385"},{"key":"ref25","article-title":"LLaMA: Open and efficient foundation language models","author":"Touvron","year":"2023","journal-title":"arXiv:2302.13971"},{"key":"ref26","article-title":"Llama 2: Open foundation and fine-tuned chat models","author":"Touvron","year":"2023","journal-title":"arXiv:2307.09288"},{"key":"ref27","article-title":"DeepSeek-coder: When the large language model meets programming\u2014The rise of code intelligence","author":"Guo","year":"2024","journal-title":"arXiv:2401.14196"},{"key":"ref28","volume-title":"Vicuna: An Open-Source Chatbot Impressing GPT-4 With 90%* ChatGPT Quality","author":"Chiang","year":"2023"},{"key":"ref29","article-title":"Self-instruct: Aligning language models with self-generated instructions","author":"Wang","year":"2022","journal-title":"arXiv:2212.10560"},{"key":"ref30","volume-title":"Koala: A Dialogue Model for Academic Research","author":"Geng","year":"2023"},{"key":"ref31","article-title":"WizardLM: Empowering large language models to follow complex instructions","author":"Xu","year":"2023","journal-title":"arXiv:2304.12244"},{"key":"ref32","article-title":"Measuring massive multitask language understanding","author":"Hendrycks","year":"2020","journal-title":"arXiv:2009.03300"},{"key":"ref33","article-title":"DROP: A reading comprehension benchmark requiring discrete reasoning over paragraphs","author":"Dua","year":"2019","journal-title":"arXiv:1903.00161"},{"key":"ref34","article-title":"Challenging BIG-bench tasks and whether chain-of-thought can solve them","author":"Suzgun","year":"2022","journal-title":"arXiv:2210.09261"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/d16-1139"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.494"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.605"},{"key":"ref38","article-title":"On-policy distillation of language models: Learning from self-generated mistakes","volume-title":"Proc. 12th Int. Conf. Learn. Represent.","author":"Agarwal"},{"key":"ref39","article-title":"Visualizing the loss landscape of neural nets","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Li"},{"key":"ref40","article-title":"Sharpness-aware minimization for efficiently improving generalization","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Foret"},{"key":"ref41","article-title":"Direct preference knowledge distillation for large language models","author":"Li","year":"2024","journal-title":"arXiv:2406.19774"},{"key":"ref42","first-page":"31044","article-title":"DistiLLM-2: A contrastive approach boosts the distillation of LLMs","volume-title":"Proc. 42nd Int. Conf. Mach. Learn.","volume":"267","author":"Ko"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/6287639\/11323511\/11456007.pdf?arnumber=11456007","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,21]],"date-time":"2026-04-21T05:18:27Z","timestamp":1776748707000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11456007\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"references-count":42,"URL":"https:\/\/doi.org\/10.1109\/access.2026.3677560","relation":{},"ISSN":["2169-3536"],"issn-type":[{"value":"2169-3536","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]}}}