{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,10]],"date-time":"2026-03-10T10:20:13Z","timestamp":1773138013000,"version":"3.50.1"},"reference-count":57,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"4","license":[{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62472401"],"award-info":[{"award-number":["62472401"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62132019"],"award-info":[{"award-number":["62132019"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["624B2136"],"award-info":[{"award-number":["624B2136"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","award":["WK2150250044"],"award-info":[{"award-number":["WK2150250044"]}],"id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. on Mobile Comput."],"published-print":{"date-parts":[[2026,4]]},"DOI":"10.1109\/tmc.2025.3629756","type":"journal-article","created":{"date-parts":[[2025,11,6]],"date-time":"2025-11-06T18:55:05Z","timestamp":1762455305000},"page":"5377-5392","source":"Crossref","is-referenced-by-count":2,"title":["Lightweight and Post-Training Structured Pruning for On-Device Large Language Models"],"prefix":"10.1109","volume":"25","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-2192-4580","authenticated-orcid":false,"given":"Zihuai","family":"Xu","sequence":"first","affiliation":[{"name":"School of Computer Science and Technology, University of Science and Technology of China, Hefei, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0839-3892","authenticated-orcid":false,"given":"Yang","family":"Xu","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, University of Science and Technology of China, Hefei, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3831-4577","authenticated-orcid":false,"given":"Hongli","family":"Xu","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, University of Science and Technology of China, Hefei, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5065-2600","authenticated-orcid":false,"given":"Yunming","family":"Liao","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, University of Science and Technology of China, Hefei, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-2284-3323","authenticated-orcid":false,"given":"Zhiwei","family":"Yao","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, University of Science and Technology of China, Hefei, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-0150-3152","authenticated-orcid":false,"given":"Zuan","family":"Xie","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, University of Science and Technology of China, Hefei, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/3771090"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.52202\/075280-2180"},{"key":"ref3","article-title":"A comprehensive survey on process-oriented automatic text summarization with exploration of LLM-based methods","author":"Jin","year":"2024"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/3632775.3661959"},{"key":"ref5","article-title":"Carbon emissions and large neural network training","author":"Patterson","year":"2021"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/WACVW60836.2024.00101"},{"key":"ref7","article-title":"LLM as a system service on mobile devices","author":"Yin","year":"2024"},{"key":"ref8","first-page":"1877","article-title":"Language models are few-shot learners","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Brown","year":"2020"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i09.7123"},{"key":"ref10","first-page":"1135","article-title":"Learning both weights and connections for efficient neural network","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"28","author":"Han","year":"2015"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.l803.03635"},{"key":"ref12","article-title":"Stabilizing the lottery ticket hypothesis","author":"Frankle","year":"2019"},{"key":"ref13","first-page":"10323","article-title":"SparseGPT: Massive language models can be accurately pruned in one-shot","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Frantar","year":"2023"},{"key":"ref14","first-page":"7089","article-title":"Plug-and-play: An efficient post-training pruning method for large language models","volume-title":"Proc. 12th Int. Conf. Learn. Representations","author":"Zhang","year":"2024"},{"key":"ref15","article-title":"A simple and effective pruning approach for large language models","volume-title":"Proc. 12th Int. Conf. Learn. Representations","author":"Sun","year":"2023"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2021.3061394"},{"key":"ref17","first-page":"24101","article-title":"A fast post-training pruning framework for transformers","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"35","author":"Kwon","year":"2022"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-acl.178"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.findings-acl.1035"},{"key":"ref20","first-page":"21702","article-title":"LLM-Pruner: On the structural pruning of large language models","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Ma","year":"2023"},{"key":"ref21","first-page":"22137","article-title":"Deja Vu: Contextual sparsity for efficient LLMS at inference time","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Liu","year":"2023"},{"key":"ref22","article-title":"SliceGPT: Compress large language models by deleting rows and columns","volume-title":"Proc. 12th Int. Conf. Learn. Representations","author":"Ashkboos","year":"2024"},{"key":"ref23","article-title":"Parameter-efficient fine-tuning for large models: A comprehensive survey","author":"Han","year":"2024","journal-title":"CoRR"},{"key":"ref24","article-title":"LoRA: Low-rank adaptation of large language models","author":"Hu","year":"2021"},{"key":"ref25","first-page":"1022","article-title":"Compacter: Efficient low-rank hypercomplex adapter layers","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Mahabadi","year":"2021"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1038\/s41746-024-01233-2"},{"issue":"2","key":"ref27","first-page":"550","article-title":"Optimizing LLM training for financial services: Best practices for model accuracy, risk management, and compliance in AI-powered financial applications","volume":"3","author":"Paul","year":"2023","journal-title":"J. Artif. Intell. Res. Appl."},{"key":"ref28","article-title":"LLAMA 2: Open foundation and fine-tuned chat models","author":"Touvron","year":"2023"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1810.04805"},{"key":"ref30","article-title":"GLM-130B: An open bilingual pre-trained model","author":"Zeng","year":"2022"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i18.34078"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1016\/0098-3004(93)90090-R"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i10.28960"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-statistics-040522-115238"},{"key":"ref35","article-title":"Adaptive budget allocation for parameter-efficient fine-tuning","volume-title":"Proc. 11th Int. Conf. Learn. Representations","author":"Zhang","year":"2024"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1137\/0716029"},{"key":"ref37","article-title":"Revisiting natural gradient for deep networks","author":"Pascanu","year":"2013"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.56021\/9781421407944"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611970739"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1038\/nmeth.3968"},{"key":"ref41","first-page":"8026","article-title":"PyTorch: An imperative style, high-performance deep learning library","volume-title":"Proc. 33rd Int. Conf. Neural Inf. Process. Syst.","author":"Paszke","year":"2019"},{"key":"ref42","article-title":"Pointer sentinel mixture models","author":"Merity","year":"2016"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.21236\/ADA273556"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.aacl-main.88"},{"key":"ref45","first-page":"2924","article-title":"BoolQ: Exploring the surprising difficulty of natural yes\/no questions","volume-title":"Proc. Conf. North Amer. Chapter Assoc. Computat. Linguistics: Hum. Lang. Technol., Vol.1","author":"Clark","year":"2019"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1145\/3474381"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2020\/501"},{"key":"ref48","article-title":"Measuring massive multitask language understanding","author":"Hendrycks","year":"2020"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6239"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W17-4413"},{"key":"ref51","article-title":"OPT: Open pre-trained transformer language models","author":"Zhang","year":"2022"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01544"},{"issue":"140","key":"ref53","first-page":"1","article-title":"Exploring the limits of transfer learning with a unified text-to-text transformer","volume":"21","author":"Raffel","year":"2020","journal-title":"J. Mach. Learn. Res."},{"key":"ref54","first-page":"14037","article-title":"Are sixteen heads really better than one?","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Michel","year":"2019"},{"key":"ref55","article-title":"Reducing transformer depth on demand with structured dropout","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Fan"},{"key":"ref56","first-page":"1","article-title":"CuPy: A NumPy-compatible library for NVIDIA GPU calculations","volume-title":"Proc. 31st Conf. Neural Inf. Process. Syst.","volume":"151","author":"Nishino","year":"2017"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W18-5446"}],"container-title":["IEEE Transactions on Mobile Computing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/7755\/11423852\/11230638.pdf?arnumber=11230638","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,9]],"date-time":"2026-03-09T20:01:29Z","timestamp":1773086489000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11230638\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4]]},"references-count":57,"journal-issue":{"issue":"4"},"URL":"https:\/\/doi.org\/10.1109\/tmc.2025.3629756","relation":{},"ISSN":["1536-1233","1558-0660","2161-9875"],"issn-type":[{"value":"1536-1233","type":"print"},{"value":"1558-0660","type":"electronic"},{"value":"2161-9875","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,4]]}}}