{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T07:55:55Z","timestamp":1781510155004,"version":"3.54.1"},"reference-count":39,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Neurocomputing"],"published-print":{"date-parts":[[2026,10]]},"DOI":"10.1016\/j.neucom.2026.134183","type":"journal-article","created":{"date-parts":[[2026,6,2]],"date-time":"2026-06-02T16:28:42Z","timestamp":1780417722000},"page":"134183","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["PPF: A framework for real-time adaptive pruning of large language models via performance prediction"],"prefix":"10.1016","volume":"697","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-7154-7786","authenticated-orcid":false,"given":"Zuxin","family":"Ma","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0880-675X","authenticated-orcid":false,"given":"Yunhe","family":"Cui","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1960-8628","authenticated-orcid":false,"given":"Yongbin","family":"Qin","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"key":"10.1016\/j.neucom.2026.134183_bib0005","author":"Jiang"},{"key":"10.1016\/j.neucom.2026.134183_bib0010","author":"Jang"},{"key":"10.1016\/j.neucom.2026.134183_bib0015","doi-asserted-by":"crossref","first-page":"43","DOI":"10.1016\/j.future.2023.09.025","article-title":"DNNShifter: an efficient DNN pruning system for edge computing","volume":"152","author":"Eccles","year":"2024","journal-title":"Future Gener. Comput. Syst."},{"key":"10.1016\/j.neucom.2026.134183_bib0020","series-title":"Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 \u2013 16, 2023","article-title":"LLM-pruner: on the structural pruning of large language models","author":"Ma","year":"2023"},{"key":"10.1016\/j.neucom.2026.134183_bib0025","series-title":"Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019, NeurIPS 2019, December 8\u201314, 2019, Vancouver, BC, Canada","first-page":"14014","article-title":"Are sixteen heads really better than one?","author":"Michel","year":"2019"},{"key":"10.1016\/j.neucom.2026.134183_bib0030","series-title":"Forty-First International Conference on Machine Learning, ICML 2024, Vienna, Austria, July 21\u201327, 2024","article-title":"Outlier weighed layerwise sparsity (OWL): a missing secret sauce for pruning LLMs to high sparsity","author":"Yin","year":"2024"},{"key":"10.1016\/j.neucom.2026.134183_bib0035","series-title":"Advances in Neural Information Processing Systems 38: Annual Conference on Neural Information Processing Systems 2024, NeurIPS 2024, Vancouver, BC, Canada, December 10 \u2013 15, 2024","article-title":"AlphaPruning: using heavy-tailed self regularization theory for improved layer-wise pruning of large language models","author":"Lu","year":"2024"},{"key":"10.1016\/j.neucom.2026.134183_bib0040","series-title":"Findings of the Association for Computational Linguistics, ACL 2025, Vienna, Austria, July 27 \u2013 August 1, 2025","first-page":"2591","article-title":"One-for-all pruning: a universal model for customized compression of large language models","author":"Ye","year":"2025"},{"key":"10.1016\/j.neucom.2026.134183_bib0045","series-title":"The Twelfth International Conference on Learning Representations, ICLR 2024, Vienna, Austria, May 7\u201311, 2024","article-title":"A simple and effective pruning approach for large language models","author":"Sun","year":"2024"},{"key":"10.1016\/j.neucom.2026.134183_bib0050","author":"Zhong"},{"key":"10.1016\/j.neucom.2026.134183_bib0055","author":"Men"},{"key":"10.1016\/j.neucom.2026.134183_bib0060","first-page":"10865","article-title":"Fluctuation-based adaptive structured pruning for large language models","author":"An","year":"2024"},{"key":"10.1016\/j.neucom.2026.134183_bib0065","series-title":"Sparsity in LLMs (SLLM): Deep Dive Into Mixture of Experts, Quantization, Hardware, and Inference","article-title":"EvoPress: accurate dynamic model compression via evolutionary search","author":"Sieberling","year":"2025"},{"key":"10.1016\/j.neucom.2026.134183_bib0070","series-title":"AAAI-25, Sponsored by the Association for the Advancement of Artificial Intelligence, February 25 \u2013 March 4, 2025, Philadelphia, PA, USA","first-page":"17938","article-title":"Sample-aware adaptive structured pruning for large language models","author":"Kong","year":"2025"},{"key":"10.1016\/j.neucom.2026.134183_bib0075","series-title":"The Thirteenth International Conference on Learning Representations, ICLR 2025, Singapore, April 24\u201328, 2025","article-title":"Probe pruning: accelerating LLMs through dynamic pruning via model-probing","author":"Le","year":"2025"},{"key":"10.1016\/j.neucom.2026.134183_bib0080","author":"Liu"},{"key":"10.1016\/j.neucom.2026.134183_bib0085","series-title":"Findings of the Association for Computational Linguistics: ACL 2023, Toronto, Canada, July 9\u201314, 2023","first-page":"9116","article-title":"AutoMoE: heterogeneous mixture-of-experts with adaptive computation for efficient neural machine translation","author":"Jawahar","year":"2023"},{"key":"10.1016\/j.neucom.2026.134183_bib0090","series-title":"Findings of the Association for Computational Linguistics, ACL 2024, Bangkok, Thailand and Virtual Meeting, August 11\u201316, 2024","first-page":"10540","article-title":"LLM performance predictors are good initializers for architecture search","author":"Jawahar","year":"2024"},{"key":"10.1016\/j.neucom.2026.134183_bib0095","series-title":"Findings of the Association for Computational Linguistics: NAACL 2025, Albuquerque, New Mexico, USA, April 29 \u2013 May 4, 2025","first-page":"5781","article-title":"RankAdaptor: hierarchical rank allocation for efficient fine-tuning pruned LLMs via performance model","author":"Zhou","year":"2025"},{"key":"10.1016\/j.neucom.2026.134183_bib0100","series-title":"Annual Conference on Neural Information Processing Systems","article-title":"Predicting LLM inference latency: a roofline-driven ML method","author":"Imai","year":"2024"},{"key":"10.1016\/j.neucom.2026.134183_bib0105","series-title":"Computer Vision \u2013 ECCV 2018 \u2013 15th European Conference, Munich, Germany, September 8\u201314, 2018, Proceedings, Part VII, Vol. 11211 of Lecture Notes in Computer Science","first-page":"815","article-title":"AMC: AutoML for model compression and acceleration on mobile devices","author":"He","year":"2018"},{"key":"10.1016\/j.neucom.2026.134183_bib0110","series-title":"IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2019, Long Beach, CA, USA, June 16\u201320, 2019","first-page":"8612","article-title":"HAQ: hardware-aware automated quantization with mixed precision","author":"Wang","year":"2019"},{"issue":"1","key":"10.1016\/j.neucom.2026.134183_bib0115","doi-asserted-by":"crossref","first-page":"145","DOI":"10.1109\/18.61115","article-title":"Divergence measures based on the shannon entropy","volume":"37","author":"Lin","year":"1991","journal-title":"IEEE Trans. Inf. Theory"},{"key":"10.1016\/j.neucom.2026.134183_bib0120","author":"Khanal"},{"key":"10.1016\/j.neucom.2026.134183_bib0125","series-title":"Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019, NeurIPS 2019, December 8\u201314, 2019, Vancouver, BC, Canada","first-page":"8024","article-title":"PyTorch: an imperative style, high-performance deep learning library","author":"Paszke","year":"2019"},{"key":"10.1016\/j.neucom.2026.134183_bib0130","author":"Wolf"},{"key":"10.1016\/j.neucom.2026.134183_bib0135","author":"Touvron"},{"key":"10.1016\/j.neucom.2026.134183_bib0140","author":"Yang"},{"key":"10.1016\/j.neucom.2026.134183_bib0145","author":"Dubey"},{"key":"10.1016\/j.neucom.2026.134183_bib0150","author":"Yang"},{"key":"10.1016\/j.neucom.2026.134183_bib0155","series-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL-HLT 2019, Minneapolis, MN, USA, June 2\u20137, 2019, Volume 1 (Long and Short Papers)","first-page":"2924","article-title":"BoolQ: exploring the surprising difficulty of natural Yes\/No questions","author":"Clark","year":"2019"},{"key":"10.1016\/j.neucom.2026.134183_bib0160","series-title":"7th International Conference on Learning Representations, ICLR 2019, New Orleans, LA, USA, May 6\u20139, 2019","article-title":"GLUE: a multi-task benchmark and analysis platform for natural language understanding","author":"Wang","year":"2019"},{"key":"10.1016\/j.neucom.2026.134183_bib0165","series-title":"Proceedings of the 57th Conference of the Association for Computational Linguistics, ACL 2019, Florence, Italy, July 28- August 2, 2019, Volume 1: Long Papers","first-page":"4791","article-title":"HellaSwag: can a machine really finish your sentence?","author":"Zellers","year":"2019"},{"issue":"9","key":"10.1016\/j.neucom.2026.134183_bib0170","doi-asserted-by":"crossref","first-page":"99","DOI":"10.1145\/3474381","article-title":"WinoGrande: an adversarial winograd schema challenge at scale","volume":"64","author":"Sakaguchi","year":"2021","journal-title":"Commun. ACM"},{"key":"10.1016\/j.neucom.2026.134183_bib0175","author":"Bhakthavatsalam"},{"key":"10.1016\/j.neucom.2026.134183_bib0180","series-title":"Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing, Brussels, Belgium, October 31 \u2013 November 4, 2018","first-page":"2381","article-title":"Can a suit of armor conduct electricity? A new dataset for open book question answering","author":"Mihaylov","year":"2018"},{"key":"10.1016\/j.neucom.2026.134183_bib0185","series-title":"5th International Conference on Learning Representations, ICLR 2017, Toulon, France, April 24\u201326, 2017, Conference Track Proceedings","article-title":"Pointer sentinel mixture models","author":"Merity","year":"2017"},{"issue":"2","key":"10.1016\/j.neucom.2026.134183_bib0190","first-page":"313","article-title":"Building a large annotated corpus of english: the penn treebank","volume":"19","author":"Marcus","year":"1993","journal-title":"Comput. Linguist."},{"key":"10.1016\/j.neucom.2026.134183_bib0195","series-title":"Findings of the Association for Computational Linguistics: EMNLP 2024, Miami, Florida, USA, November 12\u201316, 2024","first-page":"6401","article-title":"LaCo: large language model pruning via layer collapse","author":"Yang","year":"2024"}],"container-title":["Neurocomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S092523122601581X?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S092523122601581X?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T07:46:58Z","timestamp":1781509618000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S092523122601581X"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,10]]},"references-count":39,"alternative-id":["S092523122601581X"],"URL":"https:\/\/doi.org\/10.1016\/j.neucom.2026.134183","relation":{},"ISSN":["0925-2312"],"issn-type":[{"value":"0925-2312","type":"print"}],"subject":[],"published":{"date-parts":[[2026,10]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"PPF: A framework for real-time adaptive pruning of large language models via performance prediction","name":"articletitle","label":"Article Title"},{"value":"Neurocomputing","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.neucom.2026.134183","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Published by Elsevier B.V.","name":"copyright","label":"Copyright"}],"article-number":"134183"}}