{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,8]],"date-time":"2026-01-08T22:01:15Z","timestamp":1767909675848,"version":"3.49.0"},"reference-count":25,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,3,31]],"date-time":"2025-03-31T00:00:00Z","timestamp":1743379200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,3,31]],"date-time":"2025-03-31T00:00:00Z","timestamp":1743379200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,3,31]]},"DOI":"10.23919\/date64628.2025.10992997","type":"proceedings-article","created":{"date-parts":[[2025,5,21]],"date-time":"2025-05-21T17:36:35Z","timestamp":1747848995000},"page":"1-7","source":"Crossref","is-referenced-by-count":5,"title":["SparseInfer: Training-free Prediction of Activation Sparsity for Fast LLM Inference"],"prefix":"10.23919","author":[{"given":"Jiho","family":"Shin","sequence":"first","affiliation":[{"name":"University of Seoul,Seoul,South Korea"}]},{"given":"Hoeseok","family":"Yang","sequence":"additional","affiliation":[{"name":"Santa Clara University,Santa Clara,CA,USA"}]},{"given":"Youngmin","family":"Yi","sequence":"additional","affiliation":[{"name":"Sogang University,Seoul,South Korea"}]}],"member":"263","reference":[{"key":"ref1","author":"Radford","year":"2018","journal-title":"Improving language understanding by generative pretraining"},{"key":"ref2","article-title":"Attention is all you need","author":"Vaswani","year":"2017","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref3","first-page":"21 702","article-title":"Llm-pruner: On the structural pruning of large language models","volume":"36","author":"Ma","year":"2023","journal-title":"Advances in neural information processing systems"},{"key":"ref4","first-page":"27 168","article-title":"Zeroquant: Efficient and affordable post-training quantization for large-scale transformers","volume":"35","author":"Yao","year":"2022","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref5","author":"Ramachandran","year":"2017","journal-title":"Searching for activation functions"},{"key":"ref6","author":"Hendrycks","year":"2016","journal-title":"Gaussian error linear units (gelus)"},{"key":"ref7","author":"Mirzadeh","year":"2023","journal-title":"Relu strikes back: Exploiting activation sparsity in large language models"},{"key":"ref8","author":"Song","year":"2024","journal-title":"Prosparse: Introducing and enhancing intrinsic activation sparsity within large language models"},{"key":"ref9","first-page":"22137","article-title":"Deja vu: Contextual sparsity for efficient llms at inference time","volume-title":"International Conference on Machine Learning","author":"Liu","year":"2023"},{"key":"ref10","volume-title":"llama.cpp","author":"Gerganov","year":"2023"},{"key":"ref11","first-page":"5533","article-title":"Inducing and exploiting activation sparsity for fast inference on deep neural networks","author":"Kurtz","year":"2020","journal-title":"International Conference on Machine Learning"},{"key":"ref12","article-title":"ReluLLaMa","volume-title":"ReluLLaMa"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/3694715.3695964"},{"key":"ref14","author":"Song","year":"2024","journal-title":"Turbo sparse: Achieving llm sota performance with minimal activated parameters"},{"key":"ref15","author":"Liu","year":"2024","journal-title":"Training-free activation sparsity in large language models"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01147"},{"key":"ref17","article-title":"Llama: Open and efficient foundation language models","volume-title":"ar Xiv preprint","author":"Touvron","year":"2023"},{"key":"ref18","article-title":"The falcon series of language models: Towards open frontier models","author":"Almazrouei","year":"2023","journal-title":"HURRinR Face repository"},{"key":"ref19","first-page":"19","volume-title":"Opt: Open pretrained transformer language models, 2022","volume":"3","author":"Zhang","year":"2023"},{"key":"ref20","article-title":"Qlora: efficient finetuning of quantized llms","volume-title":"Proceedings of the 37th International Conference on Neural Information Processing Systems, ser. NIPS \u201923. Red Hook","author":"Dettmers","year":"2024"},{"key":"ref21","first-page":"1","article-title":"On the frequency function of $xy$","volume-title":"Annals of Mathematical Statistics","volume":"7","author":"Craig","year":"1936"},{"key":"ref22","author":"Karl","year":"2021","journal-title":"Training verifiers to solve math word problems"},{"key":"ref23","author":"Karumbunathan","year":"2022","journal-title":"Nvidia jetson agx orin series"},{"key":"ref24","article-title":"1m-evaluation-harness","volume-title":"EleutherAI","year":"2024"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-acl.824"}],"event":{"name":"2025 Design, Automation &amp; Test in Europe Conference (DATE)","location":"Lyon, France","start":{"date-parts":[[2025,3,31]]},"end":{"date-parts":[[2025,4,2]]}},"container-title":["2025 Design, Automation &amp;amp; Test in Europe Conference (DATE)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10992638\/10992588\/10992997.pdf?arnumber=10992997","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,22]],"date-time":"2025-05-22T05:32:54Z","timestamp":1747891974000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10992997\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,31]]},"references-count":25,"URL":"https:\/\/doi.org\/10.23919\/date64628.2025.10992997","relation":{},"subject":[],"published":{"date-parts":[[2025,3,31]]}}}