{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T00:59:05Z","timestamp":1773277145207,"version":"3.50.1"},"reference-count":18,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,6,22]],"date-time":"2025-06-22T00:00:00Z","timestamp":1750550400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,6,22]],"date-time":"2025-06-22T00:00:00Z","timestamp":1750550400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001321","name":"National Research Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001321","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,6,22]]},"DOI":"10.1109\/dac63849.2025.11132899","type":"proceedings-article","created":{"date-parts":[[2025,9,15]],"date-time":"2025-09-15T17:35:41Z","timestamp":1757957741000},"page":"1-7","source":"Crossref","is-referenced-by-count":1,"title":["Grasp: Group-based Prediction of Activation Sparsity for Fast LLM Inference"],"prefix":"10.1109","author":[{"given":"Jiho","family":"Shin","sequence":"first","affiliation":[{"name":"University of Seoul,Seoul,South Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hoeseok","family":"Yang","sequence":"additional","affiliation":[{"name":"Santa Clara University,Santa Clara,CA,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Youngmin","family":"Yi","sequence":"additional","affiliation":[{"name":"Sogang University,Seoul,South Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Improving language understanding by generative pretraining","author":"Radford","year":"2018"},{"key":"ref2","article-title":"Language models are few-shot learners","author":"Brown","year":"2020","journal-title":"arXiv preprint arXiv:2005.14165"},{"key":"ref3","article-title":"Llama: Open and efficient foundation language models","author":"Touvron","year":"2023","journal-title":"arXiv preprint arXiv:2302.13971"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1710.05941"},{"key":"ref5","article-title":"Gaussian error linear units (gelus)","author":"Hendrycks","year":"2016","journal-title":"arXiv preprint arXiv:1606.08415"},{"key":"ref6","article-title":"Relu strikes back: Exploiting activation sparsity in large language models","author":"Mirzadeh","year":"2023","journal-title":"arXiv preprint arXiv:2310.04564"},{"key":"ref7","first-page":"22 137","article-title":"Deja vu: Contextual sparsity for efficient llms at inference time","volume-title":"International Conference on Machine Learning.","author":"Liu"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/3694715.3695964"},{"key":"ref9","article-title":"Prosparse: Introducing and enhancing intrinsic activation sparsity within large language models","author":"Song","year":"2024","journal-title":"arXiv preprint arXiv:2402.13516"},{"key":"ref10","article-title":"Turbo sparse: Achieving llm sota performance with minimal activated parameters","author":"Song","year":"2024","journal-title":"arXiv preprint arXiv:2406.05955"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.23919\/date64628.2025.10992997"},{"key":"ref12","article-title":"Nvidia jetson agx orin series","author":"Karumbunathan","year":"2022"},{"key":"ref13","volume-title":"llama.cpp","author":"Gerganov","year":"2023"},{"key":"ref14","article-title":"Training verifiers to solve math word problems","author":"Karl","year":"2021","journal-title":"arXiv preprint arXiv:2110.14168"},{"key":"ref15","article-title":"Training-free activation sparsity in large language models","author":"Liu","year":"2024","journal-title":"arXiv preprint arXiv:2408.14690"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1002\/1097-0142(1950)3:1<32::AID-CNCR2820030106>3.0.CO;2-3"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-acl.824"},{"key":"ref18","article-title":"lm-evaluation-harness","year":"2024"}],"event":{"name":"2025 62nd ACM\/IEEE Design Automation Conference (DAC)","location":"San Francisco, CA, USA","start":{"date-parts":[[2025,6,22]]},"end":{"date-parts":[[2025,6,25]]}},"container-title":["2025 62nd ACM\/IEEE Design Automation Conference (DAC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11132383\/11132091\/11132899.pdf?arnumber=11132899","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,16]],"date-time":"2025-09-16T05:49:35Z","timestamp":1758001775000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11132899\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,22]]},"references-count":18,"URL":"https:\/\/doi.org\/10.1109\/dac63849.2025.11132899","relation":{},"subject":[],"published":{"date-parts":[[2025,6,22]]}}}