{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,29]],"date-time":"2026-05-29T13:14:01Z","timestamp":1780060441871,"version":"3.54.0"},"publisher-location":"New York, NY, USA","reference-count":41,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,6,20]],"date-time":"2026-06-20T00:00:00Z","timestamp":1781913600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["2426353"],"award-info":[{"award-number":["2426353"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,6,21]]},"DOI":"10.1145\/3745756.3809197","type":"proceedings-article","created":{"date-parts":[[2026,5,29]],"date-time":"2026-05-29T12:52:21Z","timestamp":1780059141000},"page":"173-184","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["SpMAP: Transparent Sparsity for LLMs"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7799-9946","authenticated-orcid":false,"given":"Wonkyo","family":"Choe","sequence":"first","affiliation":[{"name":"University of Virginia, Charlottesville, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-0081-6411","authenticated-orcid":false,"given":"Felix Xiaozhu","family":"Lin","sequence":"additional","affiliation":[{"name":"University of Virginia, Charlottesville, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,6,20]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2023. MLC-LLM. Retrieved Aug 6th 2025 from https:\/\/github.com\/mlc-ai\/mlc-llm"},{"key":"e_1_3_2_1_2_1","unstructured":"2025. Anonymous Memory. Retrieved Aug 6th 2025 from https:\/\/docs.kernel.org\/admin-guide\/mm\/concepts.html#id6"},{"key":"e_1_3_2_1_3_1","unstructured":"2025. AnythingLLM. Retrieved July 18th 2025 from https:\/\/anythingllm.com\/"},{"key":"e_1_3_2_1_4_1","volume-title":"eBPF. Retrieved Aug 6th","year":"2025","unstructured":"2025. eBPF. Retrieved Aug 6th, 2025 from https:\/\/ebpf.io\/"},{"key":"e_1_3_2_1_5_1","unstructured":"2025. ExecuTorch. Retrieved Nov 24th 2025 from https:\/\/github.com\/pytorch\/executorch"},{"key":"e_1_3_2_1_6_1","unstructured":"2025. LM Studieo. Retrieved July 18th 2025 from https:\/\/lmstudio.ai\/"},{"key":"e_1_3_2_1_7_1","unstructured":"2025. Ollama. Retrieved July 18th 2025 from https:\/\/ollama.com\/"},{"key":"e_1_3_2_1_8_1","unstructured":"2025. TorchScript. Retrieved Nov 24th 2025 from https:\/\/docs.pytorch.org\/docs\/stable\/jit.html"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2312.11514"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3480855"},{"key":"e_1_3_2_1_11_1","volume-title":"Retrieved Aug 6th","author":"Gerganov Georgi","year":"2025","unstructured":"Georgi Gerganov. 2025. GGUF. Retrieved Aug 6th, 2025 from https:\/\/github.com\/ggml-org\/ggml\/blob\/master\/docs\/gguf.md"},{"key":"e_1_3_2_1_12_1","unstructured":"Mel Gorman. [n. d.]. Understanding the Linux Virtual Memory Manager."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_14_1","volume-title":"Retrieved July 18th","author":"Apple Inc.","year":"2025","unstructured":"Apple Inc. 2025. Apple Intelligence. Retrieved July 18th, 2025 from https:\/\/www.apple.com\/apple-intelligence\/"},{"key":"e_1_3_2_1_15_1","volume-title":"Retrieved July 18th","author":"Microsoft Inc.","year":"2025","unstructured":"Microsoft Inc. 2025. Microsoft Copilot. Retrieved July 18th, 2025 from https:\/\/copilot.microsoft.com\/"},{"key":"e_1_3_2_1_16_1","volume-title":"Retrieved Aug 6th","author":"Kernel The Linux","year":"2025","unstructured":"The Linux Kernel. 2025. Memory Management APIs. Retrieved Aug 6th, 2025 from https:\/\/www.kernel.org\/doc\/html\/next\/core-api\/mm-api.html"},{"key":"e_1_3_2_1_17_1","volume-title":"Retrieved Aug 6th","author":"Kernel The Linux","year":"2025","unstructured":"The Linux Kernel. 2025. Memory Management Concepts. Retrieved Aug 6th, 2025 from https:\/\/docs.kernel.org\/admin-guide\/mm\/concepts.html"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3139645.3139659"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3695053.3731073"},{"key":"e_1_3_2_1_20_1","volume-title":"AWQ: Activation-aware Weight Quantization for LLM Compression and Acceleration. In MLSys.","author":"Lin Ji","year":"2024","unstructured":"Ji Lin, Jiaming Tang, Haotian Tang, Shang Yang, Wei-Ming Chen, Wei-Chen Wang, Guangxuan Xiao, Xingyu Dang, Chuang Gan, and Song Han. 2024. AWQ: Activation-aware Weight Quantization for LLM Compression and Acceleration. In MLSys."},{"key":"e_1_3_2_1_21_1","volume-title":"Deja Vu: Contextual Sparsity for Efficient LLMs at Inference Time. arXiv:2310.17157 [cs]","author":"Liu Zichang","year":"2023","unstructured":"Zichang Liu, Jue Wang, Tri Dao, Tianyi Zhou, Binhang Yuan, Zhao Song, Anshumali Shrivastava, Ce Zhang, Yuandong Tian, Christopher Re, and Beidi Chen. 2023. Deja Vu: Contextual Sparsity for Efficient LLMs at Inference Time. arXiv:2310.17157 [cs]"},{"key":"e_1_3_2_1_22_1","volume-title":"Retrieved Aug 6th","year":"2022","unstructured":"LWN.net. 2022. Introducing the Maple Tree. Retrieved Aug 6th, 2025 from https:\/\/lwn.net\/Articles\/901714\/"},{"key":"e_1_3_2_1_23_1","volume-title":"Retrieved Aug 6th, 2025","author":"NVIDIA.","year":"2025","unstructured":"NVIDIA. 2025. Unified Memory for CUDA Beginners. Retrieved Aug 6th, 2025 from https:\/\/developer.nvidia.com\/blog\/unified-memory-cuda-beginners\/ Accessed: 2025-08-06."},{"key":"e_1_3_2_1_24_1","volume-title":"Retrieved Dec 1st","year":"2025","unstructured":"Oracle. 2025. Exadata. Retrieved Dec 1st, 2025 from https:\/\/blogs.oracle.com\/exadata\/huge-pages-or-transparent-huge-pages-in-context-of-exadata"},{"key":"e_1_3_2_1_25_1","unstructured":"Yeonhong Park Jake Hyun Hojoon Kim and Jae W Lee. [n. d.]. DecDEC: A Systems Approach to Advancing Low-Bit LLM Quantization. ([n. d.])."},{"key":"e_1_3_2_1_26_1","volume-title":"Retrieved Aug 6th","year":"2025","unstructured":"PyTorch. 2025. torch.from_file. Retrieved Aug 6th, 2025 from https:\/\/docs.pytorch.org\/docs\/stable\/generated\/torch.from_file.html"},{"key":"e_1_3_2_1_27_1","first-page":"208","article-title":"Nimble: Efficiently compiling dynamic neural networks for model inference","volume":"3","author":"Shen Haichen","year":"2021","unstructured":"Haichen Shen, Jared Roesch, Zhi Chen, Wei Chen, Yong Wu, Mu Li, Vin Sharma, Zachary Tatlock, and Yida Wang. 2021. Nimble: Efficiently compiling dynamic neural networks for model inference. Proceedings of Machine Learning and Systems 3 (2021), 208\u2013222.","journal-title":"Proceedings of Machine Learning and Systems"},{"key":"e_1_3_2_1_28_1","volume-title":"Proceedings of the 9th USENIX Conference on Operating Systems Design and Implementation","author":"Soares Livio","year":"2010","unstructured":"Livio Soares and Michael Stumm. 2010. FlexSC: flexible system call scheduling with exception-less system calls. In Proceedings of the 9th USENIX Conference on Operating Systems Design and Implementation (Vancouver, BC, Canada) (OSDI'10). USENIX Association, USA, 33\u201346."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","unstructured":"Chenyang Song Xu Han Zhengyan Zhang Shengding Hu Xiyu Shi Kuai Li Chen Chen Zhiyuan Liu Guangli Li Tao Yang and Maosong Sun. 2025. ProSparse: Introducing and Enhancing Intrinsic Activation Sparsity within Large Language Models. arXiv:2402.13516 [cs] 10.48550\/arXiv.2402.13516","DOI":"10.48550\/arXiv.2402.13516"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3694715.3695964"},{"key":"e_1_3_2_1_31_1","volume-title":"Turbo Sparse: Achieving LLM SOTA Performance with Minimal Activated Parameters. arXiv:2406.05955","author":"Song Yixin","year":"2024","unstructured":"Yixin Song, Haotong Xie, Zhengyan Zhang, Bo Wen, Li Ma, Zeyu Mi, and Haibo Chen. 2024. Turbo Sparse: Achieving LLM SOTA Performance with Minimal Activated Parameters. arXiv:2406.05955"},{"key":"e_1_3_2_1_32_1","volume-title":"Retrieved Aug 6th","year":"2025","unstructured":"Symas. 2025. LMDB. Retrieved Aug 6th, 2025 from https:\/\/www.symas.com\/mdb"},{"key":"e_1_3_2_1_33_1","volume-title":"Proceedings of the 41st International Conference on Machine Learning. https:\/\/openreview.net\/forum?id=9BrydUVcoe","author":"Tseng Albert","year":"2024","unstructured":"Albert Tseng, Jerry Chee, Qingyao Sun, Volodymyr Kuleshov, and Christopher De Sa. 2024. QuIP$\\#$: Even Better LLM Quantization with Hadamard Incoherence and Lattice Codebooks. In Proceedings of the 41st International Conference on Machine Learning. https:\/\/openreview.net\/forum?id=9BrydUVcoe"},{"key":"e_1_3_2_1_34_1","volume-title":"Ripple: Accelerating LLM Inference on Smartphones with Correlation-Aware Neuron Management. arXiv preprint arXiv:2410.19274","author":"Wang Tuowei","year":"2024","unstructured":"Tuowei Wang, Ruwen Fan, Minxing Huang, Zixu Hao, Kun Li, Ting Cao, Youyou Lu, Yaoxue Zhang, and Ju Ren. 2024. Ripple: Accelerating LLM Inference on Smartphones with Correlation-Aware Neuron Management. arXiv preprint arXiv:2410.19274 (2024)."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3689031.3696099"},{"key":"e_1_3_2_1_36_1","volume-title":"Proceedings of the 40th International Conference on Machine Learning.","author":"Xiao Guangxuan","year":"2023","unstructured":"Guangxuan Xiao, Ji Lin, Mickael Seznec, Hao Wu, Julien Demouth, and Song Han. 2023. SmoothQuant: Accurate and Efficient Post-Training Quantization for Large Language Models. In Proceedings of the 40th International Conference on Machine Learning."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3669940.3707239"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","unstructured":"Zhenliang Xue Yixin Song Zeyu Mi Xinrui Zheng Yubin Xia and Haibo Chen. 2024. PowerInfer-2: Fast Large Language Model Inference on a Smartphone. arXiv:2406.06282 [cs] 10.48550\/arXiv.2406.06282","DOI":"10.48550\/arXiv.2406.06282"},{"key":"e_1_3_2_1_39_1","unstructured":"Yuwei Zhang Kumar Ayush Siyuan Qiao A. Ali Heydari Girish Narayanswamy Maxwell A. Xu Ahmed A. Metwally Shawn Xu Jake Garrison Xuhai Xu Tim Althoff Yun Liu Pushmeet Kohli Jiening Zhan Mark Malhotra Shwetak Patel Cecilia Mascolo Xin Liu Daniel McDuff and Yuzhe Yang. 2025. SensorLM: Learning the Language of Wearable Sensors. arXiv:2506.09108 [cs.LG] https:\/\/arxiv.org\/abs\/2506.09108"},{"key":"e_1_3_2_1_40_1","volume-title":"17th USENIX Symposium on Operating Systems Design and Implementation (OSDI 23)","author":"Zhou Zhe","year":"2023","unstructured":"Zhe Zhou, Yanxiang Bi, Junpeng Wan, Yangfan Zhou, and Zhou Li. 2023. Userspace bypass: Accelerating syscall-intensive applications. In 17th USENIX Symposium on Operating Systems Design and Implementation (OSDI 23). USENIX Association, 33\u201349."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437984.3458838"}],"event":{"name":"MobiSys '26: 24th Annual International Conference on Mobile Systems, Applications and Services","location":"University of Cambridge Cambridge United Kingdom","acronym":"MobiSys '26","sponsor":["SIGMOBILE ACM Special Interest Group on Mobility of Systems, Users, Data and Computing","SIGOPS ACM Special Interest Group on Operating Systems"]},"container-title":["Proceedings of the 24th Annual International Conference on Mobile Systems, Applications and Services"],"original-title":[],"deposited":{"date-parts":[[2026,5,29]],"date-time":"2026-05-29T12:56:16Z","timestamp":1780059376000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3745756.3809197"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6,20]]},"references-count":41,"alternative-id":["10.1145\/3745756.3809197","10.1145\/3745756"],"URL":"https:\/\/doi.org\/10.1145\/3745756.3809197","relation":{},"subject":[],"published":{"date-parts":[[2026,6,20]]},"assertion":[{"value":"2026-06-20","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}