{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,8]],"date-time":"2026-01-08T04:52:55Z","timestamp":1767847975521,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":43,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,13]],"date-time":"2024-10-13T00:00:00Z","timestamp":1728777600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100006374","name":"National Research Foundation Singapore","doi-asserted-by":"publisher","award":["NRF-CRP23-2019-0003"],"award-info":[{"award-number":["NRF-CRP23-2019-0003"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,14]]},"DOI":"10.1145\/3656019.3689905","type":"proceedings-article","created":{"date-parts":[[2024,10,11]],"date-time":"2024-10-11T10:34:08Z","timestamp":1728642848000},"page":"246-257","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["ZeD: A Generalized Accelerator for Variably Sparse Matrix Computations in ML"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-1339-6048","authenticated-orcid":false,"given":"Pranav","family":"Dangi","sequence":"first","affiliation":[{"name":"National University of Singapore, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1143-0762","authenticated-orcid":false,"given":"Zhenyu","family":"Bai","sequence":"additional","affiliation":[{"name":"National University of Singapore, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6015-1084","authenticated-orcid":false,"given":"Rohan","family":"Juneja","sequence":"additional","affiliation":[{"name":"National University of Singapore, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3181-2514","authenticated-orcid":false,"given":"Dhananjaya","family":"Wijerathne","sequence":"additional","affiliation":[{"name":"National University of Singapore, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4136-4188","authenticated-orcid":false,"given":"Tulika","family":"Mitra","sequence":"additional","affiliation":[{"name":"National University of Singapore, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,10,13]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.23919\/DATE58400.2024.10546782"},{"key":"e_1_3_2_1_2_1","unstructured":"Shivam Aggarwal Hans\u00a0Jakob Damsgaard Alessandro Pappalardo Giuseppe Franco Thomas\u00a0B. Preu\u00dfer Michaela Blott and Tulika Mitra. 2024. Shedding the Bits: Pushing the Boundaries of Quantization with Minifloats on FPGAs. arxiv:2311.12359\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2311.12359"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2019.2930057"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","unstructured":"D. Baek S. Hwang T. Heo D. Kim and J. Huh. 2021. InnerSP: A Memory Efficient Sparse Matrix Multiplication Accelerator with Locality-Aware Inner Product Processing. In 2021 30th International Conference on Parallel Architectures and Compilation Techniques (PACT). IEEE Computer Society Los Alamitos CA USA 116\u2013128. https:\/\/doi.org\/10.1109\/PACT52795.2021.00016","DOI":"10.1109\/PACT52795.2021.00016"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3085572"},{"key":"e_1_3_2_1_6_1","volume-title":"cuDNN: Efficient Primitives for Deep Learning. ArXiv abs\/1410.0759","author":"Chetlur Sharan","year":"2014","unstructured":"Sharan Chetlur, Cliff Woolley, Philippe Vandermersch, Jonathan\u00a0M. Cohen, John Tran, Bryan Catanzaro, and Evan Shelhamer. 2014. cuDNN: Efficient Primitives for Deep Learning. ArXiv abs\/1410.0759 (2014). https:\/\/api.semanticscholar.org\/CorpusID:12330432"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3322125"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO56248.2022.00050"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"crossref","unstructured":"Amir Gholami Sehoon Kim Zhen Dong Zhewei Yao Michael\u00a0W. Mahoney and Kurt Keutzer. 2021. A Survey of Quantization Methods for Efficient Neural Network Inference. arxiv:2103.13630\u00a0[cs.CV]","DOI":"10.1201\/9781003162810-13"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3508041"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358291"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"crossref","unstructured":"Manas Gupta Efe Camci Vishandi\u00a0Rudy Keneta Abhishek Vaidyanathan Ritwik Kanodia Chuan-Sheng Foo Wu Min and Lin Jie. 2024. Is Complexity Required for Neural Network Pruning? A Case Study on Global Magnitude Pruning. arxiv:2209.14624\u00a0[cs.LG]","DOI":"10.1109\/CAI59869.2024.00144"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/355791.355796"},{"key":"e_1_3_2_1_14_1","volume-title":"Accelerating Attention Mechanisms in Neural Networks with Approximation. 2020 IEEE International Symposium on High Performance Computer Architecture (HPCA)","author":"Ham Tae\u00a0Jun","year":"2020","unstructured":"Tae\u00a0Jun Ham, Sungjun Jung, Seonghak Kim, Young\u00a0H. Oh, Yeonhong Park, Yoonho Song, Jung-Hun Park, Sanghee Lee, Kyoung Park, Jae\u00a0W. Lee, and Deog-Kyoon Jeong. 2020. Accelerating Attention Mechanisms in Neural Networks with Approximation. 2020 IEEE International Symposium on High Performance Computer Architecture (HPCA) (2020), 328\u2013341. https:\/\/api.semanticscholar.org\/CorpusID:211296403"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001163"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358275"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/JETCAS.2019.2910232"},{"key":"e_1_3_2_1_18_1","volume-title":"Optimizing Sparse Matrix Vector Multiplication on SMP. In SIAM Conference on Parallel Processing for Scientific Computing. https:\/\/api.semanticscholar.org\/CorpusID:42432358","author":"Im Eun-Jin","year":"1999","unstructured":"Eun-Jin Im and Katherine\u00a0A. Yelick. 1999. Optimizing Sparse Matrix Vector Multiplication on SMP. In SIAM Conference on Parallel Processing for Scientific Computing. https:\/\/api.semanticscholar.org\/CorpusID:42432358"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358286"},{"key":"e_1_3_2_1_20_1","unstructured":"Sehoon Kim Coleman Hooper Thanakul Wattanawong Minwoo Kang Ruohan Yan Hasan Genc Grace Dinh Qijing Huang Kurt Keutzer Michael\u00a0W. Mahoney Yakun\u00a0Sophia Shao and Amir Gholami. 2023. Full Stack Optimization of Transformer Inference: a Survey. arxiv:2302.14017\u00a0[cs.CL]"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.5555\/3314872.3314894"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.279"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA53966.2022.00049"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3466752.3480125"},{"key":"e_1_3_2_1_25_1","unstructured":"Shuming Ma Hongyu Wang Lingxiao Ma Lei Wang Wenhui Wang Shaohan Huang Li Dong Ruiping Wang Jilong Xue and Furu Wei. 2024. The Era of 1-bit LLMs: All Large Language Models are in 1.58 Bits. arxiv:2402.17764\u00a0[cs.CL]"},{"key":"e_1_3_2_1_26_1","unstructured":"Asit Mishra Jorge\u00a0Albericio Latorre Jeff Pool Darko Stosic Dusan Stosic Ganesh Venkatesh Chong Yu and Paulius Micikevicius. 2021. Accelerating Sparse Deep Neural Networks. arxiv:2104.08378\u00a0[cs.LG]"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3582016.3582069"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC53511.2021.00028"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2018.00067"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3297858.3304025"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00015"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3466752.3480047"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO50266.2020.00068"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"crossref","unstructured":"V. Sze Y.H. Chen T.J. Yang and J.S. Emer. 2020. Efficient Processing of Deep Neural Networks. Springer International Publishing. https:\/\/books.google.com.sg\/books?id=iJ05zwEACAAJ","DOI":"10.1007\/978-3-031-01766-7_2"},{"key":"e_1_3_2_1_35_1","unstructured":"Jan van Leeuwen. 1976. On the Construction of Huffman Trees. In International Colloquium on Automata Languages and Programming. https:\/\/api.semanticscholar.org\/CorpusID:37417891"},{"key":"e_1_3_2_1_36_1","volume-title":"Attention is all you need. Advances in neural information processing systems 30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan\u00a0N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA51647.2021.00018"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00088"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3613424.3623786"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS51385.2021.00043"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA56546.2023.10071027"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3445814.3446702"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00030"}],"event":{"name":"PACT '24: International Conference on Parallel Architectures and Compilation Techniques","location":"Long Beach CA USA","acronym":"PACT '24","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture"]},"container-title":["Proceedings of the 2024 International Conference on Parallel Architectures and Compilation Techniques"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3656019.3689905","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3656019.3689905","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T19:55:38Z","timestamp":1755892538000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3656019.3689905"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,13]]},"references-count":43,"alternative-id":["10.1145\/3656019.3689905","10.1145\/3656019"],"URL":"https:\/\/doi.org\/10.1145\/3656019.3689905","relation":{},"subject":[],"published":{"date-parts":[[2024,10,13]]},"assertion":[{"value":"2024-10-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}