{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,10]],"date-time":"2026-02-10T12:35:30Z","timestamp":1770726930869,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":51,"publisher":"ACM","funder":[{"name":"National Science Foundation","award":["CCF-2007362"],"award-info":[{"award-number":["CCF-2007362"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,2,22]]},"DOI":"10.1145\/3748173.3779187","type":"proceedings-article","created":{"date-parts":[[2026,2,5]],"date-time":"2026-02-05T21:17:35Z","timestamp":1770326255000},"page":"67-78","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Enabling Efficient SpMM for Sparse Attention on GEMM-Optimized Hardware with Block Aggregation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1976-7469","authenticated-orcid":false,"given":"Tianchu","family":"Ji","sequence":"first","affiliation":[{"name":"Stony Brook University, Stony Brook, NY, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4187-9368","authenticated-orcid":false,"given":"Niranjan","family":"Balasubramanian","sequence":"additional","affiliation":[{"name":"Stony Brook University, Stony Brook, NY, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5808-1040","authenticated-orcid":false,"given":"Michael","family":"Ferdman","sequence":"additional","affiliation":[{"name":"Stony Brook University, Stony Brook, NY, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1146-3011","authenticated-orcid":false,"given":"Peter","family":"Milder","sequence":"additional","affiliation":[{"name":"Stony Brook University, Stony Brook, NY, USA"}]}],"member":"320","published-online":{"date-parts":[[2026,2,21]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3529650"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2019.2930057"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","unstructured":"M. R. Ashuthosh Santosh Krishna Vishvas Sudarshan Srinivasan Subramaniyan and Madhura Purnaprajna. 2022. MAPPARAT: A Resource Constrained FPGA-Based Accelerator for Sparse-Dense Matrix Multiplication. In 2022 35th International Conference on VLSI Design and 2022 21st International Conference on Embedded Systems (VLSID). IEEE Computer Society Bangalore India 102-107. doi:10.1109\/VLSID2022.2022.00031","DOI":"10.1109\/VLSID2022.2022.00031"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.172"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","unstructured":"Iz Beltagy Matthew E. Peters and Arman Cohan. 2020. Longformer: the long-document transformer. doi:10.48550\/arXiv.2004.05150","DOI":"10.48550\/arXiv.2004.05150"},{"key":"e_1_3_2_1_6_1","unstructured":"Intel Corporation. 2024. Stratix\u00ae 10 HBM2 Controller Features. https:\/\/www.intel.com\/content\/www\/us\/en\/docs\/programmable\/683189\/23-4-19-6-1\/hbm2-controller-features.html"},{"key":"e_1_3_2_1_7_1","unstructured":"NVIDIA Corporation. 2020. NVIDIA A100 Tensor Core GPU Architecture. https:\/\/images.nvidia.com\/aem-dam\/en-zz\/Solutions\/data-center\/nvidia-ampere-architecture-whitepaper.pdf"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1223"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3490422.3502368"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00035"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00060"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2016.30"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3392717.3392751"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA51647.2021.00017"},{"key":"e_1_3_2_1_15_1","unstructured":"Xilinx Inc. 2019. Supercharge Your AI and Database Applications with Xilinx's HBM-Enabled UltraScale Devices Featuring Samsung HBM2. https:\/\/www.xilinx.com\/support\/documentation\/white_papers\/wp508-hbm2.pdf"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.findings-acl.363"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","unstructured":"Albert Q. Jiang Alexandre Sablayrolles Antoine Roux Arthur Mensch Blanche Savary Chris Bamford Devendra Singh Chaplot Diego de las Casas Emma Bou Hanna Florian Bressand Gianna Lengyel Guillaume Bour Guillaume Lample L\u00e9lio Renard Lavaud Lucile Saulnier Marie-Anne Lachaux Pierre Stock Sandeep Subramanian Sophia Yang Szymon Antoniak Teven Le Scao Th\u00e9ophile Gervet Thibaut Lavril Thomas Wang Timoth\u00e9e Lacroix and William El Sayed. 2024b. Mixtral of Experts. doi:10.48550\/arXiv.2401.04088","DOI":"10.48550\/arXiv.2401.04088"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.52202\/079017-1663"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1445"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2023.3347291"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3520197"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3431920.3439293"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3470496.3527423"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3575706"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2022.3208206"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3688612"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/FPT.2018.00067"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2504.17768"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2018.00067"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3489517.3530585"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00015"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589057"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503222.3507738"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3626202.3637557"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1032"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICFPT59805.2023.00016"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3706628.3708867"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3466752.3480095"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3545008.3545053"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","unstructured":"Hugo Touvron Louis Martin Kevin Stone Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra Prajjwal Bhargava Shruti Bhosale Dan Bikel Lukas Blecher Cristian Canton Ferrer Moya Chen Guillem Cucurull David Esiobu Jude Fernandes Jeremy Fu Wenyin Fu Brian Fuller Cynthia Gao Vedanuj Goswami Naman Goyal Anthony Hartshorn Saghar Hosseini Rui Hou Hakan Inan Marcin Kardas Viktor Kerkez Madian Khabsa Isabel Kloumann Artem Korenev Punit Singh Koura Marie-Anne Lachaux Thibaut Lavril Jenya Lee Diana Liskovich Yinghai Lu Yuning Mao Xavier Martinet Todor Mihaylov Pushkar Mishra Igor Molybog Yixin Nie Andrew Poulton Jeremy Reizenstein Rashi Rungta Kalyan Saladi Alan Schelten Ruan Silva Eric Michael Smith Ranjan Subramanian Xiaoqing Ellen Tan Binh Tang Ross Taylor Adina Williams Jian Xiang Kuan Puxin Xu Zheng Yan Iliyan Zarov Yuchen Zhang Angela Fan Melanie Kambadur Sharan Narang Aurelien Rodriguez Robert Stojnic Sergey Edunov and Thomas Scialom. 2023. Llama 2: Open Foundation and Fine-Tuned Chat Models. doi:10.48550\/arXiv.2307.09288","DOI":"10.48550\/arXiv.2307.09288"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2023.3273992"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO61859.2024.00093"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA51647.2021.00018"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3649329.3657362"},{"key":"e_1_3_2_1_45_1","volume-title":"Efficient Streaming Language Models with Attention Sinks. In The Twelfth International Conference on Learning Representations, ICLR","author":"Xiao Guangxuan","year":"2024","unstructured":"Guangxuan Xiao, Yuandong Tian, Beidi Chen, Song Han, and Mike Lewis. 2024. Efficient Streaming Language Models with Attention Sinks. In The Twelfth International Conference on Learning Representations, ICLR 2024. OpenReview.net, Vienna, Austria. https:\/\/openreview.net\/forum?id=NG7sS51zVF"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2022.3181541"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA59077.2024.00072"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO56248.2022.00059"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2210.02414"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/3626202.3637562"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2022.3170848"}],"event":{"name":"FPGA '26:The 2026 ACM\/SIGDA International Symposium on Field Programmable Gate Arrays","location":"Seaside CA USA","sponsor":["SIGDA ACM Special Interest Group on Design Automation"]},"container-title":["Proceedings of the 2026 ACM\/SIGDA International Symposium on Field Programmable Gate Arrays"],"original-title":[],"deposited":{"date-parts":[[2026,2,9]],"date-time":"2026-02-09T16:17:28Z","timestamp":1770653848000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3748173.3779187"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,2,21]]},"references-count":51,"alternative-id":["10.1145\/3748173.3779187","10.1145\/3748173"],"URL":"https:\/\/doi.org\/10.1145\/3748173.3779187","relation":{},"subject":[],"published":{"date-parts":[[2026,2,21]]},"assertion":[{"value":"2026-02-21","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}