{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T01:16:27Z","timestamp":1772846187628,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":32,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,7,10]],"date-time":"2022-07-10T00:00:00Z","timestamp":1657411200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-sa\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,7,10]]},"DOI":"10.1145\/3489517.3530420","type":"proceedings-article","created":{"date-parts":[[2022,8,23]],"date-time":"2022-08-23T23:19:29Z","timestamp":1661296769000},"page":"211-216","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":52,"title":["Serpens"],"prefix":"10.1145","author":[{"given":"Linghao","family":"Song","sequence":"first","affiliation":[{"name":"University of California"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuze","family":"Chi","sequence":"additional","affiliation":[{"name":"University of California"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Licheng","family":"Guo","sequence":"additional","affiliation":[{"name":"University of California"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jason","family":"Cong","sequence":"additional","affiliation":[{"name":"University of California"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2022,8,23]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"[n.d.]. Alveo U200 and U250 Data Center Accelerator Cards Data Sheet. https:\/\/www.xilinx.com\/products\/boards-and-kits\/alveo\/u250.html."},{"key":"e_1_3_2_1_2_1","unstructured":"[n.d.]. Alveo U280 Data Center Accelerator Card Data Sheet. https:\/\/www.xilinx.com\/products\/boards-and-kits\/alveo\/u280.html."},{"key":"e_1_3_2_1_3_1","unstructured":"[n.d.]. HIGH BANDWIDTH MEMORY (HBM) DRAM. https:\/\/www.jedec.org\/standards-documents\/docs\/jesd235a."},{"key":"e_1_3_2_1_4_1","unstructured":"Mart\u00edn Abadi etal. 2016. TensorFlow: A System for Large-Scale Machine Learning. In OSDI."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"crossref","unstructured":"Xinyu Chen et al. 2021. ThunderGP: HLS-based Graph Processing Framework on FPGAs. In FPGA.","DOI":"10.1145\/3431920.3439290"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"crossref","unstructured":"Yuze Chi et al. 2021. Extending High-Level Synthesis for Task-Parallel Programs. In FCCM.","DOI":"10.1145\/3431920.3439470"},{"key":"e_1_3_2_1_7_1","unstructured":"Young-kyu Choi et al. 2020. When HLS Meets FPGA HBM: Benchmarking and Bandwidth Optimization. arXiv preprint (2020)."},{"key":"e_1_3_2_1_8_1","unstructured":"Young-kyu Choi et al. 2021. HBM Connect: High-Performance HLS Interconnect for FPGA HBM. In FPGA."},{"key":"e_1_3_2_1_9_1","volume-title":"Latte: Locality Aware Transformation for High-Level Synthesis. In FCCM.","author":"Jason Cong","year":"2018","unstructured":"Jason Cong et al. 2018. Latte: Locality Aware Transformation for High-Level Synthesis. In FCCM."},{"key":"e_1_3_2_1_10_1","volume-title":"Davis and Yifan Hu","author":"Timothy A.","year":"2011","unstructured":"Timothy A. Davis and Yifan Hu. 2011. The University of Florida Sparse Matrix Collection. ACM TOMS (2011)."},{"key":"e_1_3_2_1_11_1","unstructured":"Yixiao Du et al. 2022. High-Performance Sparse Linear Algebra on HBM-Equipped FPGAs Using HLS: A Case Study on SpMV. In FPGA."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"crossref","unstructured":"Jeremy Fowers et al. 2014. A High Memory Bandwidth FPGA Accelerator for Sparse Matrix-Vector Multiplication. In FCCM.","DOI":"10.1109\/FCCM.2014.23"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"crossref","unstructured":"Christina Giannoula et al. 2022. SparseP: Towards Efficient Sparse Matrix Vector Multiplication on Real Processing-In-Memory Systems. In SIGMETRICS.","DOI":"10.1109\/ISVLSI54635.2022.00063"},{"key":"e_1_3_2_1_14_1","unstructured":"Licheng Guo et al. 2020. Analysis and Optimization of the Implicit Broadcasts in FPGA HLS to Improve Maximum Frequency. In DAC."},{"key":"e_1_3_2_1_15_1","unstructured":"Licheng Guo et al. 2021. AutoBridge: Coupling Coarse-Grained Floorplanning and Pipelining for High-Frequency HLS Design on Multi-Die FPGAs. In FPGA."},{"key":"e_1_3_2_1_16_1","unstructured":"Song Han et al. 2015. Learning both Weights and Connections for Efficient Neural Networks. In NIPS."},{"key":"e_1_3_2_1_17_1","unstructured":"Weihua Hu et al. 2020. Open Graph Benchmark: Datasets for Machine Learning on Graphs. arXiv preprint (2020)."},{"key":"e_1_3_2_1_18_1","unstructured":"Yuwei Hu et al. 2021. GraphLily: Accelerating Graph Linear Algebra on HBM-Equipped FPGAs. In ICCAD."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"crossref","unstructured":"Jeremy Kepner et al. 2016. Mathematical Foundations of the GraphBLAS. In HPEC.","DOI":"10.1109\/HPEC.2016.7761646"},{"key":"e_1_3_2_1_20_1","unstructured":"Jure Leskovec and Andrej Krevl. 2014. SNAP Datasets: Stanford Large Network Dataset Collection. http:\/\/snap.stanford.edu\/data."},{"key":"e_1_3_2_1_21_1","unstructured":"Jiajia Li et al. 2018. HiCOO: Hierarchical Storage of Sparse Tensors. In SC."},{"key":"e_1_3_2_1_22_1","unstructured":"Weifeng Liu and Brian Vinter. 2015. CSR5: An Efficient Storage Format for Cross-Platform Sparse Matrix-Vector Multiplication. In ICS."},{"key":"e_1_3_2_1_23_1","volume-title":"CUSPARSE Library. In GPU Tech. Conf.","author":"Maxim","unstructured":"Maxim Naumov et al. 2010. CUSPARSE Library. In GPU Tech. Conf."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"crossref","unstructured":"Yousef Saad. 2003. Iterative Methods for Sparse Linear Systems. SIAM.","DOI":"10.1137\/1.9780898718003"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"crossref","unstructured":"Fazle Sadi et al. 2019. Efficient SpMV Operation for Large and Highly Sparse Matrices using Scalable Multi-way Merge Parallelization. In MICRO.","DOI":"10.1145\/3352460.3358330"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"crossref","unstructured":"Linghao Song et al. 2018. GraphR: Accelerating Graph Processing Using ReRAM. In HPCA.","DOI":"10.1109\/HPCA.2018.00052"},{"key":"e_1_3_2_1_27_1","volume-title":"Sextans: A Streaming Accelerator for General-Purpose Sparse-Matrix Dense-Matrix Multiplication. In FPGA.","author":"Linghao Song","year":"2022","unstructured":"Linghao Song et al. 2022. Sextans: A Streaming Accelerator for General-Purpose Sparse-Matrix Dense-Matrix Multiplication. In FPGA."},{"key":"e_1_3_2_1_28_1","volume-title":"Tensaurus: A Versatile Accelerator for Mixed Sparse-Dense Tensor Computations. In HPCA.","author":"Nitish","year":"2020","unstructured":"Nitish Srivastava et al. 2020. Tensaurus: A Versatile Accelerator for Mixed Sparse-Dense Tensor Computations. In HPCA."},{"key":"e_1_3_2_1_29_1","volume-title":"Proc. IEEE","author":"Vivienne","year":"2017","unstructured":"Vivienne Sze et al. 2017. Efficient Processing of Deep Neural Networks: A Tutorial and Survey. Proc. IEEE (2017)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"crossref","unstructured":"Jie Wang et al. 2021. AutoSA: A Polyhedral Compiler for High-Performance Systolic Arrays on FPGA. In FPGA.","DOI":"10.1145\/3431920.3439292"},{"key":"e_1_3_2_1_31_1","unstructured":"Xinfeng Xie et al. 2021. SpaceA: Sparse Matrix Vector Multiplication on Processing-in-Memory Accelerator. In HPCA."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"crossref","unstructured":"Shijie Zhou et al. 2019. HitGraph: High-throughput Graph Processing Framework on FPGA. IEEE TPDS (2019).","DOI":"10.1109\/TPDS.2019.2910068"}],"event":{"name":"DAC '22: 59th ACM\/IEEE Design Automation Conference","location":"San Francisco California","acronym":"DAC '22","sponsor":["SIGDA ACM Special Interest Group on Design Automation","IEEE CEDA"]},"container-title":["Proceedings of the 59th ACM\/IEEE Design Automation Conference"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3489517.3530420","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3489517.3530420","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:18:39Z","timestamp":1750191519000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3489517.3530420"}},"subtitle":["a high bandwidth memory based accelerator for general-purpose sparse matrix-vector multiplication"],"short-title":[],"issued":{"date-parts":[[2022,7,10]]},"references-count":32,"alternative-id":["10.1145\/3489517.3530420","10.1145\/3489517"],"URL":"https:\/\/doi.org\/10.1145\/3489517.3530420","relation":{},"subject":[],"published":{"date-parts":[[2022,7,10]]},"assertion":[{"value":"2022-08-23","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}