{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,5]],"date-time":"2026-06-05T16:01:28Z","timestamp":1780675288383,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":46,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,4,1]],"date-time":"2024-04-01T00:00:00Z","timestamp":1711929600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-sa\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100006374","name":"Semiconductor Research Corporation","doi-asserted-by":"publisher","award":["000705769"],"award-info":[{"award-number":["000705769"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,4]]},"DOI":"10.1145\/3626202.3637568","type":"proceedings-article","created":{"date-parts":[[2024,4,2]],"date-time":"2024-04-02T18:04:51Z","timestamp":1712081091000},"page":"67-77","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":9,"title":["LevelST: Stream-based Accelerator for Sparse Triangular Solver"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-3482-838X","authenticated-orcid":false,"given":"Zifan","family":"He","sequence":"first","affiliation":[{"name":"University of California, Los Angeles, Los Angeles, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7450-2842","authenticated-orcid":false,"given":"Linghao","family":"Song","sequence":"additional","affiliation":[{"name":"University of California, Los Angeles, Los Angeles, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1303-2012","authenticated-orcid":false,"given":"Robert F.","family":"Lucas","sequence":"additional","affiliation":[{"name":"Ansys, Inc., Livermore, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2887-6963","authenticated-orcid":false,"given":"Jason","family":"Cong","sequence":"additional","affiliation":[{"name":"University of California, Los Angeles, Los Angeles, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2024,4,2]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"[n. d.]. Alveo U280 Data Center Accelerator Card Data Sheet. https:\/\/docs.xilinx. com\/r\/en-US\/ds963-u280"},{"key":"e_1_3_2_1_2_1","unstructured":"[n. d.]. NVIDIA V100. https:\/\/www.nvidia.com\/en-us\/data-center\/v100\/"},{"key":"e_1_3_2_1_3_1","unstructured":"[n. d.]. UltraScale Architecture Libraries Guide (UG974). https:\/\/docs.xilinx.com\/ r\/2021.1-English\/ug974-vivado-ultrascale-libraries\/RAMB36E2"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1137\/S0895479894278952"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.comnet.2007.04.019"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/362946.362974"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1137\/140968896"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/FCCM.2010.45"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/800195.805928"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/2049662.2049663"},{"key":"e_1_3_2_1_11_1","volume-title":"ARC 2020, Toledo, Spain, April 1--3, 2020, Proceedings 16","author":"Favaro Federico","unstructured":"Federico Favaro, Ernesto Dufrechou, Pablo Ezzatti, and Juan P. Oliver. 2020. Exploring FPGA optimizations to compute sparse numerical linear algebra kernels. In Applied Reconfigurable Computing. Architectures, Tools, and Applications: 16th International Symposium, ARC 2020, Toledo, Spain, April 1--3, 2020, Proceedings 16. Springer, 258--268."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIM.2007.908248"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.5555\/2650280.2650344"},{"key":"e_1_3_2_1_14_1","series-title":"SIAM journal on matrix analysis and applications 13, 1","volume-title":"Sparse matrices in MATLAB: Design and implementation","author":"Gilbert John R","year":"1992","unstructured":"John R Gilbert, Cleve Moler, and Robert Schreiber. 1992. Sparse matrices in MATLAB: Design and implementation. SIAM journal on matrix analysis and applications 13, 1 (1992), 333--356."},{"key":"e_1_3_2_1_15_1","unstructured":"Licheng Guo Yuze Chi Jason Lau Linghao Song Xingyu Tian Moazin Khatti Weikang Qiao Jie Wang Ecenur Ustun Zhenman Fang et al. 2022. TAPA: a scalable task-parallel dataflow programming framework for modern FPGAs with co-optimization of HLS and physical design. arXiv preprint arXiv:2209.02663 (2022)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3431920.3439289"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/FCCM.2019.00027"},{"key":"e_1_3_2_1_18_1","volume-title":"Least squares data fitting with applications","author":"Hansen Per Christian","unstructured":"Per Christian Hansen, Victor Pereyra, and Godela Scherer. 2013. Least squares data fitting with applications. JHU Press."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.6028\/jres.049.044"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/1555754.1555775"},{"key":"e_1_3_2_1_21_1","unstructured":"Intel. [n. d.]. Intel Stratix 10 Datasheet. https:\/\/www.intel.com\/content\/www\/ us\/en\/docs\/programmable\/683181\/current\/dsp-block-specifications.html"},{"key":"e_1_3_2_1_22_1","unstructured":"Intel. 2023. Intel Agilex 9 FPGA and SoC FPGA. https:\/\/www.intel.com\/content\/ www\/us\/en\/products\/details\/fpga\/agilex\/9.html"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/IMW.2017.7939084"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1016\/0021-9991(78)90098-0"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.5555\/3433701.3433816"},{"key":"e_1_3_2_1_26_1","unstructured":"Yih-Yih Lin. [n. d.]. A comprehensive study on the performance of implicit LSDYNA. https:\/\/www.dynalook.com\/conferences\/12th-international-ls-dynaconference\/ computing-technologies27-a.pdf\/view"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-43659-3_45"},{"key":"e_1_3_2_1_28_1","volume-title":"TileSpTRSV: a tiled algorithm for parallel sparse triangular solve on GPUs. CCF Transactions on High Performance Computing","author":"Lu Zhengyang","year":"2023","unstructured":"Zhengyang Lu and Weifeng Liu. 2023. TileSpTRSV: a tiled algorithm for parallel sparse triangular solve on GPUs. CCF Transactions on High Performance Computing (2023), 1--15."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404397.3404413"},{"key":"e_1_3_2_1_30_1","volume-title":"European Conference on Parallel Processing. Springer, 444--455","author":"Martineau Matt","year":"2018","unstructured":"Matt Martineau, Patrick Atkinson, and Simon McIntosh-Smith. 2018. Benchmarking the NVIDIA v100 GPU and tensor cores. In European Conference on Parallel Processing. Springer, 444--455."},{"key":"e_1_3_2_1_31_1","volume-title":"Natick, MA","author":"Matlab Starting","year":"2012","unstructured":"Starting Matlab. 2012. Matlab. The MathWorks, Natick, MA (2012)."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0010-4655(97)00041-6"},{"key":"e_1_3_2_1_33_1","volume-title":"GPU Technology Conference.","author":"Naumov Maxim","year":"2010","unstructured":"Maxim Naumov, L Chien, Philippe Vandermersch, and Ujval Kapasi. 2010. Cusparse library. In GPU Technology Conference."},{"key":"e_1_3_2_1_34_1","unstructured":"Nvidia. [n. d.]. NVIDIA AMPERE GA102 GPU ARCHITECTURE. https:\/\/www.nvidia.com\/content\/PDF\/nvidia-ampere-ga-102-gpuarchitecture- whitepaper-v2.pdf"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1080\/00029890.1991.11995755"},{"key":"e_1_3_2_1_36_1","first-page":"422","article-title":"Methods of multivariate analysis","volume":"12","author":"Rencher Alvin C","year":"1997","unstructured":"Alvin C Rencher and MG Schimek. 1997. Methods of multivariate analysis. Computational Statistics 12, 4 (1997), 422--422.","journal-title":"Computational Statistics"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3489517.3530420"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3490422.3502357"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543622.3573182"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404397.3404400"},{"key":"e_1_3_2_1_41_1","unstructured":"Livermore Software Technology. 2022. LS-DYNA. https:\/\/www.lstc.com\/ products\/ls-dyna"},{"key":"e_1_3_2_1_42_1","unstructured":"Andreas Wachter. 2002. An interior point algorithm for large-scale nonlinear optimization with applications in process engineering. Ph.D. Dissertation. Carnegie Mellon University."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3178487.3178513"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/1498765.1498785"},{"key":"e_1_3_2_1_45_1","volume-title":"Proceedings of the 36th International Conference on Machine Learning (Proceedings of Machine Learning Research","volume":"7163","author":"Yu Yue","year":"2019","unstructured":"Yue Yu, Jie Chen, Tian Gao, and Mo Yu. 2019. DAG-GNN: DAG Structure Learning with Graph Neural Networks. In Proceedings of the 36th International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 97), Kamalika Chaudhuri and Ruslan Salakhutdinov (Eds.). PMLR, 7154-- 7163. https:\/\/proceedings.mlr.press\/v97\/yu19a.html"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/2684746.2689060"}],"event":{"name":"FPGA '24: The 2024 ACM\/SIGDA International Symposium on Field Programmable Gate Arrays","location":"Monterey CA USA","acronym":"FPGA '24","sponsor":["SIGDA ACM Special Interest Group on Design Automation"]},"container-title":["Proceedings of the 2024 ACM\/SIGDA International Symposium on Field Programmable Gate Arrays"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3626202.3637568","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3626202.3637568","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T22:04:05Z","timestamp":1755900245000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3626202.3637568"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4]]},"references-count":46,"alternative-id":["10.1145\/3626202.3637568","10.1145\/3626202"],"URL":"https:\/\/doi.org\/10.1145\/3626202.3637568","relation":{},"subject":[],"published":{"date-parts":[[2024,4]]},"assertion":[{"value":"2024-04-02","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}