{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T08:39:42Z","timestamp":1766219982028,"version":"3.48.0"},"publisher-location":"New York, NY, USA","reference-count":42,"publisher":"ACM","funder":[{"name":"Strategic Priority Research Program of the Chinese Academy of Sciences","award":["XDB0500102"],"award-info":[{"award-number":["XDB0500102"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,9,8]]},"DOI":"10.1145\/3754598.3754647","type":"proceedings-article","created":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T08:34:32Z","timestamp":1766219672000},"page":"188-198","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["A Fast Sparse Triangular Solve for Structured-grid Problems on Heterogeneous Processors"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-8500-6173","authenticated-orcid":false,"given":"Zhengding","family":"Hu","sequence":"first","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7179-6593","authenticated-orcid":false,"given":"Yi","family":"Zong","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5098-1503","authenticated-orcid":false,"given":"Jingwei","family":"Sun","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9740-6581","authenticated-orcid":false,"given":"Wei","family":"Xue","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China and Qinghai University and Intelligent Computing and Application Laboratory of Qinghai Province, Xining, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0794-7681","authenticated-orcid":false,"given":"Guangzhong","family":"Sun","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}]}],"member":"320","published-online":{"date-parts":[[2025,12,20]]},"reference":[{"key":"e_1_3_3_1_2_2","doi-asserted-by":"crossref","unstructured":"Mark Adams. 2014. HPGMG 1.0: A benchmark for ranking high performance computing systems. (2014).","DOI":"10.2172\/1131029"},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"crossref","unstructured":"Yulong Ao Chao Yang Fangfang Liu Wanwang Yin Lijuan Jiang and Qiao Sun. 2018. Performance optimization of the HPCG benchmark on the Sunway TaihuLight supercomputer. ACM Transactions on Architecture and Code Optimization (TACO) 15 1 (2018) 1\u201320.","DOI":"10.1145\/3182177"},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-94-011-5412-3_6"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-94-011-5412-3_6"},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"crossref","unstructured":"DeHui Chen JiShan Xue XueSheng Yang HongLiang Zhang XueShun Shen JiangLin Hu Yu Wang LiRen Ji and JiaBin Chen. 2008. New generation of multi-scale NWP system (GRAPES): general scientific design. Chinese Science Bulletin 53 22 (2008) 3433\u20133445.","DOI":"10.1007\/s11434-008-0494-z"},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","DOI":"10.1145\/3489517.3530508"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"crossref","unstructured":"Timothy\u00a0A Davis and Yifan Hu. 2011. The University of Florida sparse matrix collection. ACM Transactions on Mathematical Software (TOMS) 38 1 (2011) 1\u201325.","DOI":"10.1145\/2049662.2049663"},{"key":"e_1_3_3_1_9_2","unstructured":"Jack Dongarra and Michael\u00a0A Heroux. 2013. Toward a new metric for ranking high performance computing systems. Sandia Report SAND2013-4744 312 (2013) 150."},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"publisher","DOI":"10.1109\/PDP2018.2018.00034"},{"key":"e_1_3_3_1_11_2","unstructured":"Zhengding Hu Jingwei Sun Zhongyang Li and Guangzhong Sun. 2024. AG-SpTRSV: An Automatic Framework to Optimize Sparse Triangular Solve on GPUs. ACM Transactions on Architecture and Code Optimization (2024)."},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS57955.2024.00010"},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"publisher","DOI":"10.1109\/SC41405.2020.00076"},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"crossref","unstructured":"Xiaomeng Huang Qiang Tang Yuheng Tseng Yong Hu Allison\u00a0H Baker Frank\u00a0O Bryan John Dennis Haohuan Fu and Guangwen Yang. 2016. P-CSI v1. 0 an accelerated barotropic solver for the high-resolution ocean model component in the Community Earth System Model v2. 0. Geoscientific Model Development 9 11 (2016) 4209\u20134225.","DOI":"10.5194\/gmd-9-4209-2016"},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"crossref","unstructured":"Jianjiang Li Jiabi Liang Wei Xue Zhengding Hu Lin Li and Jinliang Shi. 2024. Toward efficient structured-grid triangular solver on sunway many-core processors. The Journal of Supercomputing 80 8 (2024) 10610\u201310636.","DOI":"10.1007\/s11227-023-05802-2"},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611976137.10"},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-43659-3_45"},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"publisher","DOI":"10.1145\/2751205.2751209"},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"crossref","unstructured":"Daniel Lowell Jeswin Godwin Justin Holewinski Deepan Karthik Chekuri Choudary Azamat Mametjanov Boyana Norris Gerald Sabin P Sadayappan and Jason Sarich. 2013. Stencil-aware GPU optimization of iterative solvers. SIAM Journal on Scientific Computing 35 5 (2013) S209\u2013S228.","DOI":"10.1137\/120883153"},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"crossref","unstructured":"Zhengyang Lu and Weifeng Liu. 2023. Tilesptrsv: a tiled algorithm for parallel sparse triangular solve on gpus. CCF Transactions on High Performance Computing 5 2 (2023) 129\u2013143.","DOI":"10.1007\/s42514-023-00151-1"},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"publisher","DOI":"10.1145\/3721145.3725745"},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"publisher","DOI":"10.1109\/HiPC.2012.6507483"},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2016.57"},{"key":"e_1_3_3_1_24_2","volume-title":"GPU Technology Conference","author":"Naumov Maxim","year":"2010","unstructured":"Maxim Naumov, L Chien, Philippe Vandermersch, and Ujval Kapasi. 2010. Cusparse library. In GPU Technology Conference."},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"crossref","unstructured":"Natalia\u00a0K Nikolova Helen\u00a0W Tam and Mohamed\u00a0H Bakr. 2004. Sensitivity analysis with the FDTD method on structured grids. IEEE Transactions on Microwave Theory and Techniques 52 4 (2004) 1207\u20131216.","DOI":"10.1109\/TMTT.2004.825710"},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-07518-1_8"},{"key":"e_1_3_3_1_27_2","unstructured":"Sivasankaran Rajamanickam Seher Acer Luc Berger-Vergiat Vinh Dang Nathan Ellingwood Evan Harvey Brian Kelley Christian\u00a0R Trott Jeremiah Wilke and Ichitaro Yamazaki. 2021. Kokkos kernels: Performance portable sparse\/dense linear algebra and graph kernels. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2103.11991 (2021)."},{"key":"e_1_3_3_1_28_2","doi-asserted-by":"publisher","DOI":"10.5555\/829576"},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"publisher","DOI":"10.5555\/829576"},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"crossref","unstructured":"Olaf Schenk Klaus G\u00e4rtner Wolfgang Fichtner and Andreas Stricker. 2001. PARDISO: a high-performance serial and parallel sparse linear solver in semiconductor device simulation. Future Generation Computer Systems 18 1 (2001) 69\u201378.","DOI":"10.1016\/S0167-739X(00)00076-5"},{"key":"e_1_3_3_1_31_2","doi-asserted-by":"publisher","DOI":"10.1145\/3079079.3079086"},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"crossref","unstructured":"J\u00fcgen Steppeler R Hess U Sch\u00e4ttler and Luca Bonaventura. 2003. Review of numerical methods for nonhydrostatic weather prediction models. Meteorology and Atmospheric Physics 82 1 (2003) 287\u2013301.","DOI":"10.1007\/s00703-001-0593-8"},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"publisher","DOI":"10.1145\/3404397.3404400"},{"key":"e_1_3_3_1_34_2","doi-asserted-by":"publisher","DOI":"10.1145\/3178487.3178513"},{"key":"e_1_3_3_1_35_2","doi-asserted-by":"publisher","DOI":"10.1145\/3225058.3225071"},{"key":"e_1_3_3_1_36_2","doi-asserted-by":"crossref","unstructured":"Shengen Yan Chao Li Yunquan Zhang and Huiyang Zhou. 2014. yaSpMV: Yet another SpMV framework on GPUs. Acm Sigplan Notices 49 8 (2014) 107\u2013118.","DOI":"10.1145\/2692916.2555255"},{"key":"e_1_3_3_1_37_2","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2016.5"},{"key":"e_1_3_3_1_38_2","doi-asserted-by":"publisher","DOI":"10.1109\/SC41406.2024.00065"},{"key":"e_1_3_3_1_39_2","doi-asserted-by":"crossref","unstructured":"Feng Zhang Jiya Su Weifeng Liu Bingsheng He Ruofan Wu Xiaoyong Du and Rujia Wang. 2021. YuenyeungSpTRSV: A Thread-Level and Warp-Level Fusion Synchronization-Free Sparse Triangular Solve. IEEE Transactions on Parallel and Distributed Systems 32 9 (2021) 2321\u20132337.","DOI":"10.1109\/TPDS.2021.3066635"},{"key":"e_1_3_3_1_40_2","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476158"},{"key":"e_1_3_3_1_41_2","doi-asserted-by":"publisher","DOI":"10.1145\/3627535.3638482"},{"key":"e_1_3_3_1_42_2","doi-asserted-by":"publisher","DOI":"10.1145\/3673038.3673040"},{"key":"e_1_3_3_1_43_2","doi-asserted-by":"publisher","DOI":"10.1145\/3710848.3710849"}],"event":{"name":"ICPP '25: 54th International Conference on Parallel Processing","location":"San Diego CA USA","acronym":"ICPP '25"},"container-title":["Proceedings of the 54th International Conference on Parallel Processing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3754598.3754647","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T08:36:11Z","timestamp":1766219771000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3754598.3754647"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,8]]},"references-count":42,"alternative-id":["10.1145\/3754598.3754647","10.1145\/3754598"],"URL":"https:\/\/doi.org\/10.1145\/3754598.3754647","relation":{},"subject":[],"published":{"date-parts":[[2025,9,8]]},"assertion":[{"value":"2025-12-20","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}