{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T01:16:05Z","timestamp":1772846165390,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":50,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,2,12]],"date-time":"2023-02-12T00:00:00Z","timestamp":1676160000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-sa\/4.0\/"}],"funder":[{"name":"National Science Foundation","award":["CCF-1937599"],"award-info":[{"award-number":["CCF-1937599"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,2,12]]},"DOI":"10.1145\/3543622.3573182","type":"proceedings-article","created":{"date-parts":[[2023,2,10]],"date-time":"2023-02-10T23:15:13Z","timestamp":1676070913000},"page":"247-258","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":12,"title":["Callipepla: Stream Centric Instruction Set and Mixed Precision for Accelerating Conjugate Gradient Solver"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7450-2842","authenticated-orcid":false,"given":"Linghao","family":"Song","sequence":"first","affiliation":[{"name":"University of California, Los Angeles, Los Angeles, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0705-9510","authenticated-orcid":false,"given":"Licheng","family":"Guo","sequence":"additional","affiliation":[{"name":"University of California, Los Angeles, Los Angeles, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8301-8411","authenticated-orcid":false,"given":"Suhail","family":"Basalama","sequence":"additional","affiliation":[{"name":"University of California, Los Angeles, Los Angeles, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5885-0425","authenticated-orcid":false,"given":"Yuze","family":"Chi","sequence":"additional","affiliation":[{"name":"University of California, Los Angeles, Los Angeles, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1303-2012","authenticated-orcid":false,"given":"Robert F.","family":"Lucas","sequence":"additional","affiliation":[{"name":"Livermore Software Technology, an ANSYS Company, Livermore, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2887-6963","authenticated-orcid":false,"given":"Jason","family":"Cong","sequence":"additional","affiliation":[{"name":"University of California, Los Angeles, Los Angeles, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2023,2,12]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Approximation of Large-Scale Dynamical Systems","author":"Antoulas Athanasios C","unstructured":"Athanasios C Antoulas. 2005. Approximation of Large-Scale Dynamical Systems. Vol. 6. SIAM."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3373087.3375297"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2021.3065902"},{"key":"e_1_3_2_1_4_1","volume-title":"Exploiting Computation Reuse for Stencil Accelerators. In 2020 57th ACM\/IEEE Design Automation Conference (DAC). IEEE, 1--6.","author":"Chi Yuze","year":"2020","unstructured":"Yuze Chi and Jason Cong. 2020. Exploiting Computation Reuse for Stencil Accelerators. In 2020 57th ACM\/IEEE Design Automation Conference (DAC). IEEE, 1--6."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3240765.3240850"},{"key":"e_1_3_2_1_6_1","volume-title":"Extending High-Level Synthesis for Task-Parallel Programs. In 2021 IEEE 29th Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM). IEEE, 204--213","author":"Chi Yuze","year":"2021","unstructured":"Yuze Chi, Licheng Guo, Jason Lau, Young-kyu Choi, Jie Wang, and Jason Cong. 2021. Extending High-Level Synthesis for Task-Parallel Programs. In 2021 IEEE 29th Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM). IEEE, 204--213."},{"key":"e_1_3_2_1_7_1","volume-title":"HBM Connect: High-Performance HLS Interconnect for FPGA HBM. In The 2021 ACM\/SIGDA International Symposium on Field-Programmable Gate Arrays. 116--126","author":"Chi Yuze","year":"2021","unstructured":"Young-kyu Choi, Yuze Chi, Weikang Qiao, Nikola Samardzic, and Jason Cong. 2021. HBM Connect: High-Performance HLS Interconnect for FPGA HBM. In The 2021 ACM\/SIGDA International Symposium on Field-Programmable Gate Arrays. 116--126."},{"key":"e_1_3_2_1_8_1","volume-title":"Chen Zhang, and Peipei Zhou.","author":"Cong Jason","year":"2018","unstructured":"Jason Cong, Zhenman Fang, Yuchen Hao, Peng Wei, Cody Hao Yu, Chen Zhang, and Peipei Zhou. 2018. Best-Effort FPGA Programming: A Few Steps Can Go a Long Way. arXiv preprint arXiv:1807.01340 (2018)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/2554688.2554771"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3240765.3240838"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/2049662.2049663"},{"key":"e_1_3_2_1_12_1","volume-title":"Knoxville","volume":"42","author":"Dongarra Jack","year":"2015","unstructured":"Jack Dongarra, Michael A Heroux, and Piotr Luszczek. 2015. HPCG Benchmark: A New Metric for Ranking High Performance Computing Systems. Knoxville, Tennessee, Vol. 42 (2015). https:\/\/hpcg-benchmark.org\/custom\/index.html?lid=155&slid=313"},{"key":"e_1_3_2_1_13_1","volume-title":"Computational Methods for Fluid Dynamics","author":"Ferziger Joel H","unstructured":"Joel H Ferziger and Milovan Peri\u0107. 2002. Computational Methods for Fluid Dynamics. Vol. 3. Springer."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.5555\/2650280.2650344"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1137\/1.9780898717761"},{"key":"e_1_3_2_1_16_1","volume-title":"TAPA: A Scalable Task-Parallel Dataflow Programming Framework for Modern FPGAs with Co-Optimization of HLS and Physical Design. arXiv preprint arXiv:2209.02663","author":"Guo Licheng","year":"2022","unstructured":"Licheng Guo, Yuze Chi, Jason Lau, Linghao Song, Xingyu Tian, Moazin Khatti, Weikang Qiao, Jie Wang, Ecenur Ustun, Zhenman Fang, et al. 2022. TAPA: A Scalable Task-Parallel Dataflow Programming Framework for Modern FPGAs with Co-Optimization of HLS and Physical Design. arXiv preprint arXiv:2209.02663 (2022)."},{"key":"e_1_3_2_1_17_1","unstructured":"Licheng Guo Yuze Chi Jie Wang Jason Lau Weikang Qiao Ecenur Ustun Zhiru Zhang and Jason Cong. 2021. AutoBridge: Coupling Coarse-Grained Floorplanning and Pipelining for High-Frequency HLS Design on Multi-Die FPGAs. In The 2021 ACM\/SIGDA International Symposium on Field-Programmable Gate Arrays. 81--92."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/DAC18072.2020.9218718"},{"key":"e_1_3_2_1_19_1","unstructured":"Licheng Guo Jason Lau Zhenyuan Ruan Peng Wei and Jason Cong. 2019. Hardware Acceleration of Long Read Pairwise Overlapping in Genome Sequencing: A Race Between FPGA and GPU. In 2019 IEEE 27th Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM). IEEE 127--135."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cam.2011.04.025"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.6028\/jres.049.044"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1017\/S0962492922000022"},{"key":"e_1_3_2_1_23_1","volume-title":"GraphLily: Accelerating Graph Linear Algebra on HBM-Equipped FPGAs. In 2021 IEEE\/ACM International Conference on Computer-Aided Design (ICCAD). IEEE, 1--9.","author":"Hu Yuwei","year":"2021","unstructured":"Yuwei Hu, Yixiao Du, Ecenur Ustun, and Zhiru Zhang. 2021. GraphLily: Accelerating Graph Linear Algebra on HBM-Equipped FPGAs. In 2021 IEEE\/ACM International Conference on Computer-Aided Design (ICCAD). IEEE, 1--9."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1016\/0021-9991(78)90098-0"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO56248.2022.00018"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-78610-8_10"},{"key":"e_1_3_2_1_27_1","volume-title":"FPGA Implementation of the Conjugate Gradient Method. In International Conference on Parallel Processing and Applied Mathematics. Springer, 526--533","author":"Maslennikow Oleg","year":"2005","unstructured":"Oleg Maslennikow, Volodymyr Lepekha, and Anatoli Sergyienko. 2005. FPGA Implementation of the Conjugate Gradient Method. In International Conference on Parallel Processing and Applied Mathematics. Springer, 526--533."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0010-4655(97)00041-6"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3243176.3243212"},{"key":"e_1_3_2_1_30_1","volume-title":"Stream-Dataflow Acceleration. In 2017 ACM\/IEEE 44th Annual International Symposium on Computer Architecture (ISCA). IEEE, 416--429","author":"Nowatzki Tony","year":"2017","unstructured":"Tony Nowatzki, Vinay Gangadhar, Newsha Ardalani, and Karthikeyan Sankaralingam. 2017. Stream-Dataflow Acceleration. In 2017 ACM\/IEEE 44th Annual International Symposium on Computer Architecture (ISCA). IEEE, 416--429."},{"key":"e_1_3_2_1_31_1","unstructured":"NVIDIA. 2021. NVIDIA A100 TENSOR CORE GPU. https:\/\/www.nvidia.com\/content\/dam\/en-zz\/Solutions\/Data-Center\/a100\/pdf\/a100--80gb-datasheet-update-nvidia-us-1521051-r2-web.pdf."},{"key":"e_1_3_2_1_32_1","volume-title":"Efficient FPGA Implementation of Conjugate Gradient Methods for Laplacian System using HLS. arXiv preprint arXiv:1803.03797","author":"Rampalli Sahithi","year":"2018","unstructured":"Sahithi Rampalli, Natasha Sehgal, Ishita Bindlish, Tanya Tyagi, and Pawan Kumar. 2018. Efficient FPGA Implementation of Conjugate Gradient Methods for Laplacian System using HLS. arXiv preprint arXiv:1803.03797 (2018)."},{"key":"e_1_3_2_1_33_1","volume-title":"Accuracy Parameterizable Linear Equation Solvers for Model Predictive Control. In 2009 17th IEEE Symposium on Field Programmable Custom Computing Machines. IEEE, 209--216","author":"Roldao-Lopes Antonio","year":"2009","unstructured":"Antonio Roldao-Lopes, Amir Shahzad, George A Constantinides, and Eric C Kerrigan. 2009. More Flops or More Precision? Accuracy Parameterizable Linear Equation Solvers for Model Predictive Control. In 2009 17th IEEE Symposium on Field Programmable Custom Computing Machines. IEEE, 209--216."},{"key":"e_1_3_2_1_34_1","volume-title":"Iterative Methods for Sparse Linear Systems","author":"Saad Yousef","unstructured":"Yousef Saad. 2003. Iterative Methods for Sparse Linear Systems. Society for Industrial and Applied Mathematics."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3373087.3375321"},{"key":"e_1_3_2_1_36_1","volume-title":"Low-Cost Floating-Point Processing in ReRAM for Scientific Computing. arXiv preprint arXiv:2011.03190","author":"Song Linghao","year":"2020","unstructured":"Linghao Song, Fan Chen, Xuehai Qian, Hai Li, and Yiran Chen. 2020. Low-Cost Floating-Point Processing in ReRAM for Scientific Computing. arXiv preprint arXiv:2011.03190 (2020)."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3489517.3530420"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3490422.3502357"},{"key":"e_1_3_2_1_39_1","volume-title":"GraphR: Accelerating Graph Processing Using ReRAM. In 2018 IEEE International Symposium on High Performance Computer Architecture (HPCA). IEEE, 531--543","author":"Song Linghao","year":"2018","unstructured":"Linghao Song, Youwei Zhuo, Xuehai Qian, Hai Li, and Yiran Chen. 2018. GraphR: Accelerating Graph Processing Using ReRAM. In 2018 IEEE International Symposium on High Performance Computer Architecture (HPCA). IEEE, 531--543."},{"key":"e_1_3_2_1_40_1","unstructured":"Livermore Software Technology. 2022. LS-DYNA. https:\/\/www.lstc.com\/products\/ls-dyna"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3431920.3439292"},{"key":"e_1_3_2_1_42_1","volume-title":"DSAGEN: Synthesizing Programmable Spatial Accelerators. In 2020 ACM\/IEEE 47th Annual International Symposium on Computer Architecture (ISCA). IEEE, 268--281","author":"Weng Jian","year":"2020","unstructured":"Jian Weng, Sihao Liu, Vidushi Dadu, Zhengrong Wang, Preyas Shah, and Tony Nowatzki. 2020a. DSAGEN: Synthesizing Programmable Spatial Accelerators. In 2020 ACM\/IEEE 47th Annual International Symposium on Computer Architecture (ISCA). IEEE, 268--281."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2022.3189976"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00063"},{"key":"e_1_3_2_1_45_1","unstructured":"Xilinx. 2022a. Alveo U280 Data Center Accelerator Card Data Sheet. https:\/\/www.xilinx.com\/content\/dam\/xilinx\/support\/documents\/data_sheets\/ds963-u280.pdf."},{"key":"e_1_3_2_1_46_1","unstructured":"Xilinx. 2022b. Vitis HPC Library. https:\/\/xilinx.github.io\/Vitis_Libraries\/hpc\/2022.1\/index.html"},{"key":"e_1_3_2_1_47_1","unstructured":"Xilinx. 2022c. Vitis Libraries. https:\/\/github.com\/Xilinx\/Vitis_Libraries"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/3373087.3375312"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/2684746.2689060"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/3020078.3021741"}],"event":{"name":"FPGA '23: The 2023 ACM\/SIGDA International Symposium on Field Programmable Gate Arrays","location":"Monterey CA USA","acronym":"FPGA '23","sponsor":["SIGDA ACM Special Interest Group on Design Automation"]},"container-title":["Proceedings of the 2023 ACM\/SIGDA International Symposium on Field Programmable Gate Arrays"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3543622.3573182","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3543622.3573182","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:00:48Z","timestamp":1750186848000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3543622.3573182"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,2,12]]},"references-count":50,"alternative-id":["10.1145\/3543622.3573182","10.1145\/3543622"],"URL":"https:\/\/doi.org\/10.1145\/3543622.3573182","relation":{},"subject":[],"published":{"date-parts":[[2023,2,12]]},"assertion":[{"value":"2023-02-12","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}