{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,14]],"date-time":"2026-02-14T10:35:51Z","timestamp":1771065351550,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":32,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,2,27]],"date-time":"2023-02-27T00:00:00Z","timestamp":1677456000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Science Challenge Project, China (TZ2016002), NSF of China (61472462, 11671049, 11601033, 62032023) and the foundation of key laboratory of computational physics, China. InterFLOP (ANR-20- CE46-0009) project of the French National Agency for Research (ANR). National Natural Science Foundation of China (No. 62032023). Natural Science Foundation of Shandong Province (no.ZR2021MA092)."}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,2,27]]},"DOI":"10.1145\/3578178.3578234","type":"proceedings-article","created":{"date-parts":[[2023,2,10]],"date-time":"2023-02-10T17:19:05Z","timestamp":1676049545000},"page":"46-54","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["Comparison of Reproducible Parallel Preconditioned BiCGSTAB Algorithm Based on ExBLAS and ReproBLAS"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9573-1890","authenticated-orcid":false,"given":"Xiaojun","family":"Lei","sequence":"first","affiliation":[{"name":"Graduate School of Chinese Academy of Engineering Physics, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6175-7061","authenticated-orcid":false,"given":"Tongxiang","family":"Gu","sequence":"additional","affiliation":[{"name":"Institute of Applied Physics and Computational Mathematics, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8954-2276","authenticated-orcid":false,"given":"Stef","family":"Graillat","sequence":"additional","affiliation":[{"name":"Sorbonne University, France"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6032-454X","authenticated-orcid":false,"given":"Xiaowen","family":"Xu","sequence":"additional","affiliation":[{"name":"CAEP Software Center for Numerical Simulation, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4309-3492","authenticated-orcid":false,"given":"Jing","family":"Meng","sequence":"additional","affiliation":[{"name":"School of Mathematics and Statistics, Taishan University, China"}]}],"member":"320","published-online":{"date-parts":[[2023,2,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3389360"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"crossref","unstructured":"Richard Barrett Michael Berry Tony\u00a0F Chan James Demmel June Donato Jack Dongarra Victor Eijkhout Roldan Pozo Charles Romine and Henk Van\u00a0der Vorst. 1994. Templates for the solution of linear systems: building blocks for iterative methods. SIAM.","DOI":"10.1137\/1.9781611971538"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-31769-4_8"},{"key":"e_1_3_2_1_4_1","volume-title":"European Conference on Parallel Processing. Springer, 609\u2013620","author":"Chohra Chemseddine","year":"2016","unstructured":"Chemseddine Chohra, Philippe Langlois, and David Parello. 2016. Reproducible, accurately rounded and efficient BLAS. In European Conference on Parallel Processing. Springer, 609\u2013620."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.parco.2015.09.001"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.parco.2015.09.001"},{"key":"e_1_3_2_1_7_1","volume-title":"Differences in floating-point arithmetic between Intel R Xeon R processors and the Intel R Xeon PhiTM coprocessor. Technical report","author":"Corden M.","year":"2013","unstructured":"M. Corden. 2013. Differences in floating-point arithmetic between Intel R Xeon R processors and the Intel R Xeon PhiTM coprocessor. Technical report (2013)."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/ARITH.2013.9"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2014.2345391"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.2118\/142297-MS"},{"key":"e_1_3_2_1_11_1","volume-title":"Numerical analysis","author":"Fletcher Roger","unstructured":"Roger Fletcher. 1976. Conjugate gradient methods for indefinite systems. In Numerical analysis. Springer, 73\u201389."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/1236463.1236468"},{"key":"e_1_3_2_1_13_1","volume-title":"Using advanced MPI: Modern features of the message-passing interface","author":"Gropp William","unstructured":"William Gropp, Torsten Hoefler, Rajeev Thakur, and Ewing Lusk. 2014. Using advanced MPI: Modern features of the message-passing interface. MIT Press."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cam.2019.112697"},{"key":"e_1_3_2_1_15_1","unstructured":"Roman Iakymchuk Sylvain Collange David Defour and Stef Graillat. 2015. ExBLAS: Reproducible and accurate BLAS library. In NRE: Numerical Reproducibility at Exascale."},{"key":"e_1_3_2_1_16_1","unstructured":"Roman Iakymchuk Stef Graillat and Jos\u00e9 Aliaga. 2021. General framework for deriving reproducible Krylov subspace algorithms: A BiCGStab case study. (2021)."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1177\/1094342020932650"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-6105(02)00261-1"},{"key":"e_1_3_2_1_19_1","volume-title":"Seminumerical algorithms. The art of computer programming 2","author":"Knuth Donald\u00a0Ervin","year":"1997","unstructured":"Donald\u00a0Ervin Knuth. 1997. Seminumerical algorithms. The art of computer programming 2 (1997)."},{"key":"e_1_3_2_1_20_1","volume-title":"International Conference on Parallel Processing and Applied Mathematics. Springer, 516\u2013527","author":"Mukunoki Daichi","year":"2019","unstructured":"Daichi Mukunoki, Takeshi Ogita, and Katsuhisa Ozaki. 2019. Reproducible BLAS routines with tunable accuracy using ozaki scheme for many-core architectures. In International Conference on Parallel Processing and Applied Mathematics. Springer, 516\u2013527."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3432261.3432270"},{"key":"e_1_3_2_1_22_1","unstructured":"Hong\u00a0Diep Nguyen James Demmel and Peter Ahrens. 2018. ReproBLAS: Reproducible BLAS."},{"key":"e_1_3_2_1_23_1","unstructured":"C Nvidia. 2013. Cublas library user guide."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1137\/030601818"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"crossref","unstructured":"Yousef Saad. 2003. Iterative methods for sparse linear systems. SIAM.","DOI":"10.1137\/1.9780898718003"},{"key":"e_1_3_2_1_26_1","series-title":"SIAM Journal on scientific and statistical computing 7, 3","volume-title":"GMRES: A generalized minimal residual algorithm for solving nonsymmetric linear systems","author":"Saad Youcef","year":"1986","unstructured":"Youcef Saad and Martin\u00a0H Schultz. 1986. GMRES: A generalized minimal residual algorithm for solving nonsymmetric linear systems. SIAM Journal on scientific and statistical computing 7, 3 (1986), 856\u2013869."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.4236\/ajcm.2011.14035"},{"key":"e_1_3_2_1_28_1","series-title":"SIAM Journal on scientific and Statistical Computing 13, 2","volume-title":"Bi-CGSTAB: A fast and smoothly converging variant of Bi-CG for the solution of nonsymmetric linear systems","author":"Vorst A Van\u00a0der","year":"1992","unstructured":"Henk\u00a0A Van\u00a0der Vorst. 1992. Bi-CGSTAB: A fast and smoothly converging variant of Bi-CG for the solution of nonsymmetric linear systems. SIAM Journal on scientific and Statistical Computing 13, 2 (1992), 631\u2013644."},{"key":"e_1_3_2_1_29_1","volume-title":"Proceedings of Cray User Group Meeting (CUG), Vol.\u00a03.","author":"Villa Oreste","year":"2009","unstructured":"Oreste Villa, Daniel Chavarria-Miranda, Vidhya Gurumoorthi, Andr\u00e9s M\u00e1rquez, and Sriram Krishnamoorthy. 2009. Effects of floating-point non-associativity on numerical computations on massively multithreaded systems. In Proceedings of Cray User Group Meeting (CUG), Vol.\u00a03."},{"key":"e_1_3_2_1_30_1","volume-title":"High-Performance Computing on the Intel\u00ae Xeon Phi\u2122","author":"Wang Endong","unstructured":"Endong Wang, Qing Zhang, Bo Shen, Guangyong Zhang, Xiaowei Lu, Qing Wu, and Yajuan Wang. 2014. Intel math kernel library. In High-Performance Computing on the Intel\u00ae Xeon Phi\u2122. Springer, 167\u2013188."},{"key":"e_1_3_2_1_31_1","volume-title":"Precision performance: Floating point and IEEE 754 compliance for NVIDIA GPUs. Tech. rep. 21, 01","author":"Whitehead Nathan","year":"2011","unstructured":"Nathan Whitehead and Alex Fit-Florea. 2011. Precision performance: Floating point and IEEE 754 compliance for NVIDIA GPUs. Tech. rep. 21, 01 (2011), 18749\u201319424."},{"key":"e_1_3_2_1_32_1","first-page":"1","article-title":"Algebraic two-level iterative method for 2-D 3-T radiation diffusion equations","volume":"26","author":"Xiaowen Xu","year":"2009","unstructured":"Xu Xiaowen, Mo Zeyao, and An Hengbin. 2009. Algebraic two-level iterative method for 2-D 3-T radiation diffusion equations. Chinese Journal of Computational Physics 26, 1 (2009), 1.","journal-title":"Chinese Journal of Computational Physics"}],"event":{"name":"HPC ASIA 2023: International Conference on High Performance Computing in Asia-Pacific Region","location":"Singapore Singapore","acronym":"HPC ASIA 2023"},"container-title":["Proceedings of the International Conference on High Performance Computing in Asia-Pacific Region"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3578178.3578234","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3578178.3578234","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T17:49:20Z","timestamp":1750182560000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3578178.3578234"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,2,27]]},"references-count":32,"alternative-id":["10.1145\/3578178.3578234","10.1145\/3578178"],"URL":"https:\/\/doi.org\/10.1145\/3578178.3578234","relation":{},"subject":[],"published":{"date-parts":[[2023,2,27]]},"assertion":[{"value":"2023-02-27","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}