{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,23]],"date-time":"2026-01-23T08:23:01Z","timestamp":1769156581373,"version":"3.49.0"},"publisher-location":"Cham","reference-count":28,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783031304415","type":"print"},{"value":"9783031304422","type":"electronic"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-30442-2_4","type":"book-chapter","created":{"date-parts":[[2023,4,27]],"date-time":"2023-04-27T10:02:09Z","timestamp":1682589729000},"page":"40-54","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Infinite-Precision Inner Product and\u00a0Sparse Matrix-Vector Multiplication Using Ozaki Scheme with\u00a0Dot2 on\u00a0Manycore Processors"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0051-6811","authenticated-orcid":false,"given":"Daichi","family":"Mukunoki","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0431-6232","authenticated-orcid":false,"given":"Katsuhisa","family":"Ozaki","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9346-2452","authenticated-orcid":false,"given":"Takeshi","family":"Ogita","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1601-9710","authenticated-orcid":false,"given":"Toshiyuki","family":"Imamura","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,4,28]]},"reference":[{"key":"4_CR1","doi-asserted-by":"publisher","unstructured":"Arteaga, A., Fuhrer, O., Hoefler, T.: Designing bit-reproducible portable high-performance applications. In: Proceedings of IEEE 28th International Parallel and Distributed Processing Symposium (IPDPS 2014), pp. 1235\u20131244 (2014). https:\/\/doi.org\/10.1109\/IPDPS.2014.127","DOI":"10.1109\/IPDPS.2014.127"},{"key":"4_CR2","doi-asserted-by":"publisher","unstructured":"Bell, N., Garland, M.: Implementing sparse matrix-vector multiplication on throughput-oriented processors. In: Proceedings of International Conference for High Performance Computing, Networking, Storage and Analysis (SC 2009), pp. 1\u201311. No. 18 (2009). https:\/\/doi.org\/10.1145\/1654059.1654078","DOI":"10.1145\/1654059.1654078"},{"issue":"1","key":"4_CR3","doi-asserted-by":"publisher","first-page":"74","DOI":"10.1016\/j.tcs.2005.09.058","volume":"351","author":"K Briggs","year":"2006","unstructured":"Briggs, K.: Implementing exact real arithmetic in python, c++ and c. Theoret. Comput. Sci. 351(1), 74\u201381 (2006). https:\/\/doi.org\/10.1016\/j.tcs.2005.09.058","journal-title":"Theoret. Comput. Sci."},{"key":"4_CR4","doi-asserted-by":"publisher","unstructured":"Chohra, C., Langlois, P., Parello, D.: Reproducible, accurately rounded and efficient BLAS. In: 22nd International European Conference on Parallel and Distributed Computing (Euro-Par 2016), pp. 609\u2013620 (2016). https:\/\/doi.org\/10.1007\/978-3-319-58943-5_49","DOI":"10.1007\/978-3-319-58943-5_49"},{"key":"4_CR5","doi-asserted-by":"publisher","first-page":"83","DOI":"10.1016\/j.parco.2015.09.001","volume":"49","author":"S Collange","year":"2015","unstructured":"Collange, S., Defour, D., Graillat, S., Iakymchuk, R.: Numerical reproducibility for the parallel reduction on multi- and many-core architectures. Parallel Comput. 49, 83\u201397 (2015). https:\/\/doi.org\/10.1016\/j.parco.2015.09.001","journal-title":"Parallel Comput."},{"key":"4_CR6","doi-asserted-by":"publisher","unstructured":"Davis, T.A., Hu, Y.: The university of Florida sparse matrix collection. ACM Trans. Math. Softw. 38(1), 1:1\u20131:25 (2011). https:\/\/doi.org\/10.1145\/2049662.2049663","DOI":"10.1145\/2049662.2049663"},{"key":"4_CR7","unstructured":"Demmel, J., Ahrens, P., Nguyen, H.D.: Efficient Reproducible Floating Point Summation and BLAS. Technical report. UCB\/EECS-2016-121, EECS Department, University of California, Berkeley (2016)"},{"key":"4_CR8","doi-asserted-by":"publisher","unstructured":"Demmel, J., Eliahu, D., Fox, A., Kamil, S., Lipshitz, B., Schwartz, O., Spillinger, O.: Communication-optimal parallel recursive rectangular matrix multiplication. In: 2013 IEEE 27th International Symposium on Parallel and Distributed Processing, pp. 261\u2013272 (2013). https:\/\/doi.org\/10.1109\/IPDPS.2013.80","DOI":"10.1109\/IPDPS.2013.80"},{"key":"4_CR9","doi-asserted-by":"publisher","unstructured":"Fousse, L., Hanrot, G., Lef\u00e8vre, V., P\u00e9lissier, P., Zimmermann, P.: MPFR: a multiple-precision binary floating-point library with correct rounding. ACM Trans. Math. Softw. 33(2), 13:1\u201313:15 (2007). https:\/\/doi.org\/10.1145\/1236463.1236468","DOI":"10.1145\/1236463.1236468"},{"key":"4_CR10","doi-asserted-by":"publisher","DOI":"10.1177\/1094342020932650","author":"R Iakymchuk","year":"2020","unstructured":"Iakymchuk, R., Barreda, M., Graillat, S., Aliaga, J.I., Quintana-Ort\u00ed, E.S.: Reproducibility of parallel preconditioned conjugate gradient in hybrid programming environments. IJHPCA (2020). https:\/\/doi.org\/10.1177\/1094342020932650","journal-title":"IJHPCA"},{"key":"4_CR11","doi-asserted-by":"publisher","first-page":"561","DOI":"10.1145\/279232.279237","volume":"23","author":"AH Karp","year":"1997","unstructured":"Karp, A.H., Markstein, P.: High-precision division and square root. ACM Trans. Math. Softw. 23, 561\u2013589 (1997). https:\/\/doi.org\/10.1145\/279232.279237","journal-title":"ACM Trans. Math. Softw."},{"key":"4_CR12","series-title":"Seminumerical Algorithms","volume-title":"The Art of Computer Programming","author":"DE Knuth","year":"1969","unstructured":"Knuth, D.E.: The Art of Computer Programming. Seminumerical Algorithms, vol. 2. Addison-Wesley, Boston (1969)"},{"issue":"1","key":"4_CR13","doi-asserted-by":"publisher","first-page":"81","DOI":"10.1017\/S0960129506005822","volume":"17","author":"B Lambov","year":"2007","unstructured":"Lambov, B.: Reallib: an efficient implementation of exact real arithmetic. Math. Struct. Comp. Sci. 17(1), 81\u201398 (2007). https:\/\/doi.org\/10.1017\/S0960129506005822","journal-title":"Math. Struct. Comp. Sci."},{"issue":"2","key":"4_CR14","doi-asserted-by":"publisher","first-page":"152","DOI":"10.1145\/567806.567808","volume":"28","author":"XS Li","year":"2000","unstructured":"Li, X.S., et al.: Design, implementation and testing of extended and mixed precision BLAS. ACM Trans. Math. Softw. 28(2), 152\u2013205 (2000). https:\/\/doi.org\/10.1145\/567806.567808","journal-title":"ACM Trans. Math. Softw."},{"key":"4_CR15","unstructured":"Minamihata, A., Ozaki, K., Ogita, T., Oishi, S.: Preconditioner for ill-conditioned tall and skinny matrices. In: The 40th JSST Annual International Conference on Simulation Technology (JSST2016) (2016)"},{"key":"4_CR16","doi-asserted-by":"publisher","DOI":"10.1016\/j.cam.2019.112701","volume":"372","author":"D Mukunoki","year":"2020","unstructured":"Mukunoki, D., Ogita, T.: Performance and energy consumption of accurate and mixed-precision linear algebra kernels on GPUs. J. Comput. Appl. Math. 372, 112701 (2020). https:\/\/doi.org\/10.1016\/j.cam.2019.112701","journal-title":"J. Comput. Appl. Math."},{"key":"4_CR17","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"516","DOI":"10.1007\/978-3-030-43229-4_44","volume-title":"Parallel Processing and Applied Mathematics","author":"D Mukunoki","year":"2020","unstructured":"Mukunoki, D., Ogita, T., Ozaki, K.: Reproducible BLAS routines with\u00a0tunable accuracy using Ozaki scheme for many-core architectures. In: Wyrzykowski, R., Deelman, E., Dongarra, J., Karczewski, K. (eds.) PPAM 2019. LNCS, vol. 12043, pp. 516\u2013527. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-43229-4_44"},{"key":"4_CR18","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"230","DOI":"10.1007\/978-3-030-50743-5_12","volume-title":"High Performance Computing","author":"D Mukunoki","year":"2020","unstructured":"Mukunoki, D., Ozaki, K., Ogita, T., Imamura, T.: DGEMM using tensor cores, and its accurate and reproducible versions. In: Sadayappan, P., Chamberlain, B.L., Juckeland, G., Ltaief, H. (eds.) ISC High Performance 2020. LNCS, vol. 12151, pp. 230\u2013248. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-50743-5_12"},{"key":"4_CR19","doi-asserted-by":"publisher","unstructured":"Mukunoki, D., Ozaki, K., Ogita, T., Iakymchuk, R.: Conjugate gradient solvers with high accuracy and bit-wise reproducibility between CPU and GPU using Ozaki scheme. In: Proceedings of The International Conference on High Performance Computing in Asia-Pacific Region (HPC Asia 2021), pp. 100\u2013109 (2021). https:\/\/doi.org\/10.1145\/3432261.3432270","DOI":"10.1145\/3432261.3432270"},{"key":"4_CR20","doi-asserted-by":"crossref","unstructured":"M\u00fcller, N.T.: The irram: Exact arithmetic in c++. In: Computability and Complexity in Analysis. pp. 222\u2013252. Springer, Berlin Heidelberg (2001). DOI: 10.1007\/3-540-45335-0_14","DOI":"10.1007\/3-540-45335-0_14"},{"key":"4_CR21","unstructured":"Nakata, M.: Mplapack version 1.0.0 user manual (2021)"},{"key":"4_CR22","doi-asserted-by":"publisher","first-page":"1955","DOI":"10.1137\/030601818","volume":"26","author":"T Ogita","year":"2005","unstructured":"Ogita, T., Rump, S.M., Oishi, S.: Accurate sum and dot product. SIAM J. Sci. Comput. 26, 1955\u20131988 (2005). https:\/\/doi.org\/10.1137\/030601818","journal-title":"SIAM J. Sci. Comput."},{"issue":"1","key":"4_CR23","doi-asserted-by":"publisher","first-page":"95","DOI":"10.1007\/s11075-011-9478-1","volume":"59","author":"K Ozaki","year":"2012","unstructured":"Ozaki, K., Ogita, T., Oishi, S., Rump, S.M.: Error-free transformations of matrix multiplication by using fast routines of matrix multiplication and its applications. Numer. Algorithms 59(1), 95\u2013118 (2012). https:\/\/doi.org\/10.1007\/s11075-011-9478-1","journal-title":"Numer. Algorithms"},{"key":"4_CR24","doi-asserted-by":"publisher","first-page":"2","DOI":"10.1587\/nolta.4.2","volume":"4","author":"K Ozaki","year":"2013","unstructured":"Ozaki, K., Ogita, T., Oishi, S., Rump, S.M.: Generalization of error-free transformation for matrix multiplication and its application. Nonlinear Theory Appl. IEICE 4, 2\u201311 (2013). https:\/\/doi.org\/10.1587\/nolta.4.2","journal-title":"Nonlinear Theory Appl. IEICE"},{"issue":"2","key":"4_CR25","doi-asserted-by":"publisher","first-page":"1269","DOI":"10.1137\/07068816X","volume":"31","author":"SM Rump","year":"2009","unstructured":"Rump, S.M., Ogita, T., Oishi, S.: Accurate floating-point summation Part II: sign, K-Fold faithful and rounding to nearest. SIAM J. Sci. Comput. 31(2), 1269\u20131302 (2009). https:\/\/doi.org\/10.1137\/07068816X","journal-title":"SIAM J. Sci. Comput."},{"key":"4_CR26","unstructured":"Todd, R.: Introduction to Conditional Numerical Reproducibility (CNR) (2012). https:\/\/software.intel.com\/en-us\/articles\/introduction-to-the-conditional-numerical-reproducibility-cnr"},{"key":"4_CR27","doi-asserted-by":"publisher","unstructured":"Wei, S., Tang, E., Liu, T., M\u00fcller, N.T., Chen, Z.: Automatic numerical analysis based on infinite-precision arithmetic. In: 2014 Eighth International Conference on Software Security and Reliability (SERE), pp. 216\u2013224 (2014). https:\/\/doi.org\/10.1109\/SERE.2014.35","DOI":"10.1109\/SERE.2014.35"},{"issue":"4","key":"4_CR28","doi-asserted-by":"publisher","first-page":"65","DOI":"10.1145\/1498765.1498785","volume":"52","author":"S Williams","year":"2009","unstructured":"Williams, S., Waterman, A., Patterson, D.: Roofline: an insightful visual performance model for multicore architectures. Commun. ACM 52(4), 65\u201376 (2009). https:\/\/doi.org\/10.1145\/1498765.1498785","journal-title":"Commun. ACM"}],"container-title":["Lecture Notes in Computer Science","Parallel Processing and Applied Mathematics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-30442-2_4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,4,27]],"date-time":"2023-04-27T10:02:17Z","timestamp":1682589737000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-30442-2_4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031304415","9783031304422"],"references-count":28,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-30442-2_4","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"28 April 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PPAM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Parallel Processing and Applied Mathematics","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Gdansk","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Poland","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11 September 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 September 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ppam2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ppam.edu.pl\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Easychair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"132","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"77","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"58% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}