{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,5]],"date-time":"2026-05-05T07:23:20Z","timestamp":1777965800311,"version":"3.51.4"},"publisher-location":"Cham","reference-count":33,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030576745","type":"print"},{"value":"9783030576752","type":"electronic"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-57675-2_25","type":"book-chapter","created":{"date-parts":[[2020,8,17]],"date-time":"2020-08-17T23:12:33Z","timestamp":1597705953000},"page":"392-407","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["3D Coded SUMMA: Communication-Efficient and Robust Parallel Matrix Multiplication"],"prefix":"10.1007","author":[{"given":"Haewon","family":"Jeong","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yaoqing","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Vipul","family":"Gupta","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Christian","family":"Engelmann","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tze Meng","family":"Low","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Viveck","family":"Cadambe","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kannan","family":"Ramchandran","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Pulkit","family":"Grover","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2020,8,18]]},"reference":[{"issue":"5","key":"25_CR1","doi-asserted-by":"publisher","first-page":"575","DOI":"10.1147\/rd.395.0575","volume":"39","author":"RC Agarwal","year":"1995","unstructured":"Agarwal, R.C., et al.: A three-dimensional approach to parallel matrix multiplication. IBM J. Res. Dev. 39(5), 575\u2013582 (1995)","journal-title":"IBM J. Res. Dev."},{"key":"25_CR2","doi-asserted-by":"crossref","unstructured":"Ashraf, R.A., Hukerikar, S., Engelmann, C.: Shrink or substitute: handling process failures in HPC systems using in-situ recovery. In: 2018 26th Euromicro International Conference on Parallel, Distributed and Network-based Processing (PDP), pp. 178\u2013185. IEEE (2018)","DOI":"10.1109\/PDP2018.2018.00032"},{"key":"25_CR3","doi-asserted-by":"crossref","unstructured":"Benoit, A., Herault, T., F\u00e8vre, V.L., Robert, Y.: Replication is more efficient than you think. In: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis. SC 2019, New York, USA (2019)","DOI":"10.1145\/3295500.3356171"},{"key":"25_CR4","unstructured":"Bergman, K., et al.: Exascale computing study: technology challenges in achieving exascale systems. DARPA Technical report (2008)"},{"issue":"3","key":"25_CR5","doi-asserted-by":"publisher","first-page":"244","DOI":"10.1177\/1094342013488238","volume":"27","author":"W Bland","year":"2013","unstructured":"Bland, W., Bouteiller, A., Herault, T., Bosilca, G., Dongarra, J.: Post-failure recovery of MPI communication capability: Design and rationale. Int. J. High Perform. Comput. Appl. 27(3), 244\u2013254 (2013)","journal-title":"Int. J. High Perform. Comput. Appl."},{"key":"25_CR6","unstructured":"Bosilca, G., et al.: Algorithmic based fault tolerance applied to high performance computing (2008)"},{"issue":"13","key":"25_CR7","doi-asserted-by":"publisher","first-page":"1749","DOI":"10.1002\/cpe.1206","volume":"19","author":"E Chan","year":"2007","unstructured":"Chan, E., et al.: Collective communication: Theory, practice, and experience. Concurr. Comput. Pract. Exp. 19(13), 1749\u20131783 (2007)","journal-title":"Concurr. Comput. Pract. Exp."},{"key":"25_CR8","doi-asserted-by":"crossref","unstructured":"Chen, Z.: Optimal real number codes for fault tolerant matrix operations. In: Proceedings of the Conference on High Performance Computing Networking, Storage and Analysis, p. 29. ACM (2009)","DOI":"10.1145\/1654059.1654089"},{"issue":"8","key":"25_CR9","doi-asserted-by":"publisher","first-page":"167","DOI":"10.1145\/2442516.2442533","volume":"48","author":"Z Chen","year":"2013","unstructured":"Chen, Z.: Online-ABFT: An online algorithm based fault tolerance scheme for soft error detection in iterative methods. ACM SIGPLAN Not. 48(8), 167\u2013176 (2013). https:\/\/doi.org\/10.1145\/2442516.2442533","journal-title":"ACM SIGPLAN Not."},{"key":"25_CR10","unstructured":"Chen, Z., Dongarra, J.: Algorithm-based checkpoint-free fault tolerance for parallel matrix computations on volatile resources. In: Proceedings 20th IEEE International Parallel & Distributed Processing Symposium (2006)"},{"key":"25_CR11","doi-asserted-by":"publisher","unstructured":"Davies, T., Karlsson, C., Liu, H., Ding, C., Chen, Z.: High performance linpack benchmark: A fault tolerant implementation without checkpointing. In: Proceedings of the International Conference on Supercomputing, pp. 162\u2013171 (2011). https:\/\/doi.org\/10.1145\/1995896.1995923","DOI":"10.1145\/1995896.1995923"},{"issue":"1","key":"25_CR12","doi-asserted-by":"publisher","first-page":"278","DOI":"10.1109\/TIT.2019.2929328","volume":"66","author":"S Dutta","year":"2019","unstructured":"Dutta, S., et al.: On the optimal recovery threshold of coded matrix multiplication. IEEE Trans. Inf. Theory 66(1), 278\u2013301 (2019)","journal-title":"IEEE Trans. Inf. Theory"},{"key":"25_CR13","doi-asserted-by":"crossref","unstructured":"Dutta, S., et al.: Addressing unreliability in emerging devices and Non-Von Neumann architectures using coded computing. Proc. IEEE (2020)","DOI":"10.1109\/JPROC.2020.2986362"},{"key":"25_CR14","unstructured":"Engelmann, C., Ong, H.H., Scott, S.L.: The case for modular redundancy in large-scale high performance computing systems. In: Proceedings of the 8th IASTED International Conference on Parallel and Distributed Computing and Networks (2009)"},{"key":"25_CR15","doi-asserted-by":"crossref","unstructured":"Ferreira, K., et al.: Evaluating the viability of process replication reliability for exascale systems. In: Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and Analysis (2011)","DOI":"10.1145\/2063384.2063443"},{"key":"25_CR16","first-page":"2","volume":"10","author":"A Geist","year":"2016","unstructured":"Geist, A.: How to kill a supercomputer: Dirty power, cosmic rays, and bad solder. IEEE Spectr. 10, 2\u20133 (2016)","journal-title":"IEEE Spectr."},{"key":"25_CR17","doi-asserted-by":"crossref","unstructured":"Hakkarinen, D., Chen, Z.: Algorithmic Cholesky factorization fault recovery. In: 2010 IEEE International Symposium on Parallel & Distributed Processing (IPDPS), pp. 1\u201310. IEEE (2010)","DOI":"10.1109\/IPDPS.2010.5470436"},{"issue":"6","key":"25_CR18","doi-asserted-by":"publisher","first-page":"518","DOI":"10.1109\/TC.1984.1676475","volume":"33","author":"KH Huang","year":"1984","unstructured":"Huang, K.H., Abraham, J.A.: Algorithm-based fault tolerance for matrix operations. IEEE Trans. Comput. 33(6), 518\u2013528 (1984)","journal-title":"IEEE Trans. Comput."},{"key":"25_CR19","doi-asserted-by":"publisher","unstructured":"Jeong, H., et al.: Artifact instructions to generate experimental results for Euro-Par 2020 paper: 3D Coded SUMMA: Communication-Efficient and Robust Parallel Matrix Multiplication, July 2020. https:\/\/doi.org\/10.6084\/m9.figshare.12560330,https:\/\/springernature.figshare.com\/articles\/software\/Artifact_instructions_to_generate_experimental_results_for_Euro-Par_2020_paper_3D_Coded_SUMMA_Communication-Efficient_and_Robust_Parallel_Matrix_Multiplication\/12560330\/0","DOI":"10.6084\/m9.figshare.12560330,"},{"key":"25_CR20","unstructured":"Jeong, H., et al.: Coded SUMMA: Fully-decentralized coded matrix multiplication for high performance computing (2019). http:\/\/www.andrew.cmu.edu\/user\/haewonj\/documents\/codml19_full_summa.pdf"},{"issue":"3","key":"25_CR21","doi-asserted-by":"publisher","first-page":"1514","DOI":"10.1109\/TIT.2017.2736066","volume":"64","author":"K Lee","year":"2017","unstructured":"Lee, K., et al.: Speeding up distributed machine learning using codes. IEEE Trans. Inf. Theory 64(3), 1514\u20131529 (2017)","journal-title":"IEEE Trans. Inf. Theory"},{"key":"25_CR22","unstructured":"Limited, F.: Fujitsu begins shipping supercomputer Fugaku (2019, Press release)"},{"issue":"2","key":"25_CR23","doi-asserted-by":"publisher","first-page":"200","DOI":"10.1147\/rd.62.0200","volume":"6","author":"RE Lyons","year":"1962","unstructured":"Lyons, R.E., Vanderkulk, W.: The use of triple-modular redundancy to improve computer reliability. IBM J. Res. Dev. 6(2), 200\u2013209 (1962)","journal-title":"IBM J. Res. Dev."},{"key":"25_CR24","doi-asserted-by":"publisher","first-page":"179","DOI":"10.1016\/j.jpdc.2017.01.022","volume":"104","author":"M Moldaschl","year":"2017","unstructured":"Moldaschl, M., Prikopa, K.E., Gansterer, M.N.: Fault tolerant communication-optimal 2.5D matrix multiplication. J. Parallel Distrib. Comput. 104, 179\u2013190 (2017)","journal-title":"J. Parallel Distrib. Comput."},{"issue":"6","key":"25_CR25","doi-asserted-by":"publisher","first-page":"748","DOI":"10.1137\/140993478","volume":"38","author":"MD Schatz","year":"2016","unstructured":"Schatz, M.D., Van de Geijn, R.A., Poulson, J.: Parallel matrix multiplication: A systematic journey. SIAM J. Sci. Comput. 38(6), 748\u2013781 (2016)","journal-title":"SIAM J. Sci. Comput."},{"key":"25_CR26","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/978-3-642-19328-6_1","volume-title":"High Performance Computing for Computational Science \u2013 VECPAR 2010","author":"J Shalf","year":"2011","unstructured":"Shalf, J., Dosanjh, S., Morrison, J.: Exascale computing technology challenges. In: Palma, J.M.L.M., Dayd\u00e9, M., Marques, O., Lopes, J.C. (eds.) VECPAR 2010. LNCS, vol. 6449, pp. 1\u201325. Springer, Heidelberg (2011). https:\/\/doi.org\/10.1007\/978-3-642-19328-6_1"},{"key":"25_CR27","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"90","DOI":"10.1007\/978-3-642-23397-5_10","volume-title":"Euro-Par 2011 Parallel Processing","author":"E Solomonik","year":"2011","unstructured":"Solomonik, E., Demmel, J.: Communication-optimal parallel 2.5D matrix multiplication and LU factorization algorithms. In: Jeannot, E., Namyst, R., Roman, J. (eds.) Euro-Par 2011. LNCS, vol. 6853, pp. 90\u2013109. Springer, Heidelberg (2011). https:\/\/doi.org\/10.1007\/978-3-642-23397-5_10"},{"key":"25_CR28","doi-asserted-by":"crossref","unstructured":"Subramaniam, A.M., Heiderzadeh, A., Narayanan, K.R.: Collaborative decoding of polynomial codes for distributed computation. In: 2019 IEEE Information Theory Workshop (ITW), pp. 1\u20135 (2019)","DOI":"10.1109\/ITW44776.2019.8989254"},{"issue":"1","key":"25_CR29","doi-asserted-by":"publisher","first-page":"49","DOI":"10.1177\/1094342005051521","volume":"19","author":"R Thakur","year":"2005","unstructured":"Thakur, R., Rabenseifner, R., Gropp, W.: Optimization of collective communication operations in MPICH. Int. J. High Perform. Comput. Appl. 19(1), 49\u201366 (2005)","journal-title":"Int. J. High Perform. Comput. Appl."},{"issue":"4","key":"25_CR30","doi-asserted-by":"publisher","first-page":"255","DOI":"10.1002\/(SICI)1096-9128(199704)9:4<255::AID-CPE250>3.0.CO;2-2","volume":"9","author":"RA Van De Geijn","year":"1997","unstructured":"Van De Geijn, R.A., Watts, J.: Summa: Scalable universal matrix multiplication algorithm. Concurr. Pract. Exp. 9(4), 255\u2013274 (1997)","journal-title":"Concurr. Pract. Exp."},{"key":"25_CR31","doi-asserted-by":"crossref","unstructured":"Yang, Y., Grover, P., Kar, S.: Coding for a single sparse inverse problem. In: 2018 IEEE International Symposium on Information Theory (ISIT), pp. 1575\u20131579 (2018)","DOI":"10.1109\/ISIT.2018.8437459"},{"issue":"4","key":"25_CR32","doi-asserted-by":"publisher","first-page":"422","DOI":"10.1177\/1094342015578487","volume":"29","author":"E Yao","year":"2015","unstructured":"Yao, E., Zhang, J., Chen, M., Tan, G., Sun, N.: Detection of soft errors in LU decomposition with partial pivoting using algorithm-based fault tolerance. Int. J. High Perform. Comput. Appl. 29(4), 422\u2013436 (2015). https:\/\/doi.org\/10.1177\/1094342015578487","journal-title":"Int. J. High Perform. Comput. Appl."},{"key":"25_CR33","doi-asserted-by":"crossref","unstructured":"Yu, Q., Maddah-Ali, M.A., Avestimehr, A.S.: Straggler mitigation in distributed matrix multiplication: Fundamental limits and optimal coding. In: IEEE International Symposium on Information Theory (ISIT), pp. 2022\u20132026 (2018)","DOI":"10.1109\/ISIT.2018.8437563"}],"container-title":["Lecture Notes in Computer Science","Euro-Par 2020: Parallel Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-57675-2_25","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,18]],"date-time":"2024-08-18T00:04:20Z","timestamp":1723939460000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-57675-2_25"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030576745","9783030576752"],"references-count":33,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-57675-2_25","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"18 August 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"Euro-Par","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Parallel Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Warsaw","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Poland","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24 August 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 August 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"europar2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2020.euro-par.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"158","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"39","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"25% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held virtually due to the coronavirus pandemic.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}