{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,5]],"date-time":"2026-05-05T07:23:27Z","timestamp":1777965807614,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":65,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,11,13]],"date-time":"2021-11-13T00:00:00Z","timestamp":1636761600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100010661","name":"Horizon 2020 Framework Programme","doi-asserted-by":"publisher","award":["678880, 801039"],"award-info":[{"award-number":["678880, 801039"]}],"id":[{"id":"10.13039\/100010661","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Swiss National Science Foundation","award":["185778"],"award-info":[{"award-number":["185778"]}]},{"name":"Platform for Advanced Scientific Computing"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,11,14]]},"DOI":"10.1145\/3458817.3476167","type":"proceedings-article","created":{"date-parts":[[2021,10,21]],"date-time":"2021-10-21T04:49:21Z","timestamp":1634791761000},"page":"1-15","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":11,"title":["On the parallel I\/O optimality of linear algebra kernels"],"prefix":"10.1145","author":[{"given":"Grzegorz","family":"Kwasniewski","sequence":"first","affiliation":[{"name":"ETH Zurich, Switzerland"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Marko","family":"Kabic","sequence":"additional","affiliation":[{"name":"ETH Zurich, Switzerland"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tal","family":"Ben-Nun","sequence":"additional","affiliation":[{"name":"ETH Zurich, Switzerland"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alexandros Nikolaos","family":"Ziogas","sequence":"additional","affiliation":[{"name":"ETH Zurich, Switzerland"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jens Eirik","family":"Saethre","sequence":"additional","affiliation":[{"name":"ETH Zurich, Switzerland"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Andr\u00e9","family":"Gaillard","sequence":"additional","affiliation":[{"name":"ETH Zurich, Switzerland"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Timo","family":"Schneider","sequence":"additional","affiliation":[{"name":"ETH Zurich, Switzerland"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Maciej","family":"Besta","sequence":"additional","affiliation":[{"name":"ETH Zurich, Switzerland"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Anton","family":"Kozhevnikov","sequence":"additional","affiliation":[{"name":"ETH Zurich, Switzerland"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Joost","family":"VandeVondele","sequence":"additional","affiliation":[{"name":"ETH Zurich, Switzerland"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Torsten","family":"Hoefler","sequence":"additional","affiliation":[{"name":"ETH Zurich, Switzerland"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2021,11,13]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/48529.48535"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/AICCSA.2011.6126599"},{"key":"e_1_3_2_2_3_1","volume-title":"GPU Computing Gems, Wen mei W","author":"Agullo Emmanuel","unstructured":"Emmanuel Agullo , C\u00e9dric Augonnet , Jack Dongarra , Hatem Ltaief , Raymond Namyst , Samuel Thibault , and Stanimire Tomov . 2010. Faster, Cheaper , Better - a Hybridization Methodology to Develop Linear Algebra Software for GPUs . In GPU Computing Gems, Wen mei W . Hwu (Ed.). Vol. 2 . Morgan Kaufmann . https:\/\/hal.inria.fr\/inria-00547847 Emmanuel Agullo, C\u00e9dric Augonnet, Jack Dongarra, Hatem Ltaief, Raymond Namyst, Samuel Thibault, and Stanimire Tomov. 2010. Faster, Cheaper, Better - a Hybridization Methodology to Develop Linear Algebra Software for GPUs. In GPU Computing Gems, Wen mei W. Hwu (Ed.). Vol. 2. Morgan Kaufmann. https:\/\/hal.inria.fr\/inria-00547847"},{"key":"e_1_3_2_2_4_1","volume-title":"Innovative Computing Laboratory","author":"Agullo Emmanuel","unstructured":"Emmanuel Agullo , Jack Dongarra , Bilel Hadri , Jakub Kurzak , Julie Langou , Julien Langou , Hatem Ltaief , Piotr Luszczek , and Asim YarKhan . 2011. PLASMA Users' Guide. Parallel Linear Algebra Software for Multicore Architectures. Rapport technique , Innovative Computing Laboratory , University of Tennessee (2011) . Emmanuel Agullo, Jack Dongarra, Bilel Hadri, Jakub Kurzak, Julie Langou, Julien Langou, Hatem Ltaief, Piotr Luszczek, and Asim YarKhan. 2011. PLASMA Users' Guide. Parallel Linear Algebra Software for Multicore Architectures. Rapport technique, Innovative Computing Laboratory, University of Tennessee (2011)."},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/2746539.2746622"},{"key":"e_1_3_2_2_6_1","volume-title":"Anne Greenbaum, Sven Hammarling, Alan McKenney, and Danny Sorensen.","author":"Anderson Edward","year":"1999","unstructured":"Edward Anderson , Zhaojun Bai , Christian Bischof , Susan Blackford , Jack Dongarra , Jeremy Du Croz , Anne Greenbaum, Sven Hammarling, Alan McKenney, and Danny Sorensen. 1999 . LAPACK Users' guide. Vol. 9 . Siam . Edward Anderson, Zhaojun Bai, Christian Bischof, Susan Blackford, Jack Dongarra, Jeremy Du Croz, Anne Greenbaum, Sven Hammarling, Alan McKenney, and Danny Sorensen. 1999. LAPACK Users' guide. Vol. 9. Siam."},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1137\/090760969"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1137\/090769156"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-11970-5_16"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"crossref","unstructured":"L. S. Blackford J. Choi A. Cleary E. D'Azevedo J. Demmel I. Dhillon J. Dongarra S. Hammarling G. Henry A. Petitet K. Stanley D. Walker and R. C. Whaley. 1997. ScaLAPACK Users' Guide. Society for Industrial and Applied Mathematics Philadelphia PA.  L. S. Blackford J. Choi A. Cleary E. D'Azevedo J. Demmel I. Dhillon J. Dongarra S. Hammarling G. Henry A. Petitet K. Stanley D. Walker and R. C. Whaley. 1997. ScaLAPACK Users' Guide. Society for Industrial and Applied Mathematics Philadelphia PA.","DOI":"10.1137\/1.9780898719642"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-78791-4_9"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"crossref","unstructured":"G. Bosilca A. Bouteiller A. Danalis M. Faverge A. Haidar T. Herault J. Kurzak J. Langou P. Lemarinier H. Ltaief P. Luszczek A. YarKhan and J. Dongarra. 2011. Flexible Development of Dense Linear Algebra Algorithms on Massively Parallel Architectures with DPLASMA. In 2011 IEEE International Symposium on Parallel and Distributed Processing Workshops and Phd Forum. 1432--1441.  G. Bosilca A. Bouteiller A. Danalis M. Faverge A. Haidar T. Herault J. Kurzak J. Langou P. Lemarinier H. Ltaief P. Luszczek A. YarKhan and J. Dongarra. 2011. Flexible Development of Dense Linear Algebra Algorithms on Massively Parallel Architectures with DPLASMA. In 2011 IEEE International Symposium on Parallel and Distributed Processing Workshops and Phd Forum. 1432--1441.","DOI":"10.1109\/IPDPS.2011.299"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/321958.321971"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"crossref","unstructured":"J. Choi et al. 1996. ScaLAPACK: a portable linear algebra library for distributed memory computers --- design issues and performance. Comp. Phys. Comm. (1996).  J. Choi et al. 1996. ScaLAPACK: a portable linear algebra library for distributed memory computers --- design issues and performance. Comp. Phys. Comm. (1996).","DOI":"10.1007\/3-540-60902-4_12"},{"key":"e_1_3_2_2_15_1","volume-title":"Communication lower bounds and optimal algorithms for programs that reference arrays-Part 1. arXiv preprint arXiv:1308.0068","author":"Christ Michael","year":"2013","unstructured":"Michael Christ , James Demmel , Nicholas Knight , Thomas Scanlon , and Katherine Yelick . 2013. Communication lower bounds and optimal algorithms for programs that reference arrays-Part 1. arXiv preprint arXiv:1308.0068 ( 2013 ). Michael Christ, James Demmel, Nicholas Knight, Thomas Scanlon, and Katherine Yelick. 2013. Communication lower bounds and optimal algorithms for programs that reference arrays-Part 1. arXiv preprint arXiv:1308.0068 (2013)."},{"key":"e_1_3_2_2_16_1","unstructured":"Cray. 2020. LibSci: Cray Scientific Libraries. (2020). https:\/\/olcf.ornl.gov\/software_package\/libsci\/  Cray. 2020. LibSci: Cray Scientific Libraries. (2020). https:\/\/olcf.ornl.gov\/software_package\/libsci\/"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.1999.807510"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"crossref","unstructured":"Mauro Del Ben etal 2015. Enabling simulation at the fifth rung of DFT: Large scale RPA calculations with excellent time to solution. Comp. Phys. Comm. (2015).  Mauro Del Ben et al. 2015. Enabling simulation at the fifth rung of DFT: Large scale RPA calculations with excellent time to solution. Comp. Phys. Comm. (2015).","DOI":"10.1016\/j.cpc.2014.10.021"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1021\/ct4002202"},{"key":"e_1_3_2_2_20_1","volume-title":"Communication-optimal convolutional neural nets. arXiv preprint arXiv:1802.06905","author":"Demmel James","year":"2018","unstructured":"James Demmel and Grace Dinh . 2018. Communication-optimal convolutional neural nets. arXiv preprint arXiv:1802.06905 ( 2018 ). James Demmel and Grace Dinh. 2018. Communication-optimal convolutional neural nets. arXiv preprint arXiv:1802.06905 (2018)."},{"key":"e_1_3_2_2_21_1","volume-title":"Parallelepipeds obtaining HBL lower bounds. arXiv preprint arXiv:1611.05944","author":"Demmel James","year":"2016","unstructured":"James Demmel and Alex Rusciano . 2016. Parallelepipeds obtaining HBL lower bounds. arXiv preprint arXiv:1611.05944 ( 2016 ). James Demmel and Alex Rusciano. 2016. Parallelepipeds obtaining HBL lower bounds. arXiv preprint arXiv:1611.05944 (2016)."},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.1974.1050511"},{"key":"e_1_3_2_2_23_1","volume-title":"Communication-Optimal Tilings for Projective Nested Loops with Arbitrary Bounds. arXiv preprint arXiv:2003.00119","author":"Dinh Grace","year":"2020","unstructured":"Grace Dinh and James Demmel . 2020. Communication-Optimal Tilings for Projective Nested Loops with Arbitrary Bounds. arXiv preprint arXiv:2003.00119 ( 2020 ). Grace Dinh and James Demmel. 2020. Communication-Optimal Tilings for Projective Nested Loops with Arbitrary Bounds. arXiv preprint arXiv:2003.00119 (2020)."},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.3110"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-0-387-09766-4_157"},{"key":"e_1_3_2_2_26_1","unstructured":"V. Elango et al. 2013. Data access complexity: The red\/blue pebble game revisited. Technical Report.  V. Elango et al. 2013. Data access complexity: The red\/blue pebble game revisited. Technical Report."},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1007\/BF01407835"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3295500.3356223"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.5555\/1413370.1413400"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2018.00050"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"crossref","unstructured":"T. Hoefler et al. 2015. Remote Memory Access Programming in MPI-3. TOPC (2015).  T. Hoefler et al. 2015. Remote Memory Access Programming in MPI-3. TOPC (2015).","DOI":"10.1145\/2780584"},{"key":"e_1_3_2_2_32_1","unstructured":"Edward Hutter. [n. d.]. Communication-Avoiding Parallelism-Increasing maTrix fActorization Library. ([n. d.]). https:\/\/github.com\/huttered40\/capital  Edward Hutter. [n. d.]. Communication-Avoiding Parallelism-Increasing maTrix fActorization Library. ([n. d.]). https:\/\/github.com\/huttered40\/capital"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2019.00020"},{"key":"e_1_3_2_2_34_1","unstructured":"Intel. 2020. Math Kernel Library. (2020). https:\/\/software.intel.com\/en-us\/mkl  Intel. 2020. Math Kernel Library. (2020). https:\/\/software.intel.com\/en-us\/mkl"},{"key":"e_1_3_2_2_35_1","volume-title":"Proceedings of the Platform for Advanced Scientific Computing Conference.","author":"Invernizzi Alberto","year":"2021","unstructured":"Alberto Invernizzi , Teodor Nikolov , Lara Querciagrossa , and Raffaele Solc\u00e0 . 2021 . Distributed Linear Algebra with (HPX) Futures (forthcoming) . In Proceedings of the Platform for Advanced Scientific Computing Conference. Alberto Invernizzi, Teodor Nikolov, Lara Querciagrossa, and Raffaele Solc\u00e0. 2021. Distributed Linear Algebra with (HPX) Futures (forthcoming). In Proceedings of the Platform for Advanced Scientific Computing Conference."},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"crossref","unstructured":"Dror Irony et al. 2004. Communication Lower Bounds for Distributed-memory Matrix Multiplication. JPDC (2004).  Dror Irony et al. 2004. Communication Lower Bounds for Distributed-memory Matrix Multiplication. JPDC (2004).","DOI":"10.1016\/j.jpdc.2004.03.021"},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"crossref","unstructured":"Hong Jia-Wei and Hsiang-Tsung Kung. 1981. I\/O complexity: The red-blue pebble game. In STOC.  Hong Jia-Wei and Hsiang-Tsung Kung. 1981. I\/O complexity: The red-blue pebble game. In STOC.","DOI":"10.1145\/800076.802486"},{"key":"e_1_3_2_2_38_1","volume-title":"COSTA: Communication-Optimal Shuffle and Transpose Algorithm with Process Relabeling. In International Conference on High Performance Computing. Springer, 217--236","author":"Kabi\u0107 Marko","year":"2021","unstructured":"Marko Kabi\u0107 , Simon Pintarelli , Anton Kozhevnikov , and Joost VandeVondele . 2021 . COSTA: Communication-Optimal Shuffle and Transpose Algorithm with Process Relabeling. In International Conference on High Performance Computing. Springer, 217--236 . Marko Kabi\u0107, Simon Pintarelli, Anton Kozhevnikov, and Joost VandeVondele. 2021. COSTA: Communication-Optimal Shuffle and Transpose Algorithm with Process Relabeling. In International Conference on High Performance Computing. Springer, 217--236."},{"key":"e_1_3_2_2_39_1","unstructured":"Richard M Karp. 1988. A survey of parallel algorithms for shared-memory machines. (1988).  Richard M Karp. 1988. A survey of parallel algorithms for shared-memory machines. (1988)."},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2013.6704670"},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"crossref","unstructured":"Andreas Kn\u00fcpfer Christian R\u00f6ssel Dieter an Mey Scott Biersdorff Kai Diethelm Dominic Eschweiler Markus Geimer Michael Gerndt Daniel Lorenz Allen Malony Wolfgang E. Nagel Yury Oleynik Peter Philippen Pavel Saviankou Dirk Schmidl Sameer Shende Ronny Tsch\u00fcter Michael Wagner Bert Wesarg and Felix Wolf. 2012. Score-P: A Joint Performance Measurement Run-Time Infrastructure for Periscope Scalasca TAU and Vampir. In Tools for High Performance Computing 2011 Holger Brunst Matthias S. M\u00fcller Wolfgang E. Nagel and Michael M. Resch (Eds.). Springer Berlin Heidelberg Berlin Heidelberg 79--91.  Andreas Kn\u00fcpfer Christian R\u00f6ssel Dieter an Mey Scott Biersdorff Kai Diethelm Dominic Eschweiler Markus Geimer Michael Gerndt Daniel Lorenz Allen Malony Wolfgang E. Nagel Yury Oleynik Peter Philippen Pavel Saviankou Dirk Schmidl Sameer Shende Ronny Tsch\u00fcter Michael Wagner Bert Wesarg and Felix Wolf. 2012. Score-P: A Joint Performance Measurement Run-Time Infrastructure for Periscope Scalasca TAU and Vampir. In Tools for High Performance Computing 2011 Holger Brunst Matthias S. M\u00fcller Wolfgang E. Nagel and Michael M. Resch (Eds.). Springer Berlin Heidelberg Berlin Heidelberg 79--91.","DOI":"10.1007\/978-3-642-31476-6_7"},{"key":"e_1_3_2_2_42_1","volume-title":"Matrix inversion using Cholesky decomposition. In 2013 signal processing: Algorithms, architectures, arrangements, and applications (SPA)","author":"Krishnamoorthy Aravindh","unstructured":"Aravindh Krishnamoorthy and Deepak Menon . 2013. Matrix inversion using Cholesky decomposition. In 2013 signal processing: Algorithms, architectures, arrangements, and applications (SPA) . IEEE , 70--72. Aravindh Krishnamoorthy and Deepak Menon. 2013. Matrix inversion using Cholesky decomposition. In 2013 signal processing: Algorithms, architectures, arrangements, and applications (SPA). IEEE, 70--72."},{"key":"e_1_3_2_2_43_1","volume-title":"Traces and emergence of nonlinear programming","author":"Kuhn Harold W","unstructured":"Harold W Kuhn and Albert W Tucker . 2014. Nonlinear programming . In Traces and emergence of nonlinear programming . Springer , 247--258. Harold W Kuhn and Albert W Tucker. 2014. Nonlinear programming. In Traces and emergence of nonlinear programming. Springer, 247--258."},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.1063\/5.0007045"},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3295500.3356181"},{"key":"e_1_3_2_2_46_1","unstructured":"Quanquan Liu. 2018. Red-Blue and Standard Pebble Games : Complexity and Applications in the Sequential and Parallel Models.  Quanquan Liu. 2018. Red-Blue and Standard Pebble Games : Complexity and Applications in the Sequential and Parallel Models."},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"crossref","unstructured":"L. H. Loomis and H. Whitney. 1949. An inequality related to the isoperimetric inequality. Bull. Amer. Math. Soc. 55 10 (10 1949) 961--962.  L. H. Loomis and H. Whitney. 1949. An inequality related to the isoperimetric inequality. Bull. Amer. Math. Soc. 55 10 (10 1949) 961--962.","DOI":"10.1090\/S0002-9904-1949-09320-5"},{"key":"e_1_3_2_2_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/2555243.2555250"},{"key":"e_1_3_2_2_49_1","doi-asserted-by":"crossref","unstructured":"Carl D Meyer. 2000. Matrix analysis and applied linear algebra. SIAM.  Carl D Meyer. 2000. Matrix analysis and applied linear algebra. SIAM.","DOI":"10.1137\/1.9780898719512"},{"key":"e_1_3_2_2_50_1","unstructured":"NVIDIA. 2020. CUSOLVER Reference Guide. (2020). https:\/\/docs.nvidia.com\/cuda\/cusolver  NVIDIA. 2020. CUSOLVER Reference Guide. (2020). https:\/\/docs.nvidia.com\/cuda\/cusolver"},{"key":"e_1_3_2_2_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/3385412.3385989"},{"key":"e_1_3_2_2_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01264"},{"key":"e_1_3_2_2_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/2427023.2427030"},{"key":"e_1_3_2_2_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/1527286.1527288"},{"key":"e_1_3_2_2_55_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-30218-6_13"},{"key":"e_1_3_2_2_56_1","doi-asserted-by":"publisher","DOI":"10.1137\/0204020"},{"key":"e_1_3_2_2_58_1","unstructured":"Edgar Solomonik. 2021. Communication Avoiding Numerical Dense Matrix Computations. (2021). https:\/\/github.com\/solomonik\/CANDMC  Edgar Solomonik. 2021. Communication Avoiding Numerical Dense Matrix Computations. (2021). https:\/\/github.com\/solomonik\/CANDMC"},{"key":"e_1_3_2_2_59_1","doi-asserted-by":"crossref","unstructured":"Edgar Solomonik et al. 2016. Trade-Offs Between Synchronization Communication and Computation in Parallel Linear Algebra omputations. TOPC (2016).  Edgar Solomonik et al. 2016. Trade-Offs Between Synchronization Communication and Computation in Parallel Linear Algebra omputations. TOPC (2016).","DOI":"10.1145\/2897188"},{"key":"e_1_3_2_2_60_1","doi-asserted-by":"crossref","unstructured":"E. Solomonik et al. 2017. Scaling Betweenness Centrality using Communication-Efficient Sparse Matrix Multiplication. In SC.  E. Solomonik et al. 2017. Scaling Betweenness Centrality using Communication-Efficient Sparse Matrix Multiplication. In SC.","DOI":"10.1145\/3126908.3126971"},{"key":"e_1_3_2_2_61_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-23397-5_10"},{"key":"e_1_3_2_2_62_1","unstructured":"TOP500 list. 2020. November 2019 TOP500 list. https:\/\/www.top500.org\/lists\/2019\/11\/ (April. 2020). (2020).  TOP500 list. 2020. November 2019 TOP500 list. https:\/\/www.top500.org\/lists\/2019\/11\/ (April. 2020). (2020)."},{"key":"e_1_3_2_2_63_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2017.2703149"},{"key":"e_1_3_2_2_64_1","doi-asserted-by":"publisher","DOI":"10.1145\/275487.275501"},{"key":"e_1_3_2_2_65_1","volume-title":"Lafferty","author":"Zheng Qinqing","year":"2016","unstructured":"Qinqing Zheng and John D . Lafferty . 2016 . Convergence Analysis for Rectangular Matrix Completion Using Burer-Monteiro Factorization and Gradient Descent. CoRR ( 2016). Qinqing Zheng and John D. Lafferty. 2016. Convergence Analysis for Rectangular Matrix Completion Using Burer-Monteiro Factorization and Gradient Descent. CoRR (2016)."},{"key":"e_1_3_2_2_66_1","doi-asserted-by":"publisher","DOI":"10.1145\/3295500.3357156"}],"event":{"name":"SC '21: The International Conference for High Performance Computing, Networking, Storage and Analysis","location":"St. Louis Missouri","acronym":"SC '21","sponsor":["SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing","IEEE CS"]},"container-title":["Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3458817.3476167","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3458817.3476167","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T17:49:07Z","timestamp":1750268947000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3458817.3476167"}},"subtitle":["near-optimal matrix factorizations"],"short-title":[],"issued":{"date-parts":[[2021,11,13]]},"references-count":65,"alternative-id":["10.1145\/3458817.3476167","10.1145\/3458817"],"URL":"https:\/\/doi.org\/10.1145\/3458817.3476167","relation":{},"subject":[],"published":{"date-parts":[[2021,11,13]]},"assertion":[{"value":"2021-11-13","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}