{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,21]],"date-time":"2026-03-21T19:23:29Z","timestamp":1774121009857,"version":"3.50.1"},"reference-count":77,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"11","license":[{"start":{"date-parts":[[2018,11,1]],"date-time":"2018-11-01T00:00:00Z","timestamp":1541030400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2018,11,1]],"date-time":"2018-11-01T00:00:00Z","timestamp":1541030400000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2018,11,1]],"date-time":"2018-11-01T00:00:00Z","timestamp":1541030400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2018,11,1]],"date-time":"2018-11-01T00:00:00Z","timestamp":1541030400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"Exascale Computing Project","award":["17-SC-20-SC"],"award-info":[{"award-number":["17-SC-20-SC"]}]},{"DOI":"10.13039\/100006168","name":"National Nuclear Security Administration","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100006168","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["ACI-1642441"],"award-info":[{"award-number":["ACI-1642441"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000015","name":"U.S. Department of Energy","doi-asserted-by":"publisher","award":["DE-AC52-07NA27344"],"award-info":[{"award-number":["DE-AC52-07NA27344"]}],"id":[{"id":"10.13039\/100000015","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100006192","name":"Advanced Scientific Computing Research","doi-asserted-by":"publisher","award":["ER26054"],"award-info":[{"award-number":["ER26054"]}],"id":[{"id":"10.13039\/100006192","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["SHF-1564074"],"award-info":[{"award-number":["SHF-1564074"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"name":"ASCR X-Stack Project","award":["ER26143"],"award-info":[{"award-number":["ER26143"]}]},{"DOI":"10.13039\/100008510","name":"University of Maryland","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100008510","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Proc. IEEE"],"published-print":{"date-parts":[[2018,11]]},"DOI":"10.1109\/jproc.2018.2841200","type":"journal-article","created":{"date-parts":[[2018,10,26]],"date-time":"2018-10-26T19:41:46Z","timestamp":1540582906000},"page":"2068-2083","source":"Crossref","is-referenced-by-count":106,"title":["Autotuning in High-Performance Computing Applications"],"prefix":"10.1109","volume":"106","author":[{"given":"Prasanna","family":"Balaprakash","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3247-1782","authenticated-orcid":false,"given":"Jack","family":"Dongarra","sequence":"additional","affiliation":[]},{"given":"Todd","family":"Gamblin","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3058-7573","authenticated-orcid":false,"given":"Mary","family":"Hall","sequence":"additional","affiliation":[]},{"given":"Jeffrey K.","family":"Hollingsworth","sequence":"additional","affiliation":[]},{"given":"Boyana","family":"Norris","sequence":"additional","affiliation":[]},{"given":"Richard","family":"Vuduc","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2014.125"},{"key":"ref72","first-page":"218","article-title":"Optimizing LOBPCG: Sparse matrix loop and data transformations in action","author":"ahmad","year":"2016","journal-title":"Proc 29th Int Workshop Lang Compilers Parallel Comput"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/ICPP.2015.106"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1145\/1810085.1810120"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW.2016.85"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1145\/2872362.2872411"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1016\/j.jocs.2015.12.001"},{"key":"ref39","first-page":"1","article-title":"Generating customized sparse eigenvalue solutions with Lighthouse","author":"nair","year":"2014","journal-title":"Proc Int Multi-Conf Comput Global Inf Technol"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1145\/2807591.2807623"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1145\/2737924.2737969"},{"key":"ref33","first-page":"50","article-title":"Loop transformation recipes for code generation and auto-tuning","author":"hall","year":"2009","journal-title":"Proc 22nd Int Workshop Lang Compilers Parallel Comput"},{"key":"ref32","first-page":"136","article-title":"A language for the compact representation of multiple program versions","author":"donadio","year":"2005","journal-title":"Proc Workshop Lang Compilers Parallel Comput (LCPC)"},{"key":"ref31","doi-asserted-by":"crossref","first-page":"209s","DOI":"10.1137\/120883153","article-title":"Stencil-aware GPU optimization of iterative solvers","volume":"35","author":"choudary","year":"2013","journal-title":"SIAM J Sci Comput"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTER.2012.46"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2014.59"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1145\/1454115.1454155"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1145\/1542476.1542481"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/HiPC.2014.7116902"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1016\/j.procs.2011.04.234"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4612-1986-6_8"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/InPar.2012.6339587"},{"key":"ref63","author":"balay","year":"2015","journal-title":"PETSc Web Page"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2009.5161004"},{"key":"ref64","article-title":"PETSc users manual","author":"balay","year":"2015"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2009.5161054"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1145\/1089014.1089021"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1145\/2837476.2837485"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2007.370637"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1145\/2830168.2830169"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1137\/15M1028406"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1177\/1094342011414744"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2016.46"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/SHPCC.1994.296728"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2011.70"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/2591635.2591656"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/301618.301661"},{"key":"ref24","author":"frigo","year":"1999","journal-title":"J H Wilkinson Prize for Numerical Software Official List of Winners"},{"key":"ref23","author":"whaley","year":"2016","journal-title":"SC16 Test of Time Award Winner Official Citation"},{"key":"ref26","article-title":"Model-guided empirical optimization for memory hierarchy","author":"chen","year":"2007"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2016.83"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1016\/j.parco.2008.12.006"},{"key":"ref51","first-page":"353","article-title":"Autotuning and specialization: Speeding up matrix multiply for small matrices with compiler technology","author":"shin","year":"2009","journal-title":"Proc 4th Int Workshop Autom Perform Tuning"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/ICPP.2015.106"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-38718-0_26"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2005.52"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1007\/s11227-006-7957-2"},{"key":"ref55","first-page":"1","article-title":"Automatically tuned linear algebra software","author":"whaley","year":"1998","journal-title":"Proc 1998 ACM\/IEEE Supercomputing 98 Conf"},{"key":"ref54","first-page":"443","author":"norris","year":"2007","journal-title":"Annotations for productivity and performance portability"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2000.888348"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTR.2008.4663803"},{"key":"ref10","article-title":"Proof-driven derivation of Krylov solver libraries","author":"eijkhout","year":"2010"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1177\/1094342013494428"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2011.86"},{"key":"ref12","first-page":"1","article-title":"Optimizing sparse matrix vector multiplication on SMPs","author":"im","year":"1999","journal-title":"Proc SIAM Conf Parallel Process Sci Comput"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1088\/1742-6596\/16\/1\/071"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2015.104"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2017.80"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1998.681704"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2004.840301"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2004.840311"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/1989493.1989508"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/355841.355847"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICPP.2005.29"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/263580.263662"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/77626.79170"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2011.311"},{"key":"ref7","first-page":"1","article-title":"Automatically tuned linear algebra software","author":"whaley","year":"1998","journal-title":"Proc 1998 ACM\/IEEE Supercomputing 98 Conf"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1137\/070693199"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2004.840306"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1145\/2491956.2462176"},{"key":"ref45","article-title":"Apollo: Fast, dynamic tuning for data-dependent code","author":"beckingsale","year":"2017","journal-title":"Proc IEEE Int Parallel Distrib Process Symp (IPDPS)"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1016\/j.jpdc.2009.04.002"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2011.15"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/HPDC.2006.1652135"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1145\/379539.379583"},{"key":"ref44","first-page":"1","article-title":"Kokkos: A manycore device performance portability library for C++ HPC applications","author":"edwards","year":"2014","journal-title":"Proc Workshop Program Abstractions Data Locality"},{"key":"ref43","doi-asserted-by":"crossref","DOI":"10.2172\/1169830","article-title":"The RAJA portability layer: Overview and status","author":"hornung","year":"2014"}],"container-title":["Proceedings of the IEEE"],"original-title":[],"link":[{"URL":"https:\/\/ieeexplore.ieee.org\/ielaam\/5\/8510998\/8423171-aam.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/5\/8510998\/08423171.pdf?arnumber=8423171","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,4,8]],"date-time":"2022-04-08T18:55:55Z","timestamp":1649444155000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8423171\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,11]]},"references-count":77,"journal-issue":{"issue":"11"},"URL":"https:\/\/doi.org\/10.1109\/jproc.2018.2841200","relation":{},"ISSN":["0018-9219","1558-2256"],"issn-type":[{"value":"0018-9219","type":"print"},{"value":"1558-2256","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018,11]]}}}