{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,5]],"date-time":"2026-05-05T16:45:18Z","timestamp":1777999518722,"version":"3.51.4"},"reference-count":70,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2021,12,1]],"date-time":"2021-12-01T00:00:00Z","timestamp":1638316800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2021,12,1]],"date-time":"2021-12-01T00:00:00Z","timestamp":1638316800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2022,9,20]],"date-time":"2022-09-20T00:00:00Z","timestamp":1663632000000},"content-version":"am","delay-in-days":293,"URL":"http:\/\/www.elsevier.com\/open-access\/userlicense\/1.0\/"}],"funder":[{"DOI":"10.13039\/100006132","name":"Office of Science","doi-asserted-by":"publisher","award":["DE-AC02-06CH11357"],"award-info":[{"award-number":["DE-AC02-06CH11357"]}],"id":[{"id":"10.13039\/100006132","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100006132","name":"Office of Science","doi-asserted-by":"publisher","award":["DE-AC05-00OR22725"],"award-info":[{"award-number":["DE-AC05-00OR22725"]}],"id":[{"id":"10.13039\/100006132","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100006168","name":"National Nuclear Security Administration","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100006168","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000015","name":"U.S. Department of Energy","doi-asserted-by":"publisher","award":["17-SC-20-SC"],"award-info":[{"award-number":["17-SC-20-SC"]}],"id":[{"id":"10.13039\/100000015","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100006224","name":"Argonne National Laboratory","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100006224","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100002418","name":"Intel Corporation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100002418","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Parallel Computing"],"published-print":{"date-parts":[[2021,12]]},"DOI":"10.1016\/j.parco.2021.102829","type":"journal-article","created":{"date-parts":[[2021,9,10]],"date-time":"2021-09-10T20:37:58Z","timestamp":1631306278000},"page":"102829","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":13,"special_numbering":"C","title":["Achieving performance portability in Gaussian basis set density functional theory on accelerator based architectures in NWChemEx"],"prefix":"10.1016","volume":"108","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2735-3706","authenticated-orcid":false,"given":"David B.","family":"Williams-Young","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9642-921X","authenticated-orcid":false,"given":"Abhishek","family":"Bagusetty","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7114-8315","authenticated-orcid":false,"given":"Wibe A.","family":"de Jong","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Douglas","family":"Doerfler","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0876-3294","authenticated-orcid":false,"given":"Hubertus J.J.","family":"van Dam","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1415-6300","authenticated-orcid":false,"given":"\u00c1lvaro","family":"V\u00e1zquez-Mayagoitia","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Theresa L.","family":"Windus","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chao","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"78","reference":[{"key":"10.1016\/j.parco.2021.102829_b1","series-title":"Highlights of the 55th TOP500 list, slides 30,31","year":"2020"},{"key":"10.1016\/j.parco.2021.102829_b2","doi-asserted-by":"crossref","unstructured":"V.V. Kindratenko, J.J. Enos, G. Shi, M.T. Showerman, G.W. Arnold, J.E. Stone, J.C. Phillips, W.-m. Hwu, GPU clusters for high-performance computing, in: 2009 IEEE International Conference on Cluster Computing and Workshops, IEEE, 2009, pp. 1\u20138.","DOI":"10.1109\/CLUSTR.2009.5289128"},{"key":"10.1016\/j.parco.2021.102829_b3","doi-asserted-by":"crossref","unstructured":"L. Parnell, D. Demetriou, V. Kamath, E. Zhang, Trends in high performance computing: Exascale systems and facilities beyond the first wave, in: 2019 18th IEEE Intersociety Conference on Thermal and Thermomechanical Phenomena in Electronic Systems (ITherm), 2019, pp. 167\u2013176.","DOI":"10.1109\/ITHERM.2019.8757229"},{"issue":"23","key":"10.1016\/j.parco.2021.102829_b4","doi-asserted-by":"crossref","first-page":"4557","DOI":"10.1021\/acs.jpca.0c02249","article-title":"Novel computer architectures and quantum chemistry","volume":"124","author":"Gordon","year":"2020","journal-title":"J. Phys. Chem. A"},{"issue":"17","key":"10.1016\/j.parco.2021.102829_b5","doi-asserted-by":"crossref","first-page":"9015","DOI":"10.1021\/acs.chemrev.0c00700","article-title":"Editorial: Modern architectures and their impact on electronic structure theory","volume":"120","author":"Gordon","year":"2020","journal-title":"Chem. Rev."},{"key":"10.1016\/j.parco.2021.102829_b6","series-title":"A metric for performance portability","author":"Pennycook","year":"2016"},{"key":"10.1016\/j.parco.2021.102829_b7","doi-asserted-by":"crossref","unstructured":"T. Deakin, S. McIntosh-Smith, J. Price, A. Poenaru, P. Atkinson, C. Popa, J. Salmon, Performance portability across diverse computer architectures, in: 2019 IEEE\/ACM International Workshop on Performance, Portability and Productivity in HPC (P3HPC), 2019.","DOI":"10.1109\/P3HPC49587.2019.00006"},{"key":"10.1016\/j.parco.2021.102829_b8","series-title":"CUDA Programming: A Developer\u2019s Guide To Parallel Computing with GPUs","author":"Cook","year":"2012"},{"key":"10.1016\/j.parco.2021.102829_b9","series-title":"Frontier, ORNL\u2019s exascale supercompter","year":"2020"},{"key":"10.1016\/j.parco.2021.102829_b10","series-title":"Aurora, ALCF\u2019s exascale supercomputer","year":"2020"},{"key":"10.1016\/j.parco.2021.102829_b11","series-title":"El capitan, LLNL exascale supercomputer","year":"2020"},{"key":"10.1016\/j.parco.2021.102829_b12","series-title":"HIP Documentation","year":"2020"},{"key":"10.1016\/j.parco.2021.102829_b13","doi-asserted-by":"crossref","unstructured":"B. Ashbaugh, A. Bader, J. Brodman, J. Hammond, M. Kinsner, J. Pennycook, R. Schulz, J. Sewall, Data parallel C++: Enhancing SYCL through extensions for productivity and performance, in: Proceedings of the International Workshop on OpenCL, IWOCL \u201920, 2020.","DOI":"10.1145\/3388333.3388653"},{"key":"10.1016\/j.parco.2021.102829_b14","series-title":"SyclTM SpecificationGeneric heterogeneous computing for modern C++, version 2020 provisional","year":"2020"},{"key":"10.1016\/j.parco.2021.102829_b15","series-title":"OpenACC.org: More science, less computing","year":"2020"},{"key":"10.1016\/j.parco.2021.102829_b16","series-title":"OpenMP.org: The OpenMP API specification for parallel programming","year":"2020"},{"issue":"12","key":"10.1016\/j.parco.2021.102829_b17","doi-asserted-by":"crossref","first-page":"3202","DOI":"10.1016\/j.jpdc.2014.07.003","article-title":"Kokkos: Enabling manycore performance portability through polymorphic memory access patterns","volume":"74","author":"Edwards","year":"2014","journal-title":"J. Parallel. Distrib. Comput."},{"key":"10.1016\/j.parco.2021.102829_b18","doi-asserted-by":"crossref","unstructured":"D.A. Beckingsale, J. Burmark, R. Hornung, et al. RAJA: Portable performance for large-scale scientific applications, in: 2019 Ieee\/Acm International Workshop on Performance, Portability and Productivity in Hpc (P3hpc), IEEE, 2019, pp. 71\u201381.","DOI":"10.1109\/P3HPC49587.2019.00012"},{"issue":"3","key":"10.1016\/j.parco.2021.102829_b19","doi-asserted-by":"crossref","first-page":"14:1","DOI":"10.1145\/2764454","article-title":"BLIS: A framework for rapidly instantiating BLAS functionality","volume":"41","author":"Van Zee","year":"2015","journal-title":"ACM TOMS"},{"key":"10.1016\/j.parco.2021.102829_b20","doi-asserted-by":"crossref","first-page":"A1133","DOI":"10.1103\/PhysRev.140.A1133","article-title":"Self-consistent equations including exchange and correlation effects","volume":"140","author":"Kohn","year":"1965","journal-title":"Phys. Rev."},{"issue":"1","key":"10.1016\/j.parco.2021.102829_b21","doi-asserted-by":"crossref","DOI":"10.1002\/wcms.1290","article-title":"Challenges in large scale quantum mechanical calculations","volume":"7","author":"Ratcliff","year":"2017","journal-title":"WIREs Comput. Mol. Sci."},{"issue":"3","key":"10.1016\/j.parco.2021.102829_b22","doi-asserted-by":"crossref","first-page":"247","DOI":"10.1016\/j.pnsc.2019.04.003","article-title":"Density functional theory calculations: A powerful tool to simulate and design high-performance energy storage and conversion materials","volume":"29","author":"Wu","year":"2019","journal-title":"Progress Natural Sci. Mater. Int."},{"issue":"7","key":"10.1016\/j.parco.2021.102829_b23","doi-asserted-by":"crossref","DOI":"10.1063\/5.0023185","article-title":"Electronic structure software","volume":"153","author":"Sherrill","year":"2020","journal-title":"J. Chem. Phys."},{"key":"10.1016\/j.parco.2021.102829_b24","doi-asserted-by":"crossref","first-page":"951","DOI":"10.3389\/fchem.2020.581058","article-title":"On the efficient evaluation of the exchange correlation potential on graphics processing unit clusters","volume":"8","author":"Williams-Young","year":"2020","journal-title":"Front. Chem."},{"issue":"7","key":"10.1016\/j.parco.2021.102829_b25","doi-asserted-by":"crossref","first-page":"4315","DOI":"10.1021\/acs.jctc.0c00290","article-title":"Parallel implementation of density functional theory methods in the quantum interaction computational kernel program","volume":"16","author":"Manathunga","year":"2020","journal-title":"J. Chem. Theory Comput."},{"key":"10.1016\/j.parco.2021.102829_b26","doi-asserted-by":"crossref","first-page":"107314","DOI":"10.1016\/j.cpc.2020.107314","article-title":"GPU acceleration of all-electron electronic structure theory using localized numeric atom-centered basis functions","volume":"254","author":"Huhn","year":"2020","journal-title":"Comput. Phys. Commun."},{"issue":"3","key":"10.1016\/j.parco.2021.102829_b27","doi-asserted-by":"crossref","first-page":"1512","DOI":"10.1021\/acs.jctc.0c01252","article-title":"Highly efficient resolution-of-identity density functional theory calculations on central and graphics processing units","volume":"17","author":"Kussmann","year":"2021","journal-title":"J. Chem. Theory Comput."},{"issue":"7","key":"10.1016\/j.parco.2021.102829_b28","doi-asserted-by":"crossref","first-page":"3955","DOI":"10.1021\/acs.jctc.1c00145","article-title":"Harnessing the power of multi-GPU acceleration into the quantum interaction computational kernel program","volume":"17","author":"Manathunga","year":"2021","journal-title":"J. Chem. Theory Comput."},{"issue":"8","key":"10.1016\/j.parco.2021.102829_b29","doi-asserted-by":"crossref","first-page":"4962","DOI":"10.1021\/acs.chemrev.0c00998","article-title":"From NWChem to NWChemEx: evolving with the computational chemistry landscape","volume":"121","author":"Kowalski","year":"2021","journal-title":"Chem. Rev."},{"issue":"18","key":"10.1016\/j.parco.2021.102829_b30","doi-asserted-by":"crossref","first-page":"184102","DOI":"10.1063\/5.0004997","article-title":"Nwchem: past, present, and future","volume":"152","author":"Apr\u00e0","year":"2020","journal-title":"J. Chem. Phys."},{"issue":"12","key":"10.1016\/j.parco.2021.102829_b31","doi-asserted-by":"crossref","first-page":"7232","DOI":"10.1021\/acs.jctc.0c00768","article-title":"High-performance, graphics processing unit-accelerated fock build algorithm","volume":"16","author":"Barca","year":"2020","journal-title":"J. Chem. Theory Comput."},{"issue":"3","key":"10.1016\/j.parco.2021.102829_b32","doi-asserted-by":"crossref","first-page":"1456","DOI":"10.1021\/acs.jctc.9b00860","article-title":"Highly efficient, linear-scaling seminumerical exact-exchange method for graphic processing units","volume":"16","author":"Laqua","year":"2020","journal-title":"J. Chem. Theory Comput."},{"issue":"7","key":"10.1016\/j.parco.2021.102829_b33","doi-asserted-by":"crossref","first-page":"3160","DOI":"10.1021\/acs.jctc.7b00030","article-title":"Arbitrary angular momentum electron repulsion integrals with graphical processing units: application to the resolution of identity Hartree\u2013Fock method","volume":"13","author":"Kalinowski","year":"2017","journal-title":"J. Chem. Theory Comput."},{"key":"10.1016\/j.parco.2021.102829_b34","series-title":"Electronic Structure Calculations on Graphics Processing Units","first-page":"67","article-title":"Gaussian basis set hartree\u2013fock, density functional theory, and beyond on gpus","author":"Luehr","year":"2016"},{"issue":"2","key":"10.1016\/j.parco.2021.102829_b35","doi-asserted-by":"crossref","first-page":"222","DOI":"10.1021\/ct700268q","article-title":"Quantum chemistry on graphical processing units. 1. Strategies for two-electron integral evaluation","volume":"4","author":"Ufimtsev","year":"2008","journal-title":"J. Chem. Theory Comput."},{"issue":"4","key":"10.1016\/j.parco.2021.102829_b36","doi-asserted-by":"crossref","first-page":"1004","DOI":"10.1021\/ct800526s","article-title":"Quantum chemistry on graphical processing units. 2. Direct self-consistent-field implementation","volume":"5","author":"Ufimtsev","year":"2009","journal-title":"J. Chem. Theory Comput."},{"issue":"2","key":"10.1016\/j.parco.2021.102829_b37","doi-asserted-by":"crossref","first-page":"965","DOI":"10.1021\/ct300754n","article-title":"Acceleration of electron repulsion integral evaluation on graphics processing units via use of recurrence relations","volume":"9","author":"Miao","year":"2013","journal-title":"J. Chem. Theory Comput."},{"issue":"3","key":"10.1016\/j.parco.2021.102829_b38","doi-asserted-by":"crossref","first-page":"696","DOI":"10.1021\/ct9005079","article-title":"Uncontracted rys quadrature implementation of up to g functions on graphical processing units","volume":"6","author":"Asadchev","year":"2010","journal-title":"J. Chem. Theory Comput."},{"key":"10.1016\/j.parco.2021.102829_b39","series-title":"International Series of Monographs on Chemistry","article-title":"Density functional theory of atoms and molecules","author":"Parr","year":"1994"},{"key":"10.1016\/j.parco.2021.102829_b40","doi-asserted-by":"crossref","first-page":"8800","DOI":"10.1103\/PhysRevB.33.8800","article-title":"Accurate and simple density functional for the electronic exchange energy: generalized gradient approximation","volume":"33","author":"Perdew","year":"1986","journal-title":"Phys. Rev. B"},{"key":"10.1016\/j.parco.2021.102829_b41","doi-asserted-by":"crossref","first-page":"8822","DOI":"10.1103\/PhysRevB.33.8822","article-title":"Density-functional approximation for the correlation energy of the inhomogeneous electron gas","volume":"33","author":"Perdew","year":"1986","journal-title":"Phys. Rev. B"},{"issue":"3","key":"10.1016\/j.parco.2021.102829_b42","doi-asserted-by":"crossref","first-page":"1361","DOI":"10.1021\/acs.jctc.7b01172","article-title":"Electronic exchange and correlation in van der Waals systems: Balancing semilocal and nonlocal energy contributions","volume":"14","author":"Hermann","year":"2018","journal-title":"J. Chem. Theory Comput."},{"issue":"7","key":"10.1016\/j.parco.2021.102829_b43","doi-asserted-by":"crossref","first-page":"169","DOI":"10.1140\/epjb\/e2018-90170-1","article-title":"An efficient implementation of two-component relativistic density functional theory with torque-free auxiliary variables","volume":"91","author":"Petrone","year":"2018","journal-title":"Eur. Phys. J. B"},{"issue":"10","key":"10.1016\/j.parco.2021.102829_b44","doi-asserted-by":"crossref","first-page":"3097","DOI":"10.1021\/ct200412r","article-title":"Linear scaling hierarchical integration scheme for the exchange-correlation term in molecular and periodic systems","volume":"7","author":"Burow","year":"2011","journal-title":"J. Chem. Theory Comput."},{"issue":"8","key":"10.1016\/j.parco.2021.102829_b45","doi-asserted-by":"crossref","first-page":"1230","DOI":"10.1021\/ct8001046","article-title":"Accelerating density functional calculations with graphics processing unit","volume":"4","author":"Yasuda","year":"2008","journal-title":"J. Chem. Theory Comput."},{"issue":"6","key":"10.1016\/j.parco.2021.102829_b46","doi-asserted-by":"crossref","first-page":"557","DOI":"10.1016\/0009-2614(92)85009-Y","article-title":"Kohn\u2014Sham density-functional theory within a finite basis set","volume":"199","author":"Pople","year":"1992","journal-title":"Chem. Phys. Lett."},{"issue":"4","key":"10.1016\/j.parco.2021.102829_b47","doi-asserted-by":"crossref","first-page":"2547","DOI":"10.1063\/1.454033","article-title":"A multicenter numerical integration scheme for polyatomic molecules","volume":"88","author":"Becke","year":"1988","journal-title":"J. Chem. Phys."},{"issue":"3\u20134","key":"10.1016\/j.parco.2021.102829_b48","doi-asserted-by":"crossref","first-page":"213","DOI":"10.1016\/0009-2614(96)00600-8","article-title":"Achieving linear scaling in exchange-correlation density functional quadratures","volume":"257","author":"Stratmann","year":"1996","journal-title":"Chem. Phys. Lett."},{"issue":"2","key":"10.1016\/j.parco.2021.102829_b49","first-page":"193","article-title":"Batched matrix computations on hardware accelerators based on GPUs","volume":"29","author":"Haidar","year":"2015","journal-title":"IJHPCA"},{"key":"10.1016\/j.parco.2021.102829_b50","series-title":"High Performance Computing","first-page":"21","article-title":"Performance, design, and autotuning of batched GEMM for GPUs","author":"Abdelfattah","year":"2016"},{"key":"10.1016\/j.parco.2021.102829_b51","series-title":"NVIDIA V100 Architecture specification","year":"2021"},{"key":"10.1016\/j.parco.2021.102829_b52","series-title":"AMD MI100 architecture specification","year":"2021"},{"key":"10.1016\/j.parco.2021.102829_b53","series-title":"Intel Xeon E3-1585 v5 architecture specification","year":"2021"},{"key":"10.1016\/j.parco.2021.102829_b54","series-title":"oneAPI Math kernel library specification","year":"2020"},{"issue":"5\u20136","key":"10.1016\/j.parco.2021.102829_b55","doi-asserted-by":"crossref","first-page":"232","DOI":"10.1016\/j.parco.2009.12.005","article-title":"Towards dense linear algebra for hybrid GPU accelerated manycore systems","volume":"36","author":"Tomov","year":"2010","journal-title":"Parallel Comput."},{"issue":"4","key":"10.1016\/j.parco.2021.102829_b56","doi-asserted-by":"crossref","first-page":"511","DOI":"10.1177\/1094342010385729","article-title":"An improved MAGMA GEMM for Fermi graphics processing units","volume":"24","author":"Nath","year":"2010","journal-title":"Int. J. High Perform. Comput. Appl."},{"key":"10.1016\/j.parco.2021.102829_b57","series-title":"High-Performance Tensor Contractions for GPUs","author":"Abdelfattah","year":"2016"},{"key":"10.1016\/j.parco.2021.102829_b58","series-title":"HipMAGMA v2.0.0","author":"Brown","year":"2020"},{"key":"10.1016\/j.parco.2021.102829_b59","doi-asserted-by":"crossref","unstructured":"H. Shan, S. Williams, C. Johnson, Improving MPI reduction performance for manycore architectures with OpenMP and data compression, in: 2018 IEEE\/ACM Performance Modeling, Benchmarking and Simulation of High Performance Computer Systems (PMBS), 2018, pp. 1\u201311.","DOI":"10.1109\/PMBS.2018.8641632"},{"key":"10.1016\/j.parco.2021.102829_b60","doi-asserted-by":"crossref","unstructured":"K. Ibrahim, Optimizing breadth-first search at scale using hardware-accelerated space consistency, in: 2019 IEEE 26th International Conference on High Performance Computing, Data, and Analytics (HiPC), 2019, pp. 23\u201333.","DOI":"10.1109\/HiPC.2019.00015"},{"key":"10.1016\/j.parco.2021.102829_b61","doi-asserted-by":"crossref","unstructured":"K. Ibrahim, CSPACER: A reduced API set runtime for the space consistency model, in: Proceedings of the International Conference on High Performance Computing in Asia-Pacific Region, 2021 (in press).","DOI":"10.1145\/3432261.3432272"},{"key":"10.1016\/j.parco.2021.102829_b62","doi-asserted-by":"crossref","unstructured":"M. Mrozek, B. Ashbaugh, J. Brodman, Taking memory management to the next level: Unified shared memory in action, in: Proceedings of the International Workshop on OpenCL, 2020, pp. 1\u20133.","DOI":"10.1145\/3388333.3388644"},{"key":"10.1016\/j.parco.2021.102829_b63","first-page":"1","article-title":"Efficiency and productivity for decision making on low-power heterogeneous CPU+ GPU SoCs","author":"Constantinescu","year":"2020","journal-title":"J. Supercomput."},{"key":"10.1016\/j.parco.2021.102829_b64","series-title":"Intel DPC++ compatibility tool developer guide and reference","year":"2020"},{"key":"10.1016\/j.parco.2021.102829_b65","doi-asserted-by":"crossref","unstructured":"S. Christgau, T. Steinke, Porting a legacy CUDA stencil code to oneAPI, in: 2020 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW), IEEE, 2020, pp. 359\u2013367.","DOI":"10.1109\/IPDPSW50202.2020.00070"},{"key":"10.1016\/j.parco.2021.102829_b66","series-title":"Auto-tuning Performance on Multicore Computers","author":"Williams","year":"2008"},{"key":"10.1016\/j.parco.2021.102829_b67","doi-asserted-by":"crossref","DOI":"10.1145\/1498765.1498785","article-title":"Roofline: An insightful visual performance model for floating-point programs and multicore architectures","author":"Williams","year":"2009","journal-title":"Commun. ACM"},{"key":"10.1016\/j.parco.2021.102829_b68","series-title":"2019 IEEE\/ACM Performance Modeling, Benchmarking and Simulation of High Performance Computer Systems (PMBS)","first-page":"7","article-title":"An instruction roofline model for gpus","author":"Ding","year":"2019"},{"key":"10.1016\/j.parco.2021.102829_b69","series-title":"Roofline toolkit","year":"2020"},{"key":"10.1016\/j.parco.2021.102829_b70","series-title":"An instruction based roofline method for AMD GPUs","year":"2020"}],"container-title":["Parallel Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0167819121000776?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0167819121000776?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2025,9,29]],"date-time":"2025-09-29T05:32:59Z","timestamp":1759123979000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0167819121000776"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,12]]},"references-count":70,"alternative-id":["S0167819121000776"],"URL":"https:\/\/doi.org\/10.1016\/j.parco.2021.102829","relation":{},"ISSN":["0167-8191"],"issn-type":[{"value":"0167-8191","type":"print"}],"subject":[],"published":{"date-parts":[[2021,12]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Achieving performance portability in Gaussian basis set density functional theory on accelerator based architectures in NWChemEx","name":"articletitle","label":"Article Title"},{"value":"Parallel Computing","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.parco.2021.102829","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"Published by Elsevier B.V.","name":"copyright","label":"Copyright"}],"article-number":"102829"}}