{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T12:23:43Z","timestamp":1773318223435,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":180,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,16]]},"DOI":"10.1145\/3712285.3759829","type":"proceedings-article","created":{"date-parts":[[2025,11,12]],"date-time":"2025-11-12T16:05:39Z","timestamp":1762963539000},"page":"2126-2142","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Matrix Is All You Need: Rearchitecting Quantum Chemistry to Scale on AI Accelerators"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-3075-3385","authenticated-orcid":false,"given":"Haozhi","family":"Han","sequence":"first","affiliation":[{"name":"School of Computer Science, Peking University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1013-1325","authenticated-orcid":false,"given":"Kun","family":"Li","sequence":"additional","affiliation":[{"name":"Microsoft Research, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0467-7858","authenticated-orcid":false,"given":"Fusong","family":"Ju","sequence":"additional","affiliation":[{"name":"Microsoft Research, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-2565-7749","authenticated-orcid":false,"given":"Qi","family":"Li","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3900-3722","authenticated-orcid":false,"given":"Hong","family":"An","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2392-8472","authenticated-orcid":false,"given":"Yifeng","family":"Chen","sequence":"additional","affiliation":[{"name":"School of Computer Science, Peking University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7520-9640","authenticated-orcid":false,"given":"Yunquan","family":"Zhang","sequence":"additional","affiliation":[{"name":"Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9107-013X","authenticated-orcid":false,"given":"Ting","family":"Cao","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-6455-3898","authenticated-orcid":false,"given":"Mao","family":"Yang","sequence":"additional","affiliation":[{"name":"Microsoft Research, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2025,11,15]]},"reference":[{"key":"e_1_3_3_3_2_2","doi-asserted-by":"crossref","unstructured":"G Andersson SE Larsson G Leander P M\u00f6ller Sven\u00a0G\u00f6sta Nilsson Ingemar Ragnarsson Sven \u00c5berg R Bengtsson J Dudek B Nerlo-Pomorska et\u00a0al. 1976. Nuclear shell structure at very high angular momentum. Nuclear Physics A 268 2 (1976) 205\u2013256.","DOI":"10.1016\/0375-9474(76)90461-9"},{"key":"e_1_3_3_3_3_2","first-page":"75","volume-title":"SpringSim (HPS)","author":"Anzt Hartwig","year":"2015","unstructured":"Hartwig Anzt, Stanimire Tomov, and Jack\u00a0J Dongarra. 2015. Accelerating the LOBPCG method on GPUs using a blocked sparse matrix vector product.. In SpringSim (HPS). 75\u201382."},{"key":"e_1_3_3_3_4_2","doi-asserted-by":"crossref","unstructured":"Gustavo\u00a0JR Aroeira Matthew\u00a0M Davis Justin\u00a0M Turney and Henry\u00a0F Schaefer\u00a0III. 2022. Fermi. jl: a modern design for quantum chemistry. Journal of chemical theory and computation 18 2 (2022) 677\u2013686.","DOI":"10.1021\/acs.jctc.1c00719"},{"key":"e_1_3_3_3_5_2","doi-asserted-by":"publisher","unstructured":"Andrey Asadchev and Mark\u00a0S. Gordon. 2012. Mixed-precision evaluation of two-electron integrals by Rys quadrature. Computer Physics Communications 183 8 (2012) 1563\u20131567. 10.1016\/j.cpc.2012.02.020","DOI":"10.1016\/j.cpc.2012.02.020"},{"key":"e_1_3_3_3_6_2","doi-asserted-by":"publisher","unstructured":"Andrey Asadchev and Mark\u00a0S. Gordon. 2012. New Multithreaded Hybrid CPU\/GPU Approach to Hartree\u2013Fock. Journal of Chemical Theory and Computation 8 11 (2012) 4166\u20134176. 10.1021\/ct300526w arXiv:https:\/\/doi.org\/10.1021\/ct300526w PMID: 26605582.","DOI":"10.1021\/ct300526w"},{"key":"e_1_3_3_3_7_2","doi-asserted-by":"crossref","unstructured":"Andrey Asadchev and Edward\u00a0F Valeev. 2023. High-performance evaluation of high angular momentum 4-center Gaussian integrals on modern accelerated processors. The Journal of Physical Chemistry A 127 51 (2023) 10889\u201310895.","DOI":"10.1021\/acs.jpca.3c04574"},{"key":"e_1_3_3_3_8_2","doi-asserted-by":"crossref","unstructured":"Andrey Asadchev and Edward\u00a0F Valeev. 2023. Memory-efficient recursive evaluation of 3-center Gaussian integrals. Journal of Chemical Theory and Computation 19 6 (2023) 1698\u20131710.","DOI":"10.1021\/acs.jctc.2c00995"},{"key":"e_1_3_3_3_9_2","doi-asserted-by":"crossref","unstructured":"Andrey Asadchev and Edward\u00a0F Valeev. 2024. 3-center and 4-center 2-particle Gaussian AO integrals on modern accelerated processors. The Journal of Chemical Physics 160 24 (2024).","DOI":"10.1063\/5.0217001"},{"key":"e_1_3_3_3_10_2","doi-asserted-by":"crossref","unstructured":"Al\u00e1n Aspuru-Guzik Roland Lindh and Markus Reiher. 2018. The matter simulation (r) evolution. ACS central science 4 2 (2018) 144\u2013152.","DOI":"10.1021\/acscentsci.7b00550"},{"key":"e_1_3_3_3_11_2","doi-asserted-by":"crossref","unstructured":"Evert\u00a0Jan Baerends and Oleg\u00a0V Gritsenko. 1997. A quantum chemical view of density functional theory. The Journal of Physical Chemistry A 101 30 (1997) 5383\u20135403.","DOI":"10.1021\/jp9703768"},{"key":"e_1_3_3_3_12_2","doi-asserted-by":"crossref","unstructured":"David Balcells and Bastian\u00a0Bjerkem Skjelstad. 2020. tmQM dataset\u2014quantum geometries and properties of 86k transition metal complexes. Journal of chemical information and modeling 60 12 (2020) 6135\u20136146.","DOI":"10.1021\/acs.jcim.0c01041"},{"key":"e_1_3_3_3_13_2","doi-asserted-by":"crossref","unstructured":"Giuseppe\u00a0MJ Barca Colleen Bertoni Laura Carrington Dipayan Datta Nuwan De\u00a0Silva J\u00a0Emiliano Deustua Dmitri\u00a0G Fedorov Jeffrey\u00a0R Gour Anastasia\u00a0O Gunina Emilie Guidez et\u00a0al. 2020. Recent developments in the general atomic and molecular electronic structure system. The Journal of chemical physics 152 15 (2020).","DOI":"10.1063\/5.0005188"},{"key":"e_1_3_3_3_14_2","doi-asserted-by":"crossref","unstructured":"Giuseppe\u00a0MJ Barca Jorge\u00a0L Galvez-Vallejo David\u00a0L Poole Alistair\u00a0P Rendell and Mark\u00a0S Gordon. 2020. High-performance graphics processing unit-accelerated fock build algorithm. Journal of Chemical Theory and Computation 16 12 (2020) 7232\u20137238.","DOI":"10.1021\/acs.jctc.0c00768"},{"key":"e_1_3_3_3_15_2","doi-asserted-by":"publisher","DOI":"10.5555\/3433701.3433808"},{"key":"e_1_3_3_3_16_2","doi-asserted-by":"crossref","unstructured":"Michael\u00a0V Berry. 1977. Regular and irregular semiclassical wavefunctions. Journal of Physics A: Mathematical and General 10 12 (1977) 2083.","DOI":"10.1088\/0305-4470\/10\/12\/016"},{"key":"e_1_3_3_3_17_2","doi-asserted-by":"crossref","unstructured":"Muhammad\u00a0Mubashir Bhatti M Marin Ahmed Zeeshan and Sara\u00a0I Abdelsalam. 2020. Recent trends in computational fluid dynamics. Frontiers in Physics 8 (2020) 593111.","DOI":"10.3389\/fphy.2020.593111"},{"key":"e_1_3_3_3_18_2","doi-asserted-by":"crossref","unstructured":"Nick\u00a0S Blunt Joan Camps Ophelia Crawford R\u00f3bert Izs\u00e1k Sebastian Leontica Arjun Mirani Alexandra\u00a0E Moylett Sam\u00a0A Scivier Christoph Sunderhauf Patrick Schopf et\u00a0al. 2022. Perspective on the current state-of-the-art of quantum computing for drug discovery applications. Journal of Chemical Theory and Computation 18 12 (2022) 7001\u20137023.","DOI":"10.1021\/acs.jctc.2c00574"},{"key":"e_1_3_3_3_19_2","doi-asserted-by":"crossref","unstructured":"Stefano Borgani and Andrey Kravtsov. 2011. Cosmological simulations of galaxy clusters. Advanced Science Letters 4 2 (2011) 204\u2013227.","DOI":"10.1166\/asl.2011.1209"},{"key":"e_1_3_3_3_20_2","doi-asserted-by":"publisher","DOI":"10.1109\/SHPCC.1992.232626"},{"key":"e_1_3_3_3_21_2","doi-asserted-by":"crossref","unstructured":"Kieron Burke. 2012. Perspective on density functional theory. The Journal of chemical physics 136 15 (2012).","DOI":"10.1063\/1.4704546"},{"key":"e_1_3_3_3_22_2","doi-asserted-by":"crossref","unstructured":"Asbjorn\u00a0M Burow and Marek Sierka. 2011. Linear scaling hierarchical integration scheme for the exchange-correlation term in molecular and periodic systems. Journal of Chemical Theory and Computation 7 10 (2011) 3097\u20133104.","DOI":"10.1021\/ct200412r"},{"key":"e_1_3_3_3_23_2","first-page":"578","volume-title":"13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18)","author":"Chen Tianqi","year":"2018","unstructured":"Tianqi Chen, Thierry Moreau, Ziheng Jiang, Lianmin Zheng, Eddie Yan, Haichen Shen, Meghan Cowan, Leyuan Wang, Yuwei Hu, Luis Ceze, et\u00a0al. 2018. { TVM} : An automated { End-to-End} optimizing compiler for deep learning. In 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18). 578\u2013594."},{"key":"e_1_3_3_3_24_2","doi-asserted-by":"publisher","DOI":"10.1145\/3627535.3638476"},{"key":"e_1_3_3_3_25_2","doi-asserted-by":"crossref","unstructured":"Jack Choquette Wishwesh Gandhi Olivier Giroux Nick Stam and Ronny Krashinsky. 2021. Nvidia a100 tensor core gpu: Performance and innovation. IEEE Micro 41 2 (2021) 29\u201335.","DOI":"10.1109\/MM.2021.3061394"},{"key":"e_1_3_3_3_26_2","doi-asserted-by":"crossref","unstructured":"Aron\u00a0J Cohen Paula Mori-S\u00e1nchez and Weitao Yang. 2012. Challenges for density functional theory. Chemical reviews 112 1 (2012) 289\u2013320.","DOI":"10.1021\/cr200107z"},{"key":"e_1_3_3_3_27_2","doi-asserted-by":"crossref","unstructured":"Larry\u00a0A Curtiss Paul\u00a0C Redfern and Krishnan Raghavachari. 2007. Gaussian-4 theory. The Journal of chemical physics 126 8 (2007).","DOI":"10.1063\/1.2436888"},{"key":"e_1_3_3_3_28_2","doi-asserted-by":"publisher","DOI":"10.1145\/3330345.3331057"},{"key":"e_1_3_3_3_29_2","doi-asserted-by":"crossref","unstructured":"Emanuele Danovaro Andrea Clematis Antonella Galizia Giuseppe Ripepi Alfonso Quarati and Daniele D\u2019Agostino. 2014. Heterogeneous architectures for computational intensive applications: a cost-effectiveness analysis. J. Comput. Appl. Math. 270 (2014) 63\u201377.","DOI":"10.1016\/j.cam.2014.02.022"},{"key":"e_1_3_3_3_30_2","doi-asserted-by":"publisher","DOI":"10.1145\/3295500.3357157"},{"key":"e_1_3_3_3_31_2","doi-asserted-by":"crossref","unstructured":"Shail Dave Riyadh Baghdadi Tony Nowatzki Sasikanth Avancha Aviral Shrivastava and Baoxin Li. 2021. Hardware acceleration of sparse and irregular tensor computations of ml models: A survey and insights. Proc. IEEE 109 10 (2021) 1706\u20131752.","DOI":"10.1109\/JPROC.2021.3098483"},{"key":"e_1_3_3_3_32_2","unstructured":"Jeff Dean. 2021. Introducing pathways: A next-generation ai architecture. Google Blog 366 (2021)."},{"key":"e_1_3_3_3_33_2","doi-asserted-by":"crossref","unstructured":"Rosa Di\u00a0Felice Maricris\u00a0L Mayes Ryan\u00a0M Richard David\u00a0B Williams-Young Garnet Kin-Lic Chan Wibe\u00a0A de Jong Niranjan Govind Martin Head-Gordon Matthew\u00a0R Hermes Karol Kowalski et\u00a0al. 2023. A perspective on sustainable computational chemistry software development and integration. Journal of chemical theory and computation 19 20 (2023) 7056\u20137076.","DOI":"10.1021\/acs.jctc.3c00419"},{"key":"e_1_3_3_3_34_2","doi-asserted-by":"crossref","unstructured":"Jack Dongarra Pete Beckman Terry Moore Patrick Aerts Giovanni Aloisio Jean-Claude Andre David Barkai Jean-Yves Berthou Taisuke Boku Bertrand Braunschweig et\u00a0al. 2011. The international exascale software project roadmap. The international journal of high performance computing applications 25 1 (2011) 3\u201360.","DOI":"10.1177\/1094342010391989"},{"key":"e_1_3_3_3_35_2","unstructured":"Jack Dongarra John Gunnels Harun Bayraktar Azzam Haidar and Dan Ernst. 2024. Hardware Trends Impacting Floating-Point Computations In Scientific Applications. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2411.12090 (2024)."},{"key":"e_1_3_3_3_36_2","unstructured":"Jack Dongarra John Gunnels Harun Bayraktar Azzam Haidar and Dan Ernst. 2024. Hardware Trends Impacting Floating-Point Computations In Scientific Applications. arxiv:https:\/\/arXiv.org\/abs\/2411.12090\u00a0[math.NA] https:\/\/arxiv.org\/abs\/2411.12090"},{"key":"e_1_3_3_3_37_2","doi-asserted-by":"crossref","unstructured":"Jack Dongarra and David Keyes. 2024. The co-evolution of computational physics and high-performance computing. Nature Reviews Physics 6 10 (2024) 621\u2013627.","DOI":"10.1038\/s42254-024-00750-z"},{"key":"e_1_3_3_3_38_2","doi-asserted-by":"crossref","unstructured":"Brett\u00a0I Dunlap. 2005. Angular momentum in molecular quantum mechanical integral evaluation. Computer physics communications 165 1 (2005) 18\u201336.","DOI":"10.1016\/j.cpc.2004.09.002"},{"key":"e_1_3_3_3_39_2","doi-asserted-by":"crossref","unstructured":"Thom\u00a0H Dunning\u00a0Jr Robert\u00a0J Harrison David Feller and Sotiris\u00a0S Xantheas. 2002. Promise and challenge of high-performance computing with examples from molecular modelling. Philosophical Transactions of the Royal Society of London. Series A: Mathematical Physical and Engineering Sciences 360 1795 (2002) 1079\u20131105.","DOI":"10.1098\/rsta.2002.0984"},{"key":"e_1_3_3_3_40_2","doi-asserted-by":"crossref","unstructured":"M Eminyan KB MacAdam J Slevin and H Kleinpoppen. 1974. Electron-photon angular correlations in electron-helium collisions: measurements of complex excitation amplitudes atomic orientation and alignment. Journal of Physics B: Atomic and Molecular Physics 7 12 (1974) 1519.","DOI":"10.1088\/0022-3700\/7\/12\/012"},{"key":"e_1_3_3_3_41_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-14090-7"},{"key":"e_1_3_3_3_42_2","doi-asserted-by":"publisher","DOI":"10.1145\/1274971.1274987"},{"key":"e_1_3_3_3_43_2","unstructured":"Basis\u00a0Set Exchange. 2025. Basis Set. https:\/\/www.basissetexchange.org\/."},{"key":"e_1_3_3_3_44_2","doi-asserted-by":"crossref","unstructured":"Massimiliano Fasi Nicholas\u00a0J Higham Mantas Mikaitis and Srikara Pranesh. 2021. Numerical behavior of NVIDIA tensor cores. PeerJ Computer Science 7 (2021) e330.","DOI":"10.7717\/peerj-cs.330"},{"key":"e_1_3_3_3_45_2","unstructured":"Elias Frantar Saleh Ashkboos Torsten Hoefler and Dan Alistarh. 2022. Gptq: Accurate post-training quantization for generative pre-trained transformers. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2210.17323 (2022)."},{"key":"e_1_3_3_3_46_2","doi-asserted-by":"crossref","unstructured":"Richard\u00a0A Friesner and Victor Guallar. 2005. Ab initio quantum chemical and mixed quantum mechanics\/molecular mechanics (QM\/MM) methods for studying enzymatic catalysis. Annu. Rev. Phys. Chem. 56 1 (2005) 389\u2013427.","DOI":"10.1146\/annurev.physchem.55.091602.094410"},{"key":"e_1_3_3_3_47_2","doi-asserted-by":"crossref","unstructured":"Jorge\u00a0Luis Galvez\u00a0Vallejo Giuseppe\u00a0MJ Barca and Mark\u00a0S Gordon. 2023. High-performance GPU-accelerated evaluation of electron repulsion integrals. Molecular Physics 121 9-10 (2023) e2112987.","DOI":"10.1080\/00268976.2022.2112987"},{"key":"e_1_3_3_3_48_2","doi-asserted-by":"crossref","unstructured":"Jorge\u00a0L Galvez\u00a0Vallejo Calum Snowdon Ryan Stocks Fazeleh Kazemian Fiona\u00a0Chuo Yan\u00a0Yu Christopher Seidl Zoe Seeger Melisa Alkan David Poole Bryce\u00a0M Westheimer et\u00a0al. 2023. Toward an extreme-scale electronic structure system. The Journal of Chemical Physics 159 4 (2023).","DOI":"10.1063\/5.0156399"},{"key":"e_1_3_3_3_49_2","doi-asserted-by":"crossref","unstructured":"Al Geist and Daniel\u00a0A Reed. 2017. A survey of high-performance computing scaling challenges. The International Journal of High Performance Computing Applications 31 1 (2017) 104\u2013113.","DOI":"10.1177\/1094342015597083"},{"key":"e_1_3_3_3_50_2","doi-asserted-by":"crossref","unstructured":"Peter\u00a0MW Gill Benny\u00a0G Johnson and John\u00a0A Pople. 1991. Two-electron repulsion integrals over Gaussian s functions. International journal of quantum chemistry 40 6 (1991) 745\u2013752.","DOI":"10.1002\/qua.560400604"},{"key":"e_1_3_3_3_51_2","doi-asserted-by":"crossref","unstructured":"Stefan Grimme. 2003. Improved second-order M\u00f8ller\u2013Plesset perturbation theory by separate scaling of parallel-and antiparallel-spin pair correlation energies. The Journal of chemical physics 118 20 (2003) 9095\u20139102.","DOI":"10.1063\/1.1569242"},{"key":"e_1_3_3_3_52_2","doi-asserted-by":"crossref","unstructured":"Zhen Guo Zigeng Huang Qiaorui Chen Jiang Shao Guangcheng Liu Hung Pham Changsu Cao Ji Chen and Dingshun Lv. 2025. ByteQC: GPU-Accelerated Quantum Chemistry Package for Large-Scale Systems. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2502.17963 (2025).","DOI":"10.1002\/wcms.70034"},{"key":"e_1_3_3_3_53_2","doi-asserted-by":"publisher","DOI":"10.1145\/2807591.2807627"},{"key":"e_1_3_3_3_54_2","doi-asserted-by":"crossref","unstructured":"Adela Habib Joshua Finkelstein and Anders\u00a0MN Niklasson. 2024. Efficient mixed-precision matrix factorization of the inverse overlap matrix in electronic structure calculations with AI-hardware and GPUs. Journal of Chemical Theory and Computation 20 16 (2024) 7102\u20137112.","DOI":"10.1021\/acs.jctc.4c00584"},{"key":"e_1_3_3_3_55_2","doi-asserted-by":"crossref","unstructured":"J\u00fcrgen Hafner Christopher Wolverton and Gerbrand Ceder. 2006. Toward computational materials design: the impact of density functional theory on materials research. MRS bulletin 31 9 (2006) 659\u2013668.","DOI":"10.1557\/mrs2006.174"},{"key":"e_1_3_3_3_56_2","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2018.00050"},{"key":"e_1_3_3_3_57_2","doi-asserted-by":"publisher","DOI":"10.1145\/3710848.3710897"},{"key":"e_1_3_3_3_58_2","doi-asserted-by":"crossref","unstructured":"Hwansoo Han and Chau-Wen Tseng. 2006. Exploiting locality for irregular scientific codes. IEEE Transactions on Parallel and Distributed Systems 17 7 (2006) 606\u2013618.","DOI":"10.1109\/TPDS.2006.88"},{"key":"e_1_3_3_3_59_2","doi-asserted-by":"crossref","unstructured":"Martin Head-Gordon and John\u00a0A Pople. 1988. A method for two-electron Gaussian integral and integral derivative evaluation using recurrence relations. The Journal of chemical physics 89 9 (1988) 5777\u20135786.","DOI":"10.1063\/1.455553"},{"key":"e_1_3_3_3_60_2","doi-asserted-by":"crossref","unstructured":"Timothy\u00a0O Hodson. 2022. Root mean square error (RMSE) or mean absolute error (MAE): When to use them or not. Geoscientific Model Development Discussions 2022 (2022) 1\u201310.","DOI":"10.5194\/gmd-2022-64"},{"key":"e_1_3_3_3_61_2","doi-asserted-by":"crossref","unstructured":"Torsten Hoefler Marcin Copik Pete Beckman Andrew Jones Ian Foster Manish Parashar Daniel Reed Matthias Troyer Thomas Schulthess Daniel Ernst et\u00a0al. 2024. Xaas: Acceleration as a service to enable productive high-performance cloud computing. Computing in Science & Engineering 26 3 (2024) 40\u201351.","DOI":"10.1109\/MCSE.2024.3382154"},{"key":"e_1_3_3_3_62_2","doi-asserted-by":"crossref","unstructured":"JJ Hopfield. 1969. Angular momentum and transition-metal superconductivity. Physical Review 186 2 (1969) 443.","DOI":"10.1103\/PhysRev.186.443"},{"key":"e_1_3_3_3_63_2","doi-asserted-by":"crossref","unstructured":"Ben Hourahine B\u00e1lint Aradi Volker Blum Frank Bonafe Alex Buccheri Cristopher Camacho Caterina Cevallos MY Deshaye T Dumitric\u0103 A Dominguez et\u00a0al. 2020. DFTB+ a software package for efficient approximate density functional theory based atomistic simulations. The Journal of chemical physics 152 12 (2020).","DOI":"10.1063\/1.5143190"},{"key":"e_1_3_3_3_64_2","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2018.00044"},{"key":"e_1_3_3_3_65_2","doi-asserted-by":"publisher","DOI":"10.5555\/3433701.3433707"},{"key":"e_1_3_3_3_66_2","unstructured":"Xianyan Jia Shutao Song Wei He Yangzihao Wang Haidong Rong Feihu Zhou Liqiang Xie Zhenyu Guo Yuanzhou Yang Liwei Yu et\u00a0al. 2018. Highly scalable deep learning training system with mixed-precision: Training imagenet in four minutes. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1807.11205 (2018)."},{"key":"e_1_3_3_3_67_2","doi-asserted-by":"crossref","unstructured":"Garima Jindal Hemanta\u00a0K Kisan and Raghavan\u00a0B Sunoj. 2015. Mechanistic insights on cooperative catalysis through computational quantum chemical methods. ACS Catalysis 5 2 (2015) 480\u2013503.","DOI":"10.1021\/cs501688y"},{"key":"e_1_3_3_3_68_2","doi-asserted-by":"crossref","unstructured":"Erin\u00a0R Johnson and Axel\u00a0D Becke. 2005. A post-Hartree\u2013Fock model of intermolecular interactions. The Journal of chemical physics 123 2 (2005).","DOI":"10.1063\/1.1949201"},{"key":"e_1_3_3_3_69_2","doi-asserted-by":"publisher","unstructured":"K.\u00a0Grace Johnson Seema Mirchandaney Ellis Hoag Alan Heirich Alex Aiken and Todd\u00a0J. Mart\u00ednez. 2022. Multinode Multi-GPU Two-Electron Integrals: Code Generation Using the Regent Language. Journal of Chemical Theory and Computation 18 11 (2022) 6522\u20136536. 10.1021\/acs.jctc.2c00414 arXiv:https:\/\/doi.org\/10.1021\/acs.jctc.2c00414 PMID: 36200649.","DOI":"10.1021\/acs.jctc.2c00414"},{"key":"e_1_3_3_3_70_2","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589350"},{"key":"e_1_3_3_3_71_2","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080246"},{"key":"e_1_3_3_3_72_2","doi-asserted-by":"crossref","unstructured":"Fusong Ju Xinran Wei Lin Huang Andrew\u00a0J Jenkins Leo Xia Jia Zhang Jianwei Zhu Han Yang Bin Shao Peggy Dai et\u00a0al. 2024. Acceleration without disruption: DFT software as a service. Journal of Chemical Theory and Computation 20 24 (2024) 10838\u201310851.","DOI":"10.1021\/acs.jctc.4c00940"},{"key":"e_1_3_3_3_73_2","doi-asserted-by":"crossref","unstructured":"LB Ju CT Zhou TW Huang K Jiang H Zhang SZ Wu B Qiao and SC Ruan. 2017. Production of high-angular-momentum electron beams in laser-plasma interactions. Physical Review E 95 5 (2017) 053205.","DOI":"10.1103\/PhysRevE.95.053205"},{"key":"e_1_3_3_3_74_2","unstructured":"Aditya Kashi Hao Lu Wesley Brewer David Rogers Michael Matheson Mallikarjun Shankar and Feiyi Wang. 2024. Mixed-precision numerics in scientific applications: survey and perspectives. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.19322 (2024)."},{"key":"e_1_3_3_3_75_2","doi-asserted-by":"crossref","unstructured":"Ricky\u00a0A Kendall Edoardo Apr\u00e0 David\u00a0E Bernholdt Eric\u00a0J Bylaska Michel Dupuis George\u00a0I Fann Robert\u00a0J Harrison Jialin Ju Jeffrey\u00a0A Nichols Jarek Nieplocha et\u00a0al. 2000. High performance computational chemistry: An overview of NWChem a distributed parallel application. Computer Physics Communications 128 1-2 (2000) 260\u2013283.","DOI":"10.1016\/S0010-4655(00)00065-5"},{"key":"e_1_3_3_3_76_2","doi-asserted-by":"crossref","unstructured":"Sunghwan Kim Jie Chen Tiejun Cheng Asta Gindulyte Jia He Siqian He Qingliang Li Benjamin\u00a0A Shoemaker Paul\u00a0A Thiessen Bo Yu et\u00a0al. 2023. PubChem 2023 update. Nucleic acids research 51 D1 (2023) D1373\u2013D1380.","DOI":"10.1093\/nar\/gkac956"},{"key":"e_1_3_3_3_77_2","volume-title":"A chemist\u2019s guide to density functional theory","author":"Koch Wolfram","year":"2015","unstructured":"Wolfram Koch and Max\u00a0C Holthausen. 2015. A chemist\u2019s guide to density functional theory. John Wiley & Sons."},{"key":"e_1_3_3_3_78_2","doi-asserted-by":"crossref","unstructured":"Walter Kohn Axel\u00a0D Becke and Robert\u00a0G Parr. 1996. Density functional theory of electronic structure. The journal of physical chemistry 100 31 (1996) 12974\u201312980.","DOI":"10.1021\/jp960669l"},{"key":"e_1_3_3_3_79_2","doi-asserted-by":"crossref","unstructured":"Karol Kowalski Raymond Bair Nicholas\u00a0P Bauman Jeffery\u00a0S Boschen Eric\u00a0J Bylaska Jeff Daily Wibe\u00a0A De\u00a0Jong Thom Dunning\u00a0Jr Niranjan Govind Robert\u00a0J Harrison et\u00a0al. 2021. From NWChem to NWChemEx: Evolving with the computational chemistry landscape. Chemical reviews 121 8 (2021) 4962\u20134998.","DOI":"10.1021\/acs.chemrev.0c00998"},{"key":"e_1_3_3_3_80_2","doi-asserted-by":"crossref","unstructured":"Thomas\u00a0D K\u00fchne Marcella Iannuzzi Mauro Del\u00a0Ben Vladimir\u00a0V Rybkin Patrick Seewald Frederick Stein Teodoro Laino Rustam\u00a0Z Khaliullin Ole Sch\u00fctt Florian Schiffmann et\u00a0al. 2020. CP2K: An electronic structure and molecular dynamics software package-Quickstep: Efficient and accurate electronic structure calculations. The Journal of Chemical Physics 152 19 (2020).","DOI":"10.1063\/5.0007045"},{"key":"e_1_3_3_3_81_2","doi-asserted-by":"crossref","unstructured":"Gautam Kumar Sahil Yadav Aniruddha Mukherjee Vikas Hassija and Mohsen Guizani. 2024. Recent advances in quantum computing for drug discovery and development. IEEE Access (2024).","DOI":"10.1109\/ACCESS.2024.3376408"},{"key":"e_1_3_3_3_82_2","doi-asserted-by":"crossref","unstructured":"Henryk Laqua J\u00f6rg Kussmann and Christian Ochsenfeld. 2021. Accelerating seminumerical Fock-exchange calculations using mixed single-and double-precision arithmethic. The Journal of Chemical Physics 154 21 (2021).","DOI":"10.1063\/5.0045084"},{"key":"e_1_3_3_3_83_2","doi-asserted-by":"publisher","DOI":"10.1109\/CGO51591.2021.9370308"},{"key":"e_1_3_3_3_84_2","doi-asserted-by":"crossref","unstructured":"Kurt Lejaeghere Gustav Bihlmayer Torbj\u00f6rn Bj\u00f6rkman Peter Blaha Stefan Bl\u00fcgel Volker Blum Damien Caliste Ivano\u00a0E Castelli Stewart\u00a0J Clark Andrea Dal\u00a0Corso et\u00a0al. 2016. Reproducibility in density functional theory calculations of solids. Science 351 6280 (2016) aad3000.","DOI":"10.1126\/science.aad3000"},{"key":"e_1_3_3_3_85_2","volume-title":"Quantum chemistry","author":"Levine Ira\u00a0N","year":"2009","unstructured":"Ira\u00a0N Levine, Daryle\u00a0H Busch, and Harrison Shull. 2009. Quantum chemistry. Vol.\u00a06. Pearson Prentice Hall Upper Saddle River, NJ."},{"key":"e_1_3_3_3_86_2","doi-asserted-by":"publisher","DOI":"10.1145\/3295500.3356165"},{"key":"e_1_3_3_3_87_2","doi-asserted-by":"crossref","unstructured":"Rui Li Qiming Sun Xing Zhang and Garnet Kin-Lic Chan. 2025. Introducing GPU Acceleration into the Python-Based Simulations of Chemistry Framework. The Journal of Physical Chemistry A (2025).","DOI":"10.1021\/acs.jpca.4c05876"},{"key":"e_1_3_3_3_88_2","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080228"},{"key":"e_1_3_3_3_89_2","unstructured":"Aixin Liu Bei Feng Bing Xue Bingxuan Wang Bochao Wu Chengda Lu Chenggang Zhao Chengqi Deng Chenyu Zhang Chong Ruan et\u00a0al. 2024. Deepseek-v3 technical report. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.19437 (2024)."},{"key":"e_1_3_3_3_90_2","doi-asserted-by":"publisher","DOI":"10.1109\/SC41406.2024.00058"},{"key":"e_1_3_3_3_91_2","doi-asserted-by":"publisher","unstructured":"Nathan Luehr Ivan\u00a0S. Ufimtsev and Todd\u00a0J. Mart\u00ednez. 2011. Dynamic Precision for Electron Repulsion Integral Evaluation on Graphical Processing Units (GPUs). Journal of Chemical Theory and Computation 7 4 (2011) 949\u2013954. 10.1021\/ct100701w arXiv:https:\/\/doi.org\/10.1021\/ct100701w PMID: 26606344.","DOI":"10.1021\/ct100701w"},{"key":"e_1_3_3_3_92_2","doi-asserted-by":"crossref","unstructured":"Marina\u00a0V Malyshkina and Alexander\u00a0S Novikov. 2021. Modern software for computer modeling in quantum chemistry and molecular dynamics. Compounds 1 3 (2021) 134\u2013144.","DOI":"10.3390\/compounds1030012"},{"key":"e_1_3_3_3_93_2","doi-asserted-by":"crossref","unstructured":"Madushanka Manathunga Hasan\u00a0Metin Aktulga Andreas\u00a0W Gotz and Kenneth\u00a0M Merz\u00a0Jr. 2023. Quantum mechanics\/molecular mechanics simulations on NVIDIA and AMD graphics processing units. Journal of Chemical Information and Modeling 63 3 (2023) 711\u2013717.","DOI":"10.1021\/acs.jcim.2c01505"},{"key":"e_1_3_3_3_94_2","doi-asserted-by":"crossref","unstructured":"Madushanka Manathunga Chi Jin Vin\u00edcius Wilian\u00a0D Cruzeiro Yipu Miao Dawei Mu Kamesh Arumugam Kristopher Keipert Hasan\u00a0Metin Aktulga Kenneth\u00a0M Merz\u00a0Jr and Andreas\u00a0W Gotz. 2021. Harnessing the power of multi-GPU acceleration into the quantum interaction computational kernel program. Journal of Chemical Theory and Computation 17 7 (2021) 3955\u20133966.","DOI":"10.1021\/acs.jctc.1c00145"},{"key":"e_1_3_3_3_95_2","doi-asserted-by":"crossref","unstructured":"Narbe Mardirossian and Martin Head-Gordon. 2017. Thirty years of density functional theory in computational chemistry: an overview and extensive assessment of 200 density functionals. Molecular physics 115 19 (2017) 2315\u20132372.","DOI":"10.1080\/00268976.2017.1333644"},{"key":"e_1_3_3_3_96_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW.2018.00091"},{"key":"e_1_3_3_3_97_2","doi-asserted-by":"publisher","DOI":"10.1145\/3624062.3624159"},{"key":"e_1_3_3_3_98_2","doi-asserted-by":"crossref","unstructured":"Larry\u00a0E McMurchie and Ernest\u00a0R Davidson. 1978. One-and two-electron integrals over Cartesian Gaussian functions. J. Comput. Phys. 26 2 (1978) 218\u2013231.","DOI":"10.1016\/0021-9991(78)90092-X"},{"key":"e_1_3_3_3_99_2","doi-asserted-by":"publisher","unstructured":"Yipu Miao and Kenneth M.\u00a0Jr. Merz. 2015. Acceleration of High Angular Momentum Electron Repulsion Integrals and Integral Derivatives on Graphics Processing Units. Journal of Chemical Theory and Computation 11 4 (2015) 1449\u20131462. 10.1021\/ct500984t arXiv:https:\/\/doi.org\/10.1021\/ct500984t PMID: 26574356.","DOI":"10.1021\/ct500984t"},{"key":"e_1_3_3_3_100_2","doi-asserted-by":"crossref","unstructured":"Yipu Miao and Kenneth\u00a0M Merz\u00a0Jr. 2013. Acceleration of electron repulsion integral evaluation on graphics processing units via use of recurrence relations. Journal of Chemical Theory and Computation 9 2 (2013) 965\u2013976.","DOI":"10.1021\/ct300754n"},{"key":"e_1_3_3_3_101_2","doi-asserted-by":"crossref","unstructured":"Yipu Miao and Kenneth\u00a0M Merz\u00a0Jr. 2015. Acceleration of high angular momentum electron repulsion integrals and integral derivatives on graphics processing units. Journal of chemical theory and computation 11 4 (2015) 1449\u20131462.","DOI":"10.1021\/ct500984t"},{"key":"e_1_3_3_3_102_2","doi-asserted-by":"crossref","unstructured":"John Michalakes. 2020. Hpc for weather forecasting. Parallel Algorithms in Computational Science and Engineering (2020) 297\u2013323.","DOI":"10.1007\/978-3-030-43736-7_10"},{"key":"e_1_3_3_3_103_2","unstructured":"Paulius Micikevicius Sharan Narang Jonah Alben Gregory Diamos Erich Elsen David Garcia Boris Ginsburg Michael Houston Oleksii Kuchaiev Ganesh Venkatesh et\u00a0al. 2017. Mixed precision training. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1710.03740 (2017)."},{"key":"e_1_3_3_3_104_2","doi-asserted-by":"publisher","DOI":"10.1145\/3126908.3126956"},{"key":"e_1_3_3_3_105_2","doi-asserted-by":"crossref","unstructured":"Vladimir\u00a0S Mironov. 2022. Reaching the Maximal Unquenched Orbital Angular Momentum L= 3 in Mononuclear Transition-Metal Complexes: Where When and How? Inorganics 10 12 (2022) 227.","DOI":"10.3390\/inorganics10120227"},{"key":"e_1_3_3_3_106_2","doi-asserted-by":"crossref","unstructured":"Stephan Mohr Laura\u00a0E Ratcliff Luigi Genovese Damien Caliste Paul Boulanger Stefan Goedecker and Thierry Deutsch. 2015. Accurate and efficient linear scaling DFT calculations with universal applicability. Physical Chemistry Chemical Physics 17 47 (2015) 31360\u201331370.","DOI":"10.1039\/C5CP00437C"},{"key":"e_1_3_3_3_107_2","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476209"},{"key":"e_1_3_3_3_108_2","doi-asserted-by":"crossref","unstructured":"Frank Neese. 2003. An improvement of the resolution of the identity approximation for the formation of the Coulomb matrix. Journal of computational chemistry 24 14 (2003) 1740\u20131747.","DOI":"10.1002\/jcc.10318"},{"key":"e_1_3_3_3_109_2","doi-asserted-by":"crossref","unstructured":"Frank Neese. 2012. The ORCA program system. Wiley Interdisciplinary Reviews: Computational Molecular Science 2 1 (2012) 73\u201378.","DOI":"10.1002\/wcms.81"},{"key":"e_1_3_3_3_110_2","doi-asserted-by":"crossref","unstructured":"Frank Neese. 2023. The SHARK integral generation and digestion system. Journal of Computational Chemistry 44 3 (2023) 381\u2013396.","DOI":"10.1002\/jcc.26942"},{"key":"e_1_3_3_3_111_2","unstructured":"Nvidia. 2025. Back-to-back Convolution in CUTLASS. https:\/\/github.com\/NVIDIA\/cutlass\/tree\/main\/examples\/13_two_tensor_op_fusion."},{"key":"e_1_3_3_3_112_2","unstructured":"Nvidia. 2025. CuTe in CUTLASS. https:\/\/docs.nvidia.com\/cutlass\/media\/docs\/cpp\/cute\/index.html."},{"key":"e_1_3_3_3_113_2","unstructured":"Nvidia. 2025. CUTLASS Profiler. https:\/\/docs.nvidia.com\/cutlass\/media\/docs\/cpp\/profiler.html."},{"key":"e_1_3_3_3_114_2","unstructured":"Nvidia. 2025. NVIDIA A100 Tensor Core GPU. https:\/\/www.nvidia.com\/en-us\/data-center\/a100\/."},{"key":"e_1_3_3_3_115_2","unstructured":"Nvidia. 2025. NVIDIA cuBLAS Documentation. https:\/\/docs.nvidia.com\/cuda\/cublas\/."},{"key":"e_1_3_3_3_116_2","unstructured":"Nvidia. 2025. NVIDIA CUTLASS Documentation. https:\/\/docs.nvidia.com\/cutlass\/index.html."},{"key":"e_1_3_3_3_117_2","unstructured":"Nvidia. 2025. NVIDIA Nsight Compute. https:\/\/developer.nvidia.com\/nsight-compute."},{"key":"e_1_3_3_3_118_2","unstructured":"Nvidia. 2025. Swizzling Functor in CUTLASS. https:\/\/github.com\/NVIDIA\/cutlass\/blob\/main\/include\/cute\/swizzle_layout.hpp."},{"key":"e_1_3_3_3_119_2","doi-asserted-by":"crossref","unstructured":"Shigeru Obara and A Saika. 1986. Efficient recursive computation of molecular integrals over Cartesian Gaussian functions. The Journal of chemical physics 84 7 (1986) 3963\u20133974.","DOI":"10.1063\/1.450106"},{"key":"e_1_3_3_3_120_2","doi-asserted-by":"publisher","unstructured":"Roberto Olivares-Amaya Mark\u00a0A. Watson Richard\u00a0G. Edgar Leslie Vogt Yihan Shao and Al\u00e1n Aspuru-Guzik. 2010. Accelerating Correlated Quantum Chemistry Calculations Using Graphical Processing Units and a Mixed Precision Matrix Multiplication Library. Journal of Chemical Theory and Computation 6 1 (2010) 135\u2013144. 10.1021\/ct900543q arXiv:https:\/\/doi.org\/10.1021\/ct900543q PMID: 26614326.","DOI":"10.1021\/ct900543q"},{"key":"e_1_3_3_3_121_2","doi-asserted-by":"crossref","unstructured":"Elise Palethorpe Ryan Stocks and Giuseppe\u00a0MJ Barca. 2024. Advanced techniques for high-performance fock matrix construction on gpu clusters. Journal of Chemical Theory and Computation 20 23 (2024) 10424\u201310442.","DOI":"10.1021\/acs.jctc.4c00994"},{"key":"e_1_3_3_3_122_2","doi-asserted-by":"crossref","unstructured":"Robert\u00a0M Parrish Lori\u00a0A Burns Daniel\u00a0GA Smith Andrew\u00a0C Simmonett A\u00a0Eugene DePrince\u00a0III Edward\u00a0G Hohenstein Ugur Bozkaya Alexander\u00a0Yu Sokolov Roberto Di\u00a0Remigio Ryan\u00a0M Richard et\u00a0al. 2017. Psi4 1.1: An open-source electronic structure program emphasizing automation advanced libraries and interoperability. Journal of chemical theory and computation 13 7 (2017) 3185\u20133197.","DOI":"10.1021\/acs.jctc.7b00174"},{"key":"e_1_3_3_3_123_2","doi-asserted-by":"crossref","unstructured":"Biagio Peccerillo Mirco Mannino Andrea Mondelli and Sandro Bartolini. 2022. A survey on hardware accelerators: Taxonomy trends challenges and perspectives. Journal of Systems Architecture 129 (2022) 102561.","DOI":"10.1016\/j.sysarc.2022.102561"},{"key":"e_1_3_3_3_124_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS49936.2021.00059"},{"key":"e_1_3_3_3_125_2","doi-asserted-by":"crossref","unstructured":"Felix Plasser Anna\u00a0I Krylov and Andreas Dreuw. 2022. libwfa: Wavefunction analysis tools for excited and open-shell electronic states. Wiley Interdisciplinary Reviews: Computational Molecular Science 12 4 (2022) e1595.","DOI":"10.1002\/wcms.1595"},{"key":"e_1_3_3_3_126_2","doi-asserted-by":"publisher","unstructured":"Pavel Pokhilko Evgeny Epifanovsky and Anna\u00a0I. Krylov. 2018. Double Precision Is Not Needed for Many-Body Calculations: Emergent Conventional Wisdom. Journal of Chemical Theory and Computation 14 8 (2018) 4088\u20134096. 10.1021\/acs.jctc.8b00321 arXiv:https:\/\/doi.org\/10.1021\/acs.jctc.8b00321 PMID: 29969560.","DOI":"10.1021\/acs.jctc.8b00321"},{"key":"e_1_3_3_3_127_2","unstructured":"Jonathan Ragan-Kelley. 2025. The Future of Fast Code: Giving Hardware What It Wants. https:\/\/pldi24.sigplan.org\/details\/pldi-2024-papers\/98\/The-Future-of-Fast-Code-Giving-Hardware-What-It-Wants."},{"key":"e_1_3_3_3_128_2","unstructured":"Daniel Reed Dennis Gannon and Jack Dongarra. 2022. Reinventing high performance computing: challenges and opportunities. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2203.02544 (2022)."},{"key":"e_1_3_3_3_129_2","doi-asserted-by":"crossref","unstructured":"Daniel\u00a0A Reed and Jack Dongarra. 2015. Exascale computing and big data. Commun. ACM 58 7 (2015) 56\u201368.","DOI":"10.1145\/2699414"},{"key":"e_1_3_3_3_130_2","doi-asserted-by":"crossref","unstructured":"J Rys M Dupuis and HF King. 1983. Computation of electron repulsion integrals using the Rys quadrature method. Journal of Computational Chemistry 4 2 (1983) 154\u2013157.","DOI":"10.1002\/jcc.540040206"},{"key":"e_1_3_3_3_131_2","doi-asserted-by":"publisher","DOI":"10.1109\/SC41406.2024.00025"},{"key":"e_1_3_3_3_132_2","doi-asserted-by":"publisher","DOI":"10.1088\/1742-6596\/180\/1\/012045"},{"key":"e_1_3_3_3_133_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS61541.2024.00022"},{"key":"e_1_3_3_3_134_2","volume-title":"Characterizing the Performance, Power Efficiency, and Programmability of AMD Matrix Cores","author":"Schieffer Gabin","year":"2024","unstructured":"Gabin Schieffer, Daniel Medeiros, Jennifer Faj, Aniruddha Marathe, and Ivy Peng. 2024. Characterizing the Performance, Power Efficiency, and Programmability of AMD Matrix Cores. Technical Report. Lawrence Livermore National Laboratory (LLNL), Livermore, CA (United States)."},{"key":"e_1_3_3_3_135_2","doi-asserted-by":"crossref","unstructured":"Gabriel\u00a0R Schleder Antonio\u00a0CM Padilha Carlos\u00a0Mera Acosta Marcio Costa and Adalberto Fazzio. 2019. From DFT to machine learning: recent approaches to materials science\u2013a review. Journal of Physics: Materials 2 3 (2019) 032001.","DOI":"10.1088\/2515-7639\/ab084b"},{"key":"e_1_3_3_3_136_2","doi-asserted-by":"crossref","unstructured":"Charles Schwartz. 1962. Importance of angular correlations between atomic electrons. Physical Review 126 3 (1962) 1015.","DOI":"10.1103\/PhysRev.126.1015"},{"key":"e_1_3_3_3_137_2","doi-asserted-by":"crossref","unstructured":"Stefan Seritan Christoph Bannwarth Bryan\u00a0S Fales Edward\u00a0G Hohenstein Christine\u00a0M Isborn Sara\u00a0IL Kokkila-Schumacher Xin Li Fang Liu Nathan Luehr James\u00a0W Snyder\u00a0Jr et\u00a0al. 2021. TeraChem: A graphical processing unit-accelerated electronic structure package for large-scale ab initio molecular dynamics. Wiley Interdisciplinary Reviews: Computational Molecular Science 11 2 (2021) e1494.","DOI":"10.1002\/wcms.1494"},{"key":"e_1_3_3_3_138_2","doi-asserted-by":"crossref","unstructured":"Jay Shah Ganesh Bikshandi Ying Zhang Vijay Thakkar Pradeep Ramani and Tri Dao. 2024. Flashattention-3: Fast and accurate attention with asynchrony and low-precision. Advances in Neural Information Processing Systems 37 (2024) 68658\u201368685.","DOI":"10.52202\/079017-2193"},{"key":"e_1_3_3_3_139_2","unstructured":"Gilad Shainer Tong Liu John Michalakes Jacob Liberman Jeff Layton Onur Celebioglu Scot\u00a0A Schultz Joshua Mora and David Cownie. 2009. Weather research and forecast (WRF) model performance and profiling analysis on advanced multi-core HPC clusters. 10th LCI ICHPCC (2009)."},{"key":"e_1_3_3_3_140_2","doi-asserted-by":"crossref","unstructured":"John\u00a0M Shalf and Robert Leland. 2015. Computing beyond moore\u2019s law. Computer 48 12 (2015) 14\u201323.","DOI":"10.1109\/MC.2015.374"},{"key":"e_1_3_3_3_141_2","doi-asserted-by":"crossref","unstructured":"Yihan Shao and Martin Head-Gordon. 2000. An improved J matrix engine for density functional theory calculations. Chemical Physics Letters 323 5-6 (2000) 425\u2013433.","DOI":"10.1016\/S0009-2614(00)00524-8"},{"key":"e_1_3_3_3_142_2","unstructured":"Mohammad Shoeybi Mostofa Patwary Raul Puri Patrick LeGresley Jared Casper and Bryan Catanzaro. 2019. Megatron-lm: Training multi-billion parameter language models using model parallelism. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1909.08053 (2019)."},{"key":"e_1_3_3_3_143_2","volume-title":"Density functional theory: a practical introduction","author":"Sholl David\u00a0S","year":"2022","unstructured":"David\u00a0S Sholl and Janice\u00a0A Steckel. 2022. Density functional theory: a practical introduction. John Wiley & Sons."},{"key":"e_1_3_3_3_144_2","doi-asserted-by":"crossref","unstructured":"John\u00a0C Slater. 1951. A simplification of the Hartree-Fock method. Physical review 81 3 (1951) 385.","DOI":"10.1103\/PhysRev.81.385"},{"key":"e_1_3_3_3_145_2","unstructured":"Benjamin\u00a0F Spector Simran Arora Aaryan Singhal Daniel\u00a0Y Fu and Christopher R\u00e9. 2024. ThunderKittens: Simple Fast and Adorable AI Kernels. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2410.20399 (2024)."},{"key":"e_1_3_3_3_146_2","doi-asserted-by":"publisher","DOI":"10.1109\/SC41406.2024.00015"},{"key":"e_1_3_3_3_147_2","doi-asserted-by":"crossref","unstructured":"Qiming Sun Xing Zhang Samragni Banerjee Peng Bao Marc Barbry Nick\u00a0S Blunt Nikolay\u00a0A Bogdanov George\u00a0H Booth Jia Chen Zhi-Hao Cui et\u00a0al. 2020. Recent developments in the PySCF program package. The Journal of chemical physics 153 2 (2020).","DOI":"10.1063\/5.0006074"},{"key":"e_1_3_3_3_148_2","doi-asserted-by":"crossref","unstructured":"Vivienne Sze Yu-Hsin Chen Tien-Ju Yang and Joel\u00a0S Emer. 2017. Efficient processing of deep neural networks: A tutorial and survey. Proc. IEEE 105 12 (2017) 2295\u20132329.","DOI":"10.1109\/JPROC.2017.2761740"},{"key":"e_1_3_3_3_149_2","unstructured":"tile ai. 2025. tilelang. https:\/\/github.com\/tile-ai\/tilelang."},{"key":"e_1_3_3_3_150_2","doi-asserted-by":"publisher","DOI":"10.1145\/3315508.3329973"},{"key":"e_1_3_3_3_151_2","doi-asserted-by":"crossref","unstructured":"William\u00a0W Tipton Neil\u00a0D Drummond and Richard\u00a0G Hennig. 2014. Importance of high-angular-momentum channels in pseudopotentials for quantum Monte Carlo. Physical Review B 90 12 (2014) 125110.","DOI":"10.1103\/PhysRevB.90.125110"},{"key":"e_1_3_3_3_152_2","doi-asserted-by":"crossref","unstructured":"Julian Tirado-Rives and William\u00a0L Jorgensen. 2008. Performance of B3LYP density functional methods for a large set of organic molecules. Journal of chemical theory and computation 4 2 (2008) 297\u2013306.","DOI":"10.1021\/ct700248k"},{"key":"e_1_3_3_3_153_2","doi-asserted-by":"crossref","unstructured":"Alexey\u00a0V Titov Ivan\u00a0S Ufimtsev Nathan Luehr and Todd\u00a0J Martinez. 2013. Generating efficient quantum chemistry codes for novel architectures. Journal of chemical theory and computation 9 1 (2013) 213\u2013221.","DOI":"10.1021\/ct300321a"},{"key":"e_1_3_3_3_154_2","doi-asserted-by":"publisher","unstructured":"Alexey\u00a0V. Titov Ivan\u00a0S. Ufimtsev Nathan Luehr and Todd\u00a0J. Martinez. 2013. Generating Efficient Quantum Chemistry Codes for Novel Architectures. Journal of Chemical Theory and Computation 9 1 (2013) 213\u2013221. 10.1021\/ct300321a arXiv:https:\/\/doi.org\/10.1021\/ct300321a PMID: 26589024.","DOI":"10.1021\/ct300321a"},{"key":"e_1_3_3_3_155_2","doi-asserted-by":"publisher","unstructured":"G\u00e1bor\u00a0J\u00e1nos Tornai Istv\u00e1n Ladj\u00e1nszki \u00c1d\u00e1m R\u00e1k Gergely Kis and Gy\u00f6rgy Cserey. 2019. Calculation of Quantum Chemical Two-Electron Integrals by Applying Compiler Technology on GPU. Journal of Chemical Theory and Computation 15 10 (2019) 5319\u20135331. 10.1021\/acs.jctc.9b00560 arXiv:https:\/\/doi.org\/10.1021\/acs.jctc.9b00560 PMID: 31503475.","DOI":"10.1021\/acs.jctc.9b00560"},{"key":"e_1_3_3_3_156_2","doi-asserted-by":"crossref","unstructured":"W T\u00f6ws and GM Pastor. 2015. Many-body theory of ultrafast demagnetization and angular momentum transfer in ferromagnetic transition metals. Physical review letters 115 21 (2015) 217204.","DOI":"10.1103\/PhysRevLett.115.217204"},{"key":"e_1_3_3_3_157_2","doi-asserted-by":"publisher","DOI":"10.1109\/CANDARW64572.2024.00041"},{"key":"e_1_3_3_3_158_2","volume-title":"Computational fluid dynamics: a practical approach","author":"Tu Jiyuan","year":"2023","unstructured":"Jiyuan Tu, Guan\u00a0Heng Yeoh, Chaoqun Liu, and Yao Tao. 2023. Computational fluid dynamics: a practical approach. Elsevier."},{"key":"e_1_3_3_3_159_2","doi-asserted-by":"crossref","unstructured":"Ivan\u00a0S Ufimtsev and Todd\u00a0J Martinez. 2008. Quantum chemistry on graphical processing units. 1. Strategies for two-electron integral evaluation. Journal of Chemical Theory and Computation 4 2 (2008) 222\u2013231.","DOI":"10.1021\/ct700268q"},{"key":"e_1_3_3_3_160_2","doi-asserted-by":"publisher","DOI":"10.1109\/CAD-CG.2005.84"},{"key":"e_1_3_3_3_161_2","unstructured":"Edward\u00a0F Valeev et\u00a0al. 2020. Libint: A library for the evaluation of molecular integrals of many-body operators over Gaussian functions. For the current version see https:\/\/github. com\/evaleev\/libint\/tree\/v1 (2020)."},{"key":"e_1_3_3_3_162_2","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan\u00a0N Gomez \u0141ukasz Kaiser and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_3_3_163_2","doi-asserted-by":"publisher","DOI":"10.1109\/CSCI49370.2019.00283"},{"key":"e_1_3_3_3_164_2","doi-asserted-by":"crossref","unstructured":"Mark Vogelsberger Federico Marinacci Paul Torrey and Ewald Puchwein. 2020. Cosmological simulations of galaxy formation. Nature Reviews Physics 2 1 (2020) 42\u201366.","DOI":"10.1038\/s42254-019-0127-2"},{"key":"e_1_3_3_3_165_2","doi-asserted-by":"crossref","unstructured":"Qing Wang Matthias Ihme Yi-Fan Chen and John Anderson. 2022. A TensorFlow simulation framework for scientific computing of fluid flows on tensor processing units. Computer Physics Communications 274 (2022) 108292.","DOI":"10.1016\/j.cpc.2022.108292"},{"key":"e_1_3_3_3_166_2","doi-asserted-by":"crossref","unstructured":"Tong Wang Xinheng He Mingyu Li Yatao Li Ran Bi Yusong Wang Chaoran Cheng Xiangzhen Shen Jiawei Meng He Zhang et\u00a0al. 2024. Ab initio characterization of protein molecular dynamics with AI2BMD. Nature (2024) 1\u20139.","DOI":"10.1038\/s41586-024-08127-z"},{"key":"e_1_3_3_3_167_2","doi-asserted-by":"crossref","unstructured":"Tong Wang Xinheng He Mingyu Li Bin Shao and Tie-Yan Liu. 2023. AIMD-Chig: Exploring the conformational space of a 166-atom protein Chignolin with ab initio molecular dynamics. Scientific Data 10 1 (2023) 549.","DOI":"10.1038\/s41597-023-02465-9"},{"key":"e_1_3_3_3_168_2","doi-asserted-by":"crossref","unstructured":"Yuanheng Wang Diptarka Hait K\u00a0Grace Johnson O\u00a0Jonathan Fajen Juncheng\u00a0Harry Zhang Rub\u00e9n\u00a0D Guerrero and Todd\u00a0J Mart\u00ednez. 2024. Extending GPU-accelerated Gaussian integrals in the TeraChem software package to f type orbitals: Implementation and applications. The Journal of Chemical Physics 161 17 (2024).","DOI":"10.1063\/5.0233523"},{"key":"e_1_3_3_3_169_2","doi-asserted-by":"publisher","unstructured":"Yuanheng Wang Diptarka Hait K.\u00a0Grace Johnson O.\u00a0Jonathan Fajen Juncheng\u00a0Harry Zhang Rub\u00e9n\u00a0D. Guerrero and Todd\u00a0J. Mart\u00ednez. 2024. Extending GPU-accelerated Gaussian integrals in the TeraChem software package to f type orbitals: Implementation and applications. The Journal of Chemical Physics 161 17 (11 2024) 174118. 10.1063\/5.0233523 arXiv:https:\/\/pubs.aip.org\/aip\/jcp\/article-pdf\/doi\/10.1063\/5.0233523\/20237020\/174118_1_5.0233523.pdf","DOI":"10.1063\/5.0233523"},{"key":"e_1_3_3_3_170_2","unstructured":"Wikipedia. 2024. Top500 Supercomputers. https:\/\/en.wikipedia.org\/wiki\/TOP500."},{"key":"e_1_3_3_3_171_2","doi-asserted-by":"crossref","unstructured":"Karl\u00a0A Wilkinson Paul Sherwood Martyn\u00a0F Guest and Kevin\u00a0J Naidoo. 2011. Acceleration of the GAMESS-UK electronic structure package on graphical processing units. Journal of computational chemistry 32 10 (2011) 2313\u20132318.","DOI":"10.1002\/jcc.21815"},{"key":"e_1_3_3_3_172_2","doi-asserted-by":"crossref","unstructured":"David\u00a0B Williams-Young Wibe\u00a0A De\u00a0Jong Hubertus\u00a0JJ Van\u00a0Dam and Chao Yang. 2020. On the efficient evaluation of the exchange correlation potential on graphics processing unit clusters. Frontiers in chemistry 8 (2020) 581058.","DOI":"10.3389\/fchem.2020.581058"},{"key":"e_1_3_3_3_173_2","doi-asserted-by":"crossref","unstructured":"Xiaojie Wu Qiming Sun Zhichen Pu Tianze Zheng Wenzhi Ma Wen Yan Yu Xia Zhengxiao Wu Mian Huo Xiang Li et\u00a0al. 2025. Enhancing GPU-Acceleration in the Python-Based Simulations of Chemistry Frameworks. Wiley Interdisciplinary Reviews: Computational Molecular Science 15 2 (2025) e70008.","DOI":"10.1002\/wcms.70008"},{"key":"e_1_3_3_3_174_2","doi-asserted-by":"publisher","DOI":"10.1145\/2783258.2783270"},{"key":"e_1_3_3_3_175_2","doi-asserted-by":"publisher","DOI":"10.1109\/SC41406.2024.00064"},{"key":"e_1_3_3_3_176_2","doi-asserted-by":"crossref","unstructured":"Koji Yasuda. 2008. Two-electron integral evaluation on the graphics processor unit. Journal of Computational Chemistry 29 3 (2008) 334\u2013342.","DOI":"10.1002\/jcc.20779"},{"key":"e_1_3_3_3_177_2","doi-asserted-by":"crossref","unstructured":"Koji Yasuda and Hironori Maruoka. 2014. Efficient calculation of two-electron integrals for high angular basis functions. International Journal of Quantum Chemistry 114 9 (2014) 543\u2013552.","DOI":"10.1002\/qua.24607"},{"key":"e_1_3_3_3_178_2","doi-asserted-by":"crossref","unstructured":"Jun Zhang. 2018. libreta: Computerized optimization and code synthesis for electron repulsion integral evaluation. Journal of Chemical Theory and Computation 14 2 (2018) 572\u2013587.","DOI":"10.1021\/acs.jctc.7b00788"},{"key":"e_1_3_3_3_179_2","unstructured":"Xuan Zhang Limei Wang Jacob Helwig Youzhi Luo Cong Fu Yaochen Xie Meng Liu Yuchao Lin Zhao Xu Keqiang Yan et\u00a0al. 2023. Artificial intelligence for science in quantum atomistic and continuum systems. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2307.08423 (2023)."},{"key":"e_1_3_3_3_180_2","unstructured":"Weiqing Zhou Daye Zheng Qianrui Liu Denghui Lu Yu Liu Peize Lin Yike Huang Xingliang Peng Jie\u00a0J Bao Chun Cai et\u00a0al. 2025. ABACUS: An Electronic Structure Analysis Package for the AI Era. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2501.08697 (2025)."},{"key":"e_1_3_3_3_181_2","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358269"}],"event":{"name":"SC '25: The International Conference for High Performance Computing, Networking, Storage and Analysis","location":"St. Louis MO USA","acronym":"SC '25","sponsor":["SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing"]},"container-title":["Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3712285.3759829","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T18:38:17Z","timestamp":1773254297000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3712285.3759829"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,15]]},"references-count":180,"alternative-id":["10.1145\/3712285.3759829","10.1145\/3712285"],"URL":"https:\/\/doi.org\/10.1145\/3712285.3759829","relation":{},"subject":[],"published":{"date-parts":[[2025,11,15]]},"assertion":[{"value":"2025-11-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}