{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T12:48:54Z","timestamp":1751374134106,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":36,"publisher":"ACM","license":[{"start":{"date-parts":[[2018,6,12]],"date-time":"2018-06-12T00:00:00Z","timestamp":1528761600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100006168","name":"National Nuclear Security Administration","doi-asserted-by":"publisher","award":["17-SC-20-SC"],"award-info":[{"award-number":["17-SC-20-SC"]}],"id":[{"id":"10.13039\/100006168","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["1440749 and 1513120"],"award-info":[{"award-number":["1440749 and 1513120"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100011030","name":"U.S. Department of Energy","doi-asserted-by":"publisher","award":["71648, DE-SC0014135, DE-AC05-76RL01830"],"award-info":[{"award-number":["71648, DE-SC0014135, DE-AC05-76RL01830"]}],"id":[{"id":"10.13039\/100011030","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2018,6,12]]},"DOI":"10.1145\/3205289.3205296","type":"proceedings-article","created":{"date-parts":[[2018,9,13]],"date-time":"2018-09-13T12:54:52Z","timestamp":1536843292000},"page":"96-106","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":12,"title":["Optimizing Tensor Contractions in CCSD(T) for Efficient Execution on GPUs"],"prefix":"10.1145","author":[{"given":"Jinsung","family":"Kim","sequence":"first","affiliation":[{"name":"The Ohio State University"}]},{"given":"Aravind","family":"Sukumaran-Rajam","sequence":"additional","affiliation":[{"name":"The Ohio State University"}]},{"given":"Changwan","family":"Hong","sequence":"additional","affiliation":[{"name":"The Ohio State University"}]},{"given":"Ajay","family":"Panyala","sequence":"additional","affiliation":[{"name":"Pacific Northwest National Laboratory"}]},{"given":"Rohit Kumar","family":"Srivastava","sequence":"additional","affiliation":[{"name":"The Ohio State University"}]},{"given":"Sriram","family":"Krishnamoorthy","sequence":"additional","affiliation":[{"name":"Pacific Northwest National Laboratory"}]},{"given":"P.","family":"Sadayappan","sequence":"additional","affiliation":[{"name":"The Ohio State University"}]}],"member":"320","published-online":{"date-parts":[[2018,6,12]]},"reference":[{"key":"e_1_3_2_1_1_1","first-page":"33","article-title":"An Introduction to Coupled Cluster Theory for Computational Chemists,\" in Reviews in Computational Chemistry. John Wiley & Sons","volume":"14","author":"Crawford T.","year":"2000","unstructured":"T. Crawford and H. Schaefer III , \" An Introduction to Coupled Cluster Theory for Computational Chemists,\" in Reviews in Computational Chemistry. John Wiley & Sons , Inc. , 2000 , vol. 14 , pp. 33 -- 136 . T. Crawford and H. Schaefer III, \"An Introduction to Coupled Cluster Theory for Computational Chemists,\" in Reviews in Computational Chemistry. John Wiley & Sons, Inc., 2000, vol. 14, pp. 33--136.","journal-title":"Inc."},{"key":"e_1_3_2_1_2_1","unstructured":"M. Valiev E. Bylaska N. Govind K. Kowalski T. Straatsma H. van Dam D. Wang J. Nieplocha E. Apra T. Windus and W. de Jong \"Nwchem \" 2017. {Online}. Available: http:\/\/www.nwchem-sw.org  M. Valiev E. Bylaska N. Govind K. Kowalski T. Straatsma H. van Dam D. Wang J. Nieplocha E. Apra T. Windus and W. de Jong \"Nwchem \" 2017. {Online}. Available: http:\/\/www.nwchem-sw.org"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/1654059.1654127"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2014.60"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/2063384.2063481"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1103\/RevModPhys.79.291"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2004.840311"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1021\/jp9051215"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/2503210.2503290"},{"key":"e_1_3_2_1_10_1","first-page":"1","volume-title":"2014 21st International Conference on. IEEE","author":"Ibrahim K. Z.","year":"2014","unstructured":"K. Z. Ibrahim , S. W. Williams , E. Epifanovsky , and A. I. Krylov , \" Analysis and tuning of libtensor framework on multicore architectures,\" in High Performance Computing (HiPC) , 2014 21st International Conference on. IEEE , 2014 , pp. 1 -- 10 . K. Z. Ibrahim, S. W. Williams, E. Epifanovsky, and A. I. Krylov, \"Analysis and tuning of libtensor framework on multicore architectures,\" in High Performance Computing (HiPC), 2014 21st International Conference on. IEEE, 2014, pp. 1--10."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTER.2010.26"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.5555\/2451462.2451482"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1021\/jp034596z"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1021\/ct1007247"},{"key":"e_1_3_2_1_15_1","volume-title":"Tccg","author":"Springer P.","year":"2018","unstructured":"P. Springer and P. Bientines , \" Tccg ,\" 2018 . {Online}. Available: https:\/\/github.com\/HPAC\/tccg P. Springer and P. Bientines, \"Tccg,\" 2018. {Online}. Available: https:\/\/github.com\/HPAC\/tccg"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1137\/16M108968X"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3157733"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2013.112"},{"key":"e_1_3_2_1_19_1","unstructured":"S. Verdoolaege \"Ppcg \" 2017. {Online}. Available: git:\/\/repo.or.cz\/ppcg.git  S. Verdoolaege \"Ppcg \" 2017. {Online}. Available: git:\/\/repo.or.cz\/ppcg.git"},{"key":"e_1_3_2_1_20_1","volume-title":"Design of a high-performance gemm-like tensor-tensor multiplication,\" arXiv preprint arXiv: 1607.00145","author":"Springer P.","year":"2016","unstructured":"P. Springer and P. Bientinesi , \" Design of a high-performance gemm-like tensor-tensor multiplication,\" arXiv preprint arXiv: 1607.00145 , 2016 . P. Springer and P. Bientinesi, \"Design of a high-performance gemm-like tensor-tensor multiplication,\" arXiv preprint arXiv: 1607.00145, 2016."},{"key":"e_1_3_2_1_21_1","unstructured":"D. I. Lyakh \"Talsh \" 2014. {Online}. Available: https:\/\/github.com\/DmitryLyakh\/TAL_SH  D. I. Lyakh \"Talsh \" 2014. {Online}. Available: https:\/\/github.com\/DmitryLyakh\/TAL_SH"},{"key":"e_1_3_2_1_22_1","volume-title":"Tensor contractions with extended blas kernels on cpu and gpu,\" arXiv preprint arXiv:1606.05696","author":"Shi Y.","year":"2016","unstructured":"Y. Shi , U. Niranjan , A. Anandkumar , and C. Cecka , \" Tensor contractions with extended blas kernels on cpu and gpu,\" arXiv preprint arXiv:1606.05696 , 2016 . Y. Shi, U. Niranjan, A. Anandkumar, and C. Cecka, \"Tensor contractions with extended blas kernels on cpu and gpu,\" arXiv preprint arXiv:1606.05696, 2016."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3091966.3091968"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/2935323.2935328"},{"key":"e_1_3_2_1_25_1","volume-title":"cuTT: A High-Performance Tensor Transpose Library for CUDA Compatible GPUs,\" CoRR","author":"Hynninen A.","year":"2017","unstructured":"A. Hynninen and D. I. Lyakh , \" cuTT: A High-Performance Tensor Transpose Library for CUDA Compatible GPUs,\" CoRR , vol. abs\/ 1705 .01598, 2017 . {Online}. Available: http:\/\/arxiv.org\/abs\/1705.01598 A. Hynninen and D. I. Lyakh, \"cuTT: A High-Performance Tensor Transpose Library for CUDA Compatible GPUs,\" CoRR, vol. abs\/1705.01598, 2017. {Online}. Available: http:\/\/arxiv.org\/abs\/1705.01598"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/1152154.1152190"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW.2014.43"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/2555243.2555253"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/2381056.2381073"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10766-015-0366-5"},{"key":"e_1_3_2_1_31_1","first-page":"511","volume-title":"Optimized batched linear algebra for modern architectures,\" in European Conference on Parallel Processing","author":"Dongarra J.","year":"2017","unstructured":"J. Dongarra , S. Hammarling , N. J. Higham , S. D. Relton , and M. Zounon , \" Optimized batched linear algebra for modern architectures,\" in European Conference on Parallel Processing . Springer , 2017 , pp. 511 -- 522 . J. Dongarra, S. Hammarling, N. J. Higham, S. D. Relton, and M. Zounon, \"Optimized batched linear algebra for modern architectures,\" in European Conference on Parallel Processing. Springer, 2017, pp. 511--522."},{"key":"e_1_3_2_1_32_1","first-page":"981","volume-title":"SC16: International Conference for. IEEE","author":"Heinecke A.","year":"2016","unstructured":"A. Heinecke , G. Henry , M. Hutchinson , and H. Pabst , \" Libxsmm: accelerating small matrix multiplications by runtime code generation,\" in High Performance Computing, Networking, Storage and Analysis , SC16: International Conference for. IEEE , 2016 , pp. 981 -- 991 . A. Heinecke, G. Henry, M. Hutchinson, and H. Pabst, \"Libxsmm: accelerating small matrix multiplications by runtime code generation,\" in High Performance Computing, Networking, Storage and Analysis, SC16: International Conference for. IEEE, 2016, pp. 981--991."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jpdc.2011.10.014"},{"key":"e_1_3_2_1_34_1","first-page":"1109","volume-title":"AIP","author":"Igual F. D.","year":"2012","unstructured":"F. D. Igual , G. Quintana-Ort\u00ed , and R. A. Van De Geijn, \"Level-3 blas on a gpu: Picking the low hanging fruit,\" in AIP Conference Proceedings, vol. 1504, no. 1 . AIP , 2012 , pp. 1109 -- 1112 . F. D. Igual, G. Quintana-Ort\u00ed, and R. A. Van De Geijn, \"Level-3 blas on a gpu: Picking the low hanging fruit,\" in AIP Conference Proceedings, vol. 1504, no. 1. AIP, 2012, pp. 1109--1112."},{"key":"e_1_3_2_1_35_1","unstructured":"D. A. Matthews \"High-performance tensor contraction without blas \" arXiv preprint arXiv:1607.00291 2016.  D. A. Matthews \"High-performance tensor contraction without blas \" arXiv preprint arXiv:1607.00291 2016."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1137\/130907215"}],"event":{"name":"ICS '18: 2018 International Conference on Supercomputing","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture"],"location":"Beijing China","acronym":"ICS '18"},"container-title":["Proceedings of the 2018 International Conference on Supercomputing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3205289.3205296","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3205289.3205296","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3205289.3205296","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T01:08:33Z","timestamp":1750208913000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3205289.3205296"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,6,12]]},"references-count":36,"alternative-id":["10.1145\/3205289.3205296","10.1145\/3205289"],"URL":"https:\/\/doi.org\/10.1145\/3205289.3205296","relation":{},"subject":[],"published":{"date-parts":[[2018,6,12]]},"assertion":[{"value":"2018-06-12","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}