{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,14]],"date-time":"2026-02-14T10:25:44Z","timestamp":1771064744455,"version":"3.50.1"},"reference-count":25,"publisher":"IEEE","license":[{"start":{"date-parts":[[2019,2,1]],"date-time":"2019-02-01T00:00:00Z","timestamp":1548979200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2019,2,1]],"date-time":"2019-02-01T00:00:00Z","timestamp":1548979200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,2,1]],"date-time":"2019-02-01T00:00:00Z","timestamp":1548979200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,2]]},"DOI":"10.1109\/cgo.2019.8661182","type":"proceedings-article","created":{"date-parts":[[2019,3,8]],"date-time":"2019-03-08T00:01:46Z","timestamp":1552003306000},"page":"85-95","source":"Crossref","is-referenced-by-count":30,"title":["A Code Generator for High-Performance Tensor Contractions on GPUs"],"prefix":"10.1109","author":[{"given":"Jinsung","family":"Kim","sequence":"first","affiliation":[]},{"given":"Aravind","family":"Sukumaran-Rajam","sequence":"additional","affiliation":[]},{"given":"Vineeth","family":"Thumma","sequence":"additional","affiliation":[]},{"given":"Sriram","family":"Krishnamoorthy","sequence":"additional","affiliation":[]},{"given":"Ajay","family":"Panyala","sequence":"additional","affiliation":[]},{"given":"Louis-Noel","family":"Pouchet","sequence":"additional","affiliation":[]},{"given":"Atanas","family":"Rountev","sequence":"additional","affiliation":[]},{"given":"P.","family":"Sadayappan","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","article-title":"Talsh","author":"lyakh","year":"2014"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/2400682.2400713"},{"key":"ref12","article-title":"Tensor comprehensions: Framework-agnostic high-performance machine learning abstractions","author":"vasilache","year":"2018","journal-title":"arXiv preprint arxiv 1802 05807"},{"key":"ref13","article-title":"Tccg","author":"springer","year":"0"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/3157733"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/s10586-011-0179-2"},{"key":"ref16","first-page":"31","article-title":"Cublas library","volume":"15","author":"nvidia","year":"2008","journal-title":"NVIDIA Corporation Santa Clara California"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/3091966.3091968"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/2935323.2935328"},{"key":"ref19","article-title":"cutt: A high-performance tensor transpose library for cuda compatible gpus","author":"hynninen","year":"2017","journal-title":"arXiv preprint arXiv 1705 01598"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1080\/00268970500275780"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1016\/j.cpc.2017.02.016"},{"key":"ref6","article-title":"TensorFlow: Large-scale machine learning on heterogeneous systems","author":"abadi","year":"2015"},{"key":"ref5","article-title":"Fast detection of overlapping communities via online tensor methods on gpus","volume":"abs 1309 787","author":"huang","year":"2013","journal-title":"CoRR"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1002\/9780470125915.ch2"},{"key":"ref7","first-page":"2773","article-title":"Tensor decompositions for learning latent variable models","volume":"15","author":"anandkumar","year":"2014","journal-title":"Journal of Machine Learning Research"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/3183895.3183900"},{"key":"ref9","article-title":"Tensor contraction benchmark v0.1","author":"springer","year":"0"},{"key":"ref1","first-page":"1","article-title":"Optimal contraction order of multiple tensors","author":"cemgil","year":"2013","journal-title":"2013 21st Signal Processing and Communications Applications Conference (SIU)"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1016\/j.cpc.2014.12.013"},{"key":"ref22","first-page":"14","article-title":"Blis: A framework for rapidly instantiating blas functionality","volume":"41","author":"zee","year":"2015","journal-title":"ACM Transactions on Mathematical Software (TOMS)"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1137\/16M108968X"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/HiPC.2016.031"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTER.2010.26"},{"key":"ref25","article-title":"Learning to optimize tensor programs","author":"chen","year":"2018","journal-title":"arXiv preprint arXiv 1805 08166"}],"event":{"name":"2019 IEEE\/ACM International Symposium on Code Generation and Optimization (CGO)","location":"Washington, DC, USA","start":{"date-parts":[[2019,2,16]]},"end":{"date-parts":[[2019,2,20]]}},"container-title":["2019 IEEE\/ACM International Symposium on Code Generation and Optimization (CGO)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8653576\/8661160\/08661182.pdf?arnumber=8661182","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,19]],"date-time":"2022-07-19T20:22:55Z","timestamp":1658262175000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8661182\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,2]]},"references-count":25,"URL":"https:\/\/doi.org\/10.1109\/cgo.2019.8661182","relation":{},"subject":[],"published":{"date-parts":[[2019,2]]}}}