{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,27]],"date-time":"2026-03-27T17:07:50Z","timestamp":1774631270766,"version":"3.50.1"},"reference-count":38,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,9,22]],"date-time":"2020-09-22T00:00:00Z","timestamp":1600732800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,9,22]],"date-time":"2020-09-22T00:00:00Z","timestamp":1600732800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,9,22]],"date-time":"2020-09-22T00:00:00Z","timestamp":1600732800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,9,22]]},"DOI":"10.1109\/hpec43674.2020.9286183","type":"proceedings-article","created":{"date-parts":[[2020,12,22]],"date-time":"2020-12-22T21:07:15Z","timestamp":1608671235000},"page":"1-10","source":"Crossref","is-referenced-by-count":19,"title":["Vyasa: A High-Performance Vectorizing Compiler for Tensor Convolutions on the Xilinx AI Engine"],"prefix":"10.1109","author":[{"given":"Prasanth","family":"Chatarasi","sequence":"first","affiliation":[]},{"given":"Stephen","family":"Neuendorffer","sequence":"additional","affiliation":[]},{"given":"Samuel","family":"Bayliss","sequence":"additional","affiliation":[]},{"given":"Kees","family":"Vissers","sequence":"additional","affiliation":[]},{"given":"Vivek","family":"Sarkar","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2004.840301"},{"key":"ref33","author":"stock","year":"2014","journal-title":"Vectorization and Register Reuse in High Performance Computing"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1145\/3106343"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/3306346.3322967"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/2897824.2925952"},{"key":"ref37","article-title":"Automated empirical optimizations of software and the ATLAS project","volume":"27","author":"whaley","year":"2001","journal-title":"Parallel Com-put"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2008.4536398"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/HPCC.2012.161"},{"key":"ref34","doi-asserted-by":"crossref","first-page":"327","DOI":"10.4156\/ijact.vol4.issue1.38","article-title":"Partial Elements Reuse of Vector Register in SIMD Mathematical Functions","volume":"4","author":"wang","year":"2012","journal-title":"International Journal of Advancements in Computing Technology"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/2694344.2694364"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/3107953"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298932"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.214"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2012.231"},{"key":"ref16","first-page":"579","article-title":"TVM: An Automated End-to-End Optimizing Compiler for Deep Learning","author":"chen","year":"2018","journal-title":"Proceedings of the 12th USENIX Conference on Operating Systems Design and Implementation ser OSDI'18"},{"key":"ref17","article-title":"cuDNN: Efficient Primitives for Deep Learning","volume":"abs 1410 759","author":"chetlur","year":"2014","journal-title":"CoRR"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/2684746.2689060"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/3020078.3021736"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/3289602.3293906"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/1995896.1995938"},{"key":"ref27","author":"shapiro","year":"2001","journal-title":"Computer Vision"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/1555815.1555773"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICSAMOS.2009.5289229"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1145\/53990.54022"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-78791-4_8"},{"key":"ref8","year":"2019","journal-title":"Versal The First Adaptive Compute Acceleration Platform (ACAP)"},{"key":"ref7","year":"2018","journal-title":"Xilinx AI Engines and Their Applications"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2011.16"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2007.22"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/2499370.2462176"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2019.00040"},{"key":"ref22","article-title":"MARVEL: A Decoupled Model-driven Approach for Efficiently Mapping Convolutions on Spatial DNN Accelerators","volume":"abs 2002 7752","author":"chatarasi","year":"2020","journal-title":"CoRR"},{"key":"ref21","article-title":"Caffeine: Towards uniformed representation and acceleration for deep convolutional neural networks","author":"zhang","year":"2018","journal-title":"IEEE Transactions on COMPUTER-AIDED DESIGN of Integrated Circuits and Systems (TCAD)"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080246"},{"key":"ref23","author":"cyphers","year":"2018","journal-title":"Intel nGraph An intermediate representation compiler and executor for deep learning"},{"key":"ref26","article-title":"Very deep convolutional networks for large-scale image recognition","author":"simonyan","year":"2015","journal-title":"International Conference on Learning Representations (ICLR)"},{"key":"ref25","article-title":"Imagenet classification with deep convolutional neural networks","author":"krizhevsky","year":"2012","journal-title":"NIPS"}],"event":{"name":"2020 IEEE High Performance Extreme Computing Conference (HPEC)","location":"Waltham, MA, USA","start":{"date-parts":[[2020,9,22]]},"end":{"date-parts":[[2020,9,24]]}},"container-title":["2020 IEEE High Performance Extreme Computing Conference (HPEC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9285977\/9286137\/09286183.pdf?arnumber=9286183","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,6,27]],"date-time":"2022-06-27T15:33:09Z","timestamp":1656343989000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9286183\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,9,22]]},"references-count":38,"URL":"https:\/\/doi.org\/10.1109\/hpec43674.2020.9286183","relation":{},"subject":[],"published":{"date-parts":[[2020,9,22]]}}}