{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,10]],"date-time":"2026-02-10T06:42:39Z","timestamp":1770705759170,"version":"3.49.0"},"reference-count":22,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,7,1]],"date-time":"2020-07-01T00:00:00Z","timestamp":1593561600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,7,1]],"date-time":"2020-07-01T00:00:00Z","timestamp":1593561600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,7,1]],"date-time":"2020-07-01T00:00:00Z","timestamp":1593561600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,7]]},"DOI":"10.1109\/dac18072.2020.9218566","type":"proceedings-article","created":{"date-parts":[[2020,10,9]],"date-time":"2020-10-09T19:57:03Z","timestamp":1602273423000},"page":"1-6","source":"Crossref","is-referenced-by-count":10,"title":["GPNPU: Enabling Efficient Hardware-Based Direct Convolution with Multi-Precision Support in GPU Tensor Cores"],"prefix":"10.1109","author":[{"given":"Zhuoran","family":"Song","sequence":"first","affiliation":[]},{"given":"Jianfei","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Tianjian","family":"Li","sequence":"additional","affiliation":[]},{"given":"Li","family":"Jiang","sequence":"additional","affiliation":[]},{"given":"Jing","family":"Ke","sequence":"additional","affiliation":[]},{"given":"Xiaoyao","family":"Liang","sequence":"additional","affiliation":[]},{"given":"Naifeng","family":"Jing","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref1","article-title":"Nvidia tesla v100 gpu architecture","year":"2017","journal-title":"NVIDIA Whitepaper"},{"key":"ref2","article-title":"Nvidia turing gpu architecture","year":"2018","journal-title":"NVIDIA Whitepaper"},{"key":"ref3","article-title":"cudnn: Efficient primitives for deep learning","author":"Chetlur","year":"2014","journal-title":"arXiv:1410.0759"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC.2014.6757323"},{"key":"ref5","first-page":"79","article-title":"Modeling deep learning accelerator enabled gpus","author":"Aamir Raihan","year":"2019","journal-title":"2019 ISPASS"},{"key":"ref6","article-title":"Low-memory gemm-based convolution algorithms for deep neural networks","author":"Anderson","year":"2017","journal-title":"arXiv preprint arXiv:1709.03395"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2918851"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICPP.2016.15"},{"key":"ref9","article-title":"High-performance hardware for machine learning","author":"Dally","year":"2015","journal-title":"NIPS Tutorial"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/VLSIC.2018.8502333"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00069"},{"key":"ref12","article-title":"Modeling deep learning accelerator enabled gpus","author":"Raihan","year":"2018","journal-title":"arXiv preprint arXiv:1811.08309"},{"key":"ref13","first-page":"80","article-title":"A 1.93 tops\/w scalable deep learning\/inference processor with tetra-parallel mimd architecture for big data applications","author":"Yoo","year":"2015","journal-title":"2015 ISSCC"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/1816038.1815993"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2014.106"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/2872887.2750389"},{"key":"ref17","first-page":"13","article-title":"Memory-centric accelerator design for convolutional neural networks","volume":"2013","author":"Peemen","year":"2013","journal-title":"ICCD"},{"key":"ref18","first-page":"1737","article-title":"Deep learning with limited numerical precision","author":"Gupta","year":"2015","journal-title":"2015 ICML"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/2684746.2689060"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1145\/2644865.2541967"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080246"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2017.2761740"}],"event":{"name":"2020 57th ACM\/IEEE Design Automation Conference (DAC)","location":"San Francisco, CA, USA","start":{"date-parts":[[2020,7,20]]},"end":{"date-parts":[[2020,7,24]]}},"container-title":["2020 57th ACM\/IEEE Design Automation Conference (DAC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9211868\/9218488\/09218566.pdf?arnumber=9218566","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,24]],"date-time":"2024-01-24T00:54:43Z","timestamp":1706057683000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9218566\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,7]]},"references-count":22,"URL":"https:\/\/doi.org\/10.1109\/dac18072.2020.9218566","relation":{},"subject":[],"published":{"date-parts":[[2020,7]]}}}