{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,21]],"date-time":"2025-06-21T11:27:16Z","timestamp":1750505236153,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":29,"publisher":"ACM","license":[{"start":{"date-parts":[[2016,10,1]],"date-time":"2016-10-01T00:00:00Z","timestamp":1475280000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2016,10]]},"DOI":"10.1145\/2968456.2968476","type":"proceedings-article","created":{"date-parts":[[2016,10,13]],"date-time":"2016-10-13T19:25:51Z","timestamp":1476386751000},"page":"1-10","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":30,"title":["Zero and data reuse-aware fast convolution for deep neural networks on GPU"],"prefix":"10.1145","author":[{"given":"Hyunsun","family":"Park","sequence":"first","affiliation":[{"name":"POSTECH"}]},{"given":"Dongyoung","family":"Kim","sequence":"additional","affiliation":[{"name":"Seoul National University"}]},{"given":"Junwhan","family":"Ahn","sequence":"additional","affiliation":[{"name":"Seoul National University"}]},{"given":"Sungjoo","family":"Yoo","sequence":"additional","affiliation":[{"name":"Seoul National University"}]}],"member":"320","published-online":{"date-parts":[[2016,10]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Proceedings of the Advances in Neural Information Processing Systems","author":"Krizhevsky A.","year":"2012","unstructured":"A. Krizhevsky , ImageNet classification with deep convolutional neural networks . In Proceedings of the Advances in Neural Information Processing Systems , December 2012 . A. Krizhevsky, et al. ImageNet classification with deep convolutional neural networks. In Proceedings of the Advances in Neural Information Processing Systems, December 2012."},{"key":"e_1_3_2_1_2_1","volume-title":"Proceedings of the Advances in Neural Information Processing Systems","author":"Mont\u00fafar G.","year":"2014","unstructured":"G. Mont\u00fafar , On the number of linear regions of deep neural networks . In Proceedings of the Advances in Neural Information Processing Systems , December 2014 . G. Mont\u00fafar, et al. On the number of linear regions of deep neural networks. In Proceedings of the Advances in Neural Information Processing Systems, December 2014."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"e_1_3_2_1_4_1","volume-title":"Deep residual learning for image recognition. arXiv preprint arXiv:1512.03385","author":"He K.","year":"2015","unstructured":"K. He , Deep residual learning for image recognition. arXiv preprint arXiv:1512.03385 , 2015 . K. He, et al. Deep residual learning for image recognition. arXiv preprint arXiv:1512.03385, 2015."},{"key":"e_1_3_2_1_5_1","volume-title":"Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556","author":"Simonyan K.","year":"2015","unstructured":"K. Simonyan and A. Zisserman . Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 , 2015 . K. Simonyan and A. Zisserman. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556, 2015."},{"key":"e_1_3_2_1_6_1","volume-title":"Proceedings of International Conference on Learning and Representation","author":"Kim Y.-D.","year":"2016","unstructured":"Y.-D. Kim , Compression of deep convolutional neural networks for fast and low power applications . In Proceedings of International Conference on Learning and Representation , May 2016 . Y.-D. Kim, et al. Compression of deep convolutional neural networks for fast and low power applications. In Proceedings of International Conference on Learning and Representation, May 2016."},{"key":"e_1_3_2_1_7_1","volume-title":"Fast training of convolutional networks through FFTs. arXiv preprint arXiv:1312.5851","author":"Mathieu M.","year":"2013","unstructured":"M. Mathieu , Fast training of convolutional networks through FFTs. arXiv preprint arXiv:1312.5851 , 2013 . M. Mathieu, et al. Fast training of convolutional networks through FFTs. arXiv preprint arXiv:1312.5851, 2013."},{"key":"e_1_3_2_1_8_1","volume-title":"Fast convolutional nets with fbfft: A GPU performance evaluation. arXiv preprint arXiv:1412.7580","author":"Vasilache N.","year":"2014","unstructured":"N. Vasilache , Fast convolutional nets with fbfft: A GPU performance evaluation. arXiv preprint arXiv:1412.7580 , 2014 . N. Vasilache, et al. Fast convolutional nets with fbfft: A GPU performance evaluation. arXiv preprint arXiv:1412.7580, 2014."},{"key":"e_1_3_2_1_9_1","volume-title":"Fast algorithms for convolutional neural networks. arXiv preprint arXiv:1509.09308","author":"Lavin A.","year":"2015","unstructured":"A. Lavin and S. Gray . Fast algorithms for convolutional neural networks. arXiv preprint arXiv:1509.09308 , 2015 . A. Lavin and S. Gray. Fast algorithms for convolutional neural networks. arXiv preprint arXiv:1509.09308, 2015."},{"key":"e_1_3_2_1_10_1","volume-title":"Proceedings of the Advances in Neural Information Processing Systems","author":"Han S.","year":"2015","unstructured":"S. Han , Learning both weights and connections for efficient neural network . In Proceedings of the Advances in Neural Information Processing Systems , December 2015 . S. Han, et al. Learning both weights and connections for efficient neural network. In Proceedings of the Advances in Neural Information Processing Systems, December 2015."},{"key":"e_1_3_2_1_11_1","volume-title":"IEEE International Solid-State Circuits Conference Technical Digest of Papers","author":"Chen Y.-H.","year":"2016","unstructured":"Y.-H. Chen , : An energy-efficient reconfigurable accelerator for deep convolutional neural networks . In IEEE International Solid-State Circuits Conference Technical Digest of Papers , January 2016 . Y.-H. Chen, et al. Eyeriss: An energy-efficient reconfigurable accelerator for deep convolutional neural networks. In IEEE International Solid-State Circuits Conference Technical Digest of Papers, January 2016."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2009.4919648"},{"key":"e_1_3_2_1_13_1","unstructured":"cuBLAS. http:\/\/docs.nvidia.com\/cuda\/cublas\/. Accessed: 2016-04-08.  cuBLAS. http:\/\/docs.nvidia.com\/cuda\/cublas\/. Accessed: 2016-04-08."},{"key":"e_1_3_2_1_14_1","volume-title":"cuDNN: Efficient primitives for deep learning. arXiv preprint arXiv:1410.0759","author":"Chetlur S.","year":"2014","unstructured":"S. Chetlur , cuDNN: Efficient primitives for deep learning. arXiv preprint arXiv:1410.0759 , 2014 . S. Chetlur, et al. cuDNN: Efficient primitives for deep learning. arXiv preprint arXiv:1410.0759, 2014."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.5244\/C.28.88"},{"key":"e_1_3_2_1_16_1","volume-title":"Proceedings of the Advances in Neural Information Processing Systems","author":"Denton E.","year":"2014","unstructured":"E. Denton , Exploiting linear structure within convolutional networks for efficient evaluation . In Proceedings of the Advances in Neural Information Processing Systems , December 2014 . E. Denton, et al. Exploiting linear structure within convolutional networks for efficient evaluation. In Proceedings of the Advances in Neural Information Processing Systems, December 2014."},{"key":"e_1_3_2_1_17_1","volume-title":"Speeding-up convolutional neural networks using fine-tuned cp-decomposition. arXiv preprint arXiv:1412.6553","author":"Lebedev V.","year":"2014","unstructured":"V. Lebedev , Speeding-up convolutional neural networks using fine-tuned cp-decomposition. arXiv preprint arXiv:1412.6553 , 2014 . V. Lebedev, et al. Speeding-up convolutional neural networks using fine-tuned cp-decomposition. arXiv preprint arXiv:1412.6553, 2014."},{"key":"e_1_3_2_1_18_1","article-title":"Accelerating very deep convolutional networks for classification and detection","author":"Zhang X.","unstructured":"X. Zhang , Accelerating very deep convolutional networks for classification and detection . IEEE Transactions on Pattern Analysis and Machine Intelligence, preprint. X. Zhang, et al. Accelerating very deep convolutional networks for classification and detection. IEEE Transactions on Pattern Analysis and Machine Intelligence, preprint.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence, preprint."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298809"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/2541940.2541967"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.5555\/2755753.2755913"},{"key":"e_1_3_2_1_22_1","volume-title":"Convolutional neural networks using logarithmic data representation. arXiv preprint arXiv:1603.01025","author":"Miyashita D.","year":"2016","unstructured":"D. Miyashita , Convolutional neural networks using logarithmic data representation. arXiv preprint arXiv:1603.01025 , 2016 . D. Miyashita, et al. Convolutional neural networks using logarithmic data representation. arXiv preprint arXiv:1603.01025, 2016."},{"key":"e_1_3_2_1_23_1","volume-title":"XNOR-Net: ImageNet classification using binary convolutional neural networks. arXiv preprint arXiv:1603.05279","author":"Rastegari M.","year":"2016","unstructured":"M. Rastegari , XNOR-Net: ImageNet classification using binary convolutional neural networks. arXiv preprint arXiv:1603.05279 , 2016 . M. Rastegari, et al. XNOR-Net: ImageNet classification using binary convolutional neural networks. arXiv preprint arXiv:1603.05279, 2016."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/2627369.2627613"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"crossref","DOI":"10.1137\/1.9781611970364","volume-title":"Arithmetic complexity of computations","author":"Winograd S.","year":"1980","unstructured":"S. Winograd . Arithmetic complexity of computations , Volume 33 . Siam , 1980 . S. Winograd. Arithmetic complexity of computations, Volume 33. Siam, 1980."},{"key":"e_1_3_2_1_26_1","volume-title":"Deep compression: Compressing deep neural networks with pruning, trained quantization and Huffman coding. arXiv preprint arXiv:1510.00149","author":"Han S.","year":"2015","unstructured":"S. Han , Deep compression: Compressing deep neural networks with pruning, trained quantization and Huffman coding. arXiv preprint arXiv:1510.00149 , 2015 . S. Han, et al. Deep compression: Compressing deep neural networks with pruning, trained quantization and Huffman coding. arXiv preprint arXiv:1510.00149, 2015."},{"key":"e_1_3_2_1_27_1","unstructured":"http:\/\/www.nvidia.com\/object\/tesla-p100.html\/. Accessed: 2016-04-08.  http:\/\/www.nvidia.com\/object\/tesla-p100.html\/. Accessed: 2016-04-08."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654889"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/2508148.2485964"}],"event":{"name":"ESWEEK'16: TWELFTH EMBEDDED SYSTEM WEEK","acronym":"ESWEEK'16","location":"Pittsburgh Pennsylvania"},"container-title":["Proceedings of the Eleventh IEEE\/ACM\/IFIP International Conference on Hardware\/Software Codesign and System Synthesis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2968456.2968476","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2968456.2968476","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T03:40:10Z","timestamp":1750218010000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2968456.2968476"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,10]]},"references-count":29,"alternative-id":["10.1145\/2968456.2968476","10.1145\/2968456"],"URL":"https:\/\/doi.org\/10.1145\/2968456.2968476","relation":{},"subject":[],"published":{"date-parts":[[2016,10]]},"assertion":[{"value":"2016-10-01","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}