{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,5]],"date-time":"2025-08-05T12:33:44Z","timestamp":1754397224153,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":19,"publisher":"ACM","license":[{"start":{"date-parts":[[2017,6,18]],"date-time":"2017-06-18T00:00:00Z","timestamp":1497744000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2017,6,18]]},"DOI":"10.1145\/3061639.3062297","type":"proceedings-article","created":{"date-parts":[[2017,6,13]],"date-time":"2017-06-13T12:18:42Z","timestamp":1497356322000},"page":"1-6","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":8,"title":["Optimizing Memory Efficiency for Convolution Kernels on Kepler GPUs"],"prefix":"10.1145","author":[{"given":"Xiaoming","family":"Chen","sequence":"first","affiliation":[{"name":"Department of Computer Science and Engineering, University of Notre Dame, Notre Dame, IN, USA"}]},{"given":"Jianxu","family":"Chen","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, University of Notre Dame, Notre Dame, IN, USA"}]},{"given":"Danny Z.","family":"Chen","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, University of Notre Dame, Notre Dame, IN, USA"}]},{"given":"Xiaobo Sharon","family":"Hu","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, University of Notre Dame, Notre Dame, IN, USA"}]}],"member":"320","published-online":{"date-parts":[[2017,6,18]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Digital Image Processing. Pearson","author":"Gonzalez Rafael C","year":"2007","unstructured":"Rafael C Gonzalez and Richard E Woods . Digital Image Processing. Pearson , 3 rd edition, 2007 . Rafael C Gonzalez and Richard E Woods. Digital Image Processing. Pearson, 3rd edition, 2007.","edition":"3"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/42.34715"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1989.1.4.541"},{"key":"e_1_3_2_1_4_1","volume-title":"Very Deep Convolutional Networks for Large-Scale Image Recognition. CoRR, abs\/1409.1556","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and Andrew Zisserman . Very Deep Convolutional Networks for Large-Scale Image Recognition. CoRR, abs\/1409.1556 , 2014 . Karen Simonyan and Andrew Zisserman. Very Deep Convolutional Networks for Large-Scale Image Recognition. CoRR, abs\/1409.1556, 2014."},{"key":"e_1_3_2_1_5_1","first-page":"1097","volume-title":"NIPS","author":"Krizhevsky Alex","year":"2012","unstructured":"Alex Krizhevsky , Ilya Sutskever , and Geoffrey E Hinton . Imagenet Classification with Deep convolutional Neural Networks . In NIPS , pages 1097 -- 1105 , 2012 . Alex Krizhevsky, Ilya Sutskever, and Geoffrey E Hinton. Imagenet Classification with Deep convolutional Neural Networks. In NIPS, pages 1097--1105, 2012."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.81"},{"key":"e_1_3_2_1_7_1","volume-title":"ICFHR","author":"Chellapilla Kumar","year":"2006","unstructured":"Kumar Chellapilla , Sidd Puri , and Patrice Simard . High Performance Convolutional Neural Networks for Document Processing . In ICFHR , 2006 . Kumar Chellapilla, Sidd Puri, and Patrice Simard. High Performance Convolutional Neural Networks for Document Processing. In ICFHR, 2006."},{"key":"e_1_3_2_1_8_1","volume-title":"cuDNN: Efficient Primitives for Deep Learning. CoRR, abs\/1410.0759","author":"Chetlur Sharan","year":"2014","unstructured":"Sharan Chetlur , Cliff Woolley , Philippe Vandermersch , Jonathan Cohen , John Tran , Bryan Catanzaro , and Evan Shelhamer . cuDNN: Efficient Primitives for Deep Learning. CoRR, abs\/1410.0759 , 2014 . Sharan Chetlur, Cliff Woolley, Philippe Vandermersch, Jonathan Cohen, John Tran, Bryan Catanzaro, and Evan Shelhamer. cuDNN: Efficient Primitives for Deep Learning. CoRR, abs\/1410.0759, 2014."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCC-CSS-ICESS.2015.94"},{"key":"e_1_3_2_1_10_1","first-page":"14","volume":"30","author":"Werkhoven Ben Van","year":"2014","unstructured":"Ben Van Werkhoven , Jason Maassen , Henri E. Bal , and Frank J. Seinstra. Optimizing Convolution Operations on GPUs Using Adaptive Tiling. Future Gener. Comput. Syst. , 30 : 14 -- 26 , 2014 . Ben Van Werkhoven, Jason Maassen, Henri E. Bal, and Frank J. Seinstra. Optimizing Convolution Operations on GPUs Using Adaptive Tiling. Future Gener. Comput. Syst., 30:14--26, 2014.","journal-title":"Frank J. Seinstra. Optimizing Convolution Operations on GPUs Using Adaptive Tiling. Future Gener. Comput. Syst."},{"key":"e_1_3_2_1_11_1","unstructured":"cuda-convnet2. Url: https:\/\/code.google.com\/archive\/p\/cuda-convnet2\/.  cuda-convnet2. Url: https:\/\/code.google.com\/archive\/p\/cuda-convnet2\/."},{"key":"e_1_3_2_1_12_1","volume-title":"Fast Training of Convolutional Networks through FFTs. CoRR, abs\/1312.5851","author":"Mathieu Micha\u00ebl","year":"2013","unstructured":"Micha\u00ebl Mathieu , Mikael Henaff , and Yann LeCun . Fast Training of Convolutional Networks through FFTs. CoRR, abs\/1312.5851 , 2013 . Micha\u00ebl Mathieu, Mikael Henaff, and Yann LeCun. Fast Training of Convolutional Networks through FFTs. CoRR, abs\/1312.5851, 2013."},{"key":"e_1_3_2_1_13_1","volume-title":"Fast Convolutional Nets With fbfft: A GPU Performance Evaluation. CoRR, abs\/1412.7580","author":"Vasilache Nicolas","year":"2014","unstructured":"Nicolas Vasilache , Jeff Johnson , Micha\u00ebl Mathieu , Soumith Chintala , Serkan Piantino , and Yann LeCun . Fast Convolutional Nets With fbfft: A GPU Performance Evaluation. CoRR, abs\/1412.7580 , 2014 . Nicolas Vasilache, Jeff Johnson, Micha\u00ebl Mathieu, Soumith Chintala, Serkan Piantino, and Yann LeCun. Fast Convolutional Nets With fbfft: A GPU Performance Evaluation. CoRR, abs\/1412.7580, 2014."},{"key":"e_1_3_2_1_14_1","volume-title":"Very Efficient Training of Convolutional Neural Networks using Fast Fourier Transform and Overlap-and-Add. CoRR, abs\/1601.06815","author":"Highlander Tyler","year":"2016","unstructured":"Tyler Highlander and Andres Rodriguez . Very Efficient Training of Convolutional Neural Networks using Fast Fourier Transform and Overlap-and-Add. CoRR, abs\/1601.06815 , 2016 . Tyler Highlander and Andres Rodriguez. Very Efficient Training of Convolutional Neural Networks using Fast Fourier Transform and Overlap-and-Add. CoRR, abs\/1601.06815, 2016."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.435"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/2968456.2968476"},{"key":"e_1_3_2_1_17_1","unstructured":"cuBLAS. Url: http:\/\/docs.nvidia.com\/cuda\/cublas\/.  cuBLAS. Url: http:\/\/docs.nvidia.com\/cuda\/cublas\/."},{"key":"e_1_3_2_1_18_1","volume-title":"Caffe: Convolutional Architecture for Fast Feature Embedding. CoRR, abs\/1408.5093","author":"Jia Yangqing","year":"2014","unstructured":"Yangqing Jia , Evan Shelhamer , Jeff Donahue , Sergey Karayev , Jonathan Long , Ross B. Girshick , Sergio Guadarrama , and Trevor Darrell . Caffe: Convolutional Architecture for Fast Feature Embedding. CoRR, abs\/1408.5093 , 2014 . Yangqing Jia, Evan Shelhamer, Jeff Donahue, Sergey Karayev, Jonathan Long, Ross B. Girshick, Sergio Guadarrama, and Trevor Darrell. Caffe: Convolutional Architecture for Fast Feature Embedding. CoRR, abs\/1408.5093, 2014."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1177\/1094342010385729"}],"event":{"name":"DAC '17: The 54th Annual Design Automation Conference 2017","sponsor":["EDAC Electronic Design Automation Consortium","SIGDA ACM Special Interest Group on Design Automation","IEEE-CEDA","SIGBED ACM Special Interest Group on Embedded Systems"],"location":"Austin TX USA","acronym":"DAC '17"},"container-title":["Proceedings of the 54th Annual Design Automation Conference 2017"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3061639.3062297","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3061639.3062297","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T03:03:26Z","timestamp":1750215806000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3061639.3062297"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,6,18]]},"references-count":19,"alternative-id":["10.1145\/3061639.3062297","10.1145\/3061639"],"URL":"https:\/\/doi.org\/10.1145\/3061639.3062297","relation":{},"subject":[],"published":{"date-parts":[[2017,6,18]]},"assertion":[{"value":"2017-06-18","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}