{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,25]],"date-time":"2026-04-25T08:29:22Z","timestamp":1777105762391,"version":"3.51.4"},"reference-count":53,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016,10]]},"DOI":"10.1109\/micro.2016.7783721","type":"proceedings-article","created":{"date-parts":[[2016,12,19]],"date-time":"2016-12-19T17:11:05Z","timestamp":1482167465000},"page":"1-13","source":"Crossref","is-referenced-by-count":259,"title":["vDNN: Virtualized deep neural networks for scalable, memory-efficient neural network design"],"prefix":"10.1109","author":[{"given":"Minsoo","family":"Rhu","sequence":"first","affiliation":[]},{"given":"Natalia","family":"Gimelshein","sequence":"additional","affiliation":[]},{"given":"Jason","family":"Clemons","sequence":"additional","affiliation":[]},{"given":"Arslan","family":"Zulfiqar","sequence":"additional","affiliation":[]},{"given":"Stephen W.","family":"Keckler","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","article-title":"MXNet: A Flexible and Efficient Machine Learning Library for Heterogeneous Distributed Systems","author":"chen","year":"2015","journal-title":"Proceedings of the 2015 Workshop on Machine Learning Systems"},{"key":"ref38","article-title":"Training and Investigating Residual Nets","author":"gross","year":"2016"},{"key":"ref33","article-title":"OpenMP Application Program Interface (version 4.0)","year":"2013"},{"key":"ref32","article-title":"cuDNN: Efficient Primitives for Deep Learning","author":"chetlur","year":"2014","journal-title":"Proceedings of the Advances in Neural Information Processing Systems"},{"key":"ref31","author":"chintala","year":"2015"},{"key":"ref30","article-title":"OverFeat: Integrated Recognition, Localization and Detection using Convolutional Networks","author":"sermanet","year":"2013","journal-title":"ArXiv org"},{"key":"ref37","year":"2016"},{"key":"ref36","article-title":"NVIDIA CUDA Programming Guide","year":"2016"},{"key":"ref35","article-title":"NVIDIA NVLINK High-Speed Interconnect","year":"2016"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2016.7446077"},{"key":"ref28","doi-asserted-by":"crossref","DOI":"10.1145\/3007787.3001139","article-title":"ISAAC: A Convolutional Neural Network Accelerator with In-Situ Analog Arithmetic in Crossbars","author":"shafiee","year":"2016","journal-title":"Proc ACM\/IEEE Int Symp Computer Architecture"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2016.13"},{"key":"ref29","doi-asserted-by":"crossref","DOI":"10.1145\/3007787.3001138","article-title":"Cnvlutin: Ineffectual-Neuron-Free Deep Convolutional Neural Network Computing","author":"albericio","year":"2016","journal-title":"Proc ACM\/IEEE Int Symp Computer Architecture"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2005.06.042"},{"key":"ref1","article-title":"Ima-geNet Classification with Deep Convolutional Neural Networks","author":"krizhevsky","year":"2012","journal-title":"Proceedings of the Advances in Neural Information Processing Systems"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1145\/2541940.2541967"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC.2016.7418007"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2014.58"},{"key":"ref24","doi-asserted-by":"crossref","DOI":"10.1145\/3007787.3001177","article-title":"Eyeriss: A Spatial Architecture for Energy-Efficient Dataflow for Convolutional Neural Networks","author":"chen","year":"2016","journal-title":"Proc ACM\/IEEE Int Symp Computer Architecture"},{"key":"ref23","doi-asserted-by":"crossref","DOI":"10.1145\/3007787.3001163","article-title":"EIE: Efficient Inference Engine on Compressed Deep Neural Network","author":"han","year":"2016","journal-title":"Proc ACM\/IEEE Int Symp Computer Architecture"},{"key":"ref26","doi-asserted-by":"crossref","DOI":"10.1145\/3007787.3001165","article-title":"Minerva: Enabling Low-Power, High-Accuracy Deep Neural Network Accelerators","author":"reagen","year":"2016","journal-title":"Proc ACM\/IEEE Int Symp Computer Architecture"},{"key":"ref25","doi-asserted-by":"crossref","DOI":"10.1145\/3007787.3001164","article-title":"RedEye: Analog ConvNet Image Sensor Architecture for Continuous Mobile Vision","author":"likam","year":"2016","journal-title":"Proc ACM\/IEEE Int Symp Computer Architecture"},{"key":"ref50","article-title":"Com-pressing Deep Convolutional Networks Using Vector Quantization","author":"gong","year":"2014","journal-title":"ArXiv org"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1145\/2541940.2541942"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1145\/2749469.2750389"},{"key":"ref52","article-title":"Supporting 86-64 Address Translation for 100s of GPU Lanes","author":"power","year":"2014","journal-title":"Proceedings of IEEE International Symposium on High-Performance Computer Architecture"},{"key":"ref10","article-title":"Baidu Eyes Deep Learning Strategy in Wake of New GPU Options","year":"2016"},{"key":"ref11","article-title":"Persistent RNNs: Stashing Recurrent Weights On-Chip","author":"diamos","year":"2016","journal-title":"Proceedings of the International Conference on Machine Learning"},{"key":"ref40","article-title":"GeForce GTX TitanX(Maxwell)","year":"2015"},{"key":"ref12","article-title":"One Weird Trick For Parallelizing Convolutional Neural Networks","author":"krizhevsky","year":"2014","journal-title":"ArXiv org"},{"key":"ref13","year":"2016"},{"key":"ref14","article-title":"Very Deep Convolutional Networks for Large-Scale Image Recognition","author":"simonyan","year":"2015","journal-title":"International Conference on Learning Representations"},{"key":"ref15","article-title":"Deep Residual Learning for Image Recognition","author":"he","year":"2015","journal-title":"ArXiv org"},{"key":"ref16","article-title":"Microsoft Neural Net Shows Deep Learning Can Get Way Deeper","year":"2016"},{"key":"ref17","article-title":"Going Deeper with Convolutions","author":"szegedy","year":"2014","journal-title":"ArXiv org"},{"key":"ref18","article-title":"Deep Networks with Stochastic Depth","author":"huang","year":"2016","journal-title":"ArXiv org"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/5.726791"},{"key":"ref4","year":"2016"},{"key":"ref3","article-title":"Natural Language Processing (Almost) From Scratch","author":"collobert","year":"2011","journal-title":"ArXiv org"},{"key":"ref6","year":"2016"},{"key":"ref5","year":"2016"},{"key":"ref8","article-title":"cuDNN: GPU Accelerated Deep Learning","year":"2016"},{"key":"ref7","year":"2016"},{"key":"ref49","article-title":"Reduced-Precision Strategies for Bounded Memory in Deep Neural Nets","author":"judd","year":"2016","journal-title":"ArXiv org"},{"key":"ref9","article-title":"Comparative Study of Caffe, Neon, Theano, and Torch for Deep Learning","author":"bahrampour","year":"2016","journal-title":"ArXiv org"},{"key":"ref46","article-title":"Learning Both Weights and Connections for Efficient Neural Networks","author":"han","year":"2015","journal-title":"Proceedings of the Advances in Neural Information Processing Systems"},{"key":"ref45","article-title":"Second Order Derivatives for Network Pruning: Optimal Brain Surgeon","author":"hassibi","year":"1993","journal-title":"Proceedings of the Advances in Neural Information Processing Systems"},{"key":"ref48","article-title":"Improving the Speed of Neural Networks on CPUs","author":"vanhoucke","year":"2011","journal-title":"Deep Learning and Unsupervised Feature LearningWorkshop"},{"key":"ref47","article-title":"Deep Compression: Compressing Deep Neural Networks with Pruning, Trained Quantization and Huffman Coding","author":"han","year":"2016","journal-title":"International Conference on Learning Representations"},{"key":"ref42","article-title":"CUDA Toolkit 7.5 Documentation: Profiler","year":"2016"},{"key":"ref41","author":"chintala","year":"2016"},{"key":"ref44","article-title":"Optimal Brain Damage","author":"lecun","year":"1990","journal-title":"Proceedings of the Advances in Neural Information Processing Systems"},{"key":"ref43","article-title":"Comparing Biases for Minimal Network Construction with Back-propagation","author":"hanson","year":"1989","journal-title":"Proceedings of the Advances in Neural Information Processing Systems"}],"event":{"name":"2016 49th Annual IEEE\/ACM International Symposium on Microarchitecture (MICRO)","location":"Taipei, Taiwan","start":{"date-parts":[[2016,10,15]]},"end":{"date-parts":[[2016,10,19]]}},"container-title":["2016 49th Annual IEEE\/ACM International Symposium on Microarchitecture (MICRO)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7777315\/7783693\/07783721.pdf?arnumber=7783721","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,9,16]],"date-time":"2019-09-16T14:51:38Z","timestamp":1568645498000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7783721\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,10]]},"references-count":53,"URL":"https:\/\/doi.org\/10.1109\/micro.2016.7783721","relation":{},"subject":[],"published":{"date-parts":[[2016,10]]}}}