{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,27]],"date-time":"2026-02-27T03:48:13Z","timestamp":1772164093984,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":55,"publisher":"ACM","license":[{"start":{"date-parts":[[2017,6,24]],"date-time":"2017-06-24T00:00:00Z","timestamp":1498262400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2017,6,24]]},"DOI":"10.1145\/3079856.3080244","type":"proceedings-article","created":{"date-parts":[[2017,6,15]],"date-time":"2017-06-15T15:40:01Z","timestamp":1497541201000},"page":"13-26","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":135,"title":["ScaleDeep"],"prefix":"10.1145","author":[{"given":"Swagath","family":"Venkataramani","sequence":"first","affiliation":[{"name":"School of ECE, Purdue University"}]},{"given":"Ashish","family":"Ranjan","sequence":"additional","affiliation":[{"name":"School of ECE, Purdue University"}]},{"given":"Subarno","family":"Banerjee","sequence":"additional","affiliation":[{"name":"Parallel Computing Lab, Intel Corporation"}]},{"given":"Dipankar","family":"Das","sequence":"additional","affiliation":[{"name":"Parallel Computing Lab, Intel Corporation"}]},{"given":"Sasikanth","family":"Avancha","sequence":"additional","affiliation":[{"name":"Parallel Computing Lab, Intel Corporation"}]},{"given":"Ashok","family":"Jagannathan","sequence":"additional","affiliation":[{"name":"Parallel Computing Lab, Intel Corporation"}]},{"given":"Ajaya","family":"Durg","sequence":"additional","affiliation":[{"name":"Parallel Computing Lab, Intel Corporation"}]},{"given":"Dheemanth","family":"Nagaraj","sequence":"additional","affiliation":[{"name":"Parallel Computing Lab, Intel Corporation"}]},{"given":"Bharat","family":"Kaul","sequence":"additional","affiliation":[{"name":"Parallel Computing Lab, Intel Corporation"}]},{"given":"Pradeep","family":"Dubey","sequence":"additional","affiliation":[{"name":"Parallel Computing Lab, Intel Corporation"}]},{"given":"Anand","family":"Raghunathan","sequence":"additional","affiliation":[{"name":"School of ECE, Purdue University"}]}],"member":"320","published-online":{"date-parts":[[2017,6,24]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Improving Photo Search: A Step Across the Semantic Gap. Google Research blog","year":"2013","unstructured":"2013. Improving Photo Search: A Step Across the Semantic Gap. Google Research blog (2013)."},{"key":"e_1_3_2_1_2_1","volume-title":"Skype Translator - How it Works: http:\/\/blogs.skype.com\/2014\/12\/15\/skypetranslator-how-it-works\/. Skype Blog","year":"2014","unstructured":"2014. Skype Translator - How it Works: http:\/\/blogs.skype.com\/2014\/12\/15\/skypetranslator-how-it-works\/. Skype Blog (2014)."},{"key":"e_1_3_2_1_3_1","volume-title":"Apple is turning Siri into a next-level Artificial Intelligence: http:\/\/mashable.com\/2016\/06\/13\/siri-sirikit-wwdc2016-analysis\/hLMSxZKVnEqO. Mashable","year":"2016","unstructured":"2016. Apple is turning Siri into a next-level Artificial Intelligence: http:\/\/mashable.com\/2016\/06\/13\/siri-sirikit-wwdc2016-analysis\/hLMSxZKVnEqO. Mashable (2016)."},{"key":"e_1_3_2_1_4_1","unstructured":"2016. ConvNet Benchmarks: https:\/\/github.com\/soumith\/convnet-benchmarks. (2016)."},{"key":"e_1_3_2_1_5_1","volume-title":"Driver's Ed for Self-Driving Cars: How Our Deep Learning Tech Taught a Car to Drive: https:\/\/blogs.nvidia.com\/blog\/2016\/05\/06\/self-driving-cars-3\/. NVIDIA blog","year":"2016","unstructured":"2016. Driver's Ed for Self-Driving Cars: How Our Deep Learning Tech Taught a Car to Drive: https:\/\/blogs.nvidia.com\/blog\/2016\/05\/06\/self-driving-cars-3\/. NVIDIA blog (2016)."},{"key":"e_1_3_2_1_6_1","volume-title":"Google supercharges machine learning tasks with TPU custom chip. Google Research blog","year":"2016","unstructured":"2016. Google supercharges machine learning tasks with TPU custom chip. Google Research blog (2016)."},{"key":"e_1_3_2_1_7_1","volume-title":"Introducing DeepText: Facebook's text understanding engine: https:\/\/code.facebook.com\/posts\/181565595577955\/introducing-deeptext-facebook-s-text-understanding-engine\/. Facebook Code","year":"2016","unstructured":"2016. Introducing DeepText: Facebook's text understanding engine: https:\/\/code.facebook.com\/posts\/181565595577955\/introducing-deeptext-facebook-s-text-understanding-engine\/. Facebook Code (2016)."},{"key":"e_1_3_2_1_8_1","unstructured":"2016. Neon Nervana Systems: http:\/\/neon.nervanasys.com\/docs\/latest\/index.html. (2016)."},{"key":"e_1_3_2_1_9_1","unstructured":"2016. Nervana Zoo: https:\/\/gist.github.com\/nervanazoo. (2016)."},{"key":"e_1_3_2_1_10_1","unstructured":"2016. Princeton Deep Driving: http:\/\/deepdriving.cs.princeton.edu\/. (2016)."},{"key":"e_1_3_2_1_11_1","unstructured":"2016. Synopsys Design Compiler: http:\/\/www.synopsys.com\/Tools\/Implementation\/RTLSynthesis\/DesignCompiler\/Pages\/default.aspx. (2016)."},{"key":"e_1_3_2_1_12_1","unstructured":"2016. Titan X: https:\/\/blogs.nvidia.com\/blog\/2016\/07\/21\/titan-x. (2016)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2016.11"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/1816038.1815993"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/2541940.2541967"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2014.58"},{"key":"e_1_3_2_1_17_1","volume-title":"2016 IEEE International Solid-State Circuits Conference (ISSCC). 262--263","author":"Chen Y. H.","unstructured":"Y. H. Chen, T. Krishna, J. Emer, and V. Sze. 2016. 14.5 Eyeriss: An energy-efficient reconfigurable accelerator for deep convolutional neural networks. In 2016 IEEE International Solid-State Circuits Conference (ISSCC). 262--263."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.5555\/2969442.2969588"},{"key":"e_1_3_2_1_19_1","volume-title":"Distributed Deep Learning Using Synchronous Stochastic Gradient Descent. CoRR abs\/1602.06709","author":"Das Dipankar","year":"2016","unstructured":"Dipankar Das, Sasikanth Avancha, Dheevatsa Mudigere, Karthikeyan Vaidyanathan, Srinivas Sridharan, Dhiraj D. Kalamkar, Bharat Kaul, and Pradeep Dubey. 2016. Distributed Deep Learning Using Synchronous Stochastic Gradient Descent. CoRR abs\/1602.06709 (2016). http:\/\/arxiv.org\/abs\/1602.06709"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.5555\/2999134.2999271"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"crossref","unstructured":"J. Deng W. Dong R. Socher L.-J. Li K. Li and L. Fei-Fei. 2009. ImageNet: A Large-Scale Hierarchical Image Database. In CVPR09.","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2015.21"},{"key":"e_1_3_2_1_23_1","volume-title":"CVPR 2011 WORKSHOPS. 109--116","author":"Farabet C.","unstructured":"C. Farabet, B. Martini, B. Corda, P. Akselrod, E. Culurciello, and Y. LeCun. 2011. NeuFlow: A runtime reconfigurable dataflow processor for vision. In CVPR 2011 WORKSHOPS. 109--116."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2014.106"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.5555\/3045118.3045303"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2016.30"},{"key":"e_1_3_2_1_27_1","volume-title":"Dally","author":"Han Song","year":"2015","unstructured":"Song Han, Huizi Mao, and William J. Dally. 2015. Deep Compression: Compressing Deep Neural Network with Pruning, Trained Quantization and Huffman Coding. CoRR abs\/1510.00149 (2015). http:\/\/arxiv.org\/abs\/1510.00149"},{"key":"e_1_3_2_1_28_1","volume-title":"Ng","author":"Hannun Awni Y.","year":"2014","unstructured":"Awni Y. Hannun, Carl Case, Jared Casper, Bryan Catanzaro, Greg Diamos, Erich Elsen, Ryan Prenger, Sanjeev Satheesh, Shubho Sengupta, Adam Coates, and Andrew Y. Ng. 2014. Deep Speech: Scaling up end-to-end speech recognition. CoRR abs\/1412.5567 (2014). http:\/\/arxiv.org\/abs\/1412.5567"},{"key":"e_1_3_2_1_29_1","volume-title":"Deep Residual Learning for Image Recognition. CoRR abs\/1512.03385","author":"He Kaiming","year":"2015","unstructured":"Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun. 2015. Deep Residual Learning for Image Recognition. CoRR abs\/1512.03385 (2015). http:\/\/arxiv.org\/abs\/1512.03385"},{"key":"e_1_3_2_1_30_1","volume-title":"Navdeep Jaitly, Andrew Senior, Vincent Vanhoucke, Patrick Nguyen, Tara Sainath, and Brian Kingsbury.","author":"Hinton Geoffrey","year":"2012","unstructured":"Geoffrey Hinton, Li Deng, Dong Yu, George Dahl, Abdel rahman Mohamed, Navdeep Jaitly, Andrew Senior, Vincent Vanhoucke, Patrick Nguyen, Tara Sainath, and Brian Kingsbury. 2012. Deep Neural Networks for Acoustic Modeling in Speech Recognition. Signal Processing Magazine (2012)."},{"key":"e_1_3_2_1_31_1","volume-title":"FireCaffe: near-linear acceleration of deep neural network training on compute clusters. CoRR abs\/1511.00175","author":"Iandola Forrest N.","year":"2015","unstructured":"Forrest N. Iandola, Khalid Ashraf, Matthew W. Moskewicz, and Kurt Keutzer. 2015. FireCaffe: near-linear acceleration of deep neural network training on compute clusters. CoRR abs\/1511.00175 (2015). http:\/\/arxiv.org\/abs\/1511.00175"},{"key":"e_1_3_2_1_32_1","volume-title":"Speeding up Convolutional Neural Networks with Low Rank Expansions. CoRR abs\/1405.3866","author":"Jaderberg Max","year":"2014","unstructured":"Max Jaderberg, Andrea Vedaldi, and Andrew Zisserman. 2014. Speeding up Convolutional Neural Networks with Low Rank Expansions. CoRR abs\/1405.3866 (2014). http:\/\/arxiv.org\/abs\/1405.3866"},{"key":"e_1_3_2_1_33_1","volume-title":"One weird trick for parallelizing convolutional neural networks. CoRR abs\/1404.5997","author":"Krizhevsky Alex","year":"2014","unstructured":"Alex Krizhevsky. 2014. One weird trick for parallelizing convolutional neural networks. CoRR abs\/1404.5997 (2014). http:\/\/arxiv.org\/abs\/1404.5997"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.5555\/2999134.2999257"},{"key":"e_1_3_2_1_35_1","volume-title":"Fast Algorithms for Convolutional Neural Networks. CoRR abs\/1509.09308","author":"Lavin Andrew","year":"2015","unstructured":"Andrew Lavin. 2015. Fast Algorithms for Convolutional Neural Networks. CoRR abs\/1509.09308 (2015). http:\/\/arxiv.org\/abs\/1509.09308"},{"key":"e_1_3_2_1_36_1","unstructured":"Jiwei Li Michel Galley Chris Brockett Georgios P. Spithourakis Jianfeng Gao and Bill Dolan. 2016. A Persona-Based Neural Conversation Model. https:\/\/www.microsoft.com\/en-us\/research\/publication\/persona-based-neural-conversation-model\/"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/2744769.2744900"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/2133382.2133388"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080246"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/2627369.2627625"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2016.32"},{"key":"e_1_3_2_1_42_1","volume-title":"2016 49th Annual IEEE\/ACM International Symposium on Microarchitecture (MICRO). 1--13","author":"Rhu M.","unstructured":"M. Rhu, N. Gimelshein, J. Clemons, A. Zulfiqar, and S. W. Keckler. 2016. vDNN: Virtualized deep neural networks for scalable, memory-efficient neural network design. In 2016 49th Annual IEEE\/ACM International Symposium on Microarchitecture (MICRO). 1--13."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"e_1_3_2_1_44_1","volume-title":"Interspeech","author":"Seide Frank","year":"2014","unstructured":"Frank Seide, Hao Fu, Jasha Droppo, Gang Li, and Dong Yu. 2014. 1-Bit Stochastic Gradient Descent and Application to Data-Parallel Distributed Training of Speech DNNs, In Interspeech 2014."},{"key":"e_1_3_2_1_45_1","volume-title":"Overfeat: Integrated recognition, localization and detection using convolutional networks","author":"Sermanet Pierre","year":"2013","unstructured":"Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus, and Yann Lecun. 2013. Overfeat: Integrated recognition, localization and detection using convolutional networks. http:\/\/arxiv.org\/abs\/1312.6229 (2013)."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2016.12"},{"key":"e_1_3_2_1_47_1","volume-title":"Very Deep Convolutional Networks for Large-Scale Image Recognition. CoRR abs\/1409.1556","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Very Deep Convolutional Networks for Large-Scale Image Recognition. CoRR abs\/1409.1556 (2014). http:\/\/arxiv.org\/abs\/1409.1556"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/2540708.2540710"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/2627369.2627613"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.515"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.5555\/3060832.3060950"},{"key":"e_1_3_2_1_53_1","volume-title":"Text Understanding from Scratch. CoRR abs\/1502.01710","author":"Zhang Xiang","year":"2015","unstructured":"Xiang Zhang and Yann LeCun. 2015. Text Understanding from Scratch. CoRR abs\/1502.01710 (2015). http:\/\/arxiv.org\/abs\/1502.01710"},{"key":"e_1_3_2_1_54_1","volume-title":"Dally","author":"Zhu Chenzhuo","year":"2016","unstructured":"Chenzhuo Zhu, Song Han, Huizi Mao, and William J. Dally. 2016. Trained Ternary Quantization. CoRR abs\/1612.01064 (2016). http:\/\/arxiv.org\/abs\/1612.01064"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2016.119"}],"event":{"name":"ISCA '17: The 44th Annual International Symposium on Computer Architecture","location":"Toronto ON Canada","acronym":"ISCA '17","sponsor":["IEEE IEEE Computer Society Technical Committee on Design Automation","SIGARCH ACM Special Interest Group on Computer Architecture"]},"container-title":["Proceedings of the 44th Annual International Symposium on Computer Architecture"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3079856.3080244","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3079856.3080244","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T23:37:15Z","timestamp":1750203435000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3079856.3080244"}},"subtitle":["A Scalable Compute Architecture for Learning and Evaluating Deep Networks"],"short-title":[],"issued":{"date-parts":[[2017,6,24]]},"references-count":55,"alternative-id":["10.1145\/3079856.3080244","10.1145\/3079856"],"URL":"https:\/\/doi.org\/10.1145\/3079856.3080244","relation":{"is-identical-to":[{"id-type":"doi","id":"10.1145\/3140659.3080244","asserted-by":"object"}]},"subject":[],"published":{"date-parts":[[2017,6,24]]},"assertion":[{"value":"2017-06-24","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}