{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,8]],"date-time":"2026-02-08T05:17:13Z","timestamp":1770527833269,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":43,"publisher":"ACM","license":[{"start":{"date-parts":[[2019,10,12]],"date-time":"2019-10-12T00:00:00Z","timestamp":1570838400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CNS-1718834, 1751064"],"award-info":[{"award-number":["CNS-1718834, 1751064"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2019,10,12]]},"DOI":"10.1145\/3352460.3358316","type":"proceedings-article","created":{"date-parts":[[2019,10,11]],"date-time":"2019-10-11T11:16:45Z","timestamp":1570792605000},"page":"1-13","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":27,"title":["Wire-Aware Architecture and Dataflow for CNN Accelerators"],"prefix":"10.1145","author":[{"given":"Sumanth","family":"Gudaparthi","sequence":"first","affiliation":[{"name":"University of Utah, Salt Lake City, Utah"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Surya","family":"Narayanan","sequence":"additional","affiliation":[{"name":"University of Utah, Salt Lake City, Utah"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Rajeev","family":"Balasubramonian","sequence":"additional","affiliation":[{"name":"University of Utah, Salt Lake City, Utah"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Edouard","family":"Giacomin","sequence":"additional","affiliation":[{"name":"University of Utah, Salt Lake City, Utah"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hari","family":"Kambalasubramanyam","sequence":"additional","affiliation":[{"name":"University of Utah, Salt Lake City, Utah"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Pierre-Emmanuel","family":"Gaillardon","sequence":"additional","affiliation":[{"name":"University of Utah, Salt Lake City, Utah"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2019,10,12]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2018. NVIDIA DGX-1. https:\/\/www.nvidia.com\/en-us\/data-center\/dgx-1\/."},{"key":"e_1_3_2_1_2_1","unstructured":"2018. NVIDIA HGX-2. https:\/\/www.nvidia.com\/en-us\/data-center\/hgx\/."},{"key":"e_1_3_2_1_3_1","volume-title":"Compute Caches. In Proceedings of HPCA-23","author":"Aga Shaizeen","year":"2017","unstructured":"Shaizeen Aga, Supreet Jeloka, Arun Subramaniyan, Satish Narayanasamy, David Blaauw, and Reetuparna Das. 2017. Compute Caches. In Proceedings of HPCA-23."},{"key":"e_1_3_2_1_4_1","volume-title":"Proceedings of ISCA-43","author":"Albericio Jorge","year":"2016","unstructured":"Jorge Albericio, Patrick Judd, Tayler Hetherington, Tor Aamodt, Natalie Jerger, and Andreas Moshovos. 2016. Cnvlutin: Zero-Neuron-Free Deep Convolutional Neural Network Computing. In Proceedings of ISCA-43."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080231"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"crossref","unstructured":"R. Balasubramonian A.B. Kahng N. Muralimanohar A. Shafiee and V. Srinivas. 2017. CACTI 7: New Tools for Interconnect Exploration in Innovative Off-Chip Memories. ACM TACO 14(2) (2017).","DOI":"10.1145\/3085572"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/L-CA.2009.45"},{"key":"e_1_3_2_1_8_1","unstructured":"Cerebras. 2019. Cerebras Wafer Scale Engine: An Introduction. https:\/\/www.cerebras.net\/wp-content\/uploads\/2019\/08\/Cerebras-Wafer-Scale-Engine-Whitepaper.pdf."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2014.58"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"crossref","unstructured":"Y. Chen T. Yang J. Emer and V. Sze. 2019. Eyeriss v2: A Flexible Accelerator for Emerging Deep Neural Networks on Mobile Devices. IEEE Journal on Emerging and Selected Topics in Circuits and Systems (2019).","DOI":"10.1109\/JETCAS.2019.2910232"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001177"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001140"},{"key":"e_1_3_2_1_13_1","unstructured":"M. Courbariaux and Y. Bengio. 2016. BinaryNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1. arXiv preprint 1602.02830."},{"key":"e_1_3_2_1_14_1","volume-title":"Binarized neural networks: Training deep neural networks with weights and activations constrained to+ 1 or-1. arXiv preprint arXiv:1602.02830","author":"Courbariaux Matthieu","year":"2016","unstructured":"Matthieu Courbariaux, Itay Hubara, Daniel Soudry, Ran El-Yaniv, and Yoshua Bengio. 2016. Binarized neural networks: Training deep neural networks with weights and activations constrained to+ 1 or-1. arXiv preprint arXiv:1602.02830 (2016)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/2749469.2750389"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00040"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/VLSIC.2018.8502276"},{"key":"e_1_3_2_1_18_1","unstructured":"Graphcore. 2017. Intelligence Processing Unit. https:\/\/cdn2.hubspot.net\/hubfs\/729091\/NIPS2017\/NIPS%2017%20-%20IPU.pdf."},{"key":"e_1_3_2_1_19_1","volume-title":"Proceedings of ICML-32)","author":"Gupta Suyog","year":"2015","unstructured":"Suyog Gupta, Ankur Agrawal, Kailash Gopalakrishnan, and Pritish Narayanan. 2015. Deep Learning with Limited Numerical Precision. In Proceedings of ICML-32)."},{"key":"e_1_3_2_1_20_1","volume-title":"Deep Residual Learning for Image Recognition. arXiv preprint arXiv:1512.03385","author":"He Kaiming","year":"2015","unstructured":"Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun. 2015. Deep Residual Learning for Image Recognition. arXiv preprint arXiv:1512.03385 (2015)."},{"key":"e_1_3_2_1_21_1","unstructured":"R. Ho. 2003. On-Chip Wires: Scaling and Efficiency. Ph.D. Dissertation. Stanford University."},{"key":"e_1_3_2_1_22_1","volume-title":"MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications. arXiv preprint arXiv:1704.04861","author":"Howard Andrew G","year":"2017","unstructured":"Andrew G Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang, Tobias Weyand, Marco Andreetto, and Hartwig Adam. 2017. MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications. arXiv preprint arXiv:1704.04861 (2017)."},{"key":"e_1_3_2_1_23_1","volume-title":"Quantized neural networks: Training neural networks with low precision weights and activations. arXiv preprint arXiv:1609.07061","author":"Hubara Itay","year":"2016","unstructured":"Itay Hubara, Matthieu Courbariaux, Daniel Soudry, Ran El-Yaniv, and Yoshua Bengio. 2016. Quantized neural networks: Training neural networks with low precision weights and activations. arXiv preprint arXiv:1609.07061 (2016)."},{"key":"e_1_3_2_1_24_1","unstructured":"Norman P Jouppi Cliff Young Nishant Patil David Patterson Gaurav Agrawal Raminder Bajwa Sarah Bates Suresh Bhatia Nan Boden Al Borchers et al. 2017. In-Datacenter Performance Analysis of a Tensor Processing Unit. (2017)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2016.7783722"},{"key":"e_1_3_2_1_26_1","unstructured":"S. Keckler. 2011. Life After Dennard and How I Learned to Love the Picojoule. Keynote at MICRO."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"crossref","unstructured":"S.W. Keckler W.J. Dally B. Khailany M. Garland and D. Glasco. 2011. GPUs and the Future of Parallel Computing. IEEE Micro 5 (2011).","DOI":"10.1109\/MM.2011.89"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001178"},{"key":"e_1_3_2_1_29_1","volume-title":"Flexpoint: An Adaptive Numerical Format for Efficient Training of Deep Neural Networks. arXiv preprint arXiv:1711.02213","author":"Koster U.","year":"2017","unstructured":"U. Koster, T. Webb, X. Wang, M. Nassar, A. Bansal, W. Constable, O. Elibol, S. Gray, S. Hall, L. Hornof, A. Khosrowshahi, C. Kloass, R. Pai, and N. Rao. 2017. Flexpoint: An Adaptive Numerical Format for Efficient Training of Deep Neural Networks. arXiv preprint arXiv:1711.02213 (2017)."},{"key":"e_1_3_2_1_30_1","volume-title":"Proceedings of NIPS.","author":"Krizhevsky Alex","year":"2012","unstructured":"Alex Krizhevsky, Ilya Sutskever, and Geoffrey E Hinton. 2012. ImageNet Classification with Deep Convolutional Neural Networks. In Proceedings of NIPS."},{"key":"e_1_3_2_1_31_1","volume-title":"Ternary weight networks. arXiv preprint arXiv:1605.04711","author":"Li Fengfu","year":"2016","unstructured":"Fengfu Li, Bo Zhang, and Bin Liu. 2016. Ternary weight networks. arXiv preprint arXiv:1605.04711 (2016)."},{"key":"e_1_3_2_1_32_1","volume-title":"Proceedings of ISCA.","author":"Malladi K. T.","unstructured":"K. T. Malladi, F. A. Nothaft, K. Periyathambi, B. C. Lee, C. Kozyrakis, and M. Horowitz. 2012. Towards Energy-Proportional Datacenter Memory with Mobile DRAM. In Proceedings of ISCA."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3123939.3124534"},{"key":"e_1_3_2_1_34_1","unstructured":"Naveen Muralimanohar et al. 2007. CACTI 6.0: A Tool to Understand Large Caches. Technical Report. University of Utah."},{"key":"e_1_3_2_1_35_1","volume-title":"Newton: Gravitating Towards the Physical Limits of Crossbar Acceleration","author":"Nag A.","year":"2018","unstructured":"A. Nag, R. Balasubramonian, V. Srikumar, R. Walker, A. Shafiee, J. Strachan, and N. Muralimanohar. 2018. Newton: Gravitating Towards the Physical Limits of Crossbar Acceleration. IEEE Micro Special Issue on Memristor-Based Computing (2018)."},{"key":"e_1_3_2_1_36_1","volume-title":"Proceedings of MICRO.","author":"O'Connor M.","unstructured":"M. O'Connor, N. Chatterjee, D. Lee, J. Wilson, A. Agrawal, S. Keckler, and W. Dally. 2017. Fine-Grained DRAM: Energy-Efficient DRAM for Extreme Bandwidth Systems. In Proceedings of MICRO."},{"key":"e_1_3_2_1_37_1","volume":"201","author":"Parashar A.","unstructured":"A. Parashar, M. Rhu, A. Mukkara, A. Puglielli, R. Venkatesan, B. Khailany, J. Emer, S.W. Keckler, and W.J. Dally. 2017. SCNN: An Accelerator for Compressed-Sparse Convolutional Neural Networks. (2017).","journal-title":"J. Dally."},{"key":"e_1_3_2_1_38_1","volume-title":"Proceedings of ISCA.","author":"Shafiee A.","unstructured":"A. Shafiee, A. Nag, N. Muralimanohar, R. Balasubramonian, J. Strachan, M. Hu, R.S. Williams, and V. Srikumar. 2016. ISAAC: A Convolutional Neural Network Accelerator with In-Situ Analog Arithmetic in Crossbars. In Proceedings of ISCA."},{"key":"e_1_3_2_1_39_1","volume-title":"Proceedings of ISCA.","author":"Han S.","unstructured":"S.Han, X. Liu, H. Mao, J. Pu, A. Pedram, M. Horowitz, and W. Dally. 2016. EIE: Efficient Inference Engine on Compressed Deep Neural Network. In Proceedings of ISCA."},{"key":"e_1_3_2_1_40_1","volume-title":"Proceedings of ICLR.","author":"Han S.","unstructured":"S.Han, H. Mao, and W. Dally. 2016. Deep Compression: Compressing Deep Neural Networks with Pruning, Trained Quantization, and Huffman Coding. In Proceedings of ICLR."},{"key":"e_1_3_2_1_41_1","volume-title":"Very Deep Convolutional Networks for Large-Scale Image Recognition. arXiv preprint arXiv:1409.1556","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Very Deep Convolutional Networks for Large-Scale Image Recognition. arXiv preprint arXiv:1409.1556 (2014)."},{"key":"e_1_3_2_1_42_1","unstructured":"Tesla. 2019. Tesla Autonomy Day. https:\/\/www.youtube.com\/watch?v=Ucp0TTmvqOE."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080244"}],"event":{"name":"MICRO '52: The 52nd Annual IEEE\/ACM International Symposium on Microarchitecture","location":"Columbus OH USA","acronym":"MICRO '52","sponsor":["SIGMICRO ACM Special Interest Group on Microarchitectural Research and Processing","IEEE CS"]},"container-title":["Proceedings of the 52nd Annual IEEE\/ACM International Symposium on Microarchitecture"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3352460.3358316","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3352460.3358316","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3352460.3358316","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,29]],"date-time":"2025-07-29T22:24:47Z","timestamp":1753827887000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3352460.3358316"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,10,12]]},"references-count":43,"alternative-id":["10.1145\/3352460.3358316","10.1145\/3352460"],"URL":"https:\/\/doi.org\/10.1145\/3352460.3358316","relation":{},"subject":[],"published":{"date-parts":[[2019,10,12]]},"assertion":[{"value":"2019-10-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}