{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,8]],"date-time":"2026-04-08T22:10:39Z","timestamp":1775686239371,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":30,"publisher":"ACM","license":[{"start":{"date-parts":[[2017,10,14]],"date-time":"2017-10-14T00:00:00Z","timestamp":1507939200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"NSERC"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2017,10,14]]},"DOI":"10.1145\/3123939.3123982","type":"proceedings-article","created":{"date-parts":[[2017,11,20]],"date-time":"2017-11-20T14:31:12Z","timestamp":1511188272000},"page":"382-394","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":178,"title":["Bit-pragmatic deep neural network computing"],"prefix":"10.1145","author":[{"given":"Jorge","family":"Albericio","sequence":"first","affiliation":[{"name":"Univ. of Toronto NVIDIA"}]},{"given":"Alberto","family":"Delm\u00e1s","sequence":"additional","affiliation":[{"name":"University of Toronto"}]},{"given":"Patrick","family":"Judd","sequence":"additional","affiliation":[{"name":"University of Toronto"}]},{"given":"Sayeh","family":"Sharify","sequence":"additional","affiliation":[{"name":"University of Toronto"}]},{"given":"Gerard","family":"O'Leary","sequence":"additional","affiliation":[{"name":"University of Toronto"}]},{"given":"Roman","family":"Genov","sequence":"additional","affiliation":[{"name":"University of Toronto"}]},{"given":"Andreas","family":"Moshovos","sequence":"additional","affiliation":[{"name":"University of Toronto"}]}],"member":"320","published-online":{"date-parts":[[2017,10,14]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"\"How to Quantize Neural Networks with TensorFlow.\" {Online}. Available: https:\/\/www.tensorflow.org\/performance\/quantization  \"How to Quantize Neural Networks with TensorFlow.\" {Online}. Available: https:\/\/www.tensorflow.org\/performance\/quantization"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2016.11"},{"key":"e_1_3_2_1_3_1","first-page":"00222","article-title":"Ternary neural networks for resource-efficient AI applications","volume":"1609","author":"Alemdar H.","year":"2016","unstructured":"H. Alemdar , N. Caldwell , V. Leroy , A. Prost-Boucle , and F. P\u00e9trot , \" Ternary neural networks for resource-efficient AI applications ,\" CoRR , vol. abs\/ 1609 . 00222 , 2016 . {Online}. Available: http:\/\/arxiv.org\/abs\/1609.00222 H. Alemdar, N. Caldwell, V. Leroy, A. Prost-Boucle, and F. P\u00e9trot, \"Ternary neural networks for resource-efficient AI applications,\" CoRR, vol. abs\/1609.00222, 2016. {Online}. Available: http:\/\/arxiv.org\/abs\/1609.00222","journal-title":"CoRR"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1093\/qjmam\/4.2.236"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2014.58"},{"key":"e_1_3_2_1_6_1","first-page":"262","volume-title":"ISSCC 2016","year":"2016","unstructured":"Chen, Yu-Hsin and Krishna, Tushar and Emer, Joel and Sze, Vivienne, \"Eyeriss : An Energy-Efficient Reconfigurable Accelerator for Deep Convolutional Neural Networks,\" in IEEE International Solid-State Circuits Conference , ISSCC 2016 , Digest of Technical Papers , 2016 , pp. 262 -- 263 . Chen, Yu-Hsin and Krishna, Tushar and Emer, Joel and Sze, Vivienne, \"Eyeriss: An Energy-Efficient Reconfigurable Accelerator for Deep Convolutional Neural Networks,\" in IEEE International Solid-State Circuits Conference, ISSCC 2016, Digest of Technical Papers, 2016, pp. 262--263."},{"key":"e_1_3_2_1_7_1","volume-title":"Nov.","author":"Courbariaux M.","year":"2015","unstructured":"M. Courbariaux , Y. Bengio , and J.-P. David , \"BinaryConnect : Training Deep Neural Networks with binary weights during propagations,\" ArXiv e-prints , Nov. 2015 . M. Courbariaux, Y. Bengio, and J.-P. David, \"BinaryConnect: Training Deep Neural Networks with binary weights during propagations,\" ArXiv e-prints, Nov. 2015."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/2000064.2000108"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.81"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/4.535411"},{"key":"e_1_3_2_1_11_1","unstructured":"Google \"Low-precision matrix multiplication \" https:\/\/github.com\/google\/gemmlowp 2016.  Google \"Low-precision matrix multiplication \" https:\/\/github.com\/google\/gemmlowp 2016."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2016.30"},{"key":"e_1_3_2_1_13_1","volume-title":"Oct.","author":"Han S.","year":"2015","unstructured":"S. Han , H. Mao , and W. J. Dally , \" Deep Compression: Compressing Deep Neural Networks with Pruning, Trained Quantization and Huffman Coding,\" arXiv:1510.00149 {cs} , Oct. 2015 , arXiv: 1510.00149. {Online}. Available: http:\/\/arxiv.org\/abs\/1510.00149 S. Han, H. Mao, and W. J. Dally, \"Deep Compression: Compressing Deep Neural Networks with Pruning, Trained Quantization and Huffman Coding,\" arXiv:1510.00149 {cs}, Oct. 2015, arXiv: 1510.00149. {Online}. Available: http:\/\/arxiv.org\/abs\/1510.00149"},{"key":"e_1_3_2_1_14_1","first-page":"5567","article-title":"Deep speech: Scaling up end-to-end speech recognition","volume":"1412","author":"Hannun A. Y.","year":"2014","unstructured":"A. Y. Hannun , C. Case , J. Casper , B. C. Catanzaro , G. Diamos , E. Elsen , R. Prenger , S. Satheesh , S. Sengupta , A. Coates , and A. Y. Ng , \" Deep speech: Scaling up end-to-end speech recognition ,\" CoRR , vol. abs\/ 1412 . 5567 , 2014 . A. Y. Hannun, C. Case, J. Casper, B. C. Catanzaro, G. Diamos, E. Elsen, R. Prenger, S. Satheesh, S. Sengupta, A. Coates, and A. Y. Ng, \"Deep speech: Scaling up end-to-end speech recognition,\" CoRR, vol. abs\/1412.5567, 2014.","journal-title":"CoRR"},{"key":"e_1_3_2_1_15_1","first-page":"07360","article-title":"Squeezenet: Alexnet-level accuracy with 50x fewer parameters and &lt;1mb model size","volume":"1602","author":"Iandola F. N.","year":"2016","unstructured":"F. N. Iandola , M. W. Moskewicz , K. Ashraf , S. Han , W. J. Dally , and K. Keutzer , \" Squeezenet: Alexnet-level accuracy with 50x fewer parameters and &lt;1mb model size ,\" CoRR , vol. abs\/ 1602 . 07360 , 2016 . {Online}. Available: http:\/\/arxiv.org\/abs\/1602.07360 F. N. Iandola, M. W. Moskewicz, K. Ashraf, S. Han, W. J. Dally, and K. Keutzer, \"Squeezenet: Alexnet-level accuracy with 50x fewer parameters and &lt;1mb model size,\" CoRR, vol. abs\/1602.07360, 2016. {Online}. Available: http:\/\/arxiv.org\/abs\/1602.07360","journal-title":"CoRR"},{"key":"e_1_3_2_1_16_1","volume-title":"Proteus: Exploiting numerical precision variability in deep neural networks,\" in Workshop On Approximate Computing (WAPCO)","author":"Judd P.","year":"2016","unstructured":"P. Judd , J. Albericio , T. Hetherington , T. Aamodt , N. Enright Jerger , and A. Moshovos , \" Proteus: Exploiting numerical precision variability in deep neural networks,\" in Workshop On Approximate Computing (WAPCO) , 2016 . P. Judd, J. Albericio, T. Hetherington, T. Aamodt, N. Enright Jerger, and A. Moshovos, \"Proteus: Exploiting numerical precision variability in deep neural networks,\" in Workshop On Approximate Computing (WAPCO), 2016."},{"key":"e_1_3_2_1_17_1","volume-title":"Reduced-Precision Strategies for Bounded Memory in Deep Neural Nets, arXiv:1511.05236v4 {cs.LG},\" arXiv.org","author":"Judd P.","year":"2015","unstructured":"P. Judd , J. Albericio , T. Hetherington , T. Aamodt , N. E. Jerger , R. Urtasun , and A. Moshovos , \" Reduced-Precision Strategies for Bounded Memory in Deep Neural Nets, arXiv:1511.05236v4 {cs.LG},\" arXiv.org , 2015 . P. Judd, J. Albericio, T. Hetherington, T. Aamodt, N. E. Jerger, R. Urtasun, and A. Moshovos, \"Reduced-Precision Strategies for Bounded Memory in Deep Neural Nets, arXiv:1511.05236v4 {cs.LG},\" arXiv.org, 2015."},{"key":"e_1_3_2_1_18_1","volume-title":"MICRO-49","author":"Judd P.","year":"2016","unstructured":"P. Judd , J. Albericio , T. Hetherington , T. Aamodt , and A. Moshovos , \" Stripes: Bit-serial Deep Neural Network Computing,\" in Proceedings of the 49th Annual IEEE\/ACM International Symposium on Microarchitecture, ser . MICRO-49 , 2016 . P. Judd, J. Albericio, T. Hetherington, T. Aamodt, and A. Moshovos, \"Stripes: Bit-serial Deep Neural Network Computing,\" in Proceedings of the 49th Annual IEEE\/ACM International Symposium on Microarchitecture, ser. MICRO-49, 2016."},{"key":"e_1_3_2_1_19_1","article-title":"Stripes: Bit-serial Deep Neural Network Computing","author":"Judd P.","year":"2016","unstructured":"P. Judd , J. Albericio , and A. Moshovos , \" Stripes: Bit-serial Deep Neural Network Computing ,\" Computer Architecture Letters , 2016 . P. Judd, J. Albericio, and A. Moshovos, \"Stripes: Bit-serial Deep Neural Network Computing,\" Computer Architecture Letters, 2016.","journal-title":"Computer Architecture Letters"},{"key":"e_1_3_2_1_20_1","first-page":"7510","volume-title":"Speech and Signal Processing (ICASSP)","author":"Kim J.","year":"2014","unstructured":"J. Kim , K. Hwang , and W. Sung , \" X1000 real-time phoneme recognition VLSI using feed-forward deep neural networks,\" in 2014 IEEE International Conference on Acoustics , Speech and Signal Processing (ICASSP) , May 2014 , pp. 7510 -- 7514 . J. Kim, K. Hwang, and W. Sung, \"X1000 real-time phoneme recognition VLSI using feed-forward deep neural networks,\" in 2014 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), May 2014, pp. 7510--7514."},{"key":"e_1_3_2_1_21_1","first-page":"293","volume-title":"Et2: A metric for time and energy efficiency of computation,\" in Power aware computing","author":"Martin A. J.","year":"2002","unstructured":"A. J. Martin , M. Nystr\u00f6m , and P. I. P\u00e9nzes , \" Et2: A metric for time and energy efficiency of computation,\" in Power aware computing . Springer , 2002 , pp. 293 -- 315 . A. J. Martin, M. Nystr\u00f6m, and P. I. P\u00e9nzes, \"Et2: A metric for time and energy efficiency of computation,\" in Power aware computing. Springer, 2002, pp. 293--315."},{"key":"e_1_3_2_1_22_1","unstructured":"N. Muralimanohar and R. Balasubramonian \"Cacti 6.0: A tool to understand large caches.\"  N. Muralimanohar and R. Balasubramonian \"Cacti 6.0: A tool to understand large caches.\""},{"key":"e_1_3_2_1_23_1","first-page":"807","volume-title":"Rectified linear units improve restricted boltzmann machines,\" in Proceedings of the 27th International Conference on Machine Learning (ICML-10)","author":"Nair V.","year":"2010","unstructured":"V. Nair and G. E. Hinton , \" Rectified linear units improve restricted boltzmann machines,\" in Proceedings of the 27th International Conference on Machine Learning (ICML-10) , 2010 , pp. 807 -- 814 . V. Nair and G. E. Hinton, \"Rectified linear units improve restricted boltzmann machines,\" in Proceedings of the 27th International Conference on Machine Learning (ICML-10), 2010, pp. 807--814."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080254"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.5555\/2755753.2757168"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2016.32"},{"key":"e_1_3_2_1_27_1","unstructured":"Synopsys \"Design Compiler \" http:\/\/www.synopsys.com\/Tools\/Implementation\/RTLSynthesis\/DesignCompiler\/Pages.  Synopsys \"Design Compiler \" http:\/\/www.synopsys.com\/Tools\/Implementation\/RTLSynthesis\/DesignCompiler\/Pages."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/PGEC.1964.263830"},{"key":"e_1_3_2_1_29_1","unstructured":"P. Warden \"Low-precision matrix multiplication \" https:\/\/petewarden.com 2016.  P. Warden \"Low-precision matrix multiplication \" https:\/\/petewarden.com 2016."},{"key":"e_1_3_2_1_30_1","first-page":"359","volume-title":"Systems and Computers","volume":"1","author":"Yao H. H.","year":"1993","unstructured":"H. H. Yao and E. E. Swartzlander , \" Serial-parallel multipliers,\" in Proceedings of 27th Asilomar Conference on Signals , Systems and Computers , Nov. 1993 , pp. 359 -- 363 vol. 1 . H. H. Yao and E. E. Swartzlander, \"Serial-parallel multipliers,\" in Proceedings of 27th Asilomar Conference on Signals, Systems and Computers, Nov. 1993, pp. 359--363 vol.1."}],"event":{"name":"MICRO-50: The 50th Annual IEEE\/ACM International Symposium on Microarchitecture","location":"Cambridge Massachusetts","acronym":"MICRO-50","sponsor":["SIGMICRO ACM Special Interest Group on Microarchitectural Research and Processing","IEEE-CS\\DATC IEEE Computer Society"]},"container-title":["Proceedings of the 50th Annual IEEE\/ACM International Symposium on Microarchitecture"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3123939.3123982","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3123939.3123982","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T03:30:31Z","timestamp":1750217431000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3123939.3123982"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,10,14]]},"references-count":30,"alternative-id":["10.1145\/3123939.3123982","10.1145\/3123939"],"URL":"https:\/\/doi.org\/10.1145\/3123939.3123982","relation":{},"subject":[],"published":{"date-parts":[[2017,10,14]]},"assertion":[{"value":"2017-10-14","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}