{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T01:52:30Z","timestamp":1773193950855,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":38,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,1,18]],"date-time":"2021-01-18T00:00:00Z","timestamp":1610928000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,1,18]]},"DOI":"10.1145\/3394885.3431554","type":"proceedings-article","created":{"date-parts":[[2021,1,29]],"date-time":"2021-01-29T11:32:46Z","timestamp":1611919966000},"page":"372-377","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":33,"title":["Mixed Precision Quantization for ReRAM-based DNN Inference Accelerators"],"prefix":"10.1145","author":[{"given":"Sitao","family":"Huang","sequence":"first","affiliation":[{"name":"University of Illinois at Urbana-Champaign, USA"}]},{"given":"Aayush","family":"Ankit","sequence":"additional","affiliation":[{"name":"Purdue University, USA"}]},{"given":"Plinio","family":"Silveira","sequence":"additional","affiliation":[{"name":"Hewlett Packard Enterprise, Brazil"}]},{"given":"Rodrigo","family":"Antunes","sequence":"additional","affiliation":[{"name":"Hewlett Packard Enterprise, Brazil"}]},{"given":"Sai Rahul","family":"Chalamalasetti","sequence":"additional","affiliation":[{"name":"Hewlett Packard Enterprise, USA"}]},{"given":"Izzat","family":"El Hajj","sequence":"additional","affiliation":[{"name":"American University of Beirut, Lebanon"}]},{"given":"Dong Eun","family":"Kim","sequence":"additional","affiliation":[{"name":"Purdue University, USA"}]},{"given":"Glaucimar","family":"Aguiar","sequence":"additional","affiliation":[{"name":"Hewlett Packard Enterprise, Brazil"}]},{"given":"Pedro","family":"Bruel","sequence":"additional","affiliation":[{"name":"Hewlett Packard Enterprise, USA and University of S\u00e3o Paulo, Brazil"}]},{"given":"Sergey","family":"Serebryakov","sequence":"additional","affiliation":[{"name":"Hewlett Packard Enterprise, USA"}]},{"given":"Cong","family":"Xu","sequence":"additional","affiliation":[{"name":"Hewlett Packard Enterprise, USA"}]},{"given":"Can","family":"Li","sequence":"additional","affiliation":[{"name":"Hewlett Packard Enterprise, USA"}]},{"given":"Paolo","family":"Faraboschi","sequence":"additional","affiliation":[{"name":"Hewlett Packard Enterprise, USA"}]},{"given":"John Paul","family":"Strachan","sequence":"additional","affiliation":[{"name":"Hewlett Packard Enterprise, USA"}]},{"given":"Deming","family":"Chen","sequence":"additional","affiliation":[{"name":"University of Illinois at Urbana-Champaign, USA"}]},{"given":"Kaushik","family":"Roy","sequence":"additional","affiliation":[{"name":"Purdue University, USA"}]},{"given":"Wen-mei","family":"Hwu","sequence":"additional","affiliation":[{"name":"University of Illinois at Urbana-Champaign, USA"}]},{"given":"Dejan","family":"Milojicic","sequence":"additional","affiliation":[{"name":"Hewlett Packard Enterprise, USA"}]}],"member":"320","published-online":{"date-parts":[[2021,1,29]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Reduced-precision strategies for bounded memory in deep neural nets. arXiv preprint arXiv:1511.05236","author":"Patrick Judd","year":"2015","unstructured":"Patrick Judd et al. Reduced-precision strategies for bounded memory in deep neural nets. arXiv preprint arXiv:1511.05236 , 2015 . Patrick Judd et al. Reduced-precision strategies for bounded memory in deep neural nets. arXiv preprint arXiv:1511.05236, 2015."},{"key":"e_1_3_2_1_2_1","first-page":"1","volume-title":"WACV","author":"Bert","year":"2016","unstructured":"Bert Moons et al. Energy-efficient convnets through approximate computing . In WACV , pages 1 -- 8 . IEEE, 2016 . Bert Moons et al. Energy-efficient convnets through approximate computing. In WACV, pages 1--8. IEEE, 2016."},{"key":"e_1_3_2_1_3_1","first-page":"2849","volume-title":"ICML","author":"Darryl","year":"2016","unstructured":"Darryl Lin et al. Fixed point quantization of deep convolutional networks . In ICML , pages 2849 -- 2858 , 2016 . Darryl Lin et al. Fixed point quantization of deep convolutional networks. In ICML, pages 2849--2858, 2016."},{"key":"e_1_3_2_1_4_1","first-page":"3007","volume-title":"ICML","author":"Charbel","year":"2017","unstructured":"Charbel Sakr et al. Analytical guarantees on numerical precision of deep neural networks . In ICML , pages 3007 -- 3016 . JMLR. org, 2017 . Charbel Sakr et al. Analytical guarantees on numerical precision of deep neural networks. In ICML, pages 3007--3016. JMLR. org, 2017."},{"key":"e_1_3_2_1_5_1","volume-title":"Loss-aware weight quantization of deep networks. arXiv preprint arXiv:1802.08635","author":"Lu Hou","year":"2018","unstructured":"Lu Hou et al. Loss-aware weight quantization of deep networks. arXiv preprint arXiv:1802.08635 , 2018 . Lu Hou et al. Loss-aware weight quantization of deep networks. arXiv preprint arXiv:1802.08635, 2018."},{"key":"e_1_3_2_1_6_1","first-page":"8612","volume-title":"CVPR","author":"Kuan","year":"2019","unstructured":"Kuan Wang et al. HAQ: Hardware-aware automated quantization with mixed precision . In CVPR , pages 8612 -- 8620 , 2019 . Kuan Wang et al. HAQ: Hardware-aware automated quantization with mixed precision. In CVPR, pages 8612--8620, 2019."},{"key":"e_1_3_2_1_7_1","first-page":"163","volume-title":"FPL","author":"Junsong","year":"2018","unstructured":"Junsong Wang et al. Design flow of accelerating hybrid extremely low bit-width neural network in embedded FPGA . In FPL , pages 163 -- 169 , 2018 . Junsong Wang et al. Design flow of accelerating hybrid extremely low bit-width neural network in embedded FPGA. In FPL, pages 163--169, 2018."},{"key":"e_1_3_2_1_8_1","volume-title":"DAC","author":"Cong","year":"2019","unstructured":"Cong Hao et al. Fpga\/dnn co-design: An efficient design methodology for iot intelligence on the edge . In DAC , New York, NY, USA , 2019 . Cong Hao et al. Fpga\/dnn co-design: An efficient design methodology for iot intelligence on the edge. In DAC, New York, NY, USA, 2019."},{"key":"e_1_3_2_1_9_1","volume-title":"Edd: Efficient differentiable dnn architecture and implementation co-search for embedded ai solutions","author":"Yuhong Li","year":"2020","unstructured":"Yuhong Li et al. Edd: Efficient differentiable dnn architecture and implementation co-search for embedded ai solutions . In DAC. IEEE Press , 2020 . Yuhong Li et al. Edd: Efficient differentiable dnn architecture and implementation co-search for embedded ai solutions. In DAC. IEEE Press, 2020."},{"key":"e_1_3_2_1_10_1","article-title":"VecQ: Minimal loss dnn model compression with vectorized weight quantization","author":"Cheng Gong","unstructured":"Cheng Gong et al . VecQ: Minimal loss dnn model compression with vectorized weight quantization . IEEE Transactions on Computers, (01):1--1, may 5555. Cheng Gong et al. VecQ: Minimal loss dnn model compression with vectorized weight quantization. IEEE Transactions on Computers, (01):1--1, may 5555.","journal-title":"IEEE Transactions on Computers, (01):1--1, may 5555."},{"key":"e_1_3_2_1_11_1","first-page":"1","volume-title":"DAC","author":"Xiaoxiao","year":"2015","unstructured":"Xiaoxiao Liu et al. RENO: A high-efficient reconfigurable neuromorphic computing accelerator design . In DAC , pages 1 -- 6 . IEEE, 2015 . Xiaoxiao Liu et al. RENO: A high-efficient reconfigurable neuromorphic computing accelerator design. In DAC, pages 1--6. IEEE, 2015."},{"key":"e_1_3_2_1_12_1","volume-title":"ISCA","author":"Ping","year":"2016","unstructured":"Ping Chi et al. PRIME: A novel processing-in-memory architecture for neural network computation in ReRAM-based main memory . In ISCA , 2016 . Ping Chi et al. PRIME: A novel processing-in-memory architecture for neural network computation in ReRAM-based main memory. In ISCA, 2016."},{"key":"e_1_3_2_1_13_1","first-page":"14","volume-title":"ISCA'16","author":"Ali","year":"2016","unstructured":"Ali Shafiee et al. ISAAC: A convolutional neural network accelerator with in-situ analog arithmetic in crossbars. In ISCA , ISCA'16 , pages 14 -- 26 . IEEE Press , 2016 . Ali Shafiee et al. ISAAC: A convolutional neural network accelerator with in-situ analog arithmetic in crossbars. In ISCA, ISCA'16, pages 14--26. IEEE Press, 2016."},{"key":"e_1_3_2_1_14_1","first-page":"52","volume-title":"HPCA","author":"Ben","year":"2018","unstructured":"Ben Feinberg et al. Making memristive neural network accelerators reliable . In HPCA , pages 52 -- 65 . IEEE, 2018 . Ben Feinberg et al. Making memristive neural network accelerators reliable. In HPCA, pages 52--65. IEEE, 2018."},{"key":"e_1_3_2_1_15_1","first-page":"715","volume-title":"ASPLOS","author":"Aayush","year":"2019","unstructured":"Aayush Ankit et al. PUMA: A programmable ultra-efficient memristor-based accelerator for machine learning inference . In ASPLOS , pages 715 -- 731 , 2019 . Aayush Ankit et al. PUMA: A programmable ultra-efficient memristor-based accelerator for machine learning inference. In ASPLOS, pages 715--731, 2019."},{"key":"e_1_3_2_1_16_1","volume-title":"DAC","author":"Indranil","year":"2020","unstructured":"Indranil Chakraborty et al. GENIEx: A Generalized Approach to Emulating Non-Idealities in Memristive X-bars Using Neural Networks . In DAC , 2020 . Indranil Chakraborty et al. GENIEx: A Generalized Approach to Emulating Non-Idealities in Memristive X-bars Using Neural Networks. In DAC, 2020."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/JETCAS.2018.2796379"},{"key":"e_1_3_2_1_18_1","volume-title":"June","author":"Neta Zmora","year":"2018","unstructured":"Neta Zmora et al. Neural network distiller , June 2018 . Neta Zmora et al. Neural network distiller, June 2018."},{"key":"e_1_3_2_1_19_1","unstructured":"TensorFlow. Model optimization toolkit.  TensorFlow. Model optimization toolkit."},{"key":"e_1_3_2_1_20_1","volume-title":"ASPLOS","author":"Ao","year":"2019","unstructured":"Ao Ren et al. ADMM-NN: An algorithm-hardware co-design framework of dnns using alternating direction methods of multipliers . In ASPLOS , 2019 . Ao Ren et al. ADMM-NN: An algorithm-hardware co-design framework of dnns using alternating direction methods of multipliers. In ASPLOS, 2019."},{"key":"e_1_3_2_1_21_1","first-page":"1","volume-title":"DAC","author":"Brandon","year":"2018","unstructured":"Brandon Reagen et al. Ares: A framework for quantifying the resilience of deep neural networks . In DAC , pages 1 -- 6 . IEEE, 2018 . Brandon Reagen et al. Ares: A framework for quantifying the resilience of deep neural networks. In DAC, pages 1--6. IEEE, 2018."},{"key":"e_1_3_2_1_22_1","volume-title":"Learning low precision deep neural networks through regularization. arXiv preprint arXiv:1809.00095","author":"Yoojin Choi","year":"2018","unstructured":"Yoojin Choi et al. Learning low precision deep neural networks through regularization. arXiv preprint arXiv:1809.00095 , 2018 . Yoojin Choi et al. Learning low precision deep neural networks through regularization. arXiv preprint arXiv:1809.00095, 2018."},{"key":"e_1_3_2_1_23_1","volume-title":"ICLR","author":"Charbel","year":"2019","unstructured":"Charbel Sakr et al. Per-tensor fixed-point quantization of the back-propagation algorithm . In ICLR , 2019 . Charbel Sakr et al. Per-tensor fixed-point quantization of the back-propagation algorithm. In ICLR, 2019."},{"key":"e_1_3_2_1_24_1","first-page":"764","volume-title":"ISCA","author":"Hardik","year":"2018","unstructured":"Hardik Sharma et al. Bit fusion: Bit-level dynamically composable architecture for accelerating deep neural network . In ISCA , pages 764 -- 775 , 2018 . Hardik Sharma et al. Bit fusion: Bit-level dynamically composable architecture for accelerating deep neural network. In ISCA, pages 764--775, 2018."},{"key":"e_1_3_2_1_25_1","first-page":"218","volume-title":"ISSCC","author":"Jinmook","year":"2018","unstructured":"Jinmook Lee et al. UNPU: A 50.6 TOPS\/W unified deep neural network accelerator with 1b-to-16b fully-variable weight bit-precision . In ISSCC , pages 218 -- 220 , 2018 . Jinmook Lee et al. UNPU: A 50.6 TOPS\/W unified deep neural network accelerator with 1b-to-16b fully-variable weight bit-precision. In ISSCC, pages 218--220, 2018."},{"key":"e_1_3_2_1_26_1","first-page":"688","volume-title":"ISCA","author":"Eunhyeok","year":"2018","unstructured":"Eunhyeok Park et al. Energy-efficient neural network accelerator based on outlier-aware low-precision computation . In ISCA , pages 688 -- 698 . IEEE, 2018 . Eunhyeok Park et al. Energy-efficient neural network accelerator based on outlier-aware low-precision computation. In ISCA, pages 688--698. IEEE, 2018."},{"key":"e_1_3_2_1_27_1","first-page":"33","volume-title":"FPGA","author":"Caiwen","year":"2019","unstructured":"Caiwen Ding et al. REQ-YOLO: A resource-aware, efficient quantization framework for object detection on fpgas . In FPGA , pages 33 -- 42 , 2019 . Caiwen Ding et al. REQ-YOLO: A resource-aware, efficient quantization framework for object detection on fpgas. In FPGA, pages 33--42, 2019."},{"key":"e_1_3_2_1_28_1","first-page":"65","volume-title":"FPGA","author":"Yaman","year":"2017","unstructured":"Yaman Umuroglu et al. FINN: A framework for fast, scalable binarized neural network inference . In FPGA , pages 65 -- 74 , 2017 . Yaman Umuroglu et al. FINN: A framework for fast, scalable binarized neural network inference. In FPGA, pages 65--74, 2017."},{"key":"e_1_3_2_1_29_1","first-page":"1","volume-title":"DAC","author":"Zhenhua","year":"2019","unstructured":"Zhenhua Zhu et al. A configurable multi-precision cnn computing framework based on single bit rram . In DAC , pages 1 -- 6 . IEEE, 2019 . Zhenhua Zhu et al. A configurable multi-precision cnn computing framework based on single bit rram. In DAC, pages 1--6. IEEE, 2019."},{"key":"e_1_3_2_1_30_1","first-page":"1","volume-title":"DAC","author":"Wenqiang","year":"2019","unstructured":"Wenqiang Zhang et al. Design guidelines of rram based neural-processing-unit: A joint device-circuit-algorithm analysis . In DAC , pages 1 -- 6 . IEEE, 2019 . Wenqiang Zhang et al. Design guidelines of rram based neural-processing-unit: A joint device-circuit-algorithm analysis. In DAC, pages 1--6. IEEE, 2019."},{"key":"e_1_3_2_1_31_1","volume-title":"HPCA","author":"Nazm Mahdi","year":"2016","unstructured":"Mahdi Nazm Bojnordi et al. Memristive Boltzmann machine: A hardware accelerator for combinatorial optimization and deep learning . In HPCA , 2016 . Mahdi Nazm Bojnordi et al. Memristive Boltzmann machine: A hardware accelerator for combinatorial optimization and deep learning. In HPCA, 2016."},{"key":"e_1_3_2_1_32_1","volume-title":"ACM","author":"Ming","year":"2017","unstructured":"Ming Cheng et al. Time: A training-in-memory architecture for memristor-based deep neural networks. In DAC, page 26 . ACM , 2017 . Ming Cheng et al. Time: A training-in-memory architecture for memristor-based deep neural networks. In DAC, page 26. ACM, 2017."},{"key":"e_1_3_2_1_33_1","first-page":"541","volume-title":"HPCA","author":"Linghao","year":"2017","unstructured":"Linghao Song et al. Pipelayer: A pipelined ReRAM-based accelerator for deep learning . In HPCA , pages 541 -- 552 . IEEE, 2017 . Linghao Song et al. Pipelayer: A pipelined ReRAM-based accelerator for deep learning. In HPCA, pages 541--552. IEEE, 2017."},{"key":"e_1_3_2_1_34_1","first-page":"178","volume-title":"ASP-DAC","author":"Fan","year":"2018","unstructured":"Fan Chen et al. ReGAN: A pipelined ReRAM-based accelerator for generative adversarial networks . In ASP-DAC , pages 178 -- 183 . IEEE, 2018 . Fan Chen et al. ReGAN: A pipelined ReRAM-based accelerator for generative adversarial networks. In ASP-DAC, pages 178--183. IEEE, 2018."},{"key":"e_1_3_2_1_35_1","article-title":"PANTHER: A programmable architecture for neural network training harnessing energy-efficient reram","author":"Aayush Ankit","year":"2020","unstructured":"Aayush Ankit et al . PANTHER: A programmable architecture for neural network training harnessing energy-efficient reram . IEEE Transactions on Computers , 2020 . Aayush Ankit et al. PANTHER: A programmable architecture for neural network training harnessing energy-efficient reram. IEEE Transactions on Computers, 2020.","journal-title":"IEEE Transactions on Computers"},{"key":"e_1_3_2_1_36_1","volume-title":"MICRO, page 21","author":"Yu Ji","year":"2016","unstructured":"Yu Ji et al. NEUTRAMS: Neural network transformation and co-design under neuromorphic hardware constraints . In MICRO, page 21 . IEEE Press , 2016 . Yu Ji et al. NEUTRAMS: Neural network transformation and co-design under neuromorphic hardware constraints. In MICRO, page 21. IEEE Press, 2016."},{"key":"e_1_3_2_1_37_1","first-page":"448","volume-title":"ASPLOS","author":"Yu","year":"2018","unstructured":"Yu Ji et al. Bridge the gap between neural networks and neuromorphic hardware with a neural network compiler . In ASPLOS , pages 448 -- 460 . ACM, 2018 . Yu Ji et al. Bridge the gap between neural networks and neuromorphic hardware with a neural network compiler. In ASPLOS, pages 448--460. ACM, 2018."},{"key":"e_1_3_2_1_38_1","volume-title":"ACM","author":"Yandan","year":"2017","unstructured":"Yandan Wang et al. Group Scissor: Scaling Neuromorphic Computing Design to Large Neural Networks. In DAC, page 85 . ACM , 2017 . Yandan Wang et al. Group Scissor: Scaling Neuromorphic Computing Design to Large Neural Networks. In DAC, page 85. ACM, 2017."}],"event":{"name":"ASPDAC '21: 26th Asia and South Pacific Design Automation Conference","location":"Tokyo Japan","acronym":"ASPDAC '21","sponsor":["SIGDA ACM Special Interest Group on Design Automation","IEEE CAS","IEEE CEDA"]},"container-title":["Proceedings of the 26th Asia and South Pacific Design Automation Conference"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3394885.3431554","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3394885.3431554","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T21:32:02Z","timestamp":1750195922000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3394885.3431554"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,1,18]]},"references-count":38,"alternative-id":["10.1145\/3394885.3431554","10.1145\/3394885"],"URL":"https:\/\/doi.org\/10.1145\/3394885.3431554","relation":{},"subject":[],"published":{"date-parts":[[2021,1,18]]},"assertion":[{"value":"2021-01-29","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}