{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,8]],"date-time":"2026-04-08T22:10:37Z","timestamp":1775686237701,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":107,"publisher":"ACM","license":[{"start":{"date-parts":[[2019,6,22]],"date-time":"2019-06-22T00:00:00Z","timestamp":1561161600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2019,6,22]]},"DOI":"10.1145\/3307650.3322255","type":"proceedings-article","created":{"date-parts":[[2019,6,14]],"date-time":"2019-06-14T12:42:33Z","timestamp":1560516153000},"page":"304-317","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":70,"title":["Laconic deep learning inference acceleration"],"prefix":"10.1145","author":[{"given":"Sayeh","family":"Sharify","sequence":"first","affiliation":[{"name":"University of Toronto"}]},{"given":"Alberto Delmas","family":"Lascorz","sequence":"additional","affiliation":[{"name":"University of Toronto"}]},{"given":"Mostafa","family":"Mahmoud","sequence":"additional","affiliation":[{"name":"University of Toronto"}]},{"given":"Milos","family":"Nikolic","sequence":"additional","affiliation":[{"name":"University of Toronto"}]},{"given":"Kevin","family":"Siu","sequence":"additional","affiliation":[{"name":"University of Toronto"}]},{"given":"Dylan Malone","family":"Stuart","sequence":"additional","affiliation":[{"name":"University of Toronto"}]},{"given":"Zissis","family":"Poulos","sequence":"additional","affiliation":[{"name":"University of Toronto"}]},{"given":"Andreas","family":"Moshovos","sequence":"additional","affiliation":[{"name":"University of Toronto"}]}],"member":"320","published-online":{"date-parts":[[2019,6,22]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.5555\/3294771.3294880"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00061"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3123939.3123982"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001138"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2016.7783725"},{"key":"e_1_3_2_1_6_1","volume-title":"SegNet: A Deep Convolutional Encoder-Decoder Architecture for Image Segmentation","author":"Badrinarayanan Vijay","year":"2017","unstructured":"Vijay Badrinarayanan, Alex Kendall, and Roberto Cipolla. 2017. SegNet: A Deep Convolutional Encoder-Decoder Architecture for Image Segmentation. IEEE Trans. on Pattern Analysis and Machine Intelligence (2017)."},{"key":"e_1_3_2_1_7_1","unstructured":"Emmanuel Bengio Pierre-Luc Bacon Joelle Pineau and Doina Precup. 2015. Conditional Computation in Neural Networks for faster models. CoRR abs\/1511.06297."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patrec.2008.04.005"},{"key":"e_1_3_2_1_9_1","unstructured":"Cadence. 2019. Encounter RTL Compiler. (2019). https:\/\/www.cadence.com"},{"key":"e_1_3_2_1_10_1","volume-title":"ProxylessNAS: Direct Neural Architecture Search on Target Task and Hardware. In Intl' Conf. on Learning Representations. https:\/\/arxiv.org\/pdf\/1812","author":"Cai Han","year":"2019","unstructured":"Han Cai, Ligeng Zhu, and Song Han. 2019. ProxylessNAS: Direct Neural Architecture Search on Target Task and Hardware. In Intl' Conf. on Learning Representations. https:\/\/arxiv.org\/pdf\/1812.00332.pdf"},{"key":"e_1_3_2_1_11_1","volume-title":"Automation & Test in Europe Conf.","author":"Chen Chia-Yu","year":"2018","unstructured":"Chia-Yu Chen, Jungwook Choi, Kailash Gopalakrishnan, Viji Srinivasan, and Swagath Venkataramani. 2018. Exploiting approximate computing for deep learning acceleration. In Design, Automation & Test in Europe Conf."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/2541940.2541967"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2014.58"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001177"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2017.2749425"},{"key":"e_1_3_2_1_16_1","volume-title":"Towards the Limit of Network Quantization. CoRR abs\/1612.01543","author":"Choi Yoojin","year":"2016","unstructured":"Yoojin Choi, Mostafa El-Khamy, and Jungwon Lee. 2016. Towards the Limit of Network Quantization. CoRR abs\/1612.01543 (2016)."},{"key":"e_1_3_2_1_17_1","volume-title":"Low precision arithmetic for deep learning. CoRR abs\/1412.7024","author":"Courbariaux Matthieu","year":"2014","unstructured":"Matthieu Courbariaux, Yoshua Bengio, and Jean-Pierre David. 2014. Low precision arithmetic for deep learning. CoRR abs\/1412.7024 (2014)."},{"key":"e_1_3_2_1_18_1","unstructured":"M. Courbariaux Y. Bengio and J.-P. David. 2015. BinaryConnect: Training Deep Neural Networks with binary weights during propagations. CoPR abs\/1511.00363."},{"key":"e_1_3_2_1_19_1","volume-title":"Wipf","author":"Dai Bin","year":"2018","unstructured":"Bin Dai, Chen Zhu, and David P. Wipf. 2018. Compressing Neural Networks using the Variational Information Bottleneck. CoRR abs\/1802.10399 (2018)."},{"key":"e_1_3_2_1_20_1","volume-title":"Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context. CoRR abs\/1901.02860","author":"Dai Zihang","year":"2019","unstructured":"Zihang Dai, Zhilin Yang, Yiming Yang, Jaime G. Carbonell, Quoc V. Le, and Ruslan Salakhutdinov. 2019. Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context. CoRR abs\/1901.02860 (2019). http:\/\/arxiv.org\/abs\/1901.02860"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.5555\/3305381.3305478"},{"key":"e_1_3_2_1_22_1","volume-title":"Dynamic Stripes: Exploiting the Dynamic Precision Requirements of Activation Values in Neural Networks. CoRR abs\/1706.00504","author":"Delmas Alberto","year":"2017","unstructured":"Alberto Delmas, Patrick Judd, Sayeh Sharify, and Andreas Moshovos. 2017. Dynamic Stripes: Exploiting the Dynamic Precision Requirements of Activation Values in Neural Networks. CoRR abs\/1706.00504 (2017)."},{"key":"e_1_3_2_1_23_1","volume-title":"DPRed: Making Typical Activation Values Matter In Deep Learning Computing. CoRR abs\/1804.06732","author":"Delmas Alberto","year":"2018","unstructured":"Alberto Delmas, Sayeh Sharify, Patrick Judd, Milos Nikolic, and Andreas Moshovos. 2018. DPRed: Making Typical Activation Values Matter In Deep Learning Computing. CoRR abs\/1804.06732 (2018)."},{"key":"e_1_3_2_1_24_1","volume-title":"Intl' Conf. on Architectural Support for Programming Languages and Operating Systems.","author":"Lascorz Alberto Delmas","year":"2019","unstructured":"Alberto Delmas Lascorz, Patrick Judd, Dylan Malone Stuart, Zissis Poulos, Mostafa Mahmoud, Sayeh Sharify, Milos Nikolic, Kevin Siu, and Andreas Moshovos. 2019. Bit-Tactical: A Software\/Hardware Approach to Exploiting Value and Bit Sparsity in Neural Networks. In Intl' Conf. on Architectural Support for Programming Languages and Operating Systems."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2018.01.010"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3123939.3124552"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/2000064.2000108"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-014-0733-5"},{"key":"e_1_3_2_1_29_1","volume-title":"Hierarchical Neural Story Generation. CoRR abs\/1805.04833","author":"Fan Angela","year":"2018","unstructured":"Angela Fan, Mike Lewis, and Yann Dauphin. 2018. Hierarchical Neural Story Generation. CoRR abs\/1805.04833 (2018)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3037697.3037702"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3037697.3037702"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/2980179.2982399"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.5555\/3157096.3157251"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.5555\/3045118.3045303"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3195970.3199847"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3020078.3021745"},{"key":"e_1_3_2_1_37_1","volume-title":"EIE: Efficient Inference Engine on Compressed Deep Neural Network. In Intl' Symp. on Computer Architecture.","author":"Han Song","unstructured":"Song Han, Xingyu Liu, Huizi Mao, Jing Pu, Ardavan Pedram, Mark A. Horowitz, and William J. Dally. 2016. EIE: Efficient Inference Engine on Compressed Deep Neural Network. In Intl' Symp. on Computer Architecture."},{"key":"e_1_3_2_1_38_1","volume-title":"Dally","author":"Han Song","year":"2015","unstructured":"Song Han, Huizi Mao, and William J. Dally. 2015. Deep Compression: Compressing Deep Neural Networks with Pruning, Trained Quantization and Huffman Coding. CoPR abs\/1510.00149 (2015)."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.5555\/2969239.2969366"},{"key":"e_1_3_2_1_40_1","volume-title":"Deep Residual Learning for Image Recognition. CoRR abs\/1512.03385","author":"He Kaiming","year":"2015","unstructured":"Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun. 2015. Deep Residual Learning for Image Recognition. CoRR abs\/1512.03385 (2015)."},{"key":"e_1_3_2_1_41_1","volume-title":"AMC: AutoML for Model Compression and Acceleration on Mobile Devices. In European Conf. on Computer Vision.","author":"He Yihui","year":"2018","unstructured":"Yihui He, Ji Lin, Zhijian Liu, Hanrui Wang, Li-Jia Li, and Song Han. 2018. AMC: AutoML for Model Compression and Acceleration on Mobile Devices. In European Conf. on Computer Vision."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC.2014.6757323"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.5555\/3122009.3242044"},{"key":"e_1_3_2_1_44_1","volume-title":"Convolutional Networks and the Models in-Between. CoRR abs\/1603.01250","author":"Ioannou Yani","year":"2016","unstructured":"Yani Ioannou, Duncan P. Robertson, Darko Zikic, Peter Kontschieder, Jamie Shotton, Matthew Brown, and Antonio Criminisi. 2016. Decision Forests, Convolutional Networks and the Models in-Between. CoRR abs\/1603.01250 (2016)."},{"key":"e_1_3_2_1_45_1","volume-title":"Quantization and Training of Neural Networks for Efficient Integer-Arithmetic-Only Inference. CoRR abs\/1712.05877","author":"Jacob Benoit","year":"2017","unstructured":"Benoit Jacob, Skirmantas Kligys, Bo Chen, Menglong Zhu, Matthew Tang, Andrew G. Howard, Hartwig Adam, and Dmitry Kalenichenko. 2017. Quantization and Training of Neural Networks for Efficient Integer-Arithmetic-Only Inference. CoRR abs\/1712.05877 (2017)."},{"key":"e_1_3_2_1_46_1","volume-title":"Proteus: Exploiting Numerical Precision Variability in Deep Neural Networks. In Workshop On Approximate Computing (WAPCO).","author":"Judd Patrick","year":"2016","unstructured":"Patrick Judd, Jorge Albericio, Tayler Hetherington, Tor Aamodt, Natalie Enright Jerger, and Andreas Moshovos. 2016. Proteus: Exploiting Numerical Precision Variability in Deep Neural Networks. In Workshop On Approximate Computing (WAPCO)."},{"key":"e_1_3_2_1_47_1","volume-title":"Raquel Urtasun, and Andreas Moshovos.","author":"Judd Patrick","year":"2015","unstructured":"Patrick Judd, Jorge Albericio, Tayler Hetherington, Tor Aamodt, Natalie Enright Jerger, Raquel Urtasun, and Andreas Moshovos. 2015. Reduced-Precision Strategies for Bounded Memory in Deep Neural Nets. CoPR abs\/1511.05236v4 (2015)."},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.5555\/3195638.3195661"},{"key":"e_1_3_2_1_49_1","volume-title":"Low Precision RNNs: Quantizing RNNs Without Losing Accuracy. CoRR abs\/1710.07706","author":"Kapur Supriya","year":"2017","unstructured":"Supriya Kapur, Asit K. Mishra, and Debbie Marr. 2017. Low Precision RNNs: Quantizing RNNs Without Losing Accuracy. CoRR abs\/1710.07706 (2017)."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","unstructured":"Dongyoung Kim Junwhan Ahn and Sungjoo Yoo. 2017. A novel zero weight\/activation-aware hardware architecture of convolutional neural network. In Design Automation and Test Europe.","DOI":"10.5555\/3130379.3130723"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/MDAT.2017.2741463"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001178"},{"key":"e_1_3_2_1_53_1","volume-title":"Bitwise Neural Networks. CoRR abs\/1601.06071","author":"Kim Minje","year":"2016","unstructured":"Minje Kim and Paris Smaragdis. 2016. Bitwise Neural Networks. CoRR abs\/1601.06071 (2016)."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"crossref","unstructured":"H.T. Kung Bradley McDanel and Sai Qian Zhang. 2019. Packing Sparse Convolutional Neural Networks for Efficient Systolic Array Implementations: Column Combining Under Joint Optimization. In Intl' Conf. on Architectural Support for Programming Languages and Operating Systems.","DOI":"10.1145\/3297858.3304028"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/3173162.3173176"},{"key":"e_1_3_2_1_56_1","volume-title":"Lempitsky","author":"Lebedev Vadim","year":"2016","unstructured":"Vadim Lebedev and Victor S. Lempitsky. 2016. Fast ConvNets Using Group-Wise Brain Damage. In Computer Vision and Pattern Recognition."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/3205289.3205295"},{"key":"e_1_3_2_1_58_1","volume-title":"Aamodt","author":"Lew Jonathan","year":"2018","unstructured":"Jonathan Lew, Deval Shah, Suchita Pati, Shaylin Cattell, Mengchi Zhang, Amruth Sandhupatla, Christopher Ng, Negar Goli, Matthew D. Sinclair, Timothy G. Rogers, and Tor M. Aamodt. 2018. Analyzing Machine Learning Workloads Using a Detailed GPU Simulator. CoRR abs\/1811.08933 (2018). arXiv:1811.08933 http:\/\/arxiv.org\/abs\/1811.08933"},{"key":"e_1_3_2_1_59_1","article-title":"Video Superresolution via Motion Compensation and Deep Residual Learning","author":"Li Dingyi","year":"2017","unstructured":"Dingyi Li and Zengfu Wang. 2017. Video Superresolution via Motion Compensation and Deep Residual Learning. IEEE Trans. on Computational Imaging.","journal-title":"IEEE Trans. on Computational Imaging."},{"key":"e_1_3_2_1_60_1","volume-title":"Ternary Weight Networks. CoRR abs\/1605.04711","author":"Li Fengfu","year":"2016","unstructured":"Fengfu Li and Bin Liu. 2016. Ternary Weight Networks. CoRR abs\/1605.04711 (2016)."},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.5555\/3045390.3045690"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.5555\/3294771.3294979"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2018.043191125"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.5555\/3294996.3295088"},{"key":"e_1_3_2_1_65_1","volume-title":"FlexFlow: A Flexible Dataflow Accelerator Architecture for Convolutional Neural Networks. In Intl' Symp. on High Performance Computer Architecture.","author":"Lu Wenyan","year":"2017","unstructured":"Wenyan Lu, Guihai Yan, Jiajun Li, Shijun Gong, Yinhe Han, and Xiaowei Li. 2017. FlexFlow: A Flexible Dataflow Accelerator Architecture for Convolutional Neural Networks. In Intl' Symp. on High Performance Computer Architecture."},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2001.937655"},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.1145\/2717764.2717783"},{"key":"e_1_3_2_1_68_1","volume-title":"GPU Technology Conf.","author":"Migacz Szymon","year":"2017","unstructured":"Szymon Migacz. 2017. 8-bit Inference with TensorRT. GPU Technology Conf."},{"key":"e_1_3_2_1_69_1","volume-title":"WRPN: Wide Reduced-Precision Networks. CoRR abs\/1709.01134(2017).","author":"Mishra Asit K.","year":"2017","unstructured":"Asit K. Mishra, Eriko Nurvitadhi, Jeffrey J. Cook, and Debbie Marr. 2017. WRPN: Wide Reduced-Precision Networks. CoRR abs\/1709.01134(2017)."},{"key":"e_1_3_2_1_70_1","unstructured":"Naveen Muralimanohar and Rajeev Balasubramonian. 2015. CACTI 6.0: A Tool to Understand Large Caches. (2015)."},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080254"},{"key":"e_1_3_2_1_72_1","volume-title":"Weighted-Entropy-Based Quantization for Deep Neural Networks. In Conf. on Computer Vision and Pattern Recognition.","author":"Park Eunhyeok","year":"2017","unstructured":"Eunhyeok Park, Junwhan Ahn, and Sungjoo Yoo. 2017. Weighted-Entropy-Based Quantization for Deep Neural Networks. In Conf. on Computer Vision and Pattern Recognition."},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00063"},{"key":"e_1_3_2_1_74_1","volume-title":"Value-Aware Quantization for Training and Inference of Neural Networks. In European Conf. on Computer Vision.","author":"Park Eunhyeok","year":"2018","unstructured":"Eunhyeok Park, Sungjoo Yoo, and Peter Vajda. 2018. Value-Aware Quantization for Training and Inference of Neural Networks. In European Conf. on Computer Vision."},{"key":"e_1_3_2_1_75_1","volume-title":"Faster CNNs with Direct Sparse Convolutions and Guided Pruning. In Intl' Conf. on Learning Representations.","author":"Park Jongsoo","year":"2017","unstructured":"Jongsoo Park, Sheng Li, Wei Wen, Ping Tak Peter Tang, Hai Li, Yiran Chen, and Pradeep Dubey. 2017. Faster CNNs with Direct Sparse Convolutions and Guided Pruning. In Intl' Conf. on Learning Representations."},{"key":"e_1_3_2_1_76_1","volume-title":"Going Deeper with Embedded FPGA Platform for Convolutional Neural Network. In Intl' Symp. on Field-Programmable Gate Arrays.","author":"Qiu Jiantao","year":"2016","unstructured":"Jiantao Qiu, Jie Wang, Song Yao, Kaiyuan Guo, Boxun Li, Erjin Zhou, Jincheng Yu, Tianqi Tang, Ningyi Xu, Sen Song, Yu Wang, and Huazhong Yang. 2016. Going Deeper with Embedded FPGA Platform for Convolutional Neural Network. In Intl' Symp. on Field-Programmable Gate Arrays."},{"key":"e_1_3_2_1_77_1","doi-asserted-by":"publisher","DOI":"10.5555\/1866696.1866717"},{"key":"e_1_3_2_1_78_1","volume-title":"XNOR-Net: ImageNet Classification Using Binary Convolutional Neural Networks. CoRR abs\/1603.05279","author":"Rastegari Mohammad","year":"2016","unstructured":"Mohammad Rastegari, Vicente Ordonez, Joseph Redmon, and Ali Farhadi. 2016. XNOR-Net: ImageNet Classification Using Binary Convolutional Neural Networks. CoRR abs\/1603.05279 (2016)."},{"key":"e_1_3_2_1_79_1","volume-title":"Faster, Stronger. CoRR abs\/1612.08242","author":"Redmon Joseph","year":"2016","unstructured":"Joseph Redmon and Ali Farhadi. 2016. YOLO9000: Better, Faster, Stronger. CoRR abs\/1612.08242 (2016)."},{"key":"e_1_3_2_1_80_1","volume-title":"ImageNet Large Scale Visual Recognition Challenge. CoRR abs\/1409.0575 (Sept","author":"Russakovsky Olga","year":"2014","unstructured":"Olga Russakovsky, Jia Deng, Hao Su, Jonathan Krause, Sanjeev Satheesh, Sean Ma, Zhiheng Huang, Andrej Karpathy, Aditya Khosla, Michael Bernstein, Alexander C. Berg, and Li Fei-Fei. 2014. ImageNet Large Scale Visual Recognition Challenge. CoRR abs\/1409.0575 (Sept. 2014)."},{"key":"e_1_3_2_1_81_1","unstructured":"Mark Sandler Andrew G. Howard Menglong Zhu Andrey Zhmoginov and Liang-Chieh Chen. 2018. Inverted Residuals and Linear Bottlenecks: Mobile Networks for Classification Detection and Segmentation. CoRR abs\/1801.04381."},{"key":"e_1_3_2_1_82_1","doi-asserted-by":"publisher","DOI":"10.1145\/3195970.3196072"},{"key":"e_1_3_2_1_83_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00069"},{"key":"e_1_3_2_1_84_1","volume-title":"Fixed Point Performance Analysis of Recurrent Neural Networks. CoRR abs\/1512.01322","author":"Shin Sungho","year":"2015","unstructured":"Sungho Shin, Kyuyeon Hwang, and Wonyong Sung. 2015. Fixed Point Performance Analysis of Recurrent Neural Networks. CoRR abs\/1512.01322 (2015)."},{"key":"e_1_3_2_1_85_1","volume-title":"Memory Requirements for Convolutional Neural Network Hardware Accelerators. In IEEE Intl' Symp. on Workload Characterization.","author":"Siu Kevin","year":"2018","unstructured":"Kevin Siu, Dylan Malone Stuart, Mostafa Mahmoud, and Andreas Moshovos. 2018. Memory Requirements for Convolutional Neural Network Hardware Accelerators. In IEEE Intl' Symp. on Workload Characterization."},{"key":"e_1_3_2_1_86_1","unstructured":"Synopsys. 2019. Design Compiler. http:\/\/www.synopsys.com\/Tools\/Implementation\/RTLSynthesis\/DesignCompiler\/Pages. (2019)."},{"key":"e_1_3_2_1_87_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.308"},{"key":"e_1_3_2_1_88_1","volume-title":"Soft Weight-Sharing for Neural Network Compression. CoRR abs\/1702.04008","author":"Ullrich Karen","year":"2017","unstructured":"Karen Ullrich, Edward Meeds, and Max Welling. 2017. Soft Weight-Sharing for Neural Network Compression. CoRR abs\/1702.04008 (2017)."},{"key":"e_1_3_2_1_89_1","doi-asserted-by":"publisher","DOI":"10.1145\/2964284.2964299"},{"key":"e_1_3_2_1_90_1","unstructured":"Peter Warden. 2016. Low-precision matrix multiplication. https:\/\/petewarden.com."},{"key":"e_1_3_2_1_91_1","unstructured":"Pete Warden. 2017. How to Quantize Neural Networks with TensorFlow. https:\/\/www.tensorflow.org\/performance\/quantization. (2017)."},{"key":"e_1_3_2_1_92_1","doi-asserted-by":"publisher","DOI":"10.1145\/3061639.3062207"},{"key":"e_1_3_2_1_93_1","doi-asserted-by":"publisher","DOI":"10.5555\/3157096.3157329"},{"key":"e_1_3_2_1_94_1","unstructured":"Neil HE Weste David Harris and Ayan Banerjee. 2010. CMOS VLSI design. Pearson India."},{"key":"e_1_3_2_1_95_1","volume-title":"CoRR abs\/1809.04070","author":"Yang Xuan","year":"2018","unstructured":"Xuan Yang, Mingyu Gao, Jing Pu, Ankita Nayak, Qiaoyi Liu, Steven Bell, Jeff Setter, Kaidi Cao, Heonjae Ha, Christos Kozyrakis, and Mark Horowitz. 2018. DNN Dataflow Choice Is Overrated. CoRR abs\/1809.04070 (2018)."},{"key":"e_1_3_2_1_96_1","volume-title":"Nikhil Bhagdikar, Stephen Richardson, Shahar Kvatinsky, Jonathan Ragan-Kelley, Ardavan Pedram, and Mark Horowitz.","author":"Yang Xuan","year":"2016","unstructured":"Xuan Yang, Jing Pu, Blaine Burton Rister, Nikhil Bhagdikar, Stephen Richardson, Shahar Kvatinsky, Jonathan Ragan-Kelley, Ardavan Pedram, and Mark Horowitz. 2016. A Systematic Approach to Blocking Convolutional Neural Networks. CoRR abs\/1606.04209 (2016)."},{"key":"e_1_3_2_1_97_1","volume-title":"IEEE Conf. on Computer Vision and Pattern Recognition.","author":"Chen Tien-Ju","year":"2017","unstructured":"Yang, Tien-Ju and Chen, Yu-Hsin and Sze, Vivienne. 2017. Designing Energy-Efficient Convolutional Neural Networks using Energy-Aware Pruning. In IEEE Conf. on Computer Vision and Pattern Recognition."},{"key":"e_1_3_2_1_98_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080215"},{"key":"e_1_3_2_1_99_1","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001177"},{"key":"e_1_3_2_1_100_1","doi-asserted-by":"publisher","DOI":"10.1145\/2684746.2689060"},{"key":"e_1_3_2_1_101_1","volume-title":"Cambricon-X: An Accelerator for Sparse Neural Networks. In Intl' Symp. on Microarchitecture.","author":"Zhang Shijin","year":"2016","unstructured":"Shijin Zhang, Zidong Du, Lei Zhang, Huiying Lan, Shaoli Liu, Ling Li, Qi Guo, Tianshi Chen, and Yunji Chen. 2016. Cambricon-X: An Accelerator for Sparse Neural Networks. In Intl' Symp. on Microarchitecture."},{"key":"e_1_3_2_1_102_1","doi-asserted-by":"publisher","DOI":"10.1109\/78.229903"},{"key":"e_1_3_2_1_103_1","volume-title":"Incremental Network Quantization: Towards Lossless CNNs with Low-Precision Weights. CoRR abs\/1702.03044","author":"Zhou Aojun","year":"2017","unstructured":"Aojun Zhou, Anbang Yao, Yiwen Guo, Lin Xu, and Yurong Chen. 2017. Incremental Network Quantization: Towards Lossless CNNs with Low-Precision Weights. CoRR abs\/1702.03044 (2017)."},{"key":"e_1_3_2_1_104_1","volume-title":"DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients. CoRR abs\/1606.06160","author":"Zhou Shuchang","year":"2016","unstructured":"Shuchang Zhou, Zekun Ni, Xinyu Zhou, He Wen, Yuxin Wu, and Yuheng Zou. 2016. DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients. CoRR abs\/1606.06160 (2016)."},{"key":"e_1_3_2_1_105_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2018.00011"},{"key":"e_1_3_2_1_106_1","volume-title":"Dally","author":"Zhu Chenzhuo","year":"2016","unstructured":"Chenzhuo Zhu, Song Han, Huizi Mao, and William J. Dally. 2016. Trained Ternary Quantization. CoRR abs\/1612.01064 (2016)."},{"key":"e_1_3_2_1_107_1","volume-title":"Le","author":"Zoph Barret","year":"2017","unstructured":"Barret Zoph, Vijay Vasudevan, Jonathon Shlens, and Quoc V. Le. 2017. Learning Transferable Architectures for Scalable Image Recognition. CoRR abs\/1707.07012 (2017). arXiv:1707.07012 http:\/\/arxiv.org\/abs\/1707.07012"}],"event":{"name":"ISCA '19: The 46th Annual International Symposium on Computer Architecture","location":"Phoenix Arizona","acronym":"ISCA '19","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture","IEEE-CS\\DATC IEEE Computer Society"]},"container-title":["Proceedings of the 46th International Symposium on Computer Architecture"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3307650.3322255","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3307650.3322255","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T23:54:06Z","timestamp":1750204446000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3307650.3322255"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,6,22]]},"references-count":107,"alternative-id":["10.1145\/3307650.3322255","10.1145\/3307650"],"URL":"https:\/\/doi.org\/10.1145\/3307650.3322255","relation":{},"subject":[],"published":{"date-parts":[[2019,6,22]]},"assertion":[{"value":"2019-06-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}