{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,15]],"date-time":"2026-04-15T17:52:50Z","timestamp":1776275570577,"version":"3.50.1"},"reference-count":308,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"4","license":[{"start":{"date-parts":[[2020,4,1]],"date-time":"2020-04-01T00:00:00Z","timestamp":1585699200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,4,1]],"date-time":"2020-04-01T00:00:00Z","timestamp":1585699200000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,4,1]],"date-time":"2020-04-01T00:00:00Z","timestamp":1585699200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,4,1]],"date-time":"2020-04-01T00:00:00Z","timestamp":1585699200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"National Science Foundation","award":["1725447"],"award-info":[{"award-number":["1725447"]}]},{"DOI":"10.13039\/501100004147","name":"Beijing Academy of Artificial Intelligence (BAAI), Tsinghua University Initiative Scientific Research Program","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004147","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004147","name":"Institute for Guo Qiang, Tsinghua University","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004147","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Proc. IEEE"],"published-print":{"date-parts":[[2020,4]]},"DOI":"10.1109\/jproc.2020.2976475","type":"journal-article","created":{"date-parts":[[2020,3,20]],"date-time":"2020-03-20T19:48:43Z","timestamp":1584733723000},"page":"485-532","source":"Crossref","is-referenced-by-count":824,"title":["Model Compression and Hardware Acceleration for Neural Networks: A Comprehensive Survey"],"prefix":"10.1109","volume":"108","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5172-9411","authenticated-orcid":false,"given":"By Lei","family":"Deng","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8994-431X","authenticated-orcid":false,"given":"Guoqi","family":"Li","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4186-7618","authenticated-orcid":false,"given":"Song","family":"Han","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9829-2202","authenticated-orcid":false,"given":"Luping","family":"Shi","sequence":"additional","affiliation":[]},{"given":"Yuan","family":"Xie","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref275","doi-asserted-by":"publisher","DOI":"10.1109\/CoolChips.2018.8373076"},{"key":"ref274","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2017.2682138"},{"key":"ref277","doi-asserted-by":"publisher","DOI":"10.1145\/3194554.3194634"},{"key":"ref276","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2018.2857019"},{"key":"ref271","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2018.00062"},{"key":"ref270","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00040"},{"key":"ref273","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2017.2778702"},{"key":"ref170","article-title":"WRPN: Wide reduced-precision networks","author":"mishra","year":"2017","journal-title":"arXiv 1709 01134"},{"key":"ref272","doi-asserted-by":"publisher","DOI":"10.1109\/IEDM.2017.8268341"},{"key":"ref172","article-title":"Incremental network quantization: Towards lossless CNNs with low-precision weights","author":"zhou","year":"2017","journal-title":"arXiv 1702 03044"},{"key":"ref171","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.430"},{"key":"ref174","article-title":"Training compact neural networks with binary weights and low precision activations","author":"zhuang","year":"2018","journal-title":"arXiv 1808 02631"},{"key":"ref173","article-title":"Learning low precision deep neural networks through regularization","author":"choi","year":"2018","journal-title":"arXiv 1809 00095"},{"key":"ref176","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00881"},{"key":"ref175","article-title":"UNIQ: Uniform noise injection for non-uniform quantization of neural networks","author":"baskin","year":"2018","journal-title":"arXiv 1804 10969"},{"key":"ref178","first-page":"1742","article-title":"Flexpoint: An adaptive numerical format for efficient training of deep neural networks","author":"k\u00f6ster","year":"2017","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref177","article-title":"Mixed precision training","author":"micikevicius","year":"2017","journal-title":"arXiv 1710 03740"},{"key":"ref168","first-page":"5811","article-title":"Training quantized nets: A deeper understanding","author":"li","year":"2017","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref169","doi-asserted-by":"publisher","DOI":"10.1007\/s11390-017-1750-y"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.469"},{"key":"ref38","first-page":"5199","article-title":"ChannelNets: Compact and efficient convolutional neural networks via channel-wise convolutions","author":"gao","year":"2018","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.634"},{"key":"ref32","article-title":"Wide residual networks","author":"zagoruyko","year":"2016","journal-title":"arXiv 1605 07146"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.243"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref267","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358302"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00716"},{"key":"ref268","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001140"},{"key":"ref36","first-page":"11","article-title":"CondenseNet: An efficient densenet using learned group convolutions","volume":"3","author":"huang","year":"2017","journal-title":"Group"},{"key":"ref269","article-title":"High-throughput in-memory computing for binary deep neural networks with monolithically integrated RRAM and 90nm CMOS","author":"yin","year":"2019","journal-title":"arXiv 1909 07514"},{"key":"ref35","article-title":"Network in network","author":"lin","year":"2013","journal-title":"arXiv 1312 4400"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.195"},{"key":"ref288","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00062"},{"key":"ref287","doi-asserted-by":"publisher","DOI":"10.1109\/ASSCC.2017.8240260"},{"key":"ref286","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001163"},{"key":"ref285","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2018.2852335"},{"key":"ref284","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001165"},{"key":"ref181","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2017.06.058"},{"key":"ref283","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001138"},{"key":"ref180","first-page":"5145","article-title":"Scalable methods for 8-bit training of neural networks","author":"banner","year":"2018","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref282","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2018.00024"},{"key":"ref281","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2016.7783723"},{"key":"ref280","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-78890-6_3"},{"key":"ref185","article-title":"Deep compression: Compressing deep neural networks with pruning, trained quantization and Huffman coding","author":"han","year":"2015","journal-title":"arXiv 1510 00149 [cs]"},{"key":"ref184","first-page":"7685","article-title":"Training deep neural networks with 8-bit floating point numbers","author":"wang","year":"2018","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref183","article-title":"Per-tensor fixed-point quantization of the back-propagation algorithm","author":"sakr","year":"2018","journal-title":"arXiv 1812 11732"},{"key":"ref182","article-title":"Training and inference with integers in deep neural networks","author":"wu","year":"2018","journal-title":"arXiv 1802 04680"},{"key":"ref189","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2017.2774288"},{"key":"ref188","article-title":"Recurrent neural networks with limited numerical precision","author":"ott","year":"2016","journal-title":"arXiv 1608 06902 [cs]"},{"key":"ref187","doi-asserted-by":"publisher","DOI":"10.1109\/SiPS.2014.6986082"},{"key":"ref186","article-title":"Deep k-means: Re-training and parameter sharing with harder cluster assignments for compressing deep convolutions","author":"wu","year":"2018","journal-title":"arXiv 1806 09228"},{"key":"ref28","article-title":"Highway networks","author":"kumar srivastava","year":"2015","journal-title":"arXiv 1505 00387"},{"key":"ref27","article-title":"Very deep convolutional networks for large-scale image recognition","author":"simonyan","year":"2015","journal-title":"Proc Int Conf Learn Represent"},{"key":"ref179","article-title":"Mixed precision training of convolutional neural networks using integer operations","author":"das","year":"2018","journal-title":"arXiv 1802 00930"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"ref20","first-page":"1097","article-title":"ImageNet classification with deep convolutional neural networks","author":"krizhevsky","year":"2012","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.308"},{"key":"ref21","first-page":"4898","article-title":"Understanding the effective receptive field in deep convolutional neural networks","author":"luo","year":"2016","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-75988-8_28"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00951"},{"key":"ref278","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2016.7783722"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.89"},{"key":"ref279","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00069"},{"key":"ref25","article-title":"Multi-scale context aggregation by dilated convolutions","author":"yu","year":"2016","journal-title":"Proc ICLR"},{"key":"ref293","doi-asserted-by":"publisher","DOI":"10.1145\/3123939.3123977"},{"key":"ref50","article-title":"Simple recurrent units for highly parallelizable recurrence","author":"tao lei","year":"2018","journal-title":"Proc EMNLP"},{"key":"ref292","doi-asserted-by":"publisher","DOI":"10.1109\/ASPDAC.2017.7858419"},{"key":"ref51","first-page":"1120","article-title":"Unitary evolution recurrent neural networks","author":"arjovsky","year":"2016","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref295","first-page":"1","article-title":"Parallelizing SRAM arrays with customized bit-cell for binary neural networks","author":"liu","year":"2018","journal-title":"Proc Design Autom Conf"},{"key":"ref294","first-page":"222","article-title":"An always-on $3.8~\\mu\\text{J}$ \/86% CIFAR-10 mixed-signal binary CNN processor with all memory on chip in 28-nm CMOS","author":"bankman","year":"2018","journal-title":"IEEE Int Solid-State Circuits Conf (ISSCC) Dig Tech Papers"},{"key":"ref297","doi-asserted-by":"publisher","DOI":"10.1145\/3195970.3196116"},{"key":"ref296","doi-asserted-by":"publisher","DOI":"10.1145\/3307650.3322271"},{"key":"ref299","doi-asserted-by":"publisher","DOI":"10.23919\/DATE.2018.8342009"},{"key":"ref298","doi-asserted-by":"publisher","DOI":"10.1145\/3287624.3287715"},{"key":"ref154","article-title":"Understanding straight-through estimator in training activation quantized neural nets","author":"yin","year":"2019","journal-title":"arXiv 1903 05662"},{"key":"ref153","article-title":"Effective quantization methods for recurrent neural networks","author":"he","year":"2016","journal-title":"arXiv 1611 10176"},{"key":"ref156","article-title":"Neural networks with few multiplications","author":"lin","year":"2015","journal-title":"arXiv 1510 03009 [cs]"},{"key":"ref155","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/425"},{"key":"ref150","first-page":"3123","article-title":"BinaryConnect: Training deep neural networks with binary weights during propagations","author":"courbariaux","year":"2015","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref291","article-title":"RxNN: A framework for evaluating deep neural networks on resistive crossbars","author":"jain","year":"2018","journal-title":"arXiv 1809 00072"},{"key":"ref152","first-page":"6869","article-title":"Quantized neural networks: Training neural networks with low precision weights and activations","volume":"18","author":"hubara","year":"2017","journal-title":"J Mach Learn Res"},{"key":"ref290","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2019.2917852"},{"key":"ref151","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2018.01.010"},{"key":"ref146","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00448"},{"key":"ref147","article-title":"DoReFa-net: Training low bitwidth convolutional neural networks with low bitwidth gradients","author":"zhou","year":"2016","journal-title":"arXiv 1606 06160 [cs]"},{"key":"ref148","article-title":"Convolutional neural networks using logarithmic data representation","author":"miyashita","year":"2016","journal-title":"arXiv 1603 01025"},{"key":"ref149","article-title":"Discovering low-precision networks close to full-precision networks for efficient embedded inference","author":"mckinstry","year":"2018","journal-title":"arXiv 1809 04191"},{"key":"ref289","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00063"},{"key":"ref59","article-title":"Efficient neural architecture search via parameter sharing","author":"pham","year":"2018","journal-title":"arXiv 1802 03268"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00907"},{"key":"ref57","first-page":"1","article-title":"Wider and deeper, cheaper and faster: Tensorized LSTMs for sequence learning","volume":"2017","author":"he","year":"0","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref56","article-title":"Grid long short-term memory","author":"kalchbrenner","year":"2015","journal-title":"arXiv 1507 01526"},{"key":"ref55","first-page":"1822","article-title":"Architectural complexity measures of recurrent neural networks","author":"zhang","year":"2016","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref54","article-title":"Google&#x2019;s neural machine translation system: Bridging the gap between human and machine translation","author":"wu","year":"2016","journal-title":"arXiv 1609 08144"},{"key":"ref53","article-title":"Factorization tricks for LSTM networks","author":"kuchaiev","year":"2017","journal-title":"arXiv 1703 10722"},{"key":"ref52","article-title":"Long short-term memory recurrent neural network architectures for large scale acoustic modeling","author":"sak","year":"2014","journal-title":"Proc INTERSPEECH"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1007\/s11633-016-1006-2"},{"key":"ref167","first-page":"1509","article-title":"Terngrad: Ternary gradients to reduce communication in distributed deep learning","author":"wen","year":"2017","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref166","first-page":"0","article-title":"Tbn: Convolutional neural network with ternary inputs and binary weights","volume":"1","author":"wan","year":"2018","journal-title":"Matrix"},{"key":"ref165","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v32i1.11713","article-title":"Extremely low bit neural network: Squeeze the last bit out with admm","author":"leng","year":"2018","journal-title":"Proc AAAI"},{"key":"ref164","doi-asserted-by":"publisher","DOI":"10.1137\/18M1166134"},{"key":"ref163","article-title":"Variational network quantization","author":"achterhold","year":"2018","journal-title":"Proc ICLR"},{"key":"ref162","article-title":"Trained ternary quantization","author":"zhu","year":"2016","journal-title":"arXiv 1612 01064"},{"key":"ref161","article-title":"Ternary weight networks","author":"li","year":"2016","journal-title":"arXiv 1605 04711 [cs]"},{"key":"ref160","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.574"},{"key":"ref4","first-page":"442","article-title":"Tensorizing neural networks","author":"novikov","year":"2015","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00474"},{"key":"ref6","first-page":"1135","article-title":"Learning both weights and connections for efficient neural network","author":"han","year":"2015","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref5","article-title":"Binarized neural networks: Training deep neural networks with weights and activations constrained to +1 or ?1","author":"courbariaux","year":"2016","journal-title":"arXiv 1602 02830 [cs]"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2014.58"},{"key":"ref159","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1604850113"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/3297858.3304076"},{"key":"ref49","article-title":"Quasi-recurrent neural networks","author":"bradbury","year":"2017","journal-title":"Proc ICLR"},{"key":"ref157","first-page":"525","article-title":"XNOR-Net: ImageNet classification using binary convolutional neural networks","author":"rastegari","year":"2016","journal-title":"Proc Eur Conf Comput Vis"},{"key":"ref9","first-page":"1","article-title":"In-datacenter performance analysis of a tensor processing unit","author":"jouppi","year":"2017","journal-title":"Proc ACM\/IEEE 44th Annu Int Symp Comput Archit (ISCA)"},{"key":"ref158","first-page":"2625","article-title":"How to train a compact binary neural network with high accuracy?","author":"tang","year":"2017","journal-title":"Proc AAAI"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1162\/089976600300015015"},{"key":"ref45","article-title":"Neural architecture search with reinforcement learning","author":"zoph","year":"2017","journal-title":"Proc ICLR"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00572"},{"key":"ref47","first-page":"2342","article-title":"An empirical exploration of recurrent network architectures","author":"jozefowicz","year":"2015","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref42","article-title":"The unreasonable effectiveness of the forget gate","author":"van der westhuizen","year":"2018","journal-title":"arXiv 1804 04849"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472657"},{"key":"ref44","article-title":"MobileNets: Efficient convolutional neural networks for mobile vision applications","author":"howard","year":"2017","journal-title":"arXiv 1704 04861"},{"key":"ref43","first-page":"3882","article-title":"Phased LSTM: Accelerating recurrent network training for long or event-based sequences","author":"neil","year":"2016","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref73","article-title":"SMASH: One-shot model architecture search through HyperNetworks","author":"brock","year":"2017","journal-title":"Arxiv 1708 05344"},{"key":"ref72","article-title":"Once for all: Train one network and specialize it for efficient deployment","author":"cai","year":"2020","journal-title":"Proc ICLR"},{"key":"ref71","article-title":"An analysis of deep neural network models for practical applications","author":"canziani","year":"2016","journal-title":"arXiv 1605 07678"},{"key":"ref70","article-title":"Efficient multi-objective neural architecture search via lamarckian evolution","author":"elsken","year":"2018","journal-title":"arXiv 1804 09081"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638949"},{"key":"ref77","doi-asserted-by":"crossref","first-page":"273","DOI":"10.1109\/TNNLS.2015.2496964","article-title":"$L_{1}$ -norm low-rank matrix decomposition by neural networks and mollifiers","volume":"27","author":"liu","year":"2016","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"ref74","article-title":"ProxylessNAS: Direct neural architecture search on target task and hardware","author":"cai","year":"2018","journal-title":"arXiv 1812 00332"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.1980.1102314"},{"key":"ref78","article-title":"Convolutional neural networks with low-rank regularization","author":"tai","year":"2016","journal-title":"Proc ICLR"},{"key":"ref79","first-page":"2148","article-title":"Predicting parameters in deep learning","author":"denil","year":"2013","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref60","article-title":"DARTS: Differentiable architecture search","author":"liu","year":"2018","journal-title":"Arxiv 1806 09055"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33014780"},{"key":"ref61","article-title":"The evolved transformer","author":"so","year":"2019","journal-title":"arXiv 1901 11117"},{"key":"ref305","article-title":"Attacking binarized neural networks","author":"galloway","year":"2017","journal-title":"arXiv 1711 00449"},{"key":"ref63","article-title":"Searching for activation functions","author":"ramachandran","year":"2017","journal-title":"arXiv 1710 05941"},{"key":"ref304","article-title":"DeepCloak: Masking deep neural network models for robustness against adversarial samples","author":"ji","year":"2017","journal-title":"arXiv 1702 06763"},{"key":"ref64","article-title":"Designing neural network architectures using reinforcement learning","author":"baker","year":"2017","journal-title":"Proc ICLR"},{"key":"ref307","article-title":"Quantizing deep convolutional networks for efficient inference: A whitepaper","author":"krishnamoorthi","year":"2018","journal-title":"arXiv 1806 08342"},{"key":"ref65","article-title":"Practical block-wise neural network architecture generation","author":"zhong","year":"2017","journal-title":"arXiv 1708 05552"},{"key":"ref306","article-title":"Tensor train decomposition on TensorFlow (T3F)","author":"novikov","year":"2018","journal-title":"arXiv 1801 01928"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01246-5_2"},{"key":"ref301","article-title":"Adversarial examples in the physical world","author":"kurakin","year":"2016","journal-title":"arXiv 1607 02533"},{"key":"ref67","article-title":"Hierarchical representations for efficient architecture search","author":"liu","year":"2017","journal-title":"arXiv 1711 00436"},{"key":"ref300","doi-asserted-by":"publisher","DOI":"10.23919\/FPL.2017.8056850"},{"key":"ref68","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v32i1.11709","article-title":"Efficient architecture search by network transformation","author":"cai","year":"2018","journal-title":"Proc AAAI"},{"key":"ref303","doi-asserted-by":"publisher","DOI":"10.1145\/3240765.3264699"},{"key":"ref69","article-title":"Path-level network transformation for efficient architecture search","author":"cai","year":"2018","journal-title":"arXiv 1806 02639"},{"key":"ref302","doi-asserted-by":"publisher","DOI":"10.1109\/ISIT.2018.8437638"},{"key":"ref308","doi-asserted-by":"publisher","DOI":"10.1145\/3242897"},{"key":"ref197","article-title":"Sparsely-connected neural networks: Towards efficient VLSI implementation of deep neural networks","author":"ardakani","year":"2016","journal-title":"arXiv 1611 01427"},{"key":"ref198","article-title":"To prune, or not to prune: Exploring the efficacy of pruning for model compression","author":"zhu","year":"2017","journal-title":"arXiv 1710 01878"},{"key":"ref199","first-page":"1049","article-title":"Frequency-domain dynamic pruning for convolutional neural networks","author":"liu","year":"2018","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref193","article-title":"Highly scalable deep learning training system with mixed-precision: Training ImageNet in four minutes","author":"jia","year":"2018","journal-title":"arXiv 1807 11205"},{"key":"ref194","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2018.2876179"},{"key":"ref195","first-page":"2160","article-title":"Norm matters: Efficient and accurate normalization schemes in deep networks","author":"hoffer","year":"2018","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref196","article-title":"CuDNN: Efficient primitives for deep learning","author":"chetlur","year":"2014","journal-title":"Arxiv 1410 0759"},{"key":"ref95","first-page":"684","article-title":"Deterministic CUR for improved large-scale data analysis: An empirical study","author":"thurau","year":"2013","journal-title":"Proc SIAM Int Conf Data Mining"},{"key":"ref94","doi-asserted-by":"publisher","DOI":"10.1007\/s00500-011-0755-7"},{"key":"ref190","article-title":"Low precision RNNs: Quantizing RNNs without losing accuracy","author":"kapur","year":"2017","journal-title":"arXiv 1710 07706"},{"key":"ref93","article-title":"Deep complex networks","author":"trabelsi","year":"2018","journal-title":"Proc ICLR"},{"key":"ref191","first-page":"601","article-title":"Hitnet: Hybrid ternary recurrent neural network","author":"wang","year":"2018","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.1080\/03081087.2016.1267104"},{"key":"ref192","article-title":"ProxQuant: Quantized neural networks via proximal operators","author":"bai","year":"2018","journal-title":"arXiv 1810 00861"},{"key":"ref91","first-page":"5125","article-title":"GradiVeQ: Vector quantization for bandwidth-efficient gradient aggregation in distributed CNN training","author":"yu","year":"2018","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref90","first-page":"5463","article-title":"SVD-softmax: Fast softmax approximation on large vocabulary neural networks","author":"shim","year":"2017","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1137\/07070111X"},{"key":"ref99","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2013.2297439"},{"key":"ref96","first-page":"3977","article-title":"Revisiting the Nystr&#x00F6;m method for improved large-scale machine learning","volume":"17","author":"gittens","year":"2016","journal-title":"J Mach Learn Res"},{"key":"ref97","doi-asserted-by":"publisher","DOI":"10.1007\/s11045-017-0481-0"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/ICCIT.2009.170"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6854828"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.7566\/JPSJ.86.024005"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.460"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1109\/CIT.2007.52"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2015.2502579"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1016\/j.matpr.2017.09.222"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178833"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2016.2572736"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298809"},{"key":"ref200","article-title":"Pruning filters for efficient ConvNets","author":"li","year":"2016","journal-title":"arXiv 1608 08710"},{"key":"ref101","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2017.2690379"},{"key":"ref100","article-title":"Compression of deep convolutional neural networks for fast and low power mobile applications","author":"kim","year":"2016","journal-title":"Proc ICLR"},{"key":"ref209","article-title":"Rethinking the smaller-norm-less-informative assumption in channel pruning of convolution layers","author":"ye","year":"2018","journal-title":"arXiv 1802 00124"},{"key":"ref203","article-title":"Deep gradient compression: Reducing the communication bandwidth for distributed training","author":"lin","year":"2017","journal-title":"arXiv 1712 01887"},{"key":"ref204","article-title":"Pruning convolutional neural networks for resource efficient inference","author":"molchanov","year":"2016","journal-title":"arXiv 1611 06440"},{"key":"ref201","first-page":"2074","article-title":"Learning structured sparsity in deep neural networks","author":"wen","year":"2016","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref202","first-page":"3299","article-title":"meprop: Sparsified back propagation for accelerated deep learning with reduced overfitting","volume":"70","author":"sun","year":"2017","journal-title":"Proc Int Conf Mach Learn (JMLR)"},{"key":"ref207","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.298"},{"key":"ref208","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00958"},{"key":"ref205","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.541"},{"key":"ref206","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.155"},{"key":"ref211","article-title":"2PFPCE: Two-phase filter pruning based on conditional entropy","author":"min","year":"2018","journal-title":"arXiv 1809 02220"},{"key":"ref210","article-title":"AutoPruner: An end-to-end trainable filter pruning method for efficient deep model inference","author":"luo","year":"2018","journal-title":"arXiv 1805 08941"},{"key":"ref212","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/309"},{"key":"ref213","article-title":"Layer-compensated pruning for resource-constrained convolutional neural networks","author":"chin","year":"2018","journal-title":"arXiv 1810 00518"},{"key":"ref214","first-page":"784","article-title":"AMC: AutoML for model compression and acceleration on mobile devices","author":"he","year":"2018","journal-title":"Proc Eur Conf Comput Vis (ECCV)"},{"key":"ref215","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00904"},{"key":"ref216","article-title":"Hybrid pruning: Thinner sparse networks for fast inference on edge devices","author":"xu","year":"2018","journal-title":"arXiv 1811 00482"},{"key":"ref217","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2019.2914438"},{"key":"ref218","article-title":"A novel channel pruning method for deep neural network compression","author":"hu","year":"2018","journal-title":"arXiv 1805 11394"},{"key":"ref219","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2018.2874823"},{"key":"ref220","first-page":"4115","article-title":"Tetris: Tile-matching the tremendous irregular sparsity","author":"ji","year":"2018","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref222","first-page":"10169","article-title":"Synaptic strength for convolutional neural network","author":"lin","year":"2018","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref221","article-title":"Compression of deep convolutional neural networks under joint sparsity constraints","author":"choi","year":"2018","journal-title":"arXiv 1805 08303"},{"key":"ref229","article-title":"Training behavior of sparse neural network topologies","author":"alford","year":"2018","journal-title":"arXiv 1810 00299"},{"key":"ref228","article-title":"Exploring sparsity in recurrent neural networks","author":"narang","year":"2017","journal-title":"arXiv 1704 05119"},{"key":"ref227","first-page":"164","article-title":"Second order derivatives for network pruning: Optimal brain surgeon","author":"hassibi","year":"1993","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref226","first-page":"598","article-title":"Optimal brain damage","volume":"1990","author":"lecun","year":"0","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref225","article-title":"Progressive weight pruning of deep neural networks using ADMM","author":"ye","year":"2018","journal-title":"arXiv 1810 07378"},{"key":"ref224","article-title":"StructADMM: A systematic, high-efficiency framework of structured weight pruning for DNNs","author":"zhang","year":"2018","journal-title":"arXiv 1807 11091"},{"key":"ref223","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01237-3_12"},{"key":"ref127","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2015.29"},{"key":"ref126","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2015.7351019"},{"key":"ref125","doi-asserted-by":"publisher","DOI":"10.1016\/0012-365X(94)00067-1"},{"key":"ref124","first-page":"3891","article-title":"Tensor-train recurrent neural networks for video classification","volume":"70","author":"yang","year":"2017","journal-title":"Proc 34th Int Conf Mach Learn"},{"key":"ref129","first-page":"1","article-title":"On the expressive power of deep learning: A tensor analysis","volume":"49","author":"cohen","year":"2016","journal-title":"J Mach Learn Res Workshop Conf Proc"},{"key":"ref128","first-page":"955","article-title":"Convolutional rectifier networks as generalized tensor decompositions","volume":"48","author":"cohen","year":"2016","journal-title":"Proc 33rd Int Conf Mach Learn"},{"key":"ref130","doi-asserted-by":"publisher","DOI":"10.1137\/090752286"},{"key":"ref133","article-title":"Expressive power of recurrent neural networks","author":"khrulkov","year":"2018","journal-title":"Proc ICLR"},{"key":"ref134","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-16042-9_14"},{"key":"ref131","doi-asserted-by":"publisher","DOI":"10.1137\/15M1028479"},{"key":"ref132","doi-asserted-by":"crossref","first-page":"291","DOI":"10.2478\/cmam-2011-0016","article-title":"An introduction to hierarchical ( $\\mathcal{H}$ -) rank and TT-rank of tensors with examples","volume":"11","author":"grasedyck","year":"2011","journal-title":"Computing Methods in Applied Mathematics and Engineering"},{"key":"ref232","article-title":"Learning intrinsic sparse structures within long short-term memory","author":"wen","year":"2017","journal-title":"arXiv 1709 05027"},{"key":"ref233","article-title":"Structurally sparsified backward propagation for faster long short-term memory training","author":"zhu","year":"2018","journal-title":"arXiv 1806 00512"},{"key":"ref230","article-title":"Grow and prune compact, fast, and accurate LSTMs","author":"dai","year":"2018","journal-title":"arXiv 1805 11797"},{"key":"ref231","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.643"},{"key":"ref239","first-page":"1929","article-title":"Dropout: A simple way to prevent neural networks from overfitting","volume":"15","author":"srivastava","year":"2014","journal-title":"J Mach Learn Res"},{"key":"ref238","article-title":"On the importance of single directions for generalization","author":"morcos","year":"2018","journal-title":"arXiv 1803 06959"},{"key":"ref235","doi-asserted-by":"publisher","DOI":"10.1145\/3005348"},{"key":"ref234","doi-asserted-by":"publisher","DOI":"10.1145\/3140659.3080215"},{"key":"ref237","first-page":"2791","article-title":"Winner-take-all autoencoders","author":"makhzani","year":"2015","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref236","first-page":"806","article-title":"Sparse convolutional neural networks","author":"liu","year":"2015","journal-title":"Proc IEEE Conf Comput Vis Pattern Recognit (CVPR)"},{"key":"ref136","doi-asserted-by":"publisher","DOI":"10.1007\/s00791-014-0218-7"},{"key":"ref135","doi-asserted-by":"publisher","DOI":"10.1137\/090757861"},{"key":"ref138","article-title":"Long-term forecasting using higher order tensor RNNs","author":"yu","year":"2017","journal-title":"arXiv 1711 00073"},{"key":"ref137","article-title":"Tensor ring decomposition","author":"zhao","year":"2016","journal-title":"arXiv 1606 05535"},{"key":"ref139","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.78"},{"key":"ref140","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.602"},{"key":"ref141","author":"parhi","year":"1999","journal-title":"VLSI Digital Signal Processing Systems Design and Implementation"},{"key":"ref142","first-page":"345","article-title":"Towards accurate binary convolutional neural network","author":"lin","year":"2017","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref143","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01237-3_23"},{"key":"ref2","article-title":"Dynamical isometry and a mean field theory of CNNs: How to train 10,000-layer vanilla convolutional neural networks","author":"xiao","year":"2018","journal-title":"arXiv 1806 05393"},{"key":"ref144","article-title":"Alternating multi-bit quantization for recurrent neural networks","author":"xu","year":"2018","journal-title":"arXiv 1802 00150"},{"key":"ref1","doi-asserted-by":"crossref","first-page":"436","DOI":"10.1038\/nature14539","article-title":"Deep learning","volume":"521","author":"lecun","year":"2015","journal-title":"Nature"},{"key":"ref145","article-title":"Model compression via distillation and quantization","author":"polino","year":"2018","journal-title":"arXiv 1802 05668"},{"key":"ref241","doi-asserted-by":"publisher","DOI":"10.1145\/3097983.3098035"},{"key":"ref242","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00061"},{"key":"ref243","doi-asserted-by":"publisher","DOI":"10.1109\/ISCAS.2017.8050797"},{"key":"ref244","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00068"},{"key":"ref240","first-page":"3084","article-title":"Adaptive dropout for training deep neural networks","author":"ba","year":"2013","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref248","first-page":"2526","article-title":"A linear speedup analysis of distributed deep learning with sparse and quantized communication","author":"jiang","year":"2018","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref247","article-title":"Rethinking the value of network pruning","author":"liu","year":"2018","journal-title":"arXiv 1810 05270"},{"key":"ref246","article-title":"Dynamic sparse graph for efficient deep learning","author":"liu","year":"2018","journal-title":"arXiv 1810 00859"},{"key":"ref245","doi-asserted-by":"publisher","DOI":"10.1109\/JETCAS.2018.2865006"},{"key":"ref249","first-page":"1379","article-title":"Dynamic network surgery for efficient DNNs","author":"guo","year":"2016","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref109","doi-asserted-by":"publisher","DOI":"10.1038\/ncomms13890"},{"key":"ref108","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2018.05.017"},{"key":"ref107","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/88"},{"key":"ref106","doi-asserted-by":"publisher","DOI":"10.1109\/BIGCOMP.2017.7881725"},{"key":"ref105","article-title":"Speeding-up convolutional neural networks using fine-tuned CP-decomposition","author":"lebedev","year":"2015","journal-title":"Proc ICLR"},{"key":"ref104","article-title":"Beating the perils of non-convexity: Guaranteed training of neural networks using tensor methods","author":"janzamin","year":"2015","journal-title":"arXiv 1506 08473"},{"key":"ref103","article-title":"Tensor regression networks","author":"kossaifi","year":"2017","journal-title":"arXiv 1707 08308"},{"key":"ref102","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.243"},{"key":"ref111","article-title":"End-to-end learning of a convolutional neural network via deep tensor decomposition","author":"oymak","year":"2018","journal-title":"arXiv 1805 06523"},{"key":"ref112","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00977"},{"key":"ref110","doi-asserted-by":"publisher","DOI":"10.1016\/j.image.2018.03.017"},{"key":"ref250","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2018.00017"},{"key":"ref251","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00070"},{"key":"ref254","doi-asserted-by":"publisher","DOI":"10.1145\/3195970.3196071"},{"key":"ref255","doi-asserted-by":"publisher","DOI":"10.1145\/3020078.3021745"},{"key":"ref252","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-018-0180-5"},{"key":"ref253","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2018.2824304"},{"key":"ref257","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2017.2778281"},{"key":"ref256","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC.2017.7870350"},{"key":"ref259","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.1990.137621"},{"key":"ref10","article-title":"Eyeriss v2: A flexible accelerator for emerging deep neural networks on mobile devices","author":"chen","year":"2018","journal-title":"arXiv 1807 07928"},{"key":"ref258","doi-asserted-by":"publisher","DOI":"10.1145\/3297858.3304049"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/3307650.3322258"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2018.2865489"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2018.00011"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2017.2761740"},{"key":"ref15","article-title":"A survey on methods and theories of quantized neural networks","author":"guo","year":"2018","journal-title":"arXiv 1808 04752"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2017.2765695"},{"key":"ref118","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682231"},{"key":"ref17","first-page":"1","article-title":"Compression of convolutional neural networks: A short survey","author":"pilipovi?","year":"2018","journal-title":"Proc Int Symp INFOTEH-JAHORINA (INFOTEH)"},{"key":"ref117","doi-asserted-by":"publisher","DOI":"10.1137\/090764189"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1179"},{"key":"ref119","doi-asserted-by":"publisher","DOI":"10.1109\/TNANO.2017.2732698"},{"key":"ref114","doi-asserted-by":"publisher","DOI":"10.1109\/INES.2011.5954721"},{"key":"ref113","doi-asserted-by":"publisher","DOI":"10.1137\/S0895479896305696"},{"key":"ref116","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-67946-4_1"},{"key":"ref115","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638350"},{"key":"ref120","article-title":"Tensorial neural networks: Generalization of neural networks and application to model compression","author":"su","year":"2018","journal-title":"arXiv 1805 10352"},{"key":"ref121","article-title":"Ultimate tensorization: Compressing convolutional and FC layers alike","author":"garipov","year":"2016","journal-title":"arXiv 1611 03214"},{"key":"ref122","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2017.7966420"},{"key":"ref123","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2018.8489213"},{"key":"ref260","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2011.5981829"},{"key":"ref261","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2016.2616357"},{"key":"ref262","doi-asserted-by":"publisher","DOI":"10.1145\/3140659.3080254"},{"key":"ref263","doi-asserted-by":"publisher","DOI":"10.1145\/3297858.3304028"},{"key":"ref264","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1424-8"},{"key":"ref265","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2020.2970709"},{"key":"ref266","doi-asserted-by":"publisher","DOI":"10.23919\/VLSIC.2019.8778056"}],"container-title":["Proceedings of the IEEE"],"original-title":[],"link":[{"URL":"https:\/\/ieeexplore.ieee.org\/ielam\/5\/9063691\/9043731-aam.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/5\/9063691\/09043731.pdf?arnumber=9043731","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,19]],"date-time":"2022-10-19T10:59:24Z","timestamp":1666177164000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9043731\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,4]]},"references-count":308,"journal-issue":{"issue":"4"},"URL":"https:\/\/doi.org\/10.1109\/jproc.2020.2976475","relation":{},"ISSN":["0018-9219","1558-2256"],"issn-type":[{"value":"0018-9219","type":"print"},{"value":"1558-2256","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,4]]}}}