{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,25]],"date-time":"2026-04-25T15:08:51Z","timestamp":1777129731032,"version":"3.51.4"},"reference-count":134,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"12","license":[{"start":{"date-parts":[[2020,12,1]],"date-time":"2020-12-01T00:00:00Z","timestamp":1606780800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,12,1]],"date-time":"2020-12-01T00:00:00Z","timestamp":1606780800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,12,1]],"date-time":"2020-12-01T00:00:00Z","timestamp":1606780800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Proc. IEEE"],"published-print":{"date-parts":[[2020,12]]},"DOI":"10.1109\/jproc.2020.3029453","type":"journal-article","created":{"date-parts":[[2020,11,10]],"date-time":"2020-11-10T21:26:41Z","timestamp":1605043601000},"page":"2232-2250","source":"Crossref","is-referenced-by-count":61,"title":["Efficient AI System Design With Cross-Layer Approximate Computing"],"prefix":"10.1109","volume":"108","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0470-6364","authenticated-orcid":false,"given":"Swagath","family":"Venkataramani","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiao","family":"Sun","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7664-0061","authenticated-orcid":false,"given":"Naigang","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5542-7149","authenticated-orcid":false,"given":"Chia-Yu","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3075-8694","authenticated-orcid":false,"given":"Jungwook","family":"Choi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mingu","family":"Kang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4389-5911","authenticated-orcid":false,"given":"Ankur","family":"Agarwal","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jinwook","family":"Oh","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2291-7712","authenticated-orcid":false,"given":"Shubham","family":"Jain","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tina","family":"Babinsky","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2786-9139","authenticated-orcid":false,"given":"Nianzheng","family":"Cao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2387-8521","authenticated-orcid":false,"given":"Thomas","family":"Fox","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bruce","family":"Fleischer","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"George","family":"Gristede","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Michael","family":"Guillorn","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Howard","family":"Haynie","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8238-0371","authenticated-orcid":false,"given":"Hiroshi","family":"Inoue","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kazuaki","family":"Ishizaki","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Michael","family":"Klaiber","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shih-Hsien","family":"Lo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gary","family":"Maier","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Silvia","family":"Mueller","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Michael","family":"Scheuermann","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Eri","family":"Ogawa","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Marcel","family":"Schaal","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mauricio","family":"Serrano","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Joel","family":"Silberman","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Christos","family":"Vezyrtzis","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wei","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Fanchieh","family":"Yee","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6909-0612","authenticated-orcid":false,"given":"Jintao","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9259-7304","authenticated-orcid":false,"given":"Matthew","family":"Ziegler","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ching","family":"Zhou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Moriyoshi","family":"Ohara","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Pong-Fei","family":"Lu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Brian","family":"Curran","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9268-4096","authenticated-orcid":false,"given":"Sunil","family":"Shukla","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Vijayalakshmi","family":"Srinivasan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Leland","family":"Chang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kailash","family":"Gopalakrishnan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref39","first-page":"1","article-title":"Best-effort parallel execution framework for recognition and mining applications","author":"meng","year":"2009","journal-title":"Proc IEEE Int Symp Parallel Distrib Process"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2012.48"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/1594233.1594282"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CICC.2013.6658433"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/1837274.1837411"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/DSD.2005.58"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1145\/2189750.2151008"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1145\/1993498.1993518"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/DATE.2010.5457059"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/DATE.2010.5457181"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/313817.313834"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2020.2975695"},{"key":"ref29","first-page":"1","article-title":"Sustaining Moore&#x2019;s law in embedded computing through probabilistic and approximate design: Retrospects and prospects","author":"palem","year":"2009","journal-title":"Proc Int Conf Compilers Arch Synth Embed Syst (CASES)"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00716"},{"key":"ref22","year":"0","journal-title":"Comput-Aid Des"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2018.2877890"},{"key":"ref24","article-title":"Modeling order in neural word embeddings at scale","author":"trask","year":"2015","journal-title":"arXiv 1506 02338"},{"key":"ref23","article-title":"Outrageously large neural networks: The sparsely-gated mixture-of-experts layer","author":"shazeer","year":"2017","journal-title":"arXiv 1701 06538"},{"key":"ref101","doi-asserted-by":"publisher","DOI":"10.3850\/9783981537079_0819"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICRC.2016.7738674"},{"key":"ref100","doi-asserted-by":"publisher","DOI":"10.1145\/2968456.2968458"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/2463209.2488873"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2017.39"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2019.2931584"},{"key":"ref59","year":"2016","journal-title":"Google Supercharges Machine Learning Tasks with TPU Custom Chip"},{"key":"ref58","first-page":"311","article-title":"BLEU: A method for automatic evaluation of machine translation","author":"papineni","year":"2002","journal-title":"Proc 40th Ann Meeting Assoc for Computational Linguistics (ACL 02)"},{"key":"ref57","article-title":"A comprehensive survey on graph neural networks","volume":"abs 1901 596","author":"wu","year":"2019","journal-title":"CoRR"},{"key":"ref56","first-page":"1842","article-title":"Meta-learning with memory-augmented neural networks","volume":"48","author":"santoro","year":"2016","journal-title":"Proc Int Conf Int Conf Mach Learn (ICML)"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/HiPC.2019.00036"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC47752.2019.9042017"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/CoolChips.2019.8721357"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1145\/2025113.2025133"},{"key":"ref4","article-title":"Deep residual learning for image recognition","volume":"abs 1512 3385","author":"he","year":"2015","journal-title":"CoRR"},{"key":"ref3","article-title":"Very deep convolutional networks for large-scale image recognition","volume":"abs 1409 1556","author":"simonyan","year":"2014","journal-title":"CoRR"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.223"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"ref8","article-title":"MobileNets: Efficient convolutional neural networks for mobile vision applications","volume":"abs 1704 4861","author":"howard","year":"2017","journal-title":"CoRR"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/ARITH.2019.00023"},{"key":"ref7","first-page":"21","article-title":"SSD: Single shot multibox detector","author":"liu","year":"2016","journal-title":"Proc ECCV"},{"key":"ref9","author":"iandola","year":"2016","journal-title":"Squeezenet Alexnet-level accuracy with 50x fewer parameters and< 0 5 mb model size"},{"key":"ref46","article-title":"Accurate and efficient 2-bit quantized neural networks","author":"choi","year":"2019","journal-title":"Proc 2nd SysML Conf"},{"key":"ref45","article-title":"Learning sparse low-precision neural networks with learnable regularization","author":"choi","year":"2018","journal-title":"arXiv 1809 00095"},{"key":"ref48","first-page":"35","article-title":"A scalable multi-teraops deep learning processor core for ai training and inference","author":"fleischer","year":"2018","journal-title":"Proc VLSI Symp"},{"key":"ref47","article-title":"Adacomp: Adaptive residual gradient compression for data-parallel distributed training","author":"chen","year":"2017","journal-title":"arXiv 1712 02679"},{"key":"ref42","first-page":"7675","article-title":"Training deep neural networks with 8-bit floating point numbers","author":"wang","year":"2018","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref41","article-title":"Deep learning with limited numerical precision","volume":"abs 1502 2551","author":"gupta","year":"2015","journal-title":"CoRR"},{"key":"ref44","first-page":"4901","article-title":"Hybrid 8-bit floating point (HFP8) training and inference for deep neural networks","author":"sun","year":"2019","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref43","article-title":"Accumulation bit-width scaling for ultra-low precision training of deep networks","author":"sakr","year":"2019","journal-title":"Proc Int Conf Learn Represent (ICLR)ICLR"},{"key":"ref127","first-page":"578","article-title":"TVM: An automated end-to-end optimizing compiler for deep learning","author":"chen","year":"2018","journal-title":"Proc OSDI"},{"key":"ref126","doi-asserted-by":"publisher","DOI":"10.1145\/3061639.3062333"},{"key":"ref125","doi-asserted-by":"crossref","first-page":"213","DOI":"10.1145\/1961296.1950391","article-title":"Flikker: Saving DRAM refresh-power through critical data partitioning","volume":"46","author":"liu","year":"2011","journal-title":"SIGPLAN Not"},{"key":"ref124","doi-asserted-by":"publisher","DOI":"10.1109\/ISLPED.2017.8009173"},{"key":"ref73","article-title":"Mixed precision training","author":"micikevicius","year":"2017","journal-title":"arXiv 1710 03740"},{"key":"ref72","article-title":"Flexpoint: An adaptive numerical format for efficient training of deep neural networks","volume":"abs 1711 2213","author":"k\u00f6ster","year":"2017","journal-title":"CoRR"},{"key":"ref129","article-title":"Glow: Graph lowering compiler techniques for neural networks","volume":"abs 1805 907","author":"rotem","year":"2018","journal-title":"CoRR"},{"key":"ref71","article-title":"Per-tensor fixed-point quantization of the back-propagation algorithm","author":"sakr","year":"2019","journal-title":"Proc Int Conf Learn Represent"},{"key":"ref128","article-title":"Intel ngraph: An intermediate representation, compiler, and executor for deep learning","volume":"abs 1801 8058","author":"cyphers","year":"2018","journal-title":"CoRR"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00448"},{"key":"ref76","article-title":"1-bit stochastic gradient descent and its application to data-parallel distributed training of speech DNNs","author":"seide","year":"2014","journal-title":"Proc INTERSPEECH"},{"key":"ref130","year":"2020","journal-title":"Nvidia tensorrt"},{"key":"ref77","first-page":"1488","article-title":"Scalable distributed DNN training usingcommodity GPU cloud computing","author":"strom","year":"2015","journal-title":"Proc 16th Annu Conf Int Speech Commun Assoc (INTERSPEECH)"},{"key":"ref74","year":"2019","journal-title":"Bfloat16 The secret to high performance on cloud tpus"},{"key":"ref75","year":"2018","journal-title":"Nvidia Apex"},{"key":"ref133","article-title":"HAQ: Hardware-aware automated quantization","volume":"abs 1811 8886","author":"wang","year":"2018","journal-title":"CoRR"},{"key":"ref134","year":"2017","journal-title":"Google Ai Blog Eager Execution An Imperative Define-by-Run Interface to Tensorflow"},{"key":"ref131","article-title":"Device placement optimization with reinforcement learning","author":"mirhoseini","year":"2017","journal-title":"arXiv 1706 04972"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1109\/MLHPC.2016.004"},{"key":"ref132","first-page":"1662","article-title":"Spotlight: Optimizing device placement for training deep neural networks","volume":"80","author":"gao","year":"2018","journal-title":"Proc 35th Int Conf Mach Learn"},{"key":"ref79","article-title":"Deep gradient compression: Reducing the communication bandwidth for distributed training","author":"lin","year":"2018","journal-title":"Proc Int Conf Learn Represent (ICLR)"},{"key":"ref60","first-page":"1","article-title":"Spring hill (NNP-I 1000) Intel&#x2019;s data center inference chip","author":"wechsler","year":"2019","journal-title":"Proc IEEE Hot Chips Symp (HCS)"},{"key":"ref62","article-title":"Trained ternary quantization","author":"zhu","year":"2017","journal-title":"Proc Int Conf Learn Represent (ICLR)"},{"key":"ref61","year":"2017","journal-title":"NVIDIA Blog"},{"key":"ref63","article-title":"Variational network quantization","author":"achterhold","year":"2018","journal-title":"Proc ICLR"},{"key":"ref64","article-title":"Model compression via distillation and quantization","author":"polino","year":"2018","journal-title":"Proc ICLR"},{"key":"ref65","article-title":"Loss-aware weight quantization of deep networks","author":"james kwok lu hou","year":"2018","journal-title":"Proc Int Conf Learn Represent"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.761"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1007\/s11390-017-1750-y"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.574"},{"key":"ref2","article-title":"OverFeat: Integrated recognition, localization and detection using convolutional networks","author":"sermanet","year":"2013","journal-title":"arXiv 1312 6229"},{"key":"ref69","article-title":"Pact: Parameterized clipping activation for quantized neural networks","author":"choi","year":"2018","journal-title":"arXiv 1805 06085"},{"key":"ref1","first-page":"1097","article-title":"ImageNet classification with deep convolutional neural networks","author":"krizhevsky","year":"2012","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref109","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001165"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.1145\/3316781.3317783"},{"key":"ref108","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001163"},{"key":"ref94","doi-asserted-by":"publisher","DOI":"10.1145\/3195970.3196012"},{"key":"ref107","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC.2016.7418007"},{"key":"ref93","article-title":"Pruning filters for efficient ConvNets","author":"li","year":"2016","journal-title":"arXiv 1608 08710"},{"key":"ref106","doi-asserted-by":"publisher","DOI":"10.1145\/2541940.2541967"},{"key":"ref92","article-title":"Pruning convolutional neural networks for resource efficient inference","author":"molchanov","year":"2016","journal-title":"arXiv 1611 06440"},{"key":"ref105","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2011.5981829"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080215"},{"key":"ref104","doi-asserted-by":"publisher","DOI":"10.1145\/1815961.1815993"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.5244\/C.28.88"},{"key":"ref103","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00061"},{"key":"ref102","article-title":"BranchyNet: Fast inference via early exiting from deep neural networks","volume":"abs 1709 1686","author":"teerapittayanon","year":"2017","journal-title":"CoRR"},{"key":"ref111","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001178"},{"key":"ref112","doi-asserted-by":"publisher","DOI":"10.1145\/2133382.2133388"},{"key":"ref110","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001138"},{"key":"ref98","first-page":"571","article-title":"Project Adam: Building an efficient and scalable deep learning training system","author":"chilimbi","year":"2014","journal-title":"Proc of USENIX Symp on Operating Systems Design and Implementation (OSDI)"},{"key":"ref99","doi-asserted-by":"publisher","DOI":"10.1109\/CODESISSS.2015.7331375"},{"key":"ref96","first-page":"1232","article-title":"Large scale distributed deep networks","author":"dean","year":"2012","journal-title":"Advances in neural information processing systems"},{"key":"ref97","article-title":"Staleness-aware ASYNC-SGD for distributed deep learning","volume":"abs 1511 5950","author":"zhang","year":"2016","journal-title":"CoRR"},{"key":"ref10","article-title":"Rethinking the inception architecture for computer vision","author":"szegedy","year":"2015","journal-title":"arXiv 1512 00567 [cs]"},{"key":"ref11","article-title":"Inception-v4, inception-resnet and the impact of residual connections on learning","author":"szegedy","year":"2016","journal-title":"arXiv 1602 07261"},{"key":"ref12","article-title":"Densely connected convolutional networks","volume":"abs 1608 6993","author":"huang","year":"2016","journal-title":"CoRR"},{"key":"ref13","article-title":"Google&#x2019;s neural machine translation system: Bridging the gap between human and machine translation","author":"wu","year":"2016","journal-title":"arXiv 1609 08144"},{"key":"ref14","first-page":"6000","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"Proc 31st Int Conf Neural Inf Process Syst (NIPS)"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2012.2205597"},{"key":"ref118","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC.2018.8310397"},{"key":"ref16","article-title":"Text understanding from scratch","author":"zhang","year":"2015","journal-title":"arXiv 1502 01710"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00286"},{"key":"ref117","doi-asserted-by":"publisher","DOI":"10.1109\/VLSIC.2018.8502421"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.515"},{"key":"ref81","first-page":"2827","article-title":"Adacomp: Adaptive residual gradient compression for data-parallel distributed training","author":"chen","year":"2018","journal-title":"Proc AAAI Conf Artif Intell (AAAI)"},{"key":"ref18","article-title":"Deep speech: Scaling up end-to-end speech recognition","author":"hannun","year":"2014","journal-title":"arXiv 1412 5567"},{"key":"ref84","first-page":"3007","article-title":"Analytical guarantees on numerical precision of deep neural networks","volume":"70","author":"sakr","year":"2017","journal-title":"Proc 34th Int Conf Mach Learn"},{"key":"ref119","article-title":"TiM-DNN: Ternary in-memory accelerator for deep neural networks","author":"jain","year":"2019","journal-title":"arXiv 1909 06892"},{"key":"ref19","article-title":"Neural architecture search with reinforcement learning","author":"zoph","year":"2017","journal-title":"arXiv 1611 01578"},{"key":"ref83","first-page":"7543","article-title":"Improving neural network quantization without retraining using outlier channel splitting","author":"zhao","year":"2019","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref114","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2018.2879434"},{"key":"ref113","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080246"},{"key":"ref116","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2017.2782087"},{"key":"ref80","first-page":"13144","article-title":"Communication-efficient distributed SGD with sketching","author":"ivkin","year":"2019","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref115","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080254"},{"key":"ref120","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2018.2790840"},{"key":"ref89","article-title":"Deep compression: Compressing deep neural network with pruning, trained quantization and Huffman coding","volume":"abs 1510 149","author":"han","year":"2015","journal-title":"CoRR"},{"key":"ref121","doi-asserted-by":"publisher","DOI":"10.1147\/JRD.2019.2947011"},{"key":"ref122","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2014.22"},{"key":"ref123","doi-asserted-by":"publisher","DOI":"10.23919\/DATE.2019.8714872"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1145\/2627369.2627613"},{"key":"ref86","first-page":"4107","article-title":"Binarized neural networks","author":"hubara","year":"2016","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1109\/SiPS.2014.6986082"},{"key":"ref88","author":"bengio","year":"2013","journal-title":"arXiv 1308 3432"}],"container-title":["Proceedings of the IEEE"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/5\/9264766\/09253640.pdf?arnumber=9253640","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,4,27]],"date-time":"2022-04-27T13:39:58Z","timestamp":1651066798000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9253640\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,12]]},"references-count":134,"journal-issue":{"issue":"12"},"URL":"https:\/\/doi.org\/10.1109\/jproc.2020.3029453","relation":{},"ISSN":["0018-9219","1558-2256"],"issn-type":[{"value":"0018-9219","type":"print"},{"value":"1558-2256","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,12]]}}}