{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T21:26:39Z","timestamp":1740173199400,"version":"3.37.3"},"reference-count":42,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"24","license":[{"start":{"date-parts":[[2024,12,15]],"date-time":"2024-12-15T00:00:00Z","timestamp":1734220800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,12,15]],"date-time":"2024-12-15T00:00:00Z","timestamp":1734220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,12,15]],"date-time":"2024-12-15T00:00:00Z","timestamp":1734220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"National Science and Technology Council, Taiwan","award":["NSTC 111-2221-E-A49-069-MY3"],"award-info":[{"award-number":["NSTC 111-2221-E-A49-069-MY3"]}]},{"name":"National Science and Technology Council, Taiwan","award":["NSTC 111-2221-E-A49-068-MY3"],"award-info":[{"award-number":["NSTC 111-2221-E-A49-068-MY3"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Internet Things J."],"published-print":{"date-parts":[[2024,12,15]]},"DOI":"10.1109\/jiot.2024.3456857","type":"journal-article","created":{"date-parts":[[2024,9,10]],"date-time":"2024-09-10T19:24:51Z","timestamp":1725996291000},"page":"40431-40447","source":"Crossref","is-referenced-by-count":0,"title":["Communication-Efficient Federated DNN Training: Convert, Compress, Correct"],"prefix":"10.1109","volume":"11","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-8481-636X","authenticated-orcid":false,"given":"Zhong-Jing","family":"Chen","sequence":"first","affiliation":[{"name":"Institute of Communications Engineering, National Yang-Ming Chiao-Tung University, Hsinchu, Taiwan"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-2812-5311","authenticated-orcid":false,"given":"Eduin E.","family":"Hernandez","sequence":"additional","affiliation":[{"name":"Department of Electronics and Electrical Engineering, National Yang Ming Chiao Tung University, Hsinchu, Taiwan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2135-1232","authenticated-orcid":false,"given":"Yu-Chih","family":"Huang","sequence":"additional","affiliation":[{"name":"Institute of Communications Engineering, National Yang-Ming Chiao-Tung University, Hsinchu, Taiwan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1681-3316","authenticated-orcid":false,"given":"Stefano","family":"Rini","sequence":"additional","affiliation":[{"name":"Institute of Communications Engineering, National Yang-Ming Chiao-Tung University, Hsinchu, Taiwan"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICC45855.2022.9838754"},{"volume-title":"Parallel and Distributed Computation: Numerical Methods","year":"2015","author":"Bertsekas","key":"ref2"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1137\/090759574"},{"key":"ref4","first-page":"9850","article-title":"Atomo: Communication-efficient learning via atomic sparsification","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Wang"},{"key":"ref5","first-page":"5973","article-title":"The convergence of sparsified gradient methods","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Alistarh"},{"key":"ref6","first-page":"560","article-title":"signSGD: Compressed optimization for nonconvex problems","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Bernstein"},{"key":"ref7","first-page":"7564","article-title":"cpSGD: Communication-efficient and differentially-private distributed SGD","volume-title":"Proc. 2nd Adv. Neural Inf. Process. Syst. (NIPS)","author":"Agarwal"},{"key":"ref8","article-title":"Privacy for free: Communication efficient learning with differential privacy using sketches","author":"Li","year":"2019","journal-title":"arxiv:1911.00972"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2021.3118428"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ISIT44484.2020.9174245"},{"key":"ref11","first-page":"1","article-title":"QSGD: Communication-efficient SGD via gradient quantization and encoding","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"30","author":"Alistarh"},{"key":"ref12","first-page":"1","article-title":"Gradient sparsification for communication-efficient distributed optimization","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"31","author":"Wangni"},{"key":"ref13","first-page":"2197","article-title":"VQSGD: Vector quantized stochastic gradient descent","volume-title":"Proc. Int. Conf. Artif. Intell. Stat.","author":"Gandikota"},{"key":"ref14","article-title":"Lossy gradient compression: How much accuracy can one bit buy?","author":"Salehkalaibar","year":"2022","journal-title":"arXiv:2202.02812"},{"key":"ref15","article-title":"Federated learning: Strategies for improving communication efficiency","author":"Kone\u010dn\u0107","year":"2016","journal-title":"arxiv:1610.05492v1"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2014-274"},{"key":"ref17","first-page":"4900","article-title":"Hybrid 8-bit floating point (HFP8) training and inference for deep neural networks","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Sun"},{"key":"ref18","first-page":"8253","article-title":"FetchSGD: Communication-efficient federated learning with sketching","volume-title":"Proc. 37th Int. Conf. Mach. Learn.","volume":"119","author":"Rothchild"},{"key":"ref19","first-page":"3304","article-title":"Don\u2019t waste your bits! Squeeze activations and gradients for deep neural networks via tinyScript","volume-title":"Proc. 37th Int. Conf. Mach. Learn.","volume":"119","author":"Fu"},{"key":"ref20","first-page":"3252","article-title":"Error feedback fixes signSGD and other gradient compression schemes","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Karimireddy"},{"key":"ref21","first-page":"1","article-title":"Sparsified SGD with memory","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"31","author":"Stich"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.243"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46493-0_38"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00907"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1602.05629"},{"key":"ref26","first-page":"1273","article-title":"Communication-efficient learning of deep networks from decentralized data","volume-title":"Proc. Artif. Intell. Stat.","author":"McMahan"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1806579115"},{"key":"ref28","article-title":"A mean-field limit for certain deep neural networks","author":"Ara\u00fajo","year":"2019","journal-title":"arXiv:1906.00193"},{"key":"ref29","article-title":"A rigorous framework for the mean field limit of multilayer neural networks","author":"Nguyen","year":"2020","journal-title":"arXiv:2001.11443"},{"key":"ref30","first-page":"1887","article-title":"Modeling from features: A mean-field framework for over-parameterized deep neural networks","volume-title":"Proc. Conf. Learn. Theory","author":"Fang"},{"key":"ref31","first-page":"2388","article-title":"Mean-field theory of two-layers neural networks: Dimension-free bounds and kernel limit","volume-title":"Proc. Conf. Learn. Theory","author":"Mei"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1503.02531"},{"key":"ref33","article-title":"Understanding top-k sparsification in distributed deep learning","author":"Shi","year":"2019","journal-title":"arXiv:1911.08772"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.1978.1055934"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ACSSC.1997.679123"},{"volume-title":"Elements if Information Theory","year":"2006","author":"Cover","key":"ref36"},{"key":"ref37","first-page":"3821","article-title":"An information-theoretic justification for model pruning","volume-title":"Proc. Int. Conf. Artif. Intell. Stat.","author":"Isik"},{"volume-title":"Optimal Transport: Old and New","year":"2008","author":"Villani","key":"ref38"},{"volume-title":"IEEE Standard for Floating-Point Arithmetic","year":"2019","key":"ref39"},{"key":"ref40","first-page":"1","article-title":"The error-feedback framework: Better rates for SGD with delayed gradients and compressed updates","volume":"21","author":"Stich","year":"2020","journal-title":"J. Mach. Learn. Res."},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2008.4518469"},{"key":"ref42","article-title":"An information-theoretic justification for model pruning","author":"Isik","year":"2021","journal-title":"arXiv:2102.08329"}],"container-title":["IEEE Internet of Things Journal"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/6488907\/10786863\/10673995.pdf?arnumber=10673995","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,11]],"date-time":"2024-12-11T22:03:52Z","timestamp":1733954632000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10673995\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,15]]},"references-count":42,"journal-issue":{"issue":"24"},"URL":"https:\/\/doi.org\/10.1109\/jiot.2024.3456857","relation":{},"ISSN":["2327-4662","2372-2541"],"issn-type":[{"type":"electronic","value":"2327-4662"},{"type":"electronic","value":"2372-2541"}],"subject":[],"published":{"date-parts":[[2024,12,15]]}}}