{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,8]],"date-time":"2025-12-08T21:15:46Z","timestamp":1765228546196,"version":"3.46.0"},"reference-count":49,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"Ericsson Canada and by the Natural Sciences and Engineering Research Council of Canada"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Signal Process."],"published-print":{"date-parts":[[2025]]},"DOI":"10.1109\/tsp.2025.3624791","type":"journal-article","created":{"date-parts":[[2025,10,23]],"date-time":"2025-10-23T18:01:12Z","timestamp":1761242472000},"page":"4463-4478","source":"Crossref","is-referenced-by-count":1,"title":["Regularized Top-$ k $: A Bayesian Framework for Gradient Sparsification"],"prefix":"10.1109","volume":"73","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9565-6405","authenticated-orcid":false,"given":"Ali","family":"Bereyhi","sequence":"first","affiliation":[{"name":"Department of Electrical and Computer Engineering, University of Toronto, Toronto, ON M5S 2E4, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1800-1322","authenticated-orcid":false,"given":"Ben","family":"Liang","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, University of Toronto, Toronto, ON M5S 2E4, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3539-9624","authenticated-orcid":false,"given":"Gary","family":"Boudreau","sequence":"additional","affiliation":[{"name":"Ericsson Canada, Ottawa, ON K2K 2V6, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5253-2429","authenticated-orcid":false,"given":"Ali","family":"Afana","sequence":"additional","affiliation":[{"name":"Ericsson Canada, Ottawa, ON K2K 2V6, Canada"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/MLSP58920.2024.10734719"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/3377454"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref4","first-page":"1273","article-title":"Communication-efficient learning of deep networks from decentralized data","volume-title":"Proc. Artif. Intell. Statist.","author":"McMahan","year":"2017"},{"key":"ref5","first-page":"2575","article-title":"Convergence and accuracy trade-offs in federated learning and meta-learning","volume-title":"Proc. Int. Conf. Artif. Intell. Statist.,","author":"Charles","year":"2021"},{"key":"ref6","first-page":"6692","article-title":"From local SGD to local fixed-point methods for federated learning","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Malinovskiy","year":"2020"},{"key":"ref7","first-page":"5132","article-title":"Scaffold: Stochastic controlled averaging for federated learning","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Karimireddy","year":"2020"},{"key":"ref8","first-page":"7057","article-title":"FedSplit: An algorithmic framework for fast federated optimization","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NeurIPS)","author":"Pathak","year":"2020"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2020.2975749"},{"key":"ref10","first-page":"7611","article-title":"Tackling the objective inconsistency problem in heterogeneous federated optimization","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Wang","year":"2020"},{"key":"ref11","first-page":"14606","article-title":"Linear convergence in federated learning: Tackling client heterogeneity and sparse gradients","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Mitra","year":"2021"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2014-274"},{"key":"ref13","article-title":"Synchronous multi-GPU deep learning with low-precision communication: An experimental study","volume-title":"Proc. 21st Int. Conf. Extending Database Technol.","author":"Grubic","year":"2018"},{"key":"ref14","first-page":"1709","article-title":"QSGD: Communication-efficient SGD via gradient quantization and encoding","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NeurIPS)","author":"Alistarh","year":"2017"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/tit.2022.3161620"},{"key":"ref16","first-page":"3370","article-title":"Communication-efficient distributed learning via lazily aggregated quantized gradients","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NeurIPS)","author":"Sun","year":"2019"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5706"},{"key":"ref18","first-page":"13144","article-title":"Communication-efficient distributed SGD with sketching,\u201d in","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NuerIPS)","volume":"32","author":"Ivkin","year":"2019"},{"key":"ref19","first-page":"8253","article-title":"FetchSGD: Communication-efficient federated learning with sketching","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML),","author":"Rothchild","year":"2020"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2024.3475578"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2015-354"},{"key":"ref22","first-page":"1509","article-title":"TernGrad: Ternary gradients to reduce communication in distributed deep learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NeurIPS)","author":"Wen","year":"2017"},{"key":"ref23","first-page":"5973","article-title":"The convergence of sparsified gradient methods","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"31","author":"Alistarh","year":"2018"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/MLHPC.2016.004"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D17-1045"},{"key":"ref26","first-page":"1","article-title":"Deep gradient compression: Reducing the communication bandwidth for distributed training","volume-title":"Proc. Int. Conf. Learn. Representations (ICLR)","author":"Lin","year":"2018"},{"key":"ref27","first-page":"8133","article-title":"Rethinking gradient sparsification as total error minimization","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NeurIPS)","author":"Sahu","year":"2021"},{"article-title":"SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and < 0.5 MB model size","year":"2016","author":"Iandola","key":"ref28"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01264-9_8"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00474"},{"key":"ref31","first-page":"6105","article-title":"EfficientNet: Rethinking model scaling for convolutional neural networks","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Tan","year":"2019"},{"article-title":"\u201cImageNette,\u201d","year":"2020","author":"Howard","key":"ref32"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11728"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCS47774.2020.00026"},{"key":"ref35","first-page":"9850","article-title":"ATOMO: Communication-efficient learning via atomic sparsification","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NeurIPS)","author":"Wang","year":"2018"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2022.3154387"},{"key":"ref37","first-page":"15216","article-title":"Efficient neural network training via forward and backward propagation sparsification","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NeurIPS)","author":"Zhou","year":"2021"},{"key":"ref38","first-page":"13551","article-title":"ScaleCom: Scalable sparsified gradient compression for communication-efficient distributed training","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NeurIPS),","author":"Chen","year":"2020"},{"key":"ref39","first-page":"297","article-title":"An efficient statistical-based gradient compression technique for distributed training systems","volume-title":"Proc. Mach. Learn. Syst. (MLSys)","volume":"3","author":"Abdelmoniem","year":"2021"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-03311-7"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/18.720546"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2025.3624791"},{"key":"ref43","first-page":"18599","article-title":"When adversarial training meets vision transformers: Recipes from training to architecture","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NuerIPS),","volume":"35","author":"Mo","year":"2022"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"ref45","first-page":"1097","article-title":"ImageNet classification with deep convolutional neural networks","volume-title":"Proc. Adv. Neural Inform. Process. Syst. (NeurIPS)","volume":"25","author":"Krizhevsky","year":"2012"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00716"},{"article-title":"MobileNets: Efficient convolutional neural networks for mobile vision applications","year":"2017","author":"Howard","key":"ref47"},{"article-title":"Adam: A method for stochastic optimization","year":"2014","author":"Kingma","key":"ref48"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1002\/9781119196037"}],"container-title":["IEEE Transactions on Signal Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/78\/10807692\/11215837.pdf?arnumber=11215837","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,8]],"date-time":"2025-12-08T18:42:52Z","timestamp":1765219372000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11215837\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":49,"URL":"https:\/\/doi.org\/10.1109\/tsp.2025.3624791","relation":{},"ISSN":["1053-587X","1941-0476"],"issn-type":[{"type":"print","value":"1053-587X"},{"type":"electronic","value":"1941-0476"}],"subject":[],"published":{"date-parts":[[2025]]}}}