{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T20:06:37Z","timestamp":1774555597711,"version":"3.50.1"},"reference-count":47,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,6,30]],"date-time":"2025-06-30T00:00:00Z","timestamp":1751241600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,6,30]],"date-time":"2025-06-30T00:00:00Z","timestamp":1751241600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,6,30]]},"DOI":"10.1109\/ijcnn64981.2025.11228782","type":"proceedings-article","created":{"date-parts":[[2025,11,14]],"date-time":"2025-11-14T18:46:15Z","timestamp":1763145975000},"page":"1-10","source":"Crossref","is-referenced-by-count":0,"title":["On The Impact of Different Batch Sizes on Byzantine Robustness in Federated Learning"],"prefix":"10.1109","author":[{"given":"Xinjian","family":"Huang","sequence":"first","affiliation":[{"name":"Nanjing University of Science and Technology,School of Cyber Science and Engineering,Nanjing,China"}]},{"given":"Yuxin","family":"Wei","sequence":"additional","affiliation":[{"name":"Nanjing University of Science and Technology,School of Cyber Science and Engineering,Nanjing,China"}]},{"given":"Yunxuan","family":"Li","sequence":"additional","affiliation":[{"name":"Nanjing University of Science and Technology,School of Cyber Science and Engineering,Nanjing,China"}]},{"given":"Yishuo","family":"Zhao","sequence":"additional","affiliation":[{"name":"Nanjing University of Science and Technology,School of Cyber Science and Engineering,Nanjing,China"}]},{"given":"Bo","family":"Du","sequence":"additional","affiliation":[{"name":"Hubei Provincial Key Laboratory of Multimedia and Network Communication Engineering,Wuhan,China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i11.29146"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2020.3028013"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.5555\/2968826.2969010"},{"key":"ref4","article-title":"Accelerating stochastic gradient descent using predictive variance reduction","volume":"26","author":"Johnson","year":"2013","journal-title":"Advances in neural information processing systems"},{"key":"ref5","first-page":"2613","article-title":"SARAH: A? novel method for machine learning problems using stochastic recursive gradient","volume-title":"International conference on machine learning","author":"Nguyen"},{"key":"ref6","first-page":"699","article-title":"Variance reduction for faster non-convex optimization","volume-title":"International conference on machine learning","author":"Allen-Zhu"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/s10107-016-1030-6"},{"issue":"1","key":"ref8","article-title":"Stochastic dual coordinate ascent methods for regularized loss minimization","volume":"14","author":"Shalev-Shwartz","year":"2013","journal-title":"Journal of Machine Learning Research"},{"issue":"221","key":"ref9","first-page":"1","article-title":"The first direct acceleration of stochastic gradient methods","volume":"18","author":"Allen-Zhu","year":"2018","journal-title":"Journal of Machine Learning Research"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.3150\/14-BEJ645"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/3154503"},{"key":"ref12","article-title":"Generalized byzantine-tolerant sgd","author":"Xie","year":"2018"},{"key":"ref13","first-page":"5650","article-title":"Byzantine-robust distributed learning: Towards optimal statistical rates","volume-title":"International conference on machine learning","author":"Yin"},{"key":"ref14","article-title":"Machine learning with adversaries: Byzantine tolerant gradient descent","volume":"30","author":"Blanchard","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2022.3153135"},{"key":"ref16","first-page":"2603","article-title":"On the strategy proofness of the geometric median","volume-title":"International Conference on Artificial Intelligence and Statistics","author":"El-Mhamdi"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.3390\/electronics12051190"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2024.3383294"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2020.3012952"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-35305-5_3"},{"key":"ref21","article-title":"Variance Reduction is an Antidote to Byzantines: Better Rates, Weaker Assumptions and Communication Compression as a Cherry on the Top","volume-title":"The Eleventh International Conference on Learning Representations","author":"Gorbunov"},{"key":"ref22","article-title":"On the Effect of Batch Size in Byzantine-Robust Distributed Learning","volume-title":"The Twelfth International Conference on Learning Representations","author":"Yang"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-35305-5_3"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2022.10.120"},{"key":"ref25","article-title":"Variance reduction is an antidote to byzantines: Better rates, weaker assumptions and communication compression as a cherry on the top","volume-title":"The Thirteenth International Conference on Learning Representations","author":"Gorbunov"},{"key":"ref26","article-title":"On the Effect of Batch Size in Byzantine-Robust Distributed Learning","volume-title":"International Conference on Learning Representations","author":"Yang"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TSIPN.2023.3265892"},{"key":"ref28","first-page":"1207","article-title":"Communication compression for byzantine robust learning: New efficient algorithms and improved rates","volume-title":"International Conference on Artificial Intelligence and Statistics","author":"Rammal"},{"key":"ref29","first-page":"1273","article-title":"Broadcast: Reducing both stochastic and compression noise to robustify communication-efficient federated learning","volume-title":"International Conference on Artificial Intelligence and Statistics","author":"Zhu"},{"key":"ref30","article-title":"signSGD with majority vote is communication efficient and fault tolerant","author":"Bernstein","year":"2019","journal-title":"International Conference on Learning Representations"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ITA50056.2020.9245017"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CDC49753.2023.10383346"},{"key":"ref33","first-page":"1145","article-title":"Asynchronous Byzantine machine learning (the case of SGD)","volume-title":"International Conference on Machine Learning","author":"Damaskinos"},{"key":"ref34","first-page":"7879","article-title":"Distributed deep learning in open collaborations","volume":"34","author":"Diskin","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref35","article-title":"Permutation compressors for provably faster distributed nonconvex optimization","author":"Szlendak","year":"2021"},{"key":"ref36","article-title":"On the convergence of SGD with biased gradients","author":"Ajalloeian","year":"2020"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1145\/3154503"},{"key":"ref38","first-page":"5650","article-title":"Byzantine-robust distributed learning: Towards optimal statistical rates","volume-title":"International conference on machine learning","author":"Yin"},{"key":"ref39","article-title":"Machine learning with adversaries: Byzantine tolerant gradient descent","volume":"30","author":"Blanchard","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref40","first-page":"3788","article-title":"MARINA: Faster non-convex distributed learning with compression","volume-title":"International Conference on Machine Learning","author":"Gorbunov"},{"key":"ref41","first-page":"5311","article-title":"Learning from history for byzantine robust optimization","volume-title":"International Conference on Machine Learning","author":"Karimireddy"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/TCSS.2023.3266019"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOMWKSHPS50562.2020.9162672"},{"key":"ref44","article-title":"Robust Aggregation for Adaptive Privacy Preserving Federated Learning in Healthcare","author":"Grama"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00130"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.3233\/978-1-61499-098-7-870"},{"key":"ref47","first-page":"261","article-title":"Fall of empires: Breaking byzantine-tolerant sgd by inner product manipulation","author":"Xie","year":"2020","journal-title":"Uncertainty in Artificial Intelligence"}],"event":{"name":"2025 International Joint Conference on Neural Networks (IJCNN)","location":"Rome, Italy","start":{"date-parts":[[2025,6,30]]},"end":{"date-parts":[[2025,7,5]]}},"container-title":["2025 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11227166\/11227148\/11228782.pdf?arnumber=11228782","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T19:46:43Z","timestamp":1774554403000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11228782\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,30]]},"references-count":47,"URL":"https:\/\/doi.org\/10.1109\/ijcnn64981.2025.11228782","relation":{},"subject":[],"published":{"date-parts":[[2025,6,30]]}}}