{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,2]],"date-time":"2026-06-02T03:39:08Z","timestamp":1780371548712,"version":"3.54.1"},"reference-count":43,"publisher":"Springer Science and Business Media LLC","issue":"10","license":[{"start":{"date-parts":[[2021,7,26]],"date-time":"2021-07-26T00:00:00Z","timestamp":1627257600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2021,7,26]],"date-time":"2021-07-26T00:00:00Z","timestamp":1627257600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Basic Research Program of China","doi-asserted-by":"publisher","award":["2018AAA0100704"],"award-info":[{"award-number":["2018AAA0100704"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100005153","name":"China National Funds for Distinguished Young Scientists","doi-asserted-by":"publisher","award":["11825104"],"award-info":[{"award-number":["11825104"]}],"id":[{"id":"10.13039\/501100005153","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["11690013"],"award-info":[{"award-number":["11690013"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"CPC Central Committee","award":["National Youth Talent Support Program"],"award-info":[{"award-number":["National Youth Talent Support Program"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["12001109"],"award-info":[{"award-number":["12001109"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003399","name":"Science and Technology Commission of Shanghai Municipality","doi-asserted-by":"publisher","award":["19YF1402800"],"award-info":[{"award-number":["19YF1402800"]}],"id":[{"id":"10.13039\/501100003399","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100010905","name":"Major Research Plan","doi-asserted-by":"publisher","award":["92046021"],"award-info":[{"award-number":["92046021"]}],"id":[{"id":"10.13039\/501100010905","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003399","name":"Science and Technology Commission of Shanghai Municipality","doi-asserted-by":"publisher","award":["20dz1200600"],"award-info":[{"award-number":["20dz1200600"]}],"id":[{"id":"10.13039\/501100003399","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Mach Learn"],"published-print":{"date-parts":[[2023,10]]},"DOI":"10.1007\/s10994-021-06001-x","type":"journal-article","created":{"date-parts":[[2021,7,26]],"date-time":"2021-07-26T21:02:41Z","timestamp":1627333361000},"page":"3773-3804","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":16,"title":["Byzantine-robust distributed sparse learning for M-estimation"],"prefix":"10.1007","volume":"112","author":[{"given":"Jiyuan","family":"Tu","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Weidong","family":"Liu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9362-508X","authenticated-orcid":false,"given":"Xiaojun","family":"Mao","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2021,7,26]]},"reference":[{"key":"6001_CR1","doi-asserted-by":"crossref","unstructured":"Agarwal, A., & Duchi, J.C. (2012). Distributed delayed stochastic optimization. In 2012 IEEE 51st IEEE Conference on Decision and Control (CDC), pp. 5451\u20135452.","DOI":"10.1109\/CDC.2012.6426626"},{"key":"6001_CR2","unstructured":"Alistarh, D., Allen-Zhu, Z., & Li, J. (2018). Byzantine stochastic gradient descent. In Advances in Neural Information Processing Systems, Curran Associates, Inc., vol 31."},{"issue":"1","key":"6001_CR3","doi-asserted-by":"publisher","first-page":"183","DOI":"10.1137\/080716542","volume":"2","author":"A Beck","year":"2009","unstructured":"Beck, A., & Teboulle, M. (2009). A fast iterative shrinkage-thresholding algorithm for linear inverse problems. SIAM Journal on Imaging Sciences, 2(1), 183\u2013202.","journal-title":"SIAM Journal on Imaging Sciences"},{"key":"6001_CR4","unstructured":"Blanchard, P., El Mhamdi, E. M., Guerraoui, R. & Stainer, J. (2017). Machine learning with adversaries: Byzantine tolerant gradient descent. In Advances in Neural Information Processing Systems, Curran Associates, Inc., Vol. 30."},{"issue":"1","key":"6001_CR5","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1561\/2200000016","volume":"3","author":"S Boyd","year":"2011","unstructured":"Boyd, S., Parikh, N., Chu, E., Peleato, B., & Eckstein, J. (2011). Distributed optimization and statistical learning via the alternating direction method of multipliers. Foundations and Trends in Machine Learning, 3(1), 1\u2013122.","journal-title":"Foundations and Trends in Machine Learning"},{"key":"6001_CR6","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-20192-9","volume-title":"Statistics for high-dimensional data: Methods, theory and applications","author":"P B\u00fchlmann","year":"2011","unstructured":"B\u00fchlmann, P., & Van De Geer, S. (2011). Statistics for high-dimensional data: Methods, theory and applications. Berlin: Springer."},{"issue":"182","key":"6001_CR7","first-page":"1","volume":"21","author":"X Chen","year":"2020","unstructured":"Chen, X., Liu, W., Mao, X., & Yang, Z. (2020). Distributed high-dimensional regression under a quantile loss function. The Journal of Machine Learning Research, 21(182), 1\u201343.","journal-title":"The Journal of Machine Learning Research"},{"issue":"2","key":"6001_CR8","first-page":"1","volume":"1","author":"Y Chen","year":"2017","unstructured":"Chen, Y., Su, L., & Xu, J. (2017). Distributed statistical machine learning in adversarial settings. Proceedings of the ACM on Measurement and Analysis of Computing Systems, 1(2), 1\u201325.","journal-title":"Proceedings of the ACM on Measurement and Analysis of Computing Systems"},{"issue":"1","key":"6001_CR9","doi-asserted-by":"publisher","first-page":"324","DOI":"10.1214\/13-AOS1191","volume":"42","author":"J Fan","year":"2014","unstructured":"Fan, J., Fan, Y., & Barut, E. (2014). Adaptive robust variable selection. The Annals of Statistics, 42(1), 324\u2013351.","journal-title":"The Annals of Statistics"},{"key":"6001_CR10","unstructured":"Fan, J., Guo, Y., & Wang, K. (2019). Communication-efficient accurate statistical estimation. arXiv e-prints arXiv:1906.04870."},{"key":"6001_CR11","unstructured":"Feng, J., Xu, H. & Mannor, S. (2014). Distributed robust learning. arXiv e-prints arXiv:1409.5937."},{"key":"6001_CR12","doi-asserted-by":"publisher","DOI":"10.1201\/b18401","volume-title":"Statistical learning with sparsity: The Lasso and Generalizations","author":"T Hastie","year":"2015","unstructured":"Hastie, T., Tibshirani, R., & Wainwright, M. (2015). Statistical learning with sparsity: The Lasso and Generalizations. Cambridge: CRC Press."},{"issue":"5","key":"6001_CR13","doi-asserted-by":"publisher","first-page":"799","DOI":"10.1214\/aos\/1176342503","volume":"1","author":"PJ Huber","year":"1973","unstructured":"Huber, P. J. (1973). Robust regression: Asymptotics, conjectures and Monte Carlo. The Annals of Statistics, 1(5), 799\u2013821.","journal-title":"The Annals of Statistics"},{"key":"6001_CR14","volume-title":"Robust statistics","author":"PJ Huber","year":"2004","unstructured":"Huber, P. J. (2004). Robust statistics (Vol. 523). New York: Wiley."},{"issue":"526","key":"6001_CR15","doi-asserted-by":"publisher","first-page":"668","DOI":"10.1080\/01621459.2018.1429274","volume":"114","author":"MI Jordan","year":"2019","unstructured":"Jordan, M. I., Lee, J. D., & Yang, Y. (2019). Communication-efficient distributed statistical inference. The Journal of the American Statistical Association, 114(526), 668\u2013681.","journal-title":"The Journal of the American Statistical Association"},{"key":"6001_CR16","doi-asserted-by":"crossref","unstructured":"Koenker, R. (2005). Quantile Regression (Econometric Society Monographs; No. 38). Cambridge university press","DOI":"10.1017\/CBO9780511754098"},{"issue":"4","key":"6001_CR17","doi-asserted-by":"publisher","first-page":"143","DOI":"10.1257\/jep.15.4.143","volume":"15","author":"R Koenker","year":"2001","unstructured":"Koenker, R., & Hallock, K. F. (2001). Quantile regression. Journal of Economic Perspectives, 15(4), 143\u2013156.","journal-title":"Journal of Economic Perspectives"},{"issue":"3","key":"6001_CR18","doi-asserted-by":"publisher","first-page":"382","DOI":"10.1145\/357172.357176","volume":"4","author":"L Lamport","year":"1982","unstructured":"Lamport, L., Shostak, R., & Pease, M. (1982). The byzantine generals problem. ACM Transactions on Programming Languages and Systems, 4(3), 382\u2013401.","journal-title":"ACM Transactions on Programming Languages and Systems"},{"issue":"2","key":"6001_CR19","doi-asserted-by":"publisher","first-page":"906","DOI":"10.1214\/19-AOS1828","volume":"48","author":"G Lecu\u00e9","year":"2020","unstructured":"Lecu\u00e9, G., & Lerasle, M. (2020). Robust machine learning by median-of-means: Theory and practice. The Annals of Statistics, 48(2), 906\u2013931.","journal-title":"The Annals of Statistics"},{"issue":"1","key":"6001_CR20","first-page":"559","volume":"16","author":"P-L Loh","year":"2015","unstructured":"Loh, P.-L., & Wainwright, M. J. (2015). Regularized m-estimators with nonconvexity: Statistical and algorithmic theory for local optima. The Journal of Machine Learning Research, 16(1), 559\u2013616.","journal-title":"The Journal of Machine Learning Research"},{"issue":"6","key":"6001_CR21","doi-asserted-by":"publisher","first-page":"2455","DOI":"10.1214\/16-AOS1530","volume":"45","author":"P-L Loh","year":"2017","unstructured":"Loh, P.-L., & Wainwright, M. J. (2017). Support recovery without incoherence: A case for nonconvex regularization. The Annals of Statistics, 45(6), 2455\u20132482.","journal-title":"The Annals of Statistics"},{"issue":"3","key":"6001_CR22","doi-asserted-by":"publisher","first-page":"2075","DOI":"10.3150\/18-BEJ1046","volume":"25","author":"G Lugosi","year":"2019","unstructured":"Lugosi, G., & Mendelson, S. (2019). Regularization, sparse recovery, and median-of-means tournaments. Bernoulli, 25(3), 2075\u20132106.","journal-title":"Bernoulli"},{"key":"6001_CR23","doi-asserted-by":"crossref","unstructured":"Ma, C., Wang, K., Chi, Y. & Chen, Y. (2019). Implicit regularization in nonconvex statistical estimation: Gradient descent converges linearly for phase retrieval, matrix completion, and blind deconvolution. Foundations of Computational Mathematics, 1\u2013182.","DOI":"10.1007\/s10208-019-09429-9"},{"key":"6001_CR24","doi-asserted-by":"crossref","unstructured":"Mansoori, F. & Wei, E. (2017). Superlinearly convergent asynchronous distributed network newton method. In 2017 IEEE 56th Annual Conference on Decision and Control (CDC), pp. 2874\u20132879.","DOI":"10.1109\/CDC.2017.8264076"},{"issue":"6A","key":"6001_CR25","doi-asserted-by":"publisher","first-page":"2747","DOI":"10.1214\/17-AOS1637","volume":"46","author":"S Mei","year":"2018","unstructured":"Mei, S., Bai, Yu., & Montanari, A. (2018). The landscape of empirical risk for nonconvex losses. The Annals of Statistics, 46(6A), 2747\u20132774.","journal-title":"The Annals of Statistics"},{"issue":"4","key":"6001_CR26","doi-asserted-by":"publisher","first-page":"2308","DOI":"10.3150\/14-BEJ645","volume":"21","author":"S Minsker","year":"2015","unstructured":"Minsker, S. (2015). Geometric median and robust estimation in banach spaces. Bernoulli, 21(4), 2308\u20132335.","journal-title":"Bernoulli"},{"issue":"2","key":"6001_CR27","doi-asserted-by":"publisher","first-page":"5213","DOI":"10.1214\/19-EJS1647","volume":"13","author":"S Minsker","year":"2019","unstructured":"Minsker, S. (2019). Distributed statistical estimation and rates of convergence in normal approximation. The Electronic Journal of Statistics, 13(2), 5213\u20135252.","journal-title":"The Electronic Journal of Statistics"},{"issue":"04","key":"6001_CR28","doi-asserted-by":"publisher","first-page":"5503","DOI":"10.1609\/aaai.v34i04.6001","volume":"34","author":"Z Ren","year":"2020","unstructured":"Ren, Z., Zhou, Z., Qiu, L., Deshpande, A., & Kalagnanam, J. (2020). Delay-adaptive distributed stochastic optimization. Proceedings of the AAAI Conference on Artificial Intelligence, 34(04), 5503\u20135510.","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"6001_CR29","unstructured":"Shamir, O., Srebro, N. & Zhang, T. (2014). Communication efficient distributed optimization using an approximate newton-type method. In Proceedings of the 31st International Conference on Machine Learning, Vol. 32, pp. 1000\u20131008."},{"key":"6001_CR30","doi-asserted-by":"crossref","unstructured":"Su, L., & Xu, J. (2019). Securing distributed gradient descent in high dimensional statistical learning. Proceedings of the ACM on Measurement and Analysis of Computing Systems3(1).","DOI":"10.1145\/3322205.3311083"},{"issue":"1","key":"6001_CR31","doi-asserted-by":"crossref","first-page":"267","DOI":"10.1111\/j.2517-6161.1996.tb02080.x","volume":"58","author":"R Tibshirani","year":"1996","unstructured":"Tibshirani, R. (1996). Regression shrinkage and selection via the lasso. The Journal of the Royal Statistical Society, Series B (Statistical Methodology), 58(1), 267\u2013288.","journal-title":"The Journal of the Royal Statistical Society, Series B (Statistical Methodology)"},{"issue":"5","key":"6001_CR32","doi-asserted-by":"publisher","first-page":"2183","DOI":"10.1109\/TIT.2009.2016018","volume":"55","author":"MJ Wainwright","year":"2009","unstructured":"Wainwright, M. J. (2009). Sharp thresholds for high-dimensional and noisy sparsity recovery using $$\\ell _1$$-constrained quadratic programming (lasso). IEEE Transactions on Information Theory, 55(5), 2183\u20132202.","journal-title":"IEEE Transactions on Information Theory"},{"issue":"3","key":"6001_CR33","doi-asserted-by":"publisher","first-page":"553","DOI":"10.1093\/biomet\/asm053","volume":"94","author":"H Wang","year":"2007","unstructured":"Wang, H., Li, R., & Tsai, C.-L. (2007). Tuning parameter selectors for the smoothly clipped absolute deviation method. Biometrika, 94(3), 553\u2013568.","journal-title":"Biometrika"},{"key":"6001_CR34","unstructured":"Wang, J., Kolar, M., Srebro, N. & Zhang, T. (2017). Efficient distributed learning with sparsity. In Proceedings of the 34th International Conference on Machine Learning, Vol. 70, pp. 3636\u20133645."},{"key":"6001_CR35","unstructured":"Xie, C., Koyejo, O. & Gupta, I. (2018). Generalized Byzantine-tolerant SGD. arXiv e-prints arXiv:1802.10116."},{"key":"6001_CR36","unstructured":"Xie, C., Koyejo, S. & Gupta, I.. (2019). Zeno: Distributed stochastic gradient descent with suspicion-based fault-tolerance. In Proceedings of the 36th International Conference on Machine Learning, Vol. 97, pp. 6893\u20136901."},{"key":"6001_CR37","unstructured":"Xie, C., Koyejo, S. & Gupta, I. (2020). Zeno++: Robust fully asynchronous SGD. In Proceedings of the 37th International Conference on Machine Learning, Vol. 119, pp. 10495\u201310503."},{"key":"6001_CR38","unstructured":"Yin, D., Chen, Y., Kannan, R. & Bartlett, P. (2018). Byzantine-robust distributed learning: Towards optimal statistical rates. In Proceedings of the 35th International Conference on Machine Learning, Vol.\u00a080, pp. 5650\u20135659."},{"key":"6001_CR39","unstructured":"Yin, D., Chen, Y., Kannan, R. & Bartlett, P. (2019). Defending against saddle point attack in Byzantine-robust distributed learning. In Proceedings of the 36th International Conference on Machine Learning, Vol.\u00a097, pp. 7074\u20137084."},{"issue":"90","key":"6001_CR40","first-page":"2541","volume":"7","author":"P Zhao","year":"2006","unstructured":"Zhao, P., & Yu, B. (2006). On model selection consistency of lasso. The Journal of Machine Learning Research, 7(90), 2541\u20132563.","journal-title":"The Journal of Machine Learning Research"},{"key":"6001_CR41","unstructured":"Zhou, Z., Mertikopoulos, P., Bambos, N., Glynn, P., Ye, Y., Li, L.-J. & Li, F.-F. (2018). Distributed asynchronous optimization with unbounded delays: How slow can you go?. In Proceedings of the 35th International Conference on Machine Learning, Vol. 80, pp. 5970\u20135979."},{"key":"6001_CR42","unstructured":"Zhu, X., Li, F. & Wang, H. (2019). Least squares approximation for a distributed system. arXiv e-prints arXiv:1908.04904."},{"issue":"5","key":"6001_CR43","doi-asserted-by":"publisher","first-page":"2173","DOI":"10.1214\/009053607000000127","volume":"35","author":"H Zou","year":"2007","unstructured":"Zou, H., Hastie, T., & Tibshirani, R. (2007). On the \u201cdegrees of freedom\u2019\u2019 of the lasso. The Annals of Statistics, 35(5), 2173\u20132192.","journal-title":"The Annals of Statistics"}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-021-06001-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10994-021-06001-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-021-06001-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,5]],"date-time":"2024-09-05T03:00:07Z","timestamp":1725505207000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10994-021-06001-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,7,26]]},"references-count":43,"journal-issue":{"issue":"10","published-print":{"date-parts":[[2023,10]]}},"alternative-id":["6001"],"URL":"https:\/\/doi.org\/10.1007\/s10994-021-06001-x","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"value":"0885-6125","type":"print"},{"value":"1573-0565","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,7,26]]},"assertion":[{"value":"6 June 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 March 2021","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 May 2021","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 July 2021","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}