{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T17:35:37Z","timestamp":1743096937600,"version":"3.40.3"},"publisher-location":"Cham","reference-count":40,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031434174"},{"type":"electronic","value":"9783031434181"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-43418-1_39","type":"book-chapter","created":{"date-parts":[[2023,9,16]],"date-time":"2023-09-16T09:02:26Z","timestamp":1694854946000},"page":"654-670","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Distributed Adaptive Optimization with\u00a0Divisible Communication"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7480-5010","authenticated-orcid":false,"given":"An","family":"Xu","sequence":"first","affiliation":[]},{"given":"Yang","family":"Bai","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,9,17]]},"reference":[{"key":"39_CR1","doi-asserted-by":"crossref","unstructured":"Aji, A.F., Heafield, K.: Sparse communication for distributed gradient descent. arXiv preprint arXiv:1704.05021 (2017)","DOI":"10.18653\/v1\/D17-1045"},{"key":"39_CR2","unstructured":"Alistarh, D., Grubic, D., Li, J., Tomioka, R., Vojnovic, M.: QSGD: communication-efficient SGD via gradient quantization and encoding. In: Advances in Neural Information Processing Systems, pp. 1709\u20131720 (2017)"},{"key":"39_CR3","unstructured":"Alistarh, D., Hoefler, T., Johansson, M., Konstantinov, N., Khirirat, S., Renggli, C.: The convergence of sparsified gradient methods. In: Advances in Neural Information Processing Systems, pp. 5973\u20135983 (2018)"},{"key":"39_CR4","doi-asserted-by":"crossref","unstructured":"Basu, D., Data, D., Karakus, C., Diggavi, S.: Qsparse-local-SGD: distributed SGD with quantization, sparsification, and local computations. arXiv preprint arXiv:1906.02367 (2019)","DOI":"10.1109\/JSAIT.2020.2985917"},{"key":"39_CR5","unstructured":"Bernstein, J., Wang, Y.X., Azizzadenesheli, K., Anandkumar, A.: signSGD: compressed optimisation for non-convex problems. In: International Conference on Machine Learning, pp. 560\u2013569. PMLR (2018)"},{"key":"39_CR6","unstructured":"Chen, C., Shen, L., Huang, H., Liu, W., Luo, Z.Q.: Efficient-adam: communication-efficient distributed adam with complexity analysis (2020)"},{"key":"39_CR7","doi-asserted-by":"crossref","unstructured":"Chen, X., Li, X., Li, P.: Toward communication efficient adaptive gradient method. In: Proceedings of the 2020 ACM-IMS on Foundations of Data Science Conference, pp. 119\u2013128 (2020)","DOI":"10.1145\/3412815.3416891"},{"key":"39_CR8","doi-asserted-by":"crossref","unstructured":"Gao, H., Xu, A., Huang, H.: On the convergence of communication-efficient local SGD for federated learning. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 35, pp. 7510\u20137518 (2021)","DOI":"10.1609\/aaai.v35i9.16920"},{"issue":"11","key":"39_CR9","doi-asserted-by":"publisher","first-page":"6103","DOI":"10.1109\/TNNLS.2021.3072238","volume":"33","author":"B Gu","year":"2021","unstructured":"Gu, B., Xu, A., Huo, Z., Deng, C., Huang, H.: Privacy-preserving asynchronous vertical federated learning algorithms for multiparty collaborative learning. IEEE Trans. Neural Netw. Learn. Syst. 33(11), 6103\u20136115 (2021)","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"39_CR10","doi-asserted-by":"publisher","first-page":"437","DOI":"10.1007\/978-3-031-19803-8_26","volume-title":"Computer Vision","author":"P Guo","year":"2022","unstructured":"Guo, P., et al.: Auto-FedRL: federated hyperparameter optimization for multi-institutional medical image segmentation. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022, pp. 437\u2013455. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19803-8_26"},{"key":"39_CR11","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"39_CR12","unstructured":"Huang, Y., et al.: Tangram: bridging immutable and mutable abstractions for distributed data analytics. In: USENIX Annual Technical Conference, pp. 191\u2013206 (2019)"},{"key":"39_CR13","unstructured":"Karimireddy, S.P., Kale, S., Mohri, M., Reddi, S., Stich, S., Suresh, A.T.: Scaffold: stochastic controlled averaging for federated learning. In: International Conference on Machine Learning, pp. 5132\u20135143. PMLR (2020)"},{"key":"39_CR14","unstructured":"Karimireddy, S.P., Rebjock, Q., Stich, S., Jaggi, M.: Error feedback fixes SignSGD and other gradient compression schemes. In: International Conference on Machine Learning, pp. 3252\u20133261. PMLR (2019)"},{"key":"39_CR15","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"key":"39_CR16","unstructured":"Kone\u010dn\u1ef3, J., McMahan, H.B., Yu, F.X., Richt\u00e1rik, P., Suresh, A.T., Bacon, D.: Federated learning: strategies for improving communication efficiency. arXiv preprint arXiv:1610.05492 (2016)"},{"key":"39_CR17","unstructured":"Krizhevsky, A., Hinton, G., et al.: Learning multiple layers of features from tiny images (2009)"},{"key":"39_CR18","doi-asserted-by":"crossref","unstructured":"Li, C., Awan, A.A., Tang, H., Rajbhandari, S., He, Y.: 1-bit lamb: communication efficient large-scale large-batch training with lamb\u2019s convergence speed. arXiv preprint arXiv:2104.06069 (2021)","DOI":"10.1109\/HiPC56025.2022.00044"},{"key":"39_CR19","doi-asserted-by":"crossref","unstructured":"Li, J., et al.: A general and efficient querying method for learning to hash. In: Proceedings of the 2018 International Conference on Management of Data, pp. 1333\u20131347 (2018)","DOI":"10.1145\/3183713.3183750"},{"key":"39_CR20","unstructured":"Lin, Y., Han, S., Mao, H., Wang, Y., Dally, W.J.: Deep gradient compression: reducing the communication bandwidth for distributed training. arXiv preprint arXiv:1712.01887 (2017)"},{"key":"39_CR21","doi-asserted-by":"crossref","unstructured":"Liu, Y., Xu, A., Chen, Z.: Map-based deep imitation learning for obstacle avoidance. In: 2018 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 8644\u20138649. IEEE (2018)","DOI":"10.1109\/IROS.2018.8593683"},{"key":"39_CR22","unstructured":"Reddi, S., et al.: Adaptive federated optimization. arXiv preprint arXiv:2003.00295 (2020)"},{"key":"39_CR23","unstructured":"Reddi, S.J., Kale, S., Kumar, S.: On the convergence of adam and beyond. arXiv preprint arXiv:1904.09237 (2019)"},{"issue":"3","key":"39_CR24","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky, O., et al.: ImageNet large scale visual recognition challenge. Int. J. Comput. Vision (IJCV) 115(3), 211\u2013252 (2015). https:\/\/doi.org\/10.1007\/s11263-015-0816-y","journal-title":"Int. J. Comput. Vision (IJCV)"},{"key":"39_CR25","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)"},{"key":"39_CR26","unstructured":"Stich, S.U.: Local SGD converges fast and communicates little. In: International Conference on Learning Representations (2018)"},{"key":"39_CR27","unstructured":"Stich, S.U., Cordonnier, J.B., Jaggi, M.: Sparsified SGD with memory. In: Advances in Neural Information Processing Systems, pp. 4447\u20134458 (2018)"},{"key":"39_CR28","unstructured":"Tang, H., et al.: 1-bit adam: communication efficient large-scale training with adam\u2019s convergence speed. arXiv preprint arXiv:2102.02888 (2021)"},{"key":"39_CR29","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Proceedings of the 31st International Conference on Neural Information Processing Systems, pp. 6000\u20136010 (2017)"},{"key":"39_CR30","unstructured":"Vogels, T., Karimireddy, S.P., Jaggi, M.: Powersgd: practical low-rank gradient compression for distributed optimization. In: Advances in Neural Information Processing Systems, pp. 14259\u201314268 (2019)"},{"key":"39_CR31","unstructured":"Wen, W., et al.: Terngrad: ternary gradients to reduce communication in distributed deep learning. In: Advances in Neural Information Processing Systems, pp. 1509\u20131519 (2017)"},{"key":"39_CR32","unstructured":"Xie, C., Zheng, S., Koyejo, O.O., Gupta, I., Li, M., Lin, H.: CSER: communication-efficient SGD with error reset. In: Advances in Neural Information Processing Systems, vol. 33 (2020)"},{"key":"39_CR33","doi-asserted-by":"crossref","unstructured":"Xu, A., Huang, H.: Coordinating momenta for cross-silo federated learning. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 36, pp. 8735\u20138743 (2022)","DOI":"10.1609\/aaai.v36i8.20853"},{"key":"39_CR34","unstructured":"Xu, A., Huang, H.: Detached error feedback for distributed SGD with random sparsification. In: International Conference on Machine Learning, pp. 24550\u201324575. PMLR (2022)"},{"key":"39_CR35","doi-asserted-by":"crossref","unstructured":"Xu, A., Huo, Z., Huang, H.: On the acceleration of deep learning model parallelism with staleness. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2088\u20132097 (2020)","DOI":"10.1109\/CVPR42600.2020.00216"},{"key":"39_CR36","doi-asserted-by":"crossref","unstructured":"Xu, A., Huo, Z., Huang, H.: Step-ahead error feedback for distributed training with compressed gradient. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 35, pp. 10478\u201310486 (2021)","DOI":"10.1609\/aaai.v35i12.17254"},{"key":"39_CR37","doi-asserted-by":"crossref","unstructured":"Xu, A., et al.: Closing the generalization gap of cross-silo federated medical image segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 20866\u201320875 (2022)","DOI":"10.1109\/CVPR52688.2022.02020"},{"key":"39_CR38","unstructured":"You, Y., et al.: Large batch optimization for deep learning: training bert in 76 minutes. arXiv preprint arXiv:1904.00962 (2019)"},{"key":"39_CR39","unstructured":"Yu, H., Jin, R., Yang, S.: On the linear speedup analysis of communication efficient momentum SGD for distributed non-convex optimization. In: International Conference on Machine Learning, pp. 7184\u20137193. PMLR (2019)"},{"key":"39_CR40","unstructured":"Zheng, S., Huang, Z., Kwok, J.T.: Communication-efficient distributed blockwise momentum SGD with error-feedback. arXiv preprint arXiv:1905.10936 (2019)"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases: Research Track"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-43418-1_39","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,26]],"date-time":"2024-06-26T13:14:48Z","timestamp":1719407688000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-43418-1_39"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031434174","9783031434181"],"references-count":40,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-43418-1_39","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"17 September 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"Our work is to improve the communication efficiency of distributed adaptive optimization. A large part of the work focuses on the theoretical analysis and we do not identify any potential ethical issues.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical Statement"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Turin","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 September 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 September 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2023.ecmlpkdd.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"829","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"196","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"24% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.63","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4.5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Applied Data Science Track: 239 submissions, 58 accepted papers; Demo Track: 31 submissions, 16 accepted papers.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}