{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,28]],"date-time":"2025-03-28T00:02:14Z","timestamp":1743120134323,"version":"3.40.3"},"publisher-location":"Cham","reference-count":42,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031434174"},{"type":"electronic","value":"9783031434181"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-43418-1_38","type":"book-chapter","created":{"date-parts":[[2023,9,16]],"date-time":"2023-09-16T09:02:26Z","timestamp":1694854946000},"page":"637-653","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Cross Model Parallelism for\u00a0Faster Bidirectional Training of\u00a0Large Convolutional Neural Networks"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7480-5010","authenticated-orcid":false,"given":"An","family":"Xu","sequence":"first","affiliation":[]},{"given":"Yang","family":"Bai","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,9,17]]},"reference":[{"key":"38_CR1","unstructured":"Abadi, M., et al.: Tensorflow: a system for large-scale machine learning. In: 12th $$\\{$$USENIX$$\\}$$ Symposium on Operating Systems Design and Implementation ($$\\{$$OSDI$$\\}$$ 2016), pp. 265\u2013283 (2016)"},{"key":"38_CR2","unstructured":"Belilovsky, E., Eickenberg, M., Oyallon, E.: Greedy layerwise learning can scale to imagenet. In: International Conference on Machine Learning, pp. 583\u2013593. PMLR (2019)"},{"key":"38_CR3","doi-asserted-by":"publisher","first-page":"177","DOI":"10.1007\/978-3-7908-2604-3_16","volume-title":"COMPSTAT 2010","author":"L Bottou","year":"2010","unstructured":"Bottou, L.: Large-scale machine learning with stochastic gradient descent. In: Lechevallier, Y., Saporta, G. (eds.) COMPSTAT 2010, pp. 177\u2013186. Springer, Heidelberg (2010). https:\/\/doi.org\/10.1007\/978-3-7908-2604-3_16"},{"key":"38_CR4","unstructured":"Chen, T., Xu, B., Zhang, C., Guestrin, C.: Training deep nets with sublinear memory cost. arXiv preprint arXiv:1604.06174 (2016)"},{"issue":"2","key":"38_CR5","doi-asserted-by":"publisher","first-page":"7","DOI":"10.1109\/MM.2017.37","volume":"37","author":"D Foley","year":"2017","unstructured":"Foley, D., Danskin, J.: Ultra-performance pascal GPU and NVLink interconnect. IEEE Micro 37(2), 7\u201317 (2017)","journal-title":"IEEE Micro"},{"key":"38_CR6","doi-asserted-by":"crossref","unstructured":"Gao, H., Xu, A., Huang, H.: On the convergence of communication-efficient local SGD for federated learning. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 35, pp. 7510\u20137518 (2021)","DOI":"10.1609\/aaai.v35i9.16920"},{"issue":"11","key":"38_CR7","doi-asserted-by":"publisher","first-page":"6103","DOI":"10.1109\/TNNLS.2021.3072238","volume":"33","author":"B Gu","year":"2021","unstructured":"Gu, B., Xu, A., Huo, Z., Deng, C., Huang, H.: Privacy-preserving asynchronous vertical federated learning algorithms for multiparty collaborative learning. IEEE Trans. Neural Netw. Learn. Syst. 33(11), 6103\u20136115 (2021)","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"38_CR8","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"437","DOI":"10.1007\/978-3-031-19803-8_26","volume-title":"ECCV 2022","author":"P Guo","year":"2022","unstructured":"Guo, P., et al.: Auto-FedRL: federated hyperparameter optimization for multi-institutional medical image segmentation. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13681, pp. 437\u2013455. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19803-8_26"},{"key":"38_CR9","unstructured":"Han, S., Mao, H., Dally, W.J.: Deep compression: compressing deep neural networks with pruning, trained quantization and huffman coding. arXiv preprint arXiv:1510.00149 (2015)"},{"key":"38_CR10","unstructured":"He, H., Huang, G., Yuan, Y.: Asymmetric valleys: beyond sharp and flat local minima. In: Advances in Neural Information Processing Systems, pp. 2553\u20132564 (2019)"},{"key":"38_CR11","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"38_CR12","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"630","DOI":"10.1007\/978-3-319-46493-0_38","volume-title":"Computer Vision \u2013 ECCV 2016","author":"K He","year":"2016","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Identity mappings in deep residual networks. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9908, pp. 630\u2013645. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46493-0_38"},{"key":"38_CR13","unstructured":"Huang, Y., et al.: Tangram: bridging immutable and mutable abstractions for distributed data analytics. In: USENIX Annual Technical Conference, pp. 191\u2013206 (2019)"},{"key":"38_CR14","unstructured":"Huo, Z., Gu, B., Huang, H.: Training neural networks using features replay. In: Advances in Neural Information Processing Systems, pp. 6659\u20136668 (2018)"},{"key":"38_CR15","unstructured":"Huo, Z., Gu, B., Huang, H., et al.: Decoupled parallel backpropagation with convergence guarantee. In: International Conference on Machine Learning, pp. 2098\u20132106 (2018)"},{"key":"38_CR16","unstructured":"Izmailov, P., Podoprikhin, D., Garipov, T., Vetrov, D., Wilson, A.G.: Averaging weights leads to wider optima and better generalization. arXiv preprint arXiv:1803.05407 (2018)"},{"key":"38_CR17","unstructured":"Jaderberg, M., et al.: Decoupled neural interfaces using synthetic gradients. In: International Conference on Machine Learning, pp. 1627\u20131635. PMLR (2017)"},{"key":"38_CR18","unstructured":"Keskar, N.S., Mudigere, D., Nocedal, J., Smelyanskiy, M., Tang, P.T.P.: On large-batch training for deep learning: generalization gap and sharp minima. arXiv preprint arXiv:1609.04836 (2016)"},{"key":"38_CR19","unstructured":"Kipf, T.N., Welling, M.: Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907 (2016)"},{"key":"38_CR20","unstructured":"Krizhevsky, A.: One weird trick for parallelizing convolutional neural networks. arXiv preprint arXiv:1404.5997 (2014)"},{"key":"38_CR21","unstructured":"Krizhevsky, A., Hinton, G., et al.: Learning multiple layers of features from tiny images (2009)"},{"issue":"4","key":"38_CR22","doi-asserted-by":"publisher","first-page":"541","DOI":"10.1162\/neco.1989.1.4.541","volume":"1","author":"Y LeCun","year":"1989","unstructured":"LeCun, Y., et al.: Backpropagation applied to handwritten zip code recognition. Neural Comput. 1(4), 541\u2013551 (1989)","journal-title":"Neural Comput."},{"key":"38_CR23","unstructured":"Lee, S., Kim, J.K., Zheng, X., Ho, Q., Gibson, G.A., Xing, E.P.: On model parallelization and scheduling strategies for distributed machine learning. In: Advances in Neural Information Processing Systems, pp. 2834\u20132842 (2014)"},{"key":"38_CR24","doi-asserted-by":"crossref","unstructured":"Li, J., et al.: A general and efficient querying method for learning to hash. In: Proceedings of the 2018 International Conference on Management of Data, pp. 1333\u20131347 (2018)","DOI":"10.1145\/3183713.3183750"},{"key":"38_CR25","doi-asserted-by":"crossref","unstructured":"Li, M., Andersen, D.G., Smola, A.J., Yu, K.: Communication efficient distributed machine learning with the parameter server. In: Advances in Neural Information Processing Systems, pp. 19\u201327 (2014)","DOI":"10.1145\/2640087.2644155"},{"key":"38_CR26","doi-asserted-by":"crossref","unstructured":"Liu, Y., Xu, A., Chen, Z.: Map-based deep imitation learning for obstacle avoidance. In: 2018 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 8644\u20138649. IEEE (2018)","DOI":"10.1109\/IROS.2018.8593683"},{"key":"38_CR27","doi-asserted-by":"crossref","unstructured":"Narayanan, D., et al.: Pipedream: generalized pipeline parallelism for DNN training. In: Proceedings of the 27th ACM Symposium on Operating Systems Principles, pp. 1\u201315 (2019)","DOI":"10.1145\/3341301.3359646"},{"key":"38_CR28","unstructured":"Paszke, A., et al.: Pytorch: an imperative style, high-performance deep learning library. In: Wallach, H., Larochelle, H., Beygelzimer, A., d\u2019 Alch\u00e9-Buc, F., Fox, E., Garnett, R. (eds.) Advances in Neural Information Processing Systems, vol. 32, pp. 8024\u20138035. Curran Associates, Inc. (2019)"},{"issue":"6088","key":"38_CR29","doi-asserted-by":"publisher","first-page":"533","DOI":"10.1038\/323533a0","volume":"323","author":"DE Rumelhart","year":"1986","unstructured":"Rumelhart, D.E., Hinton, G.E., Williams, R.J.: Learning representations by back-propagating errors. Nature 323(6088), 533\u2013536 (1986)","journal-title":"Nature"},{"issue":"3","key":"38_CR30","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky, O., et al.: ImageNet large scale visual recognition challenge. Int. J. Comput. Vision (IJCV) 115(3), 211\u2013252 (2015). https:\/\/doi.org\/10.1007\/s11263-015-0816-y","journal-title":"Int. J. Comput. Vision (IJCV)"},{"key":"38_CR31","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)"},{"key":"38_CR32","unstructured":"Stich, S.U.: Local SGD converges fast and communicates little. In: International Conference on Learning Representations (2018)"},{"issue":"8","key":"38_CR33","doi-asserted-by":"publisher","first-page":"103","DOI":"10.1145\/79173.79181","volume":"33","author":"LG Valiant","year":"1990","unstructured":"Valiant, L.G.: A bridging model for parallel computation. Commun. ACM 33(8), 103\u2013111 (1990)","journal-title":"Commun. ACM"},{"key":"38_CR34","doi-asserted-by":"crossref","unstructured":"Xie, S., Girshick, R., Doll\u00e1r, P., Tu, Z., He, K.: Aggregated residual transformations for deep neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1492\u20131500 (2017)","DOI":"10.1109\/CVPR.2017.634"},{"key":"38_CR35","doi-asserted-by":"crossref","unstructured":"Xu, A., Huang, H.: Coordinating momenta for cross-silo federated learning. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 36, pp. 8735\u20138743 (2022)","DOI":"10.1609\/aaai.v36i8.20853"},{"key":"38_CR36","unstructured":"Xu, A., Huang, H.: Detached error feedback for distributed SGD with random sparsification. In: International Conference on Machine Learning, pp. 24550\u201324575. PMLR (2022)"},{"key":"38_CR37","doi-asserted-by":"crossref","unstructured":"Xu, A., Huo, Z., Huang, H.: On the acceleration of deep learning model parallelism with staleness. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2088\u20132097 (2020)","DOI":"10.1109\/CVPR42600.2020.00216"},{"key":"38_CR38","doi-asserted-by":"crossref","unstructured":"Xu, A., Huo, Z., Huang, H.: Step-ahead error feedback for distributed training with compressed gradient. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 35, pp. 10478\u201310486 (2021)","DOI":"10.1609\/aaai.v35i12.17254"},{"key":"38_CR39","doi-asserted-by":"crossref","unstructured":"Xu, A., et al.: Closing the generalization gap of cross-silo federated medical image segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 20866\u201320875 (2022)","DOI":"10.1109\/CVPR52688.2022.02020"},{"key":"38_CR40","unstructured":"Yu, H., Jin, R., Yang, S.: On the linear speedup analysis of communication efficient momentum SGD for distributed non-convex optimization. In: International Conference on Machine Learning, pp. 7184\u20137193 (2019)"},{"key":"38_CR41","doi-asserted-by":"crossref","unstructured":"Zagoruyko, S., Komodakis, N.: Wide residual networks. arXiv preprint arXiv:1605.07146 (2016)","DOI":"10.5244\/C.30.87"},{"key":"38_CR42","unstructured":"Zhou, S., Wu, Y., Ni, Z., Zhou, X., Wen, H., Zou, Y.: Dorefa-net: training low bitwidth convolutional neural networks with low bitwidth gradients. arXiv preprint arXiv:1606.06160 (2016)"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases: Research Track"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-43418-1_38","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,26]],"date-time":"2024-06-26T13:14:20Z","timestamp":1719407660000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-43418-1_38"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031434174","9783031434181"],"references-count":42,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-43418-1_38","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"17 September 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"Our work focuses on improving the model parallelism for distributed training in data-centers. We do not see any potential ethical issues.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical Statement"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Turin","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 September 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 September 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2023.ecmlpkdd.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"829","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"196","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"24% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.63","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4.5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Applied Data Science Track: 239 submissions, 58 accepted papers; Demo Track: 31 submissions, 16 accepted papers.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}