{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T14:17:55Z","timestamp":1742998675033,"version":"3.40.3"},"publisher-location":"Cham","reference-count":22,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030638320"},{"type":"electronic","value":"9783030638337"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-63833-7_37","type":"book-chapter","created":{"date-parts":[[2020,11,19]],"date-time":"2020-11-19T06:03:35Z","timestamp":1605765815000},"page":"440-451","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["HPSGD: Hierarchical Parallel SGD with Stale Gradients Featuring"],"prefix":"10.1007","author":[{"given":"Yuhao","family":"Zhou","sequence":"first","affiliation":[]},{"given":"Qing","family":"Ye","sequence":"additional","affiliation":[]},{"given":"Hailun","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Jiancheng","family":"Lv","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,11,20]]},"reference":[{"key":"37_CR1","doi-asserted-by":"crossref","unstructured":"Cho, K., et al.: Learning phrase representations using RNN encoder-decoder for statistical machine translation. arXiv preprint arXiv:1406.1078 (2014)","DOI":"10.3115\/v1\/D14-1179"},{"key":"37_CR2","unstructured":"Dean, J., Corrado, G.S., Monga, R., Kai, C., Ng, A.Y.: Large scale distributed deep networks. In: Advances in Neural Information Processing Systems (2012)"},{"key":"37_CR3","unstructured":"Haddadpour, F., Kamani, M.M., Mahdavi, M., Cadambe, V.: Local SGD with periodic averaging: tighter analysis and adaptive synchronization. In: Advances in Neural Information Processing Systems, pp. 11080\u201311092 (2019)"},{"key":"37_CR4","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"37_CR5","unstructured":"Hoffer, E., Hubara, I., Soudry, D.: Train longer, generalize better: closing the generalization gap in large batch training of neural networks. In: Advances in Neural Information Processing Systems, pp. 1731\u20131741 (2017)"},{"key":"37_CR6","doi-asserted-by":"crossref","unstructured":"Huang, G., Liu, Z., Weinberger, K., van der Maaten, L.: Densely connected convolutional networks. arXiv preprint arXiv:1608.06993 (2017)","DOI":"10.1109\/CVPR.2017.243"},{"key":"37_CR7","unstructured":"Jia, X., et al.: Highly scalable deep learning training system with mixed-precision: training imagenet in four minutes. arXiv preprint arXiv:1807.11205 (2018)"},{"key":"37_CR8","unstructured":"Karimireddy, S.P., Rebjock, Q., Stich, S.U., Jaggi, M.: Error feedback fixes SignSGD and other gradient compression schemes. arXiv preprint arXiv:1901.09847 (2019)"},{"key":"37_CR9","unstructured":"Krizhevsky, A., Hinton, G., et al.: Learning multiple layers of features from tiny images (2009)"},{"key":"37_CR10","doi-asserted-by":"crossref","unstructured":"Li, M., et al.: Scaling distributed machine learning with the parameter server. In: 11th $$\\{$$USENIX$$\\}$$ Symposium on Operating Systems Design and Implementation ($$\\{$$OSDI 2014$$\\}$$), pp. 583\u2013598 (2014)","DOI":"10.1145\/2640087.2644155"},{"key":"37_CR11","doi-asserted-by":"crossref","unstructured":"Li, M., Andersen, D.G., Smola, A.J., Yu, K.: Communication efficient distributed machine learning with the parameter server. In: Advances in Neural Information Processing Systems, pp. 19\u201327 (2014)","DOI":"10.1145\/2640087.2644155"},{"key":"37_CR12","unstructured":"Lian, X., Zhang, C., Zhang, H., Hsieh, C.J., Zhang, W., Liu, J.: Can decentralized algorithms outperform centralized algorithms? A case study for decentralized parallel stochastic gradient descent. In: Advances in Neural Information Processing Systems, pp. 5330\u20135340 (2017)"},{"key":"37_CR13","unstructured":"Lin, Y., Han, S., Mao, H., Wang, Y., Dally, W.J.: Deep gradient compression: reducing the communication bandwidth for distributed training. arXiv preprint arXiv:1712.01887 (2017)"},{"key":"37_CR14","doi-asserted-by":"crossref","unstructured":"Ouyang, S., Dong, D., Xu, Y., Xiao, L.: Communication optimization strategies for distributed deep learning: a survey. arXiv e-prints arXiv:2003.03009 (2020)","DOI":"10.1016\/j.jpdc.2020.11.005"},{"key":"37_CR15","doi-asserted-by":"crossref","unstructured":"Sandler, M., Howard, A., Zhu, M., Zhmoginov, A., Chen, L.C.: MobileNetV2: inverted residuals and linear bottlenecks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4510\u20134520 (2018)","DOI":"10.1109\/CVPR.2018.00474"},{"key":"37_CR16","unstructured":"Stich, S.U.: Local SGD converges fast and communicates little. arXiv preprint arXiv:1805.09767 (2018)"},{"key":"37_CR17","doi-asserted-by":"crossref","unstructured":"Szegedy, C., et al.: Going deeper with convolutions. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1\u20139 (2015)","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"37_CR18","unstructured":"Haddadpour, F., Kamani, M.M., Mahdavi, M., Cadambe, V.: Trading redundancy for communication: speeding up distributed SGD for non-convex optimization. In: International Conference on Machine Learning, pp. 2545\u20132554 (2019)"},{"key":"37_CR19","unstructured":"Xu, H., et al.: Compressed communication for distributed deep learning: survey and quantitative evaluation. Technical report (2020)"},{"key":"37_CR20","doi-asserted-by":"crossref","unstructured":"Yu, H., Yang, S., Zhu, S.: Parallel restarted SGD with faster convergence and less communication: Demystifying why model averaging works for deep learning. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 33, pp. 5693\u20135700 (2019)","DOI":"10.1609\/aaai.v33i01.33015693"},{"key":"37_CR21","unstructured":"Yu, M., et al.: GradiVeQ: vector quantization for bandwidth-efficient gradient aggregation in distributed CNN training. In: Advances in Neural Information Processing Systems, pp. 5123\u20135133 (2018)"},{"key":"37_CR22","unstructured":"Zhang, W., Gupta, S., Lian, X., Liu, J.: Staleness-aware Async-SGD for distributed deep learning. arXiv preprint arXiv:1511.05950 (2015)"}],"container-title":["Lecture Notes in Computer Science","Neural Information Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-63833-7_37","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,13]],"date-time":"2024-03-13T11:02:19Z","timestamp":1710327739000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-63833-7_37"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030638320","9783030638337"],"references-count":22,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-63833-7_37","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"20 November 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICONIP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Neural Information Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Bangkok","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Thailand","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 November 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 November 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iconip2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.apnns.org\/ICONIP2020","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"618","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"187","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"189","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"30% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.18","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.68","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Due to COVID-19 pandemic the conference was held virtually.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}