{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T20:05:09Z","timestamp":1776888309846,"version":"3.51.2"},"publisher-location":"Cham","reference-count":31,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031723469","type":"print"},{"value":"9783031723476","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-72347-6_13","type":"book-chapter","created":{"date-parts":[[2024,9,16]],"date-time":"2024-09-16T13:02:55Z","timestamp":1726491775000},"page":"186-201","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Layer-Wised Sparsification Based on\u00a0Hypernetwork for\u00a0Distributed NN Training"],"prefix":"10.1007","author":[{"given":"Yusen","family":"Wu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiaxun","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qing","family":"Ye","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,9,17]]},"reference":[{"key":"13_CR1","unstructured":"Alistarh, D., Grubic, D., Li, J.Z., Tomioka, R., Vojnovic, M.: QSGD: communication-efficient SGD via gradient quantization and encoding. In: Proceedings of the 31st International Conference on Neural Information Processing Systems, pp. 1707\u20131718. NIPS\u201917, Curran Associates Inc., Red Hook, NY, USA (2017)"},{"key":"13_CR2","doi-asserted-by":"publisher","first-page":"82","DOI":"10.1016\/j.inffus.2019.12.012","volume":"58","author":"A Barredo Arrieta","year":"2020","unstructured":"Barredo Arrieta, A., et al.: Explainable artificial intelligence (XAI): concepts, taxonomies, opportunities and challenges toward responsible AI. Inf. Fusion 58, 82\u2013115 (2020). https:\/\/doi.org\/10.1016\/j.inffus.2019.12.012","journal-title":"Inf. Fusion"},{"key":"13_CR3","unstructured":"Basu, D., Data, D., Karakus, C., Diggavi, S.: Qsparse-local-SGD: Distributed SGD with quantization, sparsification and local computations. In: Wallach, H., Larochelle, H., Beygelzimer, A., d\u2019 Alch\u00e9-Buc, F., Fox, E., Garnett, R. (eds.) Advances in Neural Information Processing Systems, vol.\u00a032. Curran Associates, Inc. (2019). https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2019\/file\/d202ed5bcfa858c15a9f383c3e386ab2-Paper.pdf"},{"key":"13_CR4","doi-asserted-by":"publisher","unstructured":"Ben-Nun, T., Hoefler, T.: Demystifying parallel and distributed deep learning: an in-depth concurrency analysis. ACM Comput. Surv. 52(4) (2019). https:\/\/doi.org\/10.1145\/3320060","DOI":"10.1145\/3320060"},{"key":"13_CR5","unstructured":"Bernstein, J., Wang, Y.X., Azizzadenesheli, K., Anandkumar, A.: signSGD: compressed optimisation for non-convex problems. In: Dy, J., Krause, A. (eds.) Proceedings of the 35th International Conference on Machine Learning. Proceedings of Machine Learning Research, vol.\u00a080, pp. 560\u2013569. PMLR (10\u201315 Jul 2018).https:\/\/proceedings.mlr.press\/v80\/bernstein18a.html"},{"key":"13_CR6","unstructured":"Brock, A., Lim, T., Ritchie, J.M., Weston, N.: Smash: One-shot model architecture search through hypernetworks (2017)"},{"key":"13_CR7","unstructured":"Brown, T.B., et al.: Language models are few-shot learners. In: Proceedings of the 34th International Conference on Neural Information Processing Systems, NIPS\u201920, Curran Associates Inc., Red Hook, NY, USA (2020)"},{"key":"13_CR8","unstructured":"Das, D., et al.: Distributed deep learning using synchronous stochastic gradient descent (2016)"},{"key":"13_CR9","unstructured":"Dean, J., et al.: Large scale distributed deep networks. In: Proceedings of the 25th International Conference on Neural Information Processing Systems - Volume 1, pp. 1223\u20131231. NIPS\u201912, Curran Associates Inc., Red Hook, NY, USA (2012)"},{"key":"13_CR10","unstructured":"Frankle, J., Carbin, M.: The lottery ticket hypothesis: finding sparse, trainable neural networks (2019)"},{"key":"13_CR11","unstructured":"Ha, D., Dai, A., Le, Q.V.: Hypernetworks (2016)"},{"key":"13_CR12","unstructured":"Jia, X., De\u00a0Brabandere, B., Tuytelaars, T., Gool, L.V.: Dynamic filter networks. In: Lee, D., Sugiyama, M., Luxburg, U., Guyon, I., Garnett, R. (eds.) Advances in Neural Information Processing Systems, vol.\u00a029. Curran Associates, Inc. (2016). https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2016\/file\/8bf1211fd4b7b94528899de0a43b9fb3-Paper.pdf"},{"key":"13_CR13","doi-asserted-by":"publisher","unstructured":"Jiang, J., Fu, F., Yang, T., Cui, B.: SketchML: accelerating distributed machine learning with data sketches. In: Proceedings of the 2018 International Conference on Management of Data, pp. 1269\u20131284. SIGMOD \u201918, Association for Computing Machinery, New York, NY, USA (2018).https:\/\/doi.org\/10.1145\/3183713.3196894, https:\/\/doi.org\/10.1145\/3183713.3196894","DOI":"10.1145\/3183713.3196894"},{"key":"13_CR14","doi-asserted-by":"publisher","unstructured":"Khani, M., et al.: Sip-ml: high-bandwidth optical network interconnects for machine learning training. In: Proceedings of the 2021 ACM SIGCOMM 2021 Conference, pp. 657\u2013675. SIGCOMM \u201921, Association for Computing Machinery, New York, NY, USA (2021). https:\/\/doi.org\/10.1145\/3452296.3472900","DOI":"10.1145\/3452296.3472900"},{"key":"13_CR15","doi-asserted-by":"publisher","unstructured":"Klocek, S., Maziarka, L., Wo\u0142czyk, M., Tabor, J., Nowak, J., undefinedmieja, M.: Hypernetwork functional image representation. In: Artificial Neural Networks and Machine Learning - ICANN 2019: Workshop and Special Sessions: 28th International Conference on Artificial Neural Networks, Munich, Germany, September 17-19, 2019, Proceedings, pp. 496\u2013510. Springer-Verlag, Berlin, Heidelberg (2019). https:\/\/doi.org\/10.1007\/978-3-030-30493-5_48","DOI":"10.1007\/978-3-030-30493-5_48"},{"key":"13_CR16","doi-asserted-by":"publisher","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: ImageNet classification with deep convolutional neural networks. Commun. ACM 60(6), 84\u201390 (2017). https:\/\/doi.org\/10.1145\/3065386","DOI":"10.1145\/3065386"},{"key":"13_CR17","unstructured":"Lin, Y., Han, S., Mao, H., Wang, Y., Dally, W.J.: Deep Gradient Compression: Reducing the Communication Bandwidth for Distributed Training. ICLR (2017)"},{"key":"13_CR18","doi-asserted-by":"publisher","unstructured":"Ma, X., Zhang, J., Guo, S., Xu, W.: Layer-wised model aggregation for personalized federated learning. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 10082\u201310091 (2022).https:\/\/doi.org\/10.1109\/CVPR52688.2022.00985","DOI":"10.1109\/CVPR52688.2022.00985"},{"key":"13_CR19","unstructured":"Raghu, M., Gilmer, J., Yosinski, J., Sohl-Dickstein, J.: SVCCA: singular vector canonical correlation analysis for deep learning dynamics and interpretability. In: Proceedings of the 31st International Conference on Neural Information Processing Systems, pp. 6078\u20136087. NIPS\u201917, Curran Associates Inc., Red Hook, NY, USA (2017)"},{"key":"13_CR20","doi-asserted-by":"publisher","unstructured":"Shi, S., et al.: A distributed synchronous SGD algorithm with global top-k sparsification for low bandwidth networks. In: 2019 IEEE 39th International Conference on Distributed Computing Systems (ICDCS), pp. 2238\u20132247 (2019).https:\/\/doi.org\/10.1109\/ICDCS.2019.00220","DOI":"10.1109\/ICDCS.2019.00220"},{"key":"13_CR21","unstructured":"Tang, Z., Shi, S., Chu, X., Wang, W., Li, B.: Communication-efficient distributed deep learning: a comprehensive survey. CoRR abs\/2003.06307 (2020). https:\/\/arxiv.org\/abs\/2003.06307"},{"key":"13_CR22","unstructured":"Vogels, T., Karimireddy, S.P., Jaggi, M.: PowerSGD: practical low-rank gradient compression for distributed optimization (2020)"},{"key":"13_CR23","unstructured":"Wangni, J., Wang, J., Liu, J., Zhang, T.: Gradient sparsification for communication-efficient distributed optimization. In: Proceedings of the 32nd International Conference on Neural Information Processing Systems, pp. 1306\u20131316. NIPS\u201918, Curran Associates Inc., Red Hook, NY, USA (2018)"},{"key":"13_CR24","unstructured":"Wen, W., et al.: TernGrad: ternary gradients to reduce communication in distributed deep learning. In: Proceedings of the 31st International Conference on Neural Information Processing Systems, pp. 1508\u20131518. NIPS\u201917, Curran Associates Inc., Red Hook, NY, USA (2017)"},{"issue":"7","key":"13_CR25","doi-asserted-by":"publisher","first-page":"1753","DOI":"10.1109\/TPDS.2020.3046774","volume":"32","author":"Q Ye","year":"2021","unstructured":"Ye, Q., Sun, Y., Zhang, J., Lv, J.: A distributed framework for EA-based NAS. IEEE Trans. Parallel Distrib. Syst. 32(7), 1753\u20131764 (2021). https:\/\/doi.org\/10.1109\/TPDS.2020.3046774","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"key":"13_CR26","doi-asserted-by":"publisher","unstructured":"Ye, Q., Zhou, Y., Shi, M., Lv, J.: FLSGD: free local SGD with parallel synchronization. J. Supercomput. 78(10), 12410\u201312433 (2022).https:\/\/doi.org\/10.1007\/s11227-021-04267-5","DOI":"10.1007\/s11227-021-04267-5"},{"key":"13_CR27","unstructured":"Ye, Q., Zhou, Y., Shi, M., Sun, Y., Lv, J.: DBS: dynamic batch size for distributed deep neural network training. CoRR abs\/2007.11831 (2020). https:\/\/arxiv.org\/abs\/2007.11831"},{"key":"13_CR28","unstructured":"Zhang, C., Bengio, S., Singer, Y.: Are all layers created equal? J. Mach. Learn. Res. 23(1) (2022)"},{"key":"13_CR29","unstructured":"Zhang, C., Ren, M., Urtasun, R.: Graph hypernetworks for neural architecture search. ArXiv abs\/1810.05749 (2018). https:\/\/api.semanticscholar.org\/CorpusID:53113128"},{"key":"13_CR30","doi-asserted-by":"publisher","unstructured":"Zhang, S., Zhang, C., You, Z., Zheng, R., Xu, B.: Asynchronous stochastic gradient descent for DNN training. In: 2013 IEEE International Conference on Acoustics, Speech and Signal Processing, pp. 6660\u20136663 (2013).https:\/\/doi.org\/10.1109\/ICASSP.2013.6638950","DOI":"10.1109\/ICASSP.2013.6638950"},{"issue":"1","key":"13_CR31","doi-asserted-by":"publisher","first-page":"192","DOI":"10.1109\/TPDS.2021.3090331","volume":"33","author":"Y Zhou","year":"2022","unstructured":"Zhou, Y., Ye, Q., Lv, J.: Communication-efficient federated learning with compensated overlap-fedAvg. IEEE Trans. Parallel Distrib. Syst. 33(1), 192\u2013205 (2022). https:\/\/doi.org\/10.1109\/TPDS.2021.3090331","journal-title":"IEEE Trans. Parallel Distrib. Syst."}],"container-title":["Lecture Notes in Computer Science","Artificial Neural Networks and Machine Learning \u2013 ICANN 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72347-6_13","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,16]],"date-time":"2024-09-16T13:17:06Z","timestamp":1726492626000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72347-6_13"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031723469","9783031723476"],"references-count":31,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72347-6_13","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"17 September 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICANN","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Artificial Neural Networks","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Lugano","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Switzerland","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 September 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"33","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icann2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}