{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,15]],"date-time":"2026-05-15T15:51:41Z","timestamp":1778860301728,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":30,"publisher":"ACM","license":[{"start":{"date-parts":[[2020,6,23]],"date-time":"2020-06-23T00:00:00Z","timestamp":1592870400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"NSF","award":["1664142"],"award-info":[{"award-number":["1664142"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2020,6,23]]},"DOI":"10.1145\/3369583.3392681","type":"proceedings-article","created":{"date-parts":[[2020,6,22]],"date-time":"2020-06-22T03:27:27Z","timestamp":1592796447000},"page":"113-124","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":18,"title":["FFT-based Gradient Sparsification for the Distributed Training of Deep Neural Networks"],"prefix":"10.1145","author":[{"given":"Linnan","family":"Wang","sequence":"first","affiliation":[{"name":"Brown University, Providence, RI, USA"}]},{"given":"Wei","family":"Wu","sequence":"additional","affiliation":[{"name":"Los Alamos National Laboratory, Los Alamos, NM, USA"}]},{"given":"Junyu","family":"Zhang","sequence":"additional","affiliation":[{"name":"University of Minnesota, Twin Cities, Minneapolis, MN, USA"}]},{"given":"Hang","family":"Liu","sequence":"additional","affiliation":[{"name":"Stevens Institute of Technology, Hoboken, NJ, USA"}]},{"given":"George","family":"Bosilca","sequence":"additional","affiliation":[{"name":"University of Tennessee, Knoxville, TN, USA"}]},{"given":"Maurice","family":"Herlihy","sequence":"additional","affiliation":[{"name":"Brown University, Providence, RI, USA"}]},{"given":"Rodrigo","family":"Fonseca","sequence":"additional","affiliation":[{"name":"Brown University, Providence, RI, USA"}]}],"member":"320","published-online":{"date-parts":[[2020,6,23]]},"reference":[{"key":"e_1_3_2_2_1_1","unstructured":"Mart'in Abadi Paul Barham Jianmin Chen Zhifeng Chen Andy Davis Jeffrey Dean Matthieu Devin Sanjay Ghemawat Geoffrey Irving Michael Isard et al. 2016. TensorFlow: A System for Large-Scale Machine Learning.. In OSDI."},{"key":"e_1_3_2_2_2_1","volume-title":"Sparse communication for distributed gradient descent. arXiv preprint arXiv:1704.05021","author":"Aji Alham Fikri","year":"2017","unstructured":"Alham Fikri Aji and Kenneth Heafield. 2017. Sparse communication for distributed gradient descent. arXiv preprint arXiv:1704.05021 (2017)."},{"key":"e_1_3_2_2_3_1","first-page":"4","article-title":"Fast k-selection algorithms for graphics processing units","volume":"17","author":"Alabi Tolu","year":"2012","unstructured":"Tolu Alabi, Jeffrey D Blanchard, Bradley Gordon, and Russel Steinbach. 2012. Fast k-selection algorithms for graphics processing units. Journal of Experimental Algorithmics (JEA), Vol. 17 (2012), 4--2.","journal-title":"Journal of Experimental Algorithmics (JEA)"},{"key":"e_1_3_2_2_4_1","volume-title":"QSGD: Communication-Efficient SGD via Gradient Quantization and Encoding. In Advances in Neural Information Processing Systems.","author":"Alistarh Dan","year":"2017","unstructured":"Dan Alistarh, Demjan Grubic, Jerry Li, Ryota Tomioka, and Milan Vojnovic. 2017. QSGD: Communication-Efficient SGD via Gradient Quantization and Encoding. In Advances in Neural Information Processing Systems."},{"key":"e_1_3_2_2_5_1","unstructured":"Dan Alistarh Torsten Hoefler Mikael Johansson Nikola Konstantinov Sarit Khirirat and C\u00e9dric Renggli. 2018. The convergence of sparsified gradient methods. In Advances in Neural Information Processing Systems. 5975--5985."},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3018743.3018769"},{"key":"e_1_3_2_2_7_1","volume-title":"Client-server architecture","author":"Berson Alex","unstructured":"Alex Berson. 1992. Client-server architecture. Number IEEE-802. McGraw-Hill."},{"key":"e_1_3_2_2_8_1","volume-title":"Mxnet: A flexible and efficient machine learning library for heterogeneous distributed systems. arXiv preprint arXiv:1512.01274","author":"Chen Tianqi","year":"2015","unstructured":"Tianqi Chen, Mu Li, Yutian Li, Min Lin, Naiyan Wang, Minjie Wang, Tianjun Xiao, Bing Xu, Chiyuan Zhang, and Zheng Zhang. 2015. Mxnet: A flexible and efficient machine learning library for heterogeneous distributed systems. arXiv preprint arXiv:1512.01274 (2015)."},{"key":"e_1_3_2_2_9_1","unstructured":"Christopher M De Sa Ce Zhang Kunle Olukotun and Christopher R\u00e9. 2015. Taming the wild: A unified analysis of hogwild-style algorithms. In Advances in neural information processing systems. 2674--2682."},{"key":"e_1_3_2_2_10_1","unstructured":"Jeffrey Dean Greg Corrado Rajat Monga Kai Chen Matthieu Devin Mark Mao Andrew Senior Paul Tucker Ke Yang Quoc V Le et al. 2012. Large scale distributed deep networks. In Advances in neural information processing systems."},{"key":"e_1_3_2_2_11_1","volume-title":"Fast Error-Bounded Lossy HPC Data Compression with SZ. In 2016 IEEE International Parallel and Distributed Processing Symposium (IPDPS).","author":"Di S.","unstructured":"S. Di and F. Cappello. 2016. Fast Error-Bounded Lossy HPC Data Compression with SZ. In 2016 IEEE International Parallel and Distributed Processing Symposium (IPDPS)."},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-30218-6_19"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1137\/120880811"},{"key":"e_1_3_2_2_14_1","volume-title":"Deep compression: Compressing deep neural networks with pruning, trained quantization and huffman coding. arXiv preprint arXiv:1510.00149","author":"Han Song","year":"2015","unstructured":"Song Han, Huizi Mao, and William J Dally. 2015. Deep compression: Compressing deep neural networks with pruning, trained quantization and huffman coding. arXiv preprint arXiv:1510.00149 (2015)."},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654889"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3307681.3326608"},{"key":"e_1_3_2_2_18_1","first-page":"I","article-title":"Flexpoint: An Adaptive Numerical Format for Efficient Training of Deep Neural Networks","volume":"30","author":"K\u00f6ster Urs","year":"2017","unstructured":"Urs K\u00f6ster, Tristan Webb, Xin Wang, Marcel Nassar, Arjun K Bansal, William Constable, Oguz Elibol, Scott Gray, Stewart Hall, Luke Hornof, Amir Khosrowshahi, Carey Kloss, Ruby J Pai, and Naveen Rao. 2017. Flexpoint: An Adaptive Numerical Format for Efficient Training of Deep Neural Networks. In Advances in Neural Information Processing Systems 30, I. Guyon, U. V. Luxburg, S. Bengio, H. Wallach, R. Fergus, S. Vishwanathan, and R. Garnett (Eds.). Curran Associates, Inc., 1742--1752.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_19_1","volume-title":"Alexander J Smola, Amr Ahmed, Vanja Josifovski, James Long, Eugene J Shekita, and Bor-Yiing Su.","author":"Li Mu","year":"2014","unstructured":"Mu Li, David G Andersen, Jun Woo Park, Alexander J Smola, Amr Ahmed, Vanja Josifovski, James Long, Eugene J Shekita, and Bor-Yiing Su. 2014. Scaling Distributed Machine Learning with the Parameter Server.. In OSDI."},{"key":"e_1_3_2_2_20_1","volume-title":"Deep gradient compression: Reducing the communication bandwidth for distributed training. arXiv preprint arXiv:1712.01887","author":"Lin Yujun","year":"2017","unstructured":"Yujun Lin, Song Han, Huizi Mao, Yu Wang, and William J Dally. 2017. Deep gradient compression: Reducing the communication bandwidth for distributed training. arXiv preprint arXiv:1712.01887 (2017)."},{"key":"e_1_3_2_2_21_1","series-title":"SIAM Journal on optimization","volume-title":"Robust stochastic approximation approach to stochastic programming","author":"Nemirovski Arkadi","year":"2009","unstructured":"Arkadi Nemirovski, Anatoli Juditsky, Guanghui Lan, and Alexander Shapiro. 2009. Robust stochastic approximation approach to stochastic programming. SIAM Journal on optimization, Vol. 19, 4 (2009), 1574--1609."},{"key":"e_1_3_2_2_22_1","volume-title":"SparCML: High-Performance Sparse Communication for Machine Learning. CoRR","author":"Renggli C\u00e9dric","year":"2018","unstructured":"C\u00e9dric Renggli, Dan Alistarh, and Torsten Hoefler. 2018. SparCML: High-Performance Sparse Communication for Machine Learning. CoRR, Vol. abs\/1802.08021 (2018)."},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2016.7783721"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2014-274"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"crossref","unstructured":"Christian Szegedy Sergey Ioffe Vincent Vanhoucke and Alexander A Alemi. 2017. Inception-v4 inception-resnet and the impact of residual connections on learning.. In AAAI.","DOI":"10.1609\/aaai.v31i1.11231"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2017.06.003"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3178487.3178491"},{"key":"e_1_3_2_2_28_1","unstructured":"Jianqiao Wangni Jialei Wang Ji Liu and Tong Zhang. 2018. Gradient sparsification for communication-efficient distributed optimization. In Advances in Neural Information Processing Systems. 1306--1316."},{"key":"e_1_3_2_2_29_1","volume-title":"Terngrad: Ternary gradients to reduce communication in distributed deep learning. In Advances in Neural Information Processing Systems.","author":"Wen Wei","year":"2017","unstructured":"Wei Wen, Cong Xu, Feng Yan, Chunpeng Wu, Yandan Wang, Yiran Chen, and Hai Li. 2017. Terngrad: Ternary gradients to reduce communication in distributed deep learning. In Advances in Neural Information Processing Systems."},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3126686.3126749"}],"event":{"name":"HPDC '20: The 29th International Symposium on High-Performance Parallel and Distributed Computing","location":"Stockholm Sweden","acronym":"HPDC '20","sponsor":["University of Arizona University of Arizona","SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing","SIGARCH ACM Special Interest Group on Computer Architecture"]},"container-title":["Proceedings of the 29th International Symposium on High-Performance Parallel and Distributed Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3369583.3392681","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3369583.3392681","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3369583.3392681","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T23:44:58Z","timestamp":1750203898000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3369583.3392681"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,6,23]]},"references-count":30,"alternative-id":["10.1145\/3369583.3392681","10.1145\/3369583"],"URL":"https:\/\/doi.org\/10.1145\/3369583.3392681","relation":{},"subject":[],"published":{"date-parts":[[2020,6,23]]},"assertion":[{"value":"2020-06-23","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}