{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,19]],"date-time":"2026-03-19T13:14:58Z","timestamp":1773926098469,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":40,"publisher":"ACM","license":[{"start":{"date-parts":[[2019,2,16]],"date-time":"2019-02-16T00:00:00Z","timestamp":1550275200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2019,2,16]]},"DOI":"10.1145\/3293883.3295710","type":"proceedings-article","created":{"date-parts":[[2019,2,5]],"date-time":"2019-02-05T20:44:12Z","timestamp":1549399452000},"page":"1-14","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":49,"title":["Beyond human-level accuracy"],"prefix":"10.1145","author":[{"given":"Joel","family":"Hestness","sequence":"first","affiliation":[{"name":"Baidu Research"}]},{"given":"Newsha","family":"Ardalani","sequence":"additional","affiliation":[{"name":"Baidu Research"}]},{"given":"Gregory","family":"Diamos","sequence":"additional","affiliation":[{"name":"Baidu Research"}]}],"member":"320","published-online":{"date-parts":[[2019,2,16]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2018. AI and Compute. https:\/\/blog.openai.com\/ai-and-compute  2018. AI and Compute. https:\/\/blog.openai.com\/ai-and-compute"},{"key":"e_1_3_2_1_2_1","unstructured":"2018. DAWNBench. https:\/\/dawn.cs.stanford.edu\/benchmark\/  2018. DAWNBench. https:\/\/dawn.cs.stanford.edu\/benchmark\/"},{"key":"e_1_3_2_1_3_1","unstructured":"2018. DeepBench. https:\/\/github.com\/baidu-research\/DeepBench  2018. DeepBench. https:\/\/github.com\/baidu-research\/DeepBench"},{"key":"e_1_3_2_1_4_1","unstructured":"Mart\u00edn Abadi Ashish Agarwal Paul Barham Eugene Brevdo Zhifeng Chen Craig Citro Greg S. Corrado Andy Davis Jeffrey Dean Matthieu Devin Sanjay Ghemawat Ian Goodfellow Andrew Harp Geoffrey Irving Michael Isard Yangqing Jia Rafal Jozefowicz Lukasz Kaiser Manjunath Kudlur Josh Levenberg Dandelion Man\u00e9 Rajat Monga Sherry Moore Derek Murray Chris Olah Mike Schuster Jonathon Shlens Benoit Steiner Ilya Sutskever Kunal Talwar Paul Tucker Vincent Vanhoucke Vijay Vasudevan Fernanda Vi\u00e9gas Oriol Vinyals Pete Warden Martin Wattenberg Martin Wicke Yuan Yu and Xiaoqiang Zheng. 2015. TensorFlow: Large-Scale Machine Learning on Heterogeneous Systems. https:\/\/www.tensorflow.org\/  Mart\u00edn Abadi Ashish Agarwal Paul Barham Eugene Brevdo Zhifeng Chen Craig Citro Greg S. Corrado Andy Davis Jeffrey Dean Matthieu Devin Sanjay Ghemawat Ian Goodfellow Andrew Harp Geoffrey Irving Michael Isard Yangqing Jia Rafal Jozefowicz Lukasz Kaiser Manjunath Kudlur Josh Levenberg Dandelion Man\u00e9 Rajat Monga Sherry Moore Derek Murray Chris Olah Mike Schuster Jonathon Shlens Benoit Steiner Ilya Sutskever Kunal Talwar Paul Tucker Vincent Vanhoucke Vijay Vasudevan Fernanda Vi\u00e9gas Oriol Vinyals Pete Warden Martin Wattenberg Martin Wicke Yuan Yu and Xiaoqiang Zheng. 2015. TensorFlow: Large-Scale Machine Learning on Heterogeneous Systems. https:\/\/www.tensorflow.org\/"},{"key":"e_1_3_2_1_5_1","volume-title":"QSGD: Communication-Efficient SGD via Gradient Quantization and Encoding. In Advances in Neural Information Processing Systems (NIPS). 1709--1720.","author":"Alistarh Dan","year":"2017"},{"key":"e_1_3_2_1_6_1","volume-title":"The International Conference on Machine Learning (ICML). 173--182","author":"Amodei Dario","year":"2016"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.3115\/1073012.1073017"},{"key":"e_1_3_2_1_8_1","volume-title":"Exploring Neural Transducers for End-to-end Speech Recognition. In IEEE Automatic Speech Recognition and Understanding Workshop. 206--213","author":"Battenberg Eric","year":"2017"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2017.2765695"},{"key":"e_1_3_2_1_10_1","volume-title":"cuDNN: Efficient Primitives for Deep Learning. arXiv preprint arXiv:1410.0759","author":"Chetlur Sharan","year":"2014"},{"key":"e_1_3_2_1_11_1","volume-title":"Deep Learning with COTS HPC Systems. In International Conference on Machine Learning (ICML). 1337--1345","author":"Coates Adam","year":"2013"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/223428.207162"},{"key":"e_1_3_2_1_13_1","unstructured":"Jeff Dean et al. 2017. Machine Learning for Systems and Systems for Machine Learning. Presentation at ML Systems Workshop with Neural Information Processing Systems (NIPS) Conference. http:\/\/learningsys.org\/nips17\/assets\/slides\/dean-nips17.pdf  Jeff Dean et al. 2017. Machine Learning for Systems and Systems for Machine Learning. Presentation at ML Systems Workshop with Neural Information Processing Systems (NIPS) Conference. http:\/\/learningsys.org\/nips17\/assets\/slides\/dean-nips17.pdf"},{"key":"e_1_3_2_1_14_1","volume-title":"Large Minibatch SGD: Training ImageNet in 1 Hour","author":"Goyal Priya","year":"2017"},{"key":"e_1_3_2_1_15_1","volume-title":"Deep Speech: Scaling Up End-to-End Speech Recognition. arXiv preprint arXiv:1412.5567","author":"Hannun Awni","year":"2014"},{"key":"e_1_3_2_1_16_1","volume-title":"Nearly-tight VC-dimension and Pseudodimension Bounds for Piecewise Linear Neural Networks. In The Conference on Learning Theory (COLT)","volume":"65","author":"Harvey Nick","year":"2017"},{"key":"e_1_3_2_1_17_1","volume-title":"Deep Residual Learning for Image Recognition. In The IEEE Conference on Computer Vision and Pattern Recognition (CVPR). 770--778","author":"He Kaiming","year":"2016"},{"key":"e_1_3_2_1_18_1","volume-title":"Yang Yang, and Yanqi Zhou.","author":"Hestness Joel","year":"2017"},{"key":"e_1_3_2_1_19_1","volume-title":"Exploring the Limits of Language Modeling. arXiv preprint arXiv:1602.02410v2","author":"Jozefowicz Rafal","year":"2016"},{"key":"e_1_3_2_1_20_1","volume-title":"One Weird Trick for Parallelizing Convolutional Neural Networks. arXiv preprint arXiv:1404.5997","author":"Krizhevsky Alex","year":"2014"},{"key":"e_1_3_2_1_21_1","volume-title":"Deep Gradient Compression: Reducing the Communication Bandwidth for Distributed Training. In The International Conference on Learning Representations (ICLR).","author":"Lin Yujun"},{"key":"e_1_3_2_1_22_1","volume-title":"The Conference on Empirical Methods in Natural Language Processing (EMNLP). 1412--1421","author":"Luong Thang"},{"key":"e_1_3_2_1_23_1","volume-title":"Semantics-Preserving Parallelization of Stochastic Gradient Descent. In IEEE International Parallel and Distributed Processing Symposium (IPDPS). IEEE, 224--233","author":"Maleki Saeed","year":"2018"},{"key":"e_1_3_2_1_24_1","unstructured":"MLPerf. 2018. MLPerf: A Broad ML Benchmark Suite for Measuring Performance of ML Software Frameworks ML Hardware Accelerators and ML Cloud Platforms. https:\/\/mlperf.org\/  MLPerf. 2018. MLPerf: A Broad ML Benchmark Suite for Measuring Performance of ML Software Frameworks ML Hardware Accelerators and ML Cloud Platforms. https:\/\/mlperf.org\/"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jpdc.2008.09.002"},{"key":"e_1_3_2_1_26_1","volume-title":"Large Scale Language Modeling: Converging on 40GB of Text in Four Hours. arXiv preprint arXiv:1808.01371","author":"Puri Raul","year":"2018"},{"key":"e_1_3_2_1_27_1","volume-title":"Paleo: A Performance Model for Deep Neural Networks. In The International Conference on Learning Representations (ICLR).","author":"Qi Hang","year":"2017"},{"key":"e_1_3_2_1_28_1","volume-title":"Hogwild: A Lock-free Approach to Parallelizing Stochastic Gradient Descent. In Advances in Neural Information Processing Systems (NIPS). 693--701.","author":"Recht Benjamin","year":"2011"},{"key":"e_1_3_2_1_29_1","volume-title":"ImageNet Large Scale Visual Recognition Challenge. arXiv preprint arXiv:1409.0575 (January","author":"Russakovsky Olga","year":"2015"},{"key":"e_1_3_2_1_30_1","volume-title":"Long Short-Term Memory Based Recurrent Neural Network Architectures for Large Vocabulary Speech Recognition. arXiv preprint arXiv:1402.1128","author":"Sak Hasim","year":"2014"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1002\/j.1538-7305.1951.tb01366.x"},{"key":"e_1_3_2_1_32_1","volume-title":"Outrageously Large Neural Networks: The Sparsely-Gated Mixture-of-Experts Layer. arXiv preprint arXiv:1701.06538v1","author":"Shazeer Noam","year":"2017"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/FCCM.2017.47"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080221"},{"key":"e_1_3_2_1_35_1","volume-title":"Le","author":"Smith Samuel L.","year":"2017"},{"key":"e_1_3_2_1_36_1","volume-title":"Revisiting Unreasonable Effectiveness of Data in Deep Learning Era. In The International Conference on Computer Vision (ICCV).","author":"Sun Chen","year":"2017"},{"key":"e_1_3_2_1_37_1","unstructured":"Wei Wen Cong Xu Feng Yan Chunpeng Wu Yandan Wang Yiran Chen and Hai Li. 2017. TernGrad: Ternary Gradients to Reduce Communication in Distributed Deep Learning. In Advances in Neural Information Processing Systems (NIPS).   Wei Wen Cong Xu Feng Yan Chunpeng Wu Yandan Wang Yiran Chen and Hai Li. 2017. TernGrad: Ternary Gradients to Reduce Communication in Distributed Deep Learning. In Advances in Neural Information Processing Systems (NIPS) ."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/1498765.1498785"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3225058.3225069"},{"key":"e_1_3_2_1_41_1","volume-title":"Recurrent Highway Networks. In The International Conference on Machine Learning (ICML).","author":"Zilly Julian Georg","year":"2017"}],"event":{"name":"PPoPP '19: 24th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming","location":"Washington District of Columbia","acronym":"PPoPP '19","sponsor":["SIGPLAN ACM Special Interest Group on Programming Languages","SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing"]},"container-title":["Proceedings of the 24th Symposium on Principles and Practice of Parallel Programming"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3293883.3295710","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3293883.3295710","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T01:01:47Z","timestamp":1750208507000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3293883.3295710"}},"subtitle":["computational challenges in deep learning"],"short-title":[],"issued":{"date-parts":[[2019,2,16]]},"references-count":40,"alternative-id":["10.1145\/3293883.3295710","10.1145\/3293883"],"URL":"https:\/\/doi.org\/10.1145\/3293883.3295710","relation":{},"subject":[],"published":{"date-parts":[[2019,2,16]]},"assertion":[{"value":"2019-02-16","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}