{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,27]],"date-time":"2026-02-27T15:22:20Z","timestamp":1772205740426,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":52,"publisher":"ACM","license":[{"start":{"date-parts":[[2017,11,12]],"date-time":"2017-11-12T00:00:00Z","timestamp":1510444800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2017,11,12]]},"DOI":"10.1145\/3126908.3126916","type":"proceedings-article","created":{"date-parts":[[2017,11,8]],"date-time":"2017-11-08T21:02:30Z","timestamp":1510174950000},"page":"1-11","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":47,"title":["Deep learning at 15PF"],"prefix":"10.1145","author":[{"given":"Thorsten","family":"Kurth","sequence":"first","affiliation":[{"name":"NERSC"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jian","family":"Zhang","sequence":"additional","affiliation":[{"name":"Stanford University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Nadathur","family":"Satish","sequence":"additional","affiliation":[{"name":"Intel Corporation"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Evan","family":"Racah","sequence":"additional","affiliation":[{"name":"NERSC"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ioannis","family":"Mitliagkas","sequence":"additional","affiliation":[{"name":"Stanford University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Md. Mostofa Ali","family":"Patwary","sequence":"additional","affiliation":[{"name":"Intel Corporation"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tareq","family":"Malas","sequence":"additional","affiliation":[{"name":"Intel Corporation"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Narayanan","family":"Sundaram","sequence":"additional","affiliation":[{"name":"Intel Corporation"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wahid","family":"Bhimji","sequence":"additional","affiliation":[{"name":"NERSC"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mikhail","family":"Smorkalov","sequence":"additional","affiliation":[{"name":"Intel Corporation"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jack","family":"Deslippe","sequence":"additional","affiliation":[{"name":"NERSC"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mikhail","family":"Shiryaev","sequence":"additional","affiliation":[{"name":"Intel Corporation, Nizhny Novgorod, Russian Federation"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Srinivas","family":"Sridharan","sequence":"additional","affiliation":[{"name":"Intel Corporation, Bangalore, Karnataka, India"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"family":"Prabhat","sequence":"additional","affiliation":[{"name":"NERSC"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Pradeep","family":"Dubey","sequence":"additional","affiliation":[{"name":"Intel Corporation"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2017,11,12]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1016\/0168-9002(89)91300-4"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1016\/0010-4655(88)90004-5"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1007\/JHEP07(2016)069"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1007\/JHEP01(2017)110"},{"key":"e_1_3_2_1_5_1","unstructured":"The ATLAS collaboration \"Search for massive supersymmetric particles in multi-jet final states produced in pp collisions at {EQUATION} = 13 TeV using the ATLAS detector at the LHC \" ATLAS-CONF-2016-057 2016.  The ATLAS collaboration \"Search for massive supersymmetric particles in multi-jet final states produced in pp collisions at {EQUATION} = 13 TeV using the ATLAS detector at the LHC \" ATLAS-CONF-2016-057 2016."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cpc.2008.01.036"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1007\/JHEP02(2014)057"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1140\/epjc\/s10052-012-1896-2"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1175\/JCLI-D-14-00311.1"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1038\/ngeo779"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1029\/2012JD018027"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1175\/BAMS-D-11-00154.1"},{"key":"e_1_3_2_1_13_1","first-page":"01156","article-title":"Application of deep convolutional neural networks for detecting extreme weather in climate datasets","volume":"1605","author":"Liu Y.","year":"2016","journal-title":"CoRR"},{"key":"e_1_3_2_1_14_1","unstructured":"S. Chetlur C. Woolley P. Vandermersch J. Cohen J. Tran B. Catanzaro and E. Shelhamer \"cuDNN: Efficient primitives for deep learning \" CoRR vol. abs\/1410.0759 2014.  S. Chetlur C. Woolley P. Vandermersch J. Cohen J. Tran B. Catanzaro and E. Shelhamer \"cuDNN: Efficient primitives for deep learning \" CoRR vol. abs\/1410.0759 2014."},{"key":"e_1_3_2_1_15_1","unstructured":"\"Introducing DNN primitives in Intel\u00ae Math Kernel Library \" https:\/\/software.intel.com\/en-us\/articles\/introducing-dnn-primitives-in-intelr-mkl 2017.  \"Introducing DNN primitives in Intel \u00ae Math Kernel Library \" https:\/\/software.intel.com\/en-us\/articles\/introducing-dnn-primitives-in-intelr-mkl 2017."},{"key":"e_1_3_2_1_16_1","first-page":"1","volume-title":"IEEE Press","author":"Heinecke A.","year":"2016"},{"key":"e_1_3_2_1_17_1","unstructured":"\"Deepbench \" github.com\/baidu-research\/DeepBench 2017.  \"Deepbench \" github.com\/baidu-research\/DeepBench 2017."},{"key":"e_1_3_2_1_18_1","first-page":"173","article-title":"Deep speech 2 : End-to-end speech recognition in english and mandarin","author":"Amodei D.","year":"2016","journal-title":"Proceedings of ICML)"},{"key":"e_1_3_2_1_19_1","first-page":"1223","article-title":"Large scale distributed deep networks","author":"Dean J.","year":"2012","journal-title":"NIPS"},{"key":"e_1_3_2_1_20_1","first-page":"00175","article-title":"Firecaffe: near-linear acceleration of deep neural network training on compute clusters","volume":"1511","author":"Iandola F. N.","year":"2015","journal-title":"CoRR"},{"key":"e_1_3_2_1_21_1","first-page":"06709","article-title":"Distributed deep learning using synchronous stochastic gradient descent","volume":"1602","author":"Das D.","year":"2016","journal-title":"CoRR"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3041021.3051103"},{"key":"e_1_3_2_1_23_1","unstructured":"\"Scaling Deep Learning on 18 000 GPUs \" https:\/\/www.nextplatform.com\/2017\/03\/28\/scaling-deep-learning-beyond-18000-gpus\/ 2017.  \"Scaling Deep Learning on 18 000 GPUs \" https:\/\/www.nextplatform.com\/2017\/03\/28\/scaling-deep-learning-beyond-18000-gpus\/ 2017."},{"key":"e_1_3_2_1_24_1","unstructured":"A. Anandkumar. Deep Learning at Scale on AWS. {Online}. Available: https:\/\/ml-days-prd.s3.amazonaws.com\/slides\/speakers\/slides\/3\/Anima-EPFL2017.pdf  A. Anandkumar. Deep Learning at Scale on AWS. {Online}. Available: https:\/\/ml-days-prd.s3.amazonaws.com\/slides\/speakers\/slides\/3\/Anima-EPFL2017.pdf"},{"key":"e_1_3_2_1_25_1","unstructured":"S. Hadjis C. Zhang I. Mitliagkas D. Iter and C. R\u00e9 \"Omnivore: An optimizer for multi-device deep learning on cpus and gpus \" arXiv:1606.04487 2016.  S. Hadjis C. Zhang I. Mitliagkas D. Iter and C. R\u00e9 \"Omnivore: An optimizer for multi-device deep learning on cpus and gpus \" arXiv:1606.04487 2016."},{"key":"e_1_3_2_1_26_1","unstructured":"N. S. Keskar D. Mudigere J. Nocedal M. Smelyanskiy and P. T. P. Tang \"On large-batch training for deep learning: Generalization gap and sharp minima \" arXiv:1609.04836 2016.  N. S. Keskar D. Mudigere J. Nocedal M. Smelyanskiy and P. T. P. Tang \"On large-batch training for deep learning: Generalization gap and sharp minima \" arXiv:1609.04836 2016."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"crossref","unstructured":"J. Tsitsiklis D. Bertsekas and M. Athans \"Distributed asynchronous deterministic and stochastic gradient optimization algorithms \" IEEE transactions on automatic control vol. 31 no. 9 pp. 803--812 1986.  J. Tsitsiklis D. Bertsekas and M. Athans \"Distributed asynchronous deterministic and stochastic gradient optimization algorithms \" IEEE transactions on automatic control vol. 31 no. 9 pp. 803--812 1986.","DOI":"10.1109\/TAC.1986.1104412"},{"key":"e_1_3_2_1_28_1","first-page":"693","article-title":"Hogwild: A lock-free approach to parallelizing stochastic gradient descent","author":"Niu F.","year":"2011","journal-title":"NIPS"},{"key":"e_1_3_2_1_29_1","first-page":"1223","article-title":"Large scale distributed deep networks","author":"Dean J.","year":"2012","journal-title":"NIPS"},{"key":"e_1_3_2_1_30_1","unstructured":"T. Chilimbi Y. Suzue J. Apacible and K. Kalyanaraman \"Project adam: Building an efficient and scalable deep learning training system \" in 11th USENIX Symposium on Operating Systems Design and Implementation (OSDI 14) 2014 pp. 571--582.   T. Chilimbi Y. Suzue J. Apacible and K. Kalyanaraman \"Project adam: Building an efficient and scalable deep learning training system \" in 11th USENIX Symposium on Operating Systems Design and Implementation (OSDI 14) 2014 pp. 571--582."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"crossref","unstructured":"I. Mitliagkas C. Zhang S. Hadjis and C. R\u00e9 \"Asynchrony begets momentum with an application to deep learning \" arXiv:1605.09774 2016.  I. Mitliagkas C. Zhang S. Hadjis and C. R\u00e9 \"Asynchrony begets momentum with an application to deep learning \" arXiv:1605.09774 2016.","DOI":"10.1109\/ALLERTON.2016.7852343"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.14778\/2732977.2733001"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1038\/35016072"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.123"},{"key":"e_1_3_2_1_35_1","unstructured":"D. Kingma and J. Ba \"Adam: A method for stochastic optimization \" arXiv:1412.6980 2014.  D. Kingma and J. Ba \"Adam: A method for stochastic optimization \" arXiv:1412.6980 2014."},{"key":"e_1_3_2_1_36_1","unstructured":"E. Racah C. Beckham T. Maharaj C. Pal et al. \"Semi-supervised detection of extreme weather events in large climate datasets \" arXiv:1612.02095 2016.  E. Racah C. Beckham T. Maharaj C. Pal et al. \"Semi-supervised detection of extreme weather events in large climate datasets \" arXiv:1612.02095 2016."},{"key":"e_1_3_2_1_37_1","first-page":"779","article-title":"You only look once: Unified, real-time object detection","author":"Redmon J.","year":"2016","journal-title":"CVPR"},{"key":"e_1_3_2_1_38_1","first-page":"21","volume-title":"Springer","author":"Liu W.","year":"2016"},{"key":"e_1_3_2_1_39_1","first-page":"91","article-title":"Faster r-cnn: Towards real-time object detection with region proposal networks","author":"Ren S.","year":"2015","journal-title":"NIPS"},{"key":"e_1_3_2_1_40_1","unstructured":"\"Intel\u00ae distribution of Caffe* \" https:\/\/github.com\/intel\/caffe 2017.  \"Intel \u00ae distribution of Caffe* \" https:\/\/github.com\/intel\/caffe 2017."},{"key":"e_1_3_2_1_41_1","unstructured":"\"Intel\u00ae Machine Learning Scaling Library for Linux* OS \" https:\/\/github.com\/01org\/MLSL 2017.  \"Intel \u00ae Machine Learning Scaling Library for Linux* OS \" https:\/\/github.com\/01org\/MLSL 2017."},{"key":"e_1_3_2_1_42_1","unstructured":"\"Intel\u00ae Software Development Emulator \" https:\/\/software.intel.com\/en-us\/articles\/intel-software-development-emulator 2017.  \"Intel \u00ae Software Development Emulator \" https:\/\/software.intel.com\/en-us\/articles\/intel-software-development-emulator 2017."},{"key":"e_1_3_2_1_43_1","first-page":"09308","article-title":"Fast algorithms for convolutional neural networks","volume":"1509","author":"Lavin A.","year":"2015","journal-title":"CoRR"},{"key":"e_1_3_2_1_44_1","first-page":"07061","article-title":"Quantized neural networks: Training neural networks with low precision weights and activations","volume":"1609","author":"Hubara I.","year":"2016","journal-title":"CoRR"},{"key":"e_1_3_2_1_45_1","first-page":"7024","article-title":"Training deep neural networks with low precision multiplications","volume":"1412","author":"Courbariaux M.","year":"2014","journal-title":"CoRR"},{"key":"e_1_3_2_1_46_1","first-page":"02551","article-title":"Deep learning with limited numerical precision","volume":"1502","author":"Gupta S.","year":"2015","journal-title":"CoRR"},{"key":"e_1_3_2_1_47_1","first-page":"03168","article-title":"Hardware-oriented approximation of convolutional neural networks","volume":"1604","author":"Gysel P.","year":"2016","journal-title":"CoRR"},{"key":"e_1_3_2_1_48_1","unstructured":"J. Zhang I. Mitliagkas and C. R\u00e9 \"Yellowfin and the art of momentum tuning \" arXiv preprint arXiv:1706.03471 2017.  J. Zhang I. Mitliagkas and C. R\u00e9 \"Yellowfin and the art of momentum tuning \" arXiv preprint arXiv:1706.03471 2017."},{"key":"e_1_3_2_1_49_1","first-page":"2951","article-title":"Practical bayesian optimization of machine learning algorithms","author":"Snoek J.","year":"2012","journal-title":"NIPS"},{"key":"e_1_3_2_1_50_1","first-page":"770","article-title":"Deep residual learning for image recognition","author":"He K.","year":"2016","journal-title":"Proceedings of the IEEE conference on computer vision and pattern recognition"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1162\/089976600300015015"}],"event":{"name":"SC '17: The International Conference for High Performance Computing, Networking, Storage and Analysis","location":"Denver Colorado","acronym":"SC '17","sponsor":["SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing","IEEE CS"]},"container-title":["Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3126908.3126916","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3126908.3126916","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T02:11:08Z","timestamp":1750212668000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3126908.3126916"}},"subtitle":["supervised and semi-supervised classification for scientific data"],"short-title":[],"issued":{"date-parts":[[2017,11,12]]},"references-count":52,"alternative-id":["10.1145\/3126908.3126916","10.1145\/3126908"],"URL":"https:\/\/doi.org\/10.1145\/3126908.3126916","relation":{},"subject":[],"published":{"date-parts":[[2017,11,12]]},"assertion":[{"value":"2017-11-12","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}