{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T15:59:47Z","timestamp":1774022387986,"version":"3.50.1"},"reference-count":64,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001691","name":"Japan Society for the Promotion of Science","doi-asserted-by":"publisher","award":["Research Fellow of Japan Society for the Promotion"],"award-info":[{"award-number":["Research Fellow of Japan Society for the Promotion"]}],"id":[{"id":"10.13039\/501100001691","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000015","name":"U.S. Department of Energy","doi-asserted-by":"publisher","award":["U.S. DOE ECP ExaLearn Project"],"award-info":[{"award-number":["U.S. DOE ECP ExaLearn Project"]}],"id":[{"id":"10.13039\/100000015","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Parallel Distrib. Syst."],"published-print":{"date-parts":[[2021]]},"DOI":"10.1109\/tpds.2020.3047974","type":"journal-article","created":{"date-parts":[[2020,12,30]],"date-time":"2020-12-30T20:40:07Z","timestamp":1609360807000},"page":"1-1","source":"Crossref","is-referenced-by-count":22,"title":["The Case for Strong Scaling in Deep Learning: Training Large 3D CNNs with Hybrid Parallelism"],"prefix":"10.1109","author":[{"given":"Yosuke","family":"Oyama","sequence":"first","affiliation":[]},{"given":"Naoya","family":"Maruyama","sequence":"additional","affiliation":[]},{"given":"Nikoli","family":"Dryden","sequence":"additional","affiliation":[]},{"given":"Erin","family":"Mccarthy","sequence":"additional","affiliation":[]},{"given":"Peter","family":"Harrington","sequence":"additional","affiliation":[]},{"given":"Jan","family":"Balewski","sequence":"additional","affiliation":[]},{"given":"Satoshi","family":"Matsuoka","sequence":"additional","affiliation":[]},{"given":"Peter","family":"Nugent","sequence":"additional","affiliation":[]},{"given":"Brian","family":"Van Essen","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1145\/3210377.3210394"},{"key":"ref38","first-page":"iii-1337","article-title":"Deep learning with COTS HPC systems","author":"coates","year":"2013","journal-title":"Proc 30th Int Conf Int Conf Mach Learn"},{"key":"ref33","article-title":"Horovod: Fast and easy distributed deep learning in TensorFlow","author":"sergeev","year":"2018"},{"key":"ref32","article-title":"TensorFlow: Large-scale machine learning on heterogeneous systems","author":"abadi","year":"2015"},{"key":"ref31","first-page":"1","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"2015","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref30","first-page":"1","article-title":"Rectifier nonlinearities improve neural network acoustic models","author":"maas","year":"2013","journal-title":"Proc ICML Workshop Deep Learn Audio Speech Lang Process"},{"key":"ref37","article-title":"One weird trick for parallelizing convolutional neural networks","author":"krizhevsky","year":"2014"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1145\/3320060"},{"key":"ref35","article-title":"Tfrecord and tf.train.example","year":"2020"},{"key":"ref34","first-page":"10414","article-title":"Mesh-TensorFlow: Deep learning for supercomputers","author":"shazeer","year":"2018","journal-title":"Proc Annu Conf Neural Inf Process Syst"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1145\/1048935.1050189"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/3DV.2016.79"},{"key":"ref61","first-page":"1","article-title":"Tuning HDF5 for lustre file systems","author":"howison","year":"2010","journal-title":"Proc Workshop Interfaces Abstractions Sci Data Storage"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.510"},{"key":"ref28","first-page":"448","article-title":"Batch normalization: Accelerating deep network training by reducing internal covariate shift","author":"ioffe","year":"2015","journal-title":"Proc 32nd Int Conf Int Conf Mach Learn"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.502"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/BigData.2016.7840590"},{"key":"ref29","article-title":"Estimating cosmological parameters from the dark matter distribution","author":"ravanbakhsh","year":"2016","journal-title":"Proc 33rd Int Conf Int Conf Mach Learn"},{"key":"ref2","first-page":"424","article-title":"3D U-Net: Learning dense volumetric segmentation from sparse annotation","author":"\u00e7i\u00e7ek","year":"2016","journal-title":"Proc Int Conf Med Image Comput Comput -Assisted Intervention"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2018.00068"},{"key":"ref20","article-title":"The liver tumor segmentation benchmark (LiTS)","volume":"abs 1901 4056","author":"joskowicz","year":"2019","journal-title":"CoRR"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/3295500.3356207"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2019.00031"},{"key":"ref24","article-title":"Conduit","year":"2019"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTER.2019.8891012"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1177\/1094342005051521"},{"key":"ref25","article-title":"cuDNN: Efficient primitives for deep learning","author":"cohen","year":"2014"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2016.7783721"},{"key":"ref51","first-page":"1","article-title":"Training deeper models by GPU memory optimization on TensorFlow","author":"meng","year":"2017","journal-title":"Proc ML Syst Workshop Neural Inf Process Syst"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/FMPC.1999.750599"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1145\/3337821.3337902"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/MASCOTS.2018.00023"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/PDSW-DISCS.2018.00011"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/HPCC-SmartCity-DSS.2017.29"},{"key":"ref54","first-page":"497","article-title":"Checkmate: Breaking the memory wall with optimal tensor rematerialization","author":"jain","year":"2020","journal-title":"Proc Conf Mach Learn Syst"},{"key":"ref53","article-title":"Training deep nets with sublinear memory cost","author":"chen","year":"2016"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1145\/3178487.3178491"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/2834892.2834897"},{"key":"ref11","article-title":"LBANN","year":"2020"},{"key":"ref40","first-page":"1","article-title":"Beyond data and model parallelism for deep neural networks","author":"jia","year":"2019","journal-title":"Proc MLSys Conf"},{"key":"ref12","article-title":"Accurate, large minibatch SGD: Training ImageNet in 1 hour","author":"noordhuis","year":"2017"},{"key":"ref13","article-title":"PFDet: 2nd place solution to open images challenge 2018 object detection track","author":"akiba","year":"2018"},{"key":"ref14","article-title":"Yet another accelerated SGD: ResNet-50 training on ImageNet in 74.7 seconds","author":"honda","year":"2019"},{"key":"ref15","article-title":"Exploring deep learning for science benchmarks on DOE supercomputers","author":"tsaris","year":"2020","journal-title":"Proc ECP Annu Meeting"},{"key":"ref16","article-title":"CosmoFlow datasets","year":"2019"},{"key":"ref17","first-page":"234","article-title":"U-Net: Convolutional networks for biomedical image segmentation","author":"ronneberger","year":"2015","journal-title":"Proc Int Conf Med Image Comput Comput -Assisted Intervention"},{"key":"ref18","article-title":"High resolution medical image analysis with spatial partitioning","author":"hou","year":"2019"},{"key":"ref19","article-title":"MLPerf inference benchmark suite","year":"2020"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.2514\/6.2015-1284"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2018.00054"},{"key":"ref6","first-page":"173","article-title":"Deep speech 2: End-to-end speech recognition in English and Mandarin","author":"amodei","year":"2016","journal-title":"Proc 33rd Int Conf Int Conf Mach Learn"},{"key":"ref5","first-page":"1097","article-title":"ImageNet classification with deep convolutional neural networks","author":"krizhevsky","year":"2012","journal-title":"Proc Advances Neural Inf Process Syst"},{"key":"ref8","first-page":"4171","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","author":"devlin","year":"2019","journal-title":"Proc Conf North Amer Chapter Assoc Comput Linguistics Human Lang Technol"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1038\/nature16961"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/BigData.2017.8257926"},{"key":"ref9","first-page":"1","article-title":"On large-batch training for deep learning: Generalization gap and sharp minima","author":"keskar","year":"2017","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref46","first-page":"103","article-title":"GPipe: Efficient training of giant neural networks using pipeline parallelism","author":"huang","year":"2019","journal-title":"Proc Conf Neural Inf Process Syst"},{"key":"ref45","first-page":"8056","article-title":"Pipe-SGD: A decentralized pipelined SGD framework for distributed deep net training","author":"li","year":"2018","journal-title":"Proc 32nd Int Conf Neural Inf Process Syst"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTER.2018.00058"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1145\/3341301.3359646"},{"key":"ref42","first-page":"571","article-title":"Project Adam: Building an efficient and scalable deep learning training system","author":"chilimbi","year":"2014","journal-title":"Proc 11th USENIX Conf Operating Syst Des Implementation"},{"key":"ref41","first-page":"1223","article-title":"Large scale distributed deep networks","author":"dean","year":"2012","journal-title":"Proc 25th Int Conf Neural Inf Process Syst"},{"key":"ref44","first-page":"1","article-title":"Pipelined back-propagation for context-dependent deep neural networks","author":"chen","year":"2012","journal-title":"Proc INTERSPEECH"},{"key":"ref43","article-title":"TF-Replicator: Distributed machine learning for researchers","author":"buchlovsky","year":"2019"}],"container-title":["IEEE Transactions on Parallel and Distributed Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/71\/4359390\/09311438.pdf?arnumber=9311438","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T14:50:31Z","timestamp":1652194231000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9311438\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"references-count":64,"URL":"https:\/\/doi.org\/10.1109\/tpds.2020.3047974","relation":{},"ISSN":["1045-9219","1558-2183","2161-9883"],"issn-type":[{"value":"1045-9219","type":"print"},{"value":"1558-2183","type":"electronic"},{"value":"2161-9883","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021]]}}}