{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:28:30Z","timestamp":1750220910220,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":23,"publisher":"ACM","license":[{"start":{"date-parts":[[2020,6,23]],"date-time":"2020-06-23T00:00:00Z","timestamp":1592870400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2020,6,23]]},"DOI":"10.1145\/3369583.3392687","type":"proceedings-article","created":{"date-parts":[[2020,6,22]],"date-time":"2020-06-22T03:27:27Z","timestamp":1592796447000},"page":"203-207","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["An Efficient Technique for Large Mini-batch Challenge of DNNs Training on Large Scale Cluster"],"prefix":"10.1145","author":[{"given":"Akihiko","family":"Kasagi","sequence":"first","affiliation":[{"name":"Fujitsu Laboratories ltd., Kawasaki, Japan"}]},{"given":"Akihiro","family":"Tabuchi","sequence":"additional","affiliation":[{"name":"Fujitsu Laboratories ltd., Kawasaki, Japan"}]},{"given":"Masafumi","family":"Yamazaki","sequence":"additional","affiliation":[{"name":"Fujitsu Laboratories ltd., Kawasaki, Japan"}]},{"given":"Takumi","family":"Honda","sequence":"additional","affiliation":[{"name":"Fujitsu Laboratories ltd., Kawasaki, Japan"}]},{"given":"Masahiro","family":"Miwa","sequence":"additional","affiliation":[{"name":"Fujitsu Laboratories ltd., Kawasaki, Japan"}]},{"given":"Naoto","family":"Fukumoto","sequence":"additional","affiliation":[{"name":"Fujitsu Laboratories ltd., Kawasaki, Japan"}]},{"given":"Tsuguchika","family":"Tabaru","sequence":"additional","affiliation":[{"name":"Fujitsu Laboratories ltd., Kawasaki, Japan"}]},{"given":"Atsushi","family":"Ike","sequence":"additional","affiliation":[{"name":"Fujitsu Laboratories ltd., Kawasaki, Japan"}]},{"given":"Kohta","family":"Nakashima","sequence":"additional","affiliation":[{"name":"Fujitsu Laboratories ltd., Kawasaki, Japan"}]}],"member":"320","published-online":{"date-parts":[[2020,6,23]]},"reference":[{"key":"e_1_3_2_2_1_1","unstructured":"T. Akiba S. Suzuki and K. Fukuda. 2017. Extremely Large Minibatch SGD: Train- ing ResNet-50 on ImageNet in 15 Minutes. arXiv:1711.04325 (2017)."},{"key":"e_1_3_2_2_2_1","unstructured":"T. Chen M. Li Y. Li M. Lin N. Wang M. Wang T. Xiao B. Xu C. Zhang and Z. Zhang. 2015. MXNet: A Flexible and Efficient Machine Learning Li- brary for Heterogeneous Distributed Systems. CoRR arXiv:1512.01274 (2015). arXiv:1512.01274 http:\/\/arxiv.org\/abs\/1512.01274"},{"key":"e_1_3_2_2_3_1","volume-title":"Proceedings of the 30th International Con- ference on Machine Learning","author":"Coates A.","year":"2013","unstructured":"A. Coates, B. Huval, T. Wang, D. J. Wu, A. Y. Ng, and B. Catanzaro. 2013. Deep learning with COTS HPC systems. In Proceedings of the 30th International Con- ference on Machine Learning (2013)."},{"key":"e_1_3_2_2_4_1","unstructured":"J. Dean G. Corrado R. Monga K. Chen M. Devin M. Mao M. Ranzato A. Senior P. Tucker K. Yang Q. V. Le and A. Y. Ng. 2012. Large Scale Distributed Deep Networks. Neural Information Processing Systems (2012)."},{"key":"e_1_3_2_2_5_1","volume-title":"Y. Jia, and K. He.","author":"Goyal P.","year":"2017","unstructured":"P. Goyal, P. Dollar, R. Girshick, P. Noordhuis, L. Wesolowski, A. Kyrola, A. Tul- loch, Y. Jia, and K. He. 2017. Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour. ArXiv:1706.02677 (2017)."},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CEC.2010.5586368"},{"key":"e_1_3_2_2_7_1","volume-title":"Proceedings of IEEE Conference on Computer Vision and Pattern Recognition","author":"He K.","year":"2016","unstructured":"K. He, X. Zhang, S. Ren, and J. Sun. 2016. Deep Residual Learning for Image Recognition. In Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (2016)."},{"key":"e_1_3_2_2_8_1","unstructured":"Z. He L. Xie X. Chen Y. Zhang Y.Wang and Q. Tian. 2019. Data Augmentation Revisited: Rethinking the Distribution Gap between Clean and Augmented Data. arXiv:1909.09148 (2019)."},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"crossref","unstructured":"F. N. Iandola K. Ashraf M.W. Moskewicz and K. Keutzer. 2015. FireCaffe: Near- Linear Acceleration of Deep Neural Network Training on Compute Clusters. arXiv:1511.00175 (2015).","DOI":"10.1109\/CVPR.2016.284"},{"key":"e_1_3_2_2_10_1","unstructured":"F. N. Iandola S. Han M. W. Moskewicz K. Ashraf W. J. Dally and K. Keutzer. 2016. SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and 0.5 MB model size. arXiv:1602.07360 (2016)."},{"key":"e_1_3_2_2_11_1","unstructured":"X. Jia S. Song W. He Y. Wang H. Rong F. Zhou L. Xie Z. Guo Y. Yang L. Yu T. Chen G. Hu S. Shi and X. Chu. 2018. Highly Scalable Deep Learning Training System with Mixed--Precision: Training ImageNet in Four Minutes. arXiv:1807.11205 (2018)."},{"key":"e_1_3_2_2_12_1","unstructured":"H. Mikami H. Suganuma P. U-chupala Y. Tanaka and Y. Kageyama. 2019. Massively Distributed SGD: ImageNet\/ResNet-50 Training in a Flash. arXiv:1811.05233v2 (2019)."},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"e_1_3_2_2_14_1","unstructured":"S. L. Smith P.-J. Kindermans C. Ying and Q. V. Le. 2017. Don't Decay the Learn- ing Rate Increase the Batch Size. Neural Information Processing Systems (2017)."},{"key":"e_1_3_2_2_15_1","volume-title":"Rethinking the Inception Architecture for Computer Vision. arXiv:1512.00567v3","author":"Szegedy C.","year":"2015","unstructured":"C. Szegedy, V. Vanhoucke, S. Ioffe, J. Shlens, and Z.Wojna. 2015. Rethinking the Inception Architecture for Computer Vision. arXiv:1512.00567v3 (2015)."},{"key":"e_1_3_2_2_16_1","unstructured":"H. Touvron A. Vedaldi M. Douze and H. J\u00e9gou. 2019. Fixing the train-test resolution discrepancy. arXiv:1906.06423 (2019)."},{"key":"e_1_3_2_2_17_1","volume-title":"Deep Image: Scaling up Image Recognition. arXiv:1501.02876","author":"Wu R.","year":"2015","unstructured":"R.Wu, S. Yan, Y. Shan, Q. Dang, and G. Sun. 2015. Deep Image: Scaling up Image Recognition. arXiv:1501.02876 (2015)."},{"key":"e_1_3_2_2_18_1","unstructured":"C. Ying S. Kumar D. Chen T. Wang and Y. Cheng. 2018. Image Classification at Supercomputer Scale. arXiv:1811.06992v2 (2018)."},{"key":"e_1_3_2_2_19_1","unstructured":"Y. You I. Gitman and B. Ginsburg. 2017. Large Batch Training Of Convolutional Networks. arXiv:1708.03888 (2017)."},{"key":"e_1_3_2_2_20_1","unstructured":"Y. You J. Li S. Reddi J. Hseu S. Kumar S. Bhojanapalli X. Song J. Demmel K. Keutzer and C. Hsieh. 2019. Large Batch Optimization for Deep Learning: Training BERT in 76 minutes. arXiv:1904.00962 (2019)."},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"crossref","unstructured":"S. Yun D. Han S. J. Oh S. Chun J. Choe and Y. Yoo. 2019. CutMix: Regularization Strategy to Train Strong Classifiers with Localizable Features. arXiv:1905.04899 (2019).","DOI":"10.1109\/ICCV.2019.00612"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"crossref","unstructured":"H. Zhang M. Cisse Y. N. Dauphin and D. Lopez-Paz. 2017. mixup: Beyond Empirical Risk Minimization. arXiv:1710.09412 (2017).","DOI":"10.1007\/978-1-4899-7687-1_79"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"crossref","unstructured":"W. Zhang X. Cui A. Kayi M. Liu U. Finkler B. Kingsbury G. Saon Y. Mroueh A. Buyuktosunoglu P. Das D. Kung and M. Picheny. 2020. Improving Efficiency in Large-Scale Decentralized Distributed Training. arXiv:2002.01119 (2020).","DOI":"10.1109\/ICASSP40776.2020.9054065"}],"event":{"name":"HPDC '20: The 29th International Symposium on High-Performance Parallel and Distributed Computing","sponsor":["University of Arizona University of Arizona","SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing","SIGARCH ACM Special Interest Group on Computer Architecture"],"location":"Stockholm Sweden","acronym":"HPDC '20"},"container-title":["Proceedings of the 29th International Symposium on High-Performance Parallel and Distributed Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3369583.3392687","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3369583.3392687","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T23:44:58Z","timestamp":1750203898000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3369583.3392687"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,6,23]]},"references-count":23,"alternative-id":["10.1145\/3369583.3392687","10.1145\/3369583"],"URL":"https:\/\/doi.org\/10.1145\/3369583.3392687","relation":{},"subject":[],"published":{"date-parts":[[2020,6,23]]},"assertion":[{"value":"2020-06-23","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}