{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,14]],"date-time":"2026-01-14T22:32:33Z","timestamp":1768429953761,"version":"3.49.0"},"reference-count":29,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,11,17]],"date-time":"2020-11-17T00:00:00Z","timestamp":1605571200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,11,17]],"date-time":"2020-11-17T00:00:00Z","timestamp":1605571200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,11,17]],"date-time":"2020-11-17T00:00:00Z","timestamp":1605571200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,11,17]]},"DOI":"10.1109\/mascots50786.2020.9285954","type":"proceedings-article","created":{"date-parts":[[2020,12,21]],"date-time":"2020-12-21T23:22:16Z","timestamp":1608592936000},"page":"1-8","source":"Crossref","is-referenced-by-count":9,"title":["Effective Elastic Scaling of Deep Learning Workloads"],"prefix":"10.1109","author":[{"given":"Vaibhav","family":"Saxena","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"K. R.","family":"Jayaram","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Saurav","family":"Basu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yogish","family":"Sabharwal","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ashish","family":"Verma","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref10","first-page":"485","article-title":"Tiresias: A GPU cluster manager for distributed deep learning","author":"juncheng","year":"2019","journal-title":"16th USENIX Symposium on Networked Systems Design and Implementation (NSDI 19)"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCS.2018.00036"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/3236332"},{"key":"ref13","year":"2018","journal-title":"IBM Inc Ffdl A fabric for deep learning"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/3164537"},{"key":"ref15","article-title":"Elastic remote methods. In David Eyers and Karsten Schwan, editors","author":"jayaram","year":"2013","journal-title":"ACM International Middleware Conference"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCS.2016.102"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/3361525.3361538"},{"key":"ref18","first-page":"947","article-title":"Analysis of large-scale multi-tenant gpu clusters for dnn training workloads","author":"jeon","year":"2019","journal-title":"USENIX Annual Technical Conference"},{"key":"ref19","volume":"abs 1609 4836","author":"keskar","year":"2016","journal-title":"On large-batch training for deep learning Generalization gap and sharp minima"},{"key":"ref28","author":"you","year":"2017","journal-title":"Large batch training of convolutional networks"},{"key":"ref4","first-page":"223","article-title":"Optimization methods for large-scale machine learning","volume":"60","author":"leon","year":"2016","journal-title":"SIAM Review"},{"key":"ref27","first-page":"595","article-title":"Gandiva: Introspective cluster scheduling for deep learning","author":"xiao","year":"2018","journal-title":"13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18)"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2018.2870389"},{"key":"ref6","year":"2018","journal-title":"SPEC Cloud IaaS 2018 Benchmark"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1145\/3127479.3127490"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/2523616.2523617"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/2382553.2382556"},{"key":"ref7","author":"devarakonda","year":"2017","journal-title":"AdaBatch Adaptive batch sizes for training deep neural networks"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM.2018.8486422"},{"key":"ref9","article-title":"Accurate, large minibatch SGD: training imagenet in 1 hour","volume":"abs 1706 2677","author":"goyal","year":"2017","journal-title":"CoRR"},{"key":"ref1","first-page":"410","article-title":"Coupling adaptive batch sizes with learning rates","author":"balles","year":"2017","journal-title":"Conference on Uncertainty in Artificial Intelligence"},{"key":"ref20","first-page":"69","article-title":"AGILE: Elastic distributed resource scaling for infrastructure-as-a-service","author":"nguyen","year":"2013","journal-title":"10th International Conference on Autonomic Computing (ICAC 13)"},{"key":"ref22","author":"robinson","year":"2004","journal-title":"Simulation the practice of model development and use Chichester"},{"key":"ref21","first-page":"631","article-title":"Litz: Elastic framework for highperformance distributed machine learning","author":"aurick","year":"2018","journal-title":"2018 USENIX Annual Technical Conference (USENIX ATC 18)"},{"key":"ref24","author":"saxena","year":"2020","journal-title":"Effective elastic scaling of deep learning workloads"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/2988336.2988352"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTER.2018.00057"},{"key":"ref25","article-title":"Don't decay the learning rate, increase the batch size","author":"smith","year":"2018","journal-title":"ICLR&#x2019; 18"}],"event":{"name":"2020 28th International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS)","location":"Nice, France","start":{"date-parts":[[2020,11,17]]},"end":{"date-parts":[[2020,11,19]]}},"container-title":["2020 28th International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9285930\/9285931\/09285954.pdf?arnumber=9285954","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,6,28]],"date-time":"2022-06-28T21:50:47Z","timestamp":1656453047000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9285954\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,11,17]]},"references-count":29,"URL":"https:\/\/doi.org\/10.1109\/mascots50786.2020.9285954","relation":{},"subject":[],"published":{"date-parts":[[2020,11,17]]}}}