{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,10]],"date-time":"2026-03-10T14:45:19Z","timestamp":1773153919975,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":37,"publisher":"ACM","license":[{"start":{"date-parts":[[2019,8,5]],"date-time":"2019-08-05T00:00:00Z","timestamp":1564963200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61802449,61761136014,61722206"],"award-info":[{"award-number":["61802449,61761136014,61722206"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003453","name":"Natural Science Foundation of Guangdong Province","doi-asserted-by":"publisher","award":["2018A030313032"],"award-info":[{"award-number":["2018A030313032"]}],"id":[{"id":"10.13039\/501100003453","id-type":"DOI","asserted-by":"publisher"}]},{"name":"BoRSF-RCS"},{"DOI":"10.13039\/501100003399","name":"Science and Technology Commission of Shanghai Municipality","doi-asserted-by":"publisher","award":["17511102602,14DZ2260800"],"award-info":[{"award-number":["17511102602,14DZ2260800"]}],"id":[{"id":"10.13039\/501100003399","id-type":"DOI","asserted-by":"publisher"}]},{"name":"National Key Research & Development (R&D) Plan","award":["2017YFB1001703"],"award-info":[{"award-number":["2017YFB1001703"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2019,8,5]]},"DOI":"10.1145\/3337821.3337873","type":"proceedings-article","created":{"date-parts":[[2019,7,25]],"date-time":"2019-07-25T12:34:36Z","timestamp":1564058076000},"page":"1-11","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":41,"title":["Cynthia"],"prefix":"10.1145","author":[{"given":"Haoyue","family":"Zheng","sequence":"first","affiliation":[{"name":"Shanghai Key Laboratory of Multidimensional Information Processing, Department of Computer Science and Technology, East China Normal University"}]},{"given":"Fei","family":"Xu","sequence":"additional","affiliation":[{"name":"Shanghai Key Laboratory of Multidimensional Information Processing, Department of Computer Science and Technology, East China Normal University"}]},{"given":"Li","family":"Chen","sequence":"additional","affiliation":[{"name":"Department of Computer Science, University of Louisiana at Lafayette"}]},{"given":"Zhi","family":"Zhou","sequence":"additional","affiliation":[{"name":"School of Data and Computer Science, Sun Yat-sen University"}]},{"given":"Fangming","family":"Liu","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Huazhong University of Science and Technology"}]}],"member":"320","published-online":{"date-parts":[[2019,8,5]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Proc. of OSDI. 265--283","author":"Abadi Martin","year":"2016","unstructured":"Martin Abadi , Paul Barham , Jianmin Chen , Zhifeng Chen , Andy Davis , Jeffrey Dean , Matthieu Devin , 2016 . Tensorflow: A System for Large-Scale Machine Learning . In Proc. of OSDI. 265--283 . Martin Abadi, Paul Barham, Jianmin Chen, Zhifeng Chen, Andy Davis, Jeffrey Dean, Matthieu Devin, et al. 2016. Tensorflow: A System for Large-Scale Machine Learning. In Proc. of OSDI. 265--283."},{"key":"e_1_3_2_1_2_1","unstructured":"Amazon. 2019. Amazon Elastic Compute Cloud (Amazon EC2). http:\/\/aws.amazon.com\/ec2\/  Amazon. 2019. Amazon Elastic Compute Cloud (Amazon EC2). http:\/\/aws.amazon.com\/ec2\/"},{"key":"e_1_3_2_1_3_1","unstructured":"Asteroids. 2018. CPU performance. https:\/\/asteroidsathome.net\/boinc\/cpu_list.  Asteroids. 2018. CPU performance. https:\/\/asteroidsathome.net\/boinc\/cpu_list."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM.2018.8486422"},{"key":"e_1_3_2_1_5_1","volume-title":"Demystifying Parallel and Distributed Deep Learning: An In-Depth Concurrency Analysis. arXiv preprint arXiv:1802.09941","author":"Ben-Nun Tal","year":"2018","unstructured":"Tal Ben-Nun and Torsten Hoefler . 2018. Demystifying Parallel and Distributed Deep Learning: An In-Depth Concurrency Analysis. arXiv preprint arXiv:1802.09941 ( 2018 ). Tal Ben-Nun and Torsten Hoefler. 2018. Demystifying Parallel and Distributed Deep Learning: An In-Depth Concurrency Analysis. arXiv preprint arXiv:1802.09941 (2018)."},{"key":"e_1_3_2_1_6_1","volume-title":"Convex Optimization","author":"Boyd Stephen","unstructured":"Stephen Boyd and Lieven Vandenberghe . 2004. Convex Optimization . Cambridge University Press . Stephen Boyd and Lieven Vandenberghe. 2004. Convex Optimization. Cambridge University Press."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/2898442.2898444"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM.2019.8737587"},{"key":"e_1_3_2_1_9_1","volume-title":"Mxnet: A flexible and efficient machine learning library for heterogeneous distributed systems. arXiv preprint arXiv:1512.01274","author":"Chen Tianqi","year":"2015","unstructured":"Tianqi Chen , Mu Li , Yutian Li , Min Lin , Naiyan Wang , Minjie Wang , Tianjun Xiao , 2015 . Mxnet: A flexible and efficient machine learning library for heterogeneous distributed systems. arXiv preprint arXiv:1512.01274 (2015). Tianqi Chen, Mu Li, Yutian Li, Min Lin, Naiyan Wang, Minjie Wang, Tianjun Xiao, et al. 2015. Mxnet: A flexible and efficient machine learning library for heterogeneous distributed systems. arXiv preprint arXiv:1512.01274 (2015)."},{"key":"e_1_3_2_1_10_1","volume-title":"Proc. of OSDI. 571--582","author":"Chilimbi Trishul","year":"2014","unstructured":"Trishul Chilimbi , Yutaka Suzue , Johnson Apacible , and Karthik Kalyanaraman . 2014 . Project adam: Building an efficient and scalable deep learning training system . In Proc. of OSDI. 571--582 . Trishul Chilimbi, Yutaka Suzue, Johnson Apacible, and Karthik Kalyanaraman. 2014. Project adam: Building an efficient and scalable deep learning training system. In Proc. of OSDI. 571--582."},{"key":"e_1_3_2_1_11_1","volume-title":"Proc. of NIPS. 1223--1231","author":"Dean Jeffrey","year":"2012","unstructured":"Jeffrey Dean , Greg Corrado , Rajat Monga , Kai Chen , Matthieu Devin , Mark Mao , Andrew Senior , 2012 . Large Scale Distributed Deep Networks . In Proc. of NIPS. 1223--1231 . Jeffrey Dean, Greg Corrado, Rajat Monga, Kai Chen, Matthieu Devin, Mark Mao, Andrew Senior, et al. 2012. Large Scale Distributed Deep Networks. In Proc. of NIPS. 1223--1231."},{"key":"e_1_3_2_1_12_1","volume-title":"Proc. of NSDI. 485--500","author":"Gu Juncheng","year":"2019","unstructured":"Juncheng Gu , Mosharaf Chowdhury , Kang G. Shin , Yibo Zhu , Myeongjae Jeon , Junjie Qian , Hongqiang Liu , and Chuanxiong Guo . 2019 . Tiresias: A GPU Cluster Manager for Distributed Deep Learning . In Proc. of NSDI. 485--500 . Juncheng Gu, Mosharaf Chowdhury, Kang G. Shin, Yibo Zhu, Myeongjae Jeon, Junjie Qian, Hongqiang Liu, and Chuanxiong Guo. 2019. Tiresias: A GPU Cluster Manager for Distributed Deep Learning. In Proc. of NSDI. 485--500."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3064176.3064182"},{"key":"e_1_3_2_1_14_1","volume-title":"Proc. of NIPS. 1223--1231","author":"Ho Qirong","year":"2013","unstructured":"Qirong Ho , James Cipar , Henggang Cui , Seunghak Lee , Jin Kyu Kim , Phillip B. Gibbons , Garth A Gibson , 2013 . More effective distributed ml via a stale synchronous parallel parameter server . In Proc. of NIPS. 1223--1231 . Qirong Ho, James Cipar, Henggang Cui, Seunghak Lee, Jin Kyu Kim, Phillip B. Gibbons, Garth A Gibson, et al. 2013. More effective distributed ml via a stale synchronous parallel parameter server. In Proc. of NIPS. 1223--1231."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3035918.3035933"},{"key":"e_1_3_2_1_16_1","volume-title":"Deep Learning. Nature 521, 7553","author":"LeCun Yann","year":"2015","unstructured":"Yann LeCun , Yoshua Bengio , and Geoffrey Hinton . 2015. Deep Learning. Nature 521, 7553 ( 2015 ), 436--444. Yann LeCun, Yoshua Bengio, and Geoffrey Hinton. 2015. Deep Learning. Nature 521, 7553 (2015), 436--444."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.5555\/2685048.2685095"},{"key":"e_1_3_2_1_18_1","volume-title":"Proc. of SysML. 32--43","author":"Lim Hyeontaek","year":"2019","unstructured":"Hyeontaek Lim , David G. Andersen , and Michael Kaminsky . 2019 . 3LC: Lightweight and Effective Traffic Compression for Distributed Machine Learning . In Proc. of SysML. 32--43 . Hyeontaek Lim, David G. Andersen, and Michael Kaminsky. 2019. 3LC: Lightweight and Effective Traffic Compression for Distributed Machine Learning. In Proc. of SysML. 32--43."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3267809.3267840"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3297858.3304009"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3190508.3190517"},{"key":"e_1_3_2_1_22_1","unstructured":"Google Cloud Platform. 2018. Cloud Deep Learning VM Image. https:\/\/cloud.google.com\/deep-learning-vm\/  Google Cloud Platform. 2018. Cloud Deep Learning VM Image. https:\/\/cloud.google.com\/deep-learning-vm\/"},{"key":"e_1_3_2_1_23_1","volume-title":"Proc. of ICLR. 1--10","author":"Qi Hang","year":"2017","unstructured":"Hang Qi , Evan R Sparks , and Ameet Talwalkar . 2017 . Paleo: A performance model for deep neural networks . In Proc. of ICLR. 1--10 . Hang Qi, Evan R Sparks, and Ameet Talwalkar. 2017. Paleo: A performance model for deep neural networks. In Proc. of ICLR. 1--10."},{"key":"e_1_3_2_1_24_1","volume-title":"Lee","author":"Seber George A. F.","year":"2012","unstructured":"George A. F. Seber and Alan J . Lee . 2012 . Linear regression analysis. Vol. 329 . John Wiley & Sons . George A. F. Seber and Alan J. Lee. 2012. Linear regression analysis. Vol. 329. John Wiley & Sons."},{"key":"e_1_3_2_1_25_1","unstructured":"Amazon Web Service. 2018. AWS Deep Learning AMIs. https:\/\/aws.amazon.com\/machine-learning\/amis\/  Amazon Web Service. 2018. AWS Deep Learning AMIs. https:\/\/aws.amazon.com\/machine-learning\/amis\/"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/DASC\/PiCom\/DataCom\/CyberSciTec.2018.000-4"},{"key":"e_1_3_2_1_27_1","volume-title":"FC2: cloud-based cluster provisioning for distributed machine learning. Cluster Computing","author":"Duong Ta Nguyen Binh","year":"2019","unstructured":"Nguyen Binh Duong Ta . 2019. FC2: cloud-based cluster provisioning for distributed machine learning. Cluster Computing ( 2019 ), 1--17. Nguyen Binh Duong Ta. 2019. FC2: cloud-based cluster provisioning for distributed machine learning. Cluster Computing (2019), 1--17."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3274808.3274828"},{"key":"e_1_3_2_1_29_1","volume-title":"Revisiting Resource Management for Deep Learning Framework. Electronics","author":"Xu Erci","year":"2019","unstructured":"Erci Xu and Shanshan Li. 2019. Revisiting Resource Management for Deep Learning Framework. Electronics ( 2019 ), 327--327. Erci Xu and Shanshan Li. 2019. Revisiting Resource Management for Deep Learning Framework. Electronics (2019), 327--327."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2015.2481403"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2013.2287711"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2013.185"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2018.2873397"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/2783258.2783270"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCS.2018.00020"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3127479.3127490"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2017.161"}],"event":{"name":"ICPP 2019: 48th International Conference on Parallel Processing","location":"Kyoto Japan","acronym":"ICPP 2019","sponsor":["University of Tsukuba University of Tsukuba"]},"container-title":["Proceedings of the 48th International Conference on Parallel Processing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3337821.3337873","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3337821.3337873","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T23:54:26Z","timestamp":1750204466000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3337821.3337873"}},"subtitle":["Cost-Efficient Cloud Resource Provisioning for Predictable Distributed Deep Neural Network Training"],"short-title":[],"issued":{"date-parts":[[2019,8,5]]},"references-count":37,"alternative-id":["10.1145\/3337821.3337873","10.1145\/3337821"],"URL":"https:\/\/doi.org\/10.1145\/3337821.3337873","relation":{},"subject":[],"published":{"date-parts":[[2019,8,5]]},"assertion":[{"value":"2019-08-05","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}