{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T01:42:18Z","timestamp":1740102138017,"version":"3.37.3"},"reference-count":31,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,5,17]],"date-time":"2023-05-17T00:00:00Z","timestamp":1684281600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,5,17]],"date-time":"2023-05-17T00:00:00Z","timestamp":1684281600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,5,17]]},"DOI":"10.1109\/infocom53939.2023.10228920","type":"proceedings-article","created":{"date-parts":[[2023,8,29]],"date-time":"2023-08-29T17:40:43Z","timestamp":1693330843000},"page":"1-10","source":"Crossref","is-referenced-by-count":0,"title":["Dynamic Resource Allocation for Deep Learning Clusters with Separated Compute and Storage"],"prefix":"10.1109","author":[{"given":"Mingxia","family":"Li","sequence":"first","affiliation":[{"name":"University of Science and Technology of China,CAS Key Lab of Wireless-Optical Communications,China"}]},{"given":"Zhenhua","family":"Han","sequence":"additional","affiliation":[{"name":"Microsoft Research Asia,China"}]},{"given":"Chi","family":"Zhang","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China,CAS Key Lab of Wireless-Optical Communications,China"}]},{"given":"Ruiting","family":"Zhou","sequence":"additional","affiliation":[{"name":"Southeast University,Nanjing,China"}]},{"given":"Yuanchi","family":"Liu","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China,CAS Key Lab of Wireless-Optical Communications,China"}]},{"given":"Haisheng","family":"Tan","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China,CAS Key Lab of Wireless-Optical Communications,China"}]}],"member":"263","reference":[{"article-title":"Azure blob storage","key":"ref1"},{"article-title":"Aws amazon storage","key":"ref2"},{"article-title":"Amazon ec2 i3en instances","key":"ref3"},{"article-title":"Azure cache for redis","key":"ref4"},{"article-title":"Amazon ec2 p3 instances","key":"ref5"},{"article-title":"Azure high-performance computing","key":"ref6"},{"article-title":"Amazon fsx","key":"ref7"},{"doi-asserted-by":"publisher","key":"ref8","DOI":"10.1109\/CVPR.2016.90"},{"doi-asserted-by":"publisher","key":"ref9","DOI":"10.1109\/CVPR.2009.5206848"},{"year":"2018","author":"Devlin","article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","key":"ref10"},{"article-title":"Bert pre-training","key":"ref11"},{"key":"ref12","article-title":"Gandiva: introspective cluster scheduling for deep learning","author":"Xiao","year":"2018","journal-title":"Operating Systems Design and Implementation"},{"key":"ref13","first-page":"481","article-title":"{Heterogeneity-Aware} cluster scheduling policies for deep learning workloads","volume-title":"14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20)","author":"Narayanan"},{"key":"ref14","first-page":"283","article-title":"Quiver: An informed storage cache for deep learning","volume-title":"18th USENIX Conference on File and Storage Technologies (FAST 20)","author":"Kumar"},{"doi-asserted-by":"publisher","key":"ref15","DOI":"10.5555\/2999134.2999257"},{"year":"2016","author":"Abu-El-Haija","article-title":"Youtube-8m: A large-scale video classification benchmark","key":"ref16"},{"doi-asserted-by":"publisher","key":"ref17","DOI":"10.1109\/CVPR.2016.308"},{"doi-asserted-by":"publisher","key":"ref18","DOI":"10.1109\/TPAMI.2011.235"},{"key":"ref19","doi-asserted-by":"crossref","DOI":"10.1145\/3552326.3567499","article-title":"Silod: A co-design of caching and scheduling for deep learning clusters","volume-title":"EuroSys 2023","author":"Zhao","year":"2023"},{"article-title":"Analysis of large-scale multi-tenant gpu clusters for dnn training workloads","key":"ref20"},{"doi-asserted-by":"publisher","key":"ref21","DOI":"10.1145\/3397166.3409122"},{"doi-asserted-by":"publisher","key":"ref22","DOI":"10.1109\/INFOCOM.2016.7524525"},{"article-title":"tc(8) \u2014 linux manual page","key":"ref23"},{"doi-asserted-by":"publisher","key":"ref24","DOI":"10.14778\/3446095.3446100"},{"key":"ref25","article-title":"Alluxio: A virtual distributed file system","volume-title":"Ph.D. dissertation","author":"Li","year":"2018"},{"key":"ref26","first-page":"469","article-title":"{CherryPick}: Adaptively unearthing the best cloud configurations for big data analytics","volume-title":"14th USENIX Symposium on Networked Systems Design and Implementation (NSDI 17)","author":"Alipourfard"},{"key":"ref27","first-page":"363","article-title":"Ernest: Efficient performance prediction for {Large-Scale} advanced analytics","volume-title":"13th USENIX Symposium on Networked Systems Design and Implementation (NSDI 16)","author":"Venkataraman"},{"doi-asserted-by":"publisher","key":"ref28","DOI":"10.1109\/INFOCOM41043.2020.9155267"},{"key":"ref29","first-page":"485","article-title":"Tiresias: A {GPU} cluster manager for distributed deep learning","volume-title":"16th USENIX Symposium on Networked Systems Design and Implementation (NSDI 19)","author":"Gu"},{"volume-title":"15th {USENIX} Symposium on Operating Systems Design and Implementation ({OSDI} 21)","author":"Qiao","article-title":"Pollux: Co-adaptive cluster scheduling for goodput-optimized deep learning","key":"ref30"},{"doi-asserted-by":"publisher","key":"ref31","DOI":"10.1109\/INFOCOM41043.2020.9155445"}],"event":{"name":"IEEE INFOCOM 2023 - IEEE Conference on Computer Communications","start":{"date-parts":[[2023,5,17]]},"location":"New York City, NY, USA","end":{"date-parts":[[2023,5,20]]}},"container-title":["IEEE INFOCOM 2023 - IEEE Conference on Computer Communications"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10228851\/10228852\/10228920.pdf?arnumber=10228920","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,1]],"date-time":"2024-03-01T21:55:04Z","timestamp":1709330104000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10228920\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,5,17]]},"references-count":31,"URL":"https:\/\/doi.org\/10.1109\/infocom53939.2023.10228920","relation":{},"subject":[],"published":{"date-parts":[[2023,5,17]]}}}