{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T22:23:31Z","timestamp":1766269411226,"version":"3.37.3"},"reference-count":42,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"12","license":[{"start":{"date-parts":[[2023,12,1]],"date-time":"2023-12-01T00:00:00Z","timestamp":1701388800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,12,1]],"date-time":"2023-12-01T00:00:00Z","timestamp":1701388800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,12,1]],"date-time":"2023-12-01T00:00:00Z","timestamp":1701388800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62022057","61832006","61872240"],"award-info":[{"award-number":["62022057","61832006","61872240"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Comput."],"published-print":{"date-parts":[[2023,12]]},"DOI":"10.1109\/tc.2023.3303988","type":"journal-article","created":{"date-parts":[[2023,8,10]],"date-time":"2023-08-10T17:36:34Z","timestamp":1691688994000},"page":"3458-3472","source":"Crossref","is-referenced-by-count":7,"title":["Improving Cluster Utilization Through Adaptive Resource Management for Deep Neural Network and CPU Jobs Colocation"],"prefix":"10.1109","volume":"72","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1561-5329","authenticated-orcid":false,"given":"Han","family":"Zhao","sequence":"first","affiliation":[{"name":"Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6646-5260","authenticated-orcid":false,"given":"Weihao","family":"Cui","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5832-0347","authenticated-orcid":false,"given":"Quan","family":"Chen","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5660-5493","authenticated-orcid":false,"given":"Jingwen","family":"Leng","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3276-1202","authenticated-orcid":false,"given":"Deze","family":"Zeng","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0034-2302","authenticated-orcid":false,"given":"Minyi","family":"Guo","sequence":"additional","affiliation":[{"name":"School of Computer Science, China University of Geosciences, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/2954679.2872368"},{"year":"2022","key":"ref35","article-title":"Common voice dataset"},{"key":"ref12","first-page":"24","article-title":"Dominant resource fairness: Fair allocation of multiple resource types","volume":"11","author":"ghodsi","year":"2011","journal-title":"Proc NSDI"},{"article-title":"English multi-speaker corpus for CSTR voice cloning toolkit","year":"2012","author":"yamagishi","key":"ref34"},{"key":"ref15","first-page":"1","article-title":"Mesos: Flexible resource sharing for the cloud","volume":"1","author":"hindman","year":"2011","journal-title":"USENIX Mag"},{"article-title":"Tensorflow-serving: Flexible, high-performance ML serving","year":"2017","author":"olston","key":"ref37"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/2523616.2523633"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCS47774.2020.00069"},{"article-title":"Bidirectional attention flow for machine comprehension","year":"2016","author":"seo","key":"ref31"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.308"},{"year":"2022","key":"ref11","article-title":"First in first out algorithm"},{"year":"2022","key":"ref33","article-title":"Wmt16 dataset"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2019.00036"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D16-1264"},{"key":"ref2","first-page":"1","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/1755913.1755940"},{"year":"2023","key":"ref39","article-title":"Lambda function scaling"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/2465351.2465387"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476143"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/3093315.3037700"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/3190508.3190517"},{"key":"ref24","first-page":"485","article-title":"Mercury: Hybrid centralized and distributed scheduling in large shared clusters","author":"karanasos","year":"0","journal-title":"Proc USENIX Annu Tech Conf (USENIX ATC)"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/2741948.2741964"},{"key":"ref26","first-page":"1","article-title":"WaveNet: A generative model for raw audio","volume":"125","author":"van den oord","year":"2016","journal-title":"SSW"},{"key":"ref25","first-page":"285","article-title":"Apollo: Scalable and coordinated scheduling for $\\{${ cloud-scale$\\}$} computing","author":"boutin","year":"2014","journal-title":"Proc 11th USENIX Sym Operating Syst Des Implementation (OSDI)"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1145\/3342195.3387547"},{"year":"2022","key":"ref42","article-title":"Linux command tc"},{"key":"ref41","first-page":"539","article-title":"Microsecond-scale preemption for concurrent $\\{${GPU-accelerated$\\}$} $\\{${ DNN$\\}$} inferences","author":"han","year":"0","journal-title":"Proc OSDI"},{"key":"ref22","first-page":"595","article-title":"Gandiva: Introspective cluster scheduling for deep learning","author":"xiao","year":"2018","journal-title":"Proc OSDI"},{"key":"ref21","first-page":"579","article-title":"Looking beyond $\\{${GPUs$\\}$} for $\\{${DNN$\\}$} scheduling on $\\{${multi-tenant$\\}$} clusters","author":"mohan","year":"2022","journal-title":"Proc OSDI"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/3065386"},{"year":"2023","key":"ref27","article-title":"Slurm&#x00AE; commercial support and development"},{"article-title":"Very deep convolutional networks for large-scale image recognition","year":"2014","author":"simonyan","key":"ref29"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2020.3047638"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/3341301.3359658"},{"article-title":"Analysis of large-scale multi-tenant GPU clusters for DNN training workloads","year":"2019","author":"jeon","key":"ref9"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654889"},{"article-title":"Deep speech: Scaling up end-to-end speech recognition","year":"2014","author":"hannun","key":"ref3"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2018.00059"},{"key":"ref5","first-page":"265","article-title":"Tensorflow: A system for large-scale machine learning","author":"abadi","year":"2016","journal-title":"Proc OSDI"},{"key":"ref40","first-page":"199","article-title":"Serving heterogeneous machine learning models on $\\{${multi-GPU$\\}$} servers with $\\{${spatio-temporal$\\}$} sharing","author":"choi","year":"2022","journal-title":"Proc USENIX ATC"}],"container-title":["IEEE Transactions on Computers"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/12\/10311055\/10214260.pdf?arnumber=10214260","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,12,11]],"date-time":"2023-12-11T19:56:01Z","timestamp":1702324561000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10214260\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12]]},"references-count":42,"journal-issue":{"issue":"12"},"URL":"https:\/\/doi.org\/10.1109\/tc.2023.3303988","relation":{},"ISSN":["0018-9340","1557-9956","2326-3814"],"issn-type":[{"type":"print","value":"0018-9340"},{"type":"electronic","value":"1557-9956"},{"type":"electronic","value":"2326-3814"}],"subject":[],"published":{"date-parts":[[2023,12]]}}}