{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,7]],"date-time":"2026-04-07T23:14:29Z","timestamp":1775603669284,"version":"3.50.1"},"reference-count":42,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"10","license":[{"start":{"date-parts":[[2024,10,1]],"date-time":"2024-10-01T00:00:00Z","timestamp":1727740800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,10,1]],"date-time":"2024-10-01T00:00:00Z","timestamp":1727740800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,10,1]],"date-time":"2024-10-01T00:00:00Z","timestamp":1727740800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"National Key R&#x0026;D Program","award":["2023YFB3001903"],"award-info":[{"award-number":["2023YFB3001903"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62322201"],"award-info":[{"award-number":["62322201"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62072018"],"award-info":[{"award-number":["62072018"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U23B2020"],"award-info":[{"award-number":["U23B2020"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U22A2028"],"award-info":[{"award-number":["U22A2028"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012240","name":"Academic Excellence Foundation of BUAA for PhD Students","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100012240","id-type":"DOI","asserted-by":"publisher"}]},{"name":"China National Postdoctoral Program for Innovative Talents","award":["BX20240383"],"award-info":[{"award-number":["BX20240383"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Parallel Distrib. Syst."],"published-print":{"date-parts":[[2024,10]]},"DOI":"10.1109\/tpds.2024.3431189","type":"journal-article","created":{"date-parts":[[2024,7,19]],"date-time":"2024-07-19T17:42:00Z","timestamp":1721410920000},"page":"1708-1720","source":"Crossref","is-referenced-by-count":4,"title":["ElasticBatch: A Learning-Augmented Elastic Scheduling System for Batch Inference on MIG"],"prefix":"10.1109","volume":"35","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9530-2858","authenticated-orcid":false,"given":"Jiaxing","family":"Qi","sequence":"first","affiliation":[{"name":"Sino-German Joint Software Institute, Beihang University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wencong","family":"Xiao","sequence":"additional","affiliation":[{"name":"Alibaba Group, Hangzhou, Zhejiang Province, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4115-9072","authenticated-orcid":false,"given":"Mingzhen","family":"Li","sequence":"additional","affiliation":[{"name":"Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chaojie","family":"Yang","sequence":"additional","affiliation":[{"name":"Alibaba Group, Beijing, Chaoyang District, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yong","family":"Li","sequence":"additional","affiliation":[{"name":"Alibaba Group, Beijing, Chaoyang District, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wei","family":"Lin","sequence":"additional","affiliation":[{"name":"Alibaba Group, Hangzhou, Zhejiang Province, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1101-7927","authenticated-orcid":false,"given":"Hailong","family":"Yang","sequence":"additional","affiliation":[{"name":"Sino-German Joint Software Institute, Beihang University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7186-0556","authenticated-orcid":false,"given":"Zhongzhi","family":"Luan","sequence":"additional","affiliation":[{"name":"Sino-German Joint Software Institute, Beihang University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5382-1473","authenticated-orcid":false,"given":"Depei","family":"Qian","sequence":"additional","affiliation":[{"name":"Sino-German Joint Software Institute, Beihang University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TII.2018.2842821"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICRITO48877.2020.9197818"},{"key":"ref4","first-page":"443","article-title":"Serving DNNs like clockwork: Performance predictability from the bottom up","volume-title":"Proc. 14th USENIX Conf. Operating Syst. Des. Implementation","author":"Gujarati"},{"key":"ref5","article-title":"Serving DNN models with multi-instance GPUs: A case of the reconfigurable machine scheduling problem","author":"Tan","year":"2021"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/3489517.3530510"},{"key":"ref7","first-page":"199","article-title":"Serving heterogeneous machine learning models on multi-GPU servers with spatio-temporal sharing","volume-title":"Proc. USENIX Annu. Tech. Conf.","author":"Choi"},{"key":"ref8","first-page":"183","article-title":"DVABatch: Diversity-aware multi-entry multi-exit batching for efficient processing of DNN services on GPUs","volume-title":"Proc. USENIX Annu. Tech. Conf.","author":"Cui"},{"key":"ref9","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2018"},{"key":"ref10","article-title":"LLaMA: Open and efficient foundation language models","author":"Touvron","year":"2023"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1016\/j.metrad.2023.100017"},{"key":"ref12","article-title":"Instruction tuning with GPT-4","author":"Peng","year":"2023"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/3341301.3359658"},{"issue":"8","key":"ref14","first-page":"9","article-title":"Language models are unsupervised multitask learners","volume":"1","author":"Radford","year":"2019","journal-title":"OpenAI Blog"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-2124"},{"key":"ref18","article-title":"NVIDIA data center GPU manager (DCGM) user guide","year":"2023"},{"key":"ref19","article-title":"NVIDIA management library (NVML) user guide","year":"2023"},{"key":"ref20","article-title":"DistilBERT, a distilled version of BERT: Smaller, faster, cheaper and lighter","author":"Sanh","year":"2019"},{"key":"ref21","article-title":"RoBERTa: A robustly optimized BERT pretraining approach","author":"Liu","year":"2019"},{"key":"ref22","article-title":"GLM-130B: An open bilingual pre-trained model","author":"Zeng","year":"2022"},{"key":"ref23","first-page":"521","article-title":"Orca: A distributed serving system for Transformer-Based generative models","volume-title":"Proc. 16th USENIX Symp. Operating Syst. Des. Implementation","author":"Yu"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00045"},{"issue":"60","key":"ref25","first-page":"1","article-title":"Naive Bayes classifiers","volume":"18","author":"Murphy","year":"2006"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/FSKD.2007.552"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1016\/j.catena.2016.06.004"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1016\/j.isprsjprs.2016.01.011"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.3389\/fnbot.2013.00021"},{"issue":"3","key":"ref30","first-page":"61","article-title":"Probabilistic outputs for support vector machines and comparisons to regularized likelihood methods","volume":"10","author":"Platt","year":"1999","journal-title":"Adv. Large Margin Classifiers"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.4135\/9781446251119.n67"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1139\/x98-085"},{"key":"ref33","first-page":"489","article-title":"Solar energy prediction using decision tree regressor","volume-title":"Proc. 5th Int. Conf. Intell. Comput. Control Syst.","author":"Gupta"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2004.1380102"},{"key":"ref35","first-page":"595","article-title":"Gandiva: Introspective cluster scheduling for deep learning","volume-title":"Proc. 13th USENIX Symp. Operating Syst. Des. Implementation","author":"Xiao"},{"key":"ref36","first-page":"485","article-title":"Tiresias: A GPU cluster manager for distributed deep learning","volume-title":"Proc. 16th USENIX Symp. Netw. Syst. Des. Implementation","author":"Gu"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/globecom38437.2019.9014110"},{"key":"ref38","first-page":"579","article-title":"Looking beyond GPUs for DNN scheduling on Multi-Tenant clusters","volume-title":"Proc. 16th USENIX Symp. Operating Syst. Des. Implementation","author":"Mohan"},{"key":"ref39","first-page":"613","article-title":"Clipper: A Low-Latency online prediction serving system","volume-title":"Proc. 14th USENIX Symp. Netw. Syst. Des. Implementation","author":"Crankshaw"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2022.3205325"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/SECON48991.2020.9158444"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/TII.2020.3017573"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2022.3214113"}],"container-title":["IEEE Transactions on Parallel and Distributed Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/71\/10631781\/10605084.pdf?arnumber=10605084","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,18]],"date-time":"2024-09-18T18:03:05Z","timestamp":1726682585000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10605084\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10]]},"references-count":42,"journal-issue":{"issue":"10"},"URL":"https:\/\/doi.org\/10.1109\/tpds.2024.3431189","relation":{},"ISSN":["1045-9219","1558-2183","2161-9883"],"issn-type":[{"value":"1045-9219","type":"print"},{"value":"1558-2183","type":"electronic"},{"value":"2161-9883","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,10]]}}}