{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,14]],"date-time":"2026-01-14T15:32:03Z","timestamp":1768404723418,"version":"3.49.0"},"reference-count":39,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"3","license":[{"start":{"date-parts":[[2023,3,1]],"date-time":"2023-03-01T00:00:00Z","timestamp":1677628800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,3,1]],"date-time":"2023-03-01T00:00:00Z","timestamp":1677628800000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,3,1]],"date-time":"2023-03-01T00:00:00Z","timestamp":1677628800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,3,1]],"date-time":"2023-03-01T00:00:00Z","timestamp":1677628800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Parallel Distrib. Syst."],"published-print":{"date-parts":[[2023,3,1]]},"DOI":"10.1109\/tpds.2022.3232715","type":"journal-article","created":{"date-parts":[[2022,12,28]],"date-time":"2022-12-28T18:33:37Z","timestamp":1672252417000},"page":"812-827","source":"Crossref","is-referenced-by-count":42,"title":["<i>iGniter:<\/i> Interference-Aware GPU Resource Provisioning for Predictable DNN Inference in the Cloud"],"prefix":"10.1109","volume":"34","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1590-5323","authenticated-orcid":false,"given":"Fei","family":"Xu","sequence":"first","affiliation":[{"name":"Shanghai Key Laboratory of Multidimensional Information Processing, School of Computer Science and Technology, East China Normal University, Shanghai, China"}]},{"given":"Jianian","family":"Xu","sequence":"additional","affiliation":[{"name":"Shanghai Key Laboratory of Multidimensional Information Processing, School of Computer Science and Technology, East China Normal University, Shanghai, China"}]},{"given":"Jiabin","family":"Chen","sequence":"additional","affiliation":[{"name":"Shanghai Key Laboratory of Multidimensional Information Processing, School of Computer Science and Technology, East China Normal University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2300-6996","authenticated-orcid":false,"given":"Li","family":"Chen","sequence":"additional","affiliation":[{"name":"School of Computing and Informatics, University of Louisiana at Lafayette, Lafayette, LA, USA"}]},{"given":"Ruitao","family":"Shang","sequence":"additional","affiliation":[{"name":"Shanghai Key Laboratory of Multidimensional Information Processing, School of Computer Science and Technology, East China Normal University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0987-9344","authenticated-orcid":false,"given":"Zhi","family":"Zhou","sequence":"additional","affiliation":[{"name":"Guangdong Key Laboratory of Big Data Analysis and Processing, School of Computer Science and Engineering, Sun Yat-sen University, Guangzhou, Guangdong Province, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8570-1345","authenticated-orcid":false,"given":"Fangming","family":"Liu","sequence":"additional","affiliation":[{"name":"Peng Cheng Laboratory, Shenzhen, Guangdong Province, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2017.2761740"},{"key":"ref2","first-page":"1","article-title":"Dynamic space-time scheduling for GPU inference","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Jain"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33015941"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/3318464.3386126"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/3341301.3359658"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/3472883.3486993"},{"key":"ref11","first-page":"1","article-title":"Cocktail: A multidimensional optimization for model serving in cloud","volume-title":"Proc. USENIX Symp. Netw. Syst. Des. Implementation","author":"Gunasekaran"},{"key":"ref12","first-page":"443","article-title":"Serving DNNs like clockwork: Performance predictability from the bottom up","volume-title":"Proc. USENIX Symp. Operating Syst. Des. Implementation","author":"Gujarati"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/3419111.3421284"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2021.3064352"},{"key":"ref16","first-page":"613","article-title":"Clipper: A Low-Latency Online Prediction Serving System","volume-title":"Proc. USENIX Symp. Netw. Syst. Des. Implementation","author":"Crankshaw"},{"key":"ref17","first-page":"397","article-title":"INFaaS: Automated model-less inference serving","volume-title":"Proc. USENIX Annu. Tech. Conf.","author":"Romero"},{"key":"ref18","first-page":"199","article-title":"Serving heterogeneous machine learning models on Multi-GPU servers with spatio-temporal sharing","volume-title":"Proc. USENIX Annu. Tech. Conf.","author":"Choi"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2013.2287711"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/3446382.3448606"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/3065386"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref25","first-page":"1","article-title":"Very deep convolutional networks for large-scale image recognition","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Simonyan"},{"key":"ref26","first-page":"2","article-title":"Efficient inference with TensorRT","volume-title":"Proc. GPU Technol. Conf.","author":"Vanholder"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/RTAS.2019.00011"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICPP.2013.98"},{"key":"ref32","first-page":"530","article-title":"The method of least squares","volume":"1","author":"Abdi","year":"2007","journal-title":"Encyclopedia Meas. Statist."},{"key":"ref33","article-title":"Near-optimal bin packing algorithms","author":"Johnson","year":"1973"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"ref36","first-page":"1","article-title":"The pascal visual object classes challenge 2012 (VOC2012) development kit","author":"Everingham","year":"2012"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2022.3144614"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1145\/3472883.3486987"},{"key":"ref39","article-title":"Serving DNN models with multi-instance GPUs: A case of the reconfigurable machine scheduling problem","author":"Tan","year":"2021"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1145\/3503222.3507709"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2013.185"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2015.2481403"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1145\/3503222.3507752"},{"key":"ref45","first-page":"1","article-title":"Characterization and prediction of performance interference on mediated pass through GPUs for interference-aware scheduler","volume-title":"Proc. USENIX Workshop Hot Top. Cloud Comput.","author":"Xu"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1145\/3337821.3337873"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2021.3079202"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1145\/3037697.3037700"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1145\/3503222.3507721"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1145\/2954679.2872368"}],"container-title":["IEEE Transactions on Parallel and Distributed Systems"],"original-title":[],"link":[{"URL":"https:\/\/ieeexplore.ieee.org\/ielam\/71\/10012125\/10002315-aam.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/71\/10012125\/10002315.pdf?arnumber=10002315","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,12]],"date-time":"2024-06-12T17:48:57Z","timestamp":1718214537000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10002315\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,3,1]]},"references-count":39,"journal-issue":{"issue":"3"},"URL":"https:\/\/doi.org\/10.1109\/tpds.2022.3232715","relation":{},"ISSN":["1045-9219","1558-2183","2161-9883"],"issn-type":[{"value":"1045-9219","type":"print"},{"value":"1558-2183","type":"electronic"},{"value":"2161-9883","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,3,1]]}}}