{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,30]],"date-time":"2026-06-30T15:58:29Z","timestamp":1782835109619,"version":"3.54.5"},"reference-count":88,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2024,2,1]],"date-time":"2024-02-01T00:00:00Z","timestamp":1706745600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,2,1]],"date-time":"2024-02-01T00:00:00Z","timestamp":1706745600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,2,1]],"date-time":"2024-02-01T00:00:00Z","timestamp":1706745600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"National Key Research &#x0026; Development","award":["2022YFB4501703"],"award-info":[{"award-number":["2022YFB4501703"]}]},{"name":"Major Key Project of PCL","award":["PCL2022A05"],"award-info":[{"award-number":["PCL2022A05"]}]},{"DOI":"10.13039\/501100001659","name":"Deutsche Forschungsgemeinschaft","doi-asserted-by":"publisher","award":["210487104"],"award-info":[{"award-number":["210487104"]}],"id":[{"id":"10.13039\/501100001659","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Parallel Distrib. Syst."],"published-print":{"date-parts":[[2024,2]]},"DOI":"10.1109\/tpds.2023.3340518","type":"journal-article","created":{"date-parts":[[2023,12,7]],"date-time":"2023-12-07T19:52:26Z","timestamp":1701978746000},"page":"280-296","source":"Crossref","is-referenced-by-count":25,"title":["Graft: Efficient Inference Serving for Hybrid Deep Learning With SLO Guarantees via DNN Re-Alignment"],"prefix":"10.1109","volume":"35","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2555-0220","authenticated-orcid":false,"given":"Jing","family":"Wu","sequence":"first","affiliation":[{"name":"National Engineering Research Center for Big Data Technology and System, the Services Computing Technology and System Lab, Cluster and Grid Computing Lab in the School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7181-6128","authenticated-orcid":false,"given":"Lin","family":"Wang","sequence":"additional","affiliation":[{"name":"Paderborn University, Paderborn, Germany"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-0698-0903","authenticated-orcid":false,"given":"Qirui","family":"Jin","sequence":"additional","affiliation":[{"name":"National Engineering Research Center for Big Data Technology and System, the Services Computing Technology and System Lab, Cluster and Grid Computing Lab in the School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8570-1345","authenticated-orcid":false,"given":"Fangming","family":"Liu","sequence":"additional","affiliation":[{"name":"Peng Cheng Laboratory, Shenzhen, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/3173162.3173185"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/3447993.3483243"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/3447993.3483244"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/3419111.3421285"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/3320060"},{"key":"ref6","first-page":"397","article-title":"INFaaS: Automated model-less inference serving","volume-title":"Proc. USENIX Annu. Tech. Conf.","author":"Romero"},{"key":"ref7","first-page":"613","article-title":"Clipper: A low-latency online prediction serving system","volume-title":"Proc. 14th USENIX Symp. Netw. Syst. Des. Implementation","author":"Crankshaw"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/3341301.3359658"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/3372224.3419194"},{"key":"ref10","article-title":"Serving DNNs like clockwork: Performance predictability from the bottom up","volume-title":"Proc. 14th USENIX Symp. Operating Syst. Des. Implementation","author":"Gujarati"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/3510831"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/3487552.3487863"},{"key":"ref13","first-page":"352","article-title":"MLPerf mobile inference benchmark: An industry-standard open-source machine learning benchmark for on-device AI","volume-title":"Proc. Mach. Learn. Syst.","author":"Reddi"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2019.00447"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/3486618"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/3340531.3412700"},{"key":"ref17","article-title":"MC-SF: Slow-fast learning for mobile-cloud collaborative recommendation","author":"Chen","year":"2021"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/3447548.3467078"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/3037697.3037698"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/3372224.3419215"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3450051"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2019.2947893"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2022.3195664"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/3397315"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378522"},{"key":"ref30","article-title":"Split learning for collaborative deep learning in healthcare","author":"Poirot","year":"2019"},{"key":"ref31","article-title":"Rethinking atrous convolution for semantic image segmentation","author":"Chen","year":"2017"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1145\/3458336.3465289"},{"key":"ref33","article-title":"GFLOPs of image classification targeted models on ImageNet","year":"2022"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1145\/3498361.3538919"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1145\/3447993.3483274"},{"key":"ref36","first-page":"1","article-title":"Once-for-all: Train one network and specialize it for efficient deployment","volume-title":"Proc. 8th Int. Conf. Learn. Representations","author":"Cai"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01237-3_12"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/EMC2-NIPS53020.2019.00020"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-63703-7_10"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1145\/3498361.3538932"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1145\/3498361.3538940"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1145\/3447993.3448625"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1145\/3447993.3483249"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1145\/3308558.3313591"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1145\/3491046"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2019.00048"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/mc.2017.9"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1145\/3302424.3303958"},{"key":"ref49","first-page":"723","article-title":"SOTER: Guarding black-box inference for general neural networks at the edge","volume-title":"Proc. USENIX Annu. Tech. Conf.","author":"Shen"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1145\/3318216.3363301"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM.2019.8737614"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1145\/3349614.3356023"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM48880.2022.9796763"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1145\/3339825.3394938"},{"key":"ref57","first-page":"353","article-title":"ALERT: Accurate learning for energy and timeliness","volume-title":"Proc. USENIX Annu. Tech. Conf.","author":"Wan"},{"key":"ref58","article-title":"Tetris: Memory-efficient serverless inference through tensor sharing","volume-title":"Proc. USENIX Annu. Tech. Conf.","author":"Li"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1145\/3419111.3421284"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1007\/s00224-006-1350-7"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1145\/2556195.2556213"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1145\/3567955.3567964"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2019.2946140"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475399"},{"key":"ref68","article-title":"Salus: Fine-grained GPU sharing primitives for deep learning applications","author":"Yu","year":"2019"},{"key":"ref69","first-page":"161","article-title":"Zico: Efficient GPU memory sharing for concurrent DNN training","volume-title":"Proc. USENIX Annu. Tech. Conf.","author":"Lim"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583540"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1145\/3477132.3483553"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1145\/3542929.3563470"},{"key":"ref73","first-page":"133","article-title":"Peeking behind the curtains of serverless platforms","volume-title":"Proc. USENIX Annu. Tech. Conf.","author":"Wang"},{"key":"ref74","first-page":"1106","article-title":"ImageNet classification with deep convolutional neural networks","volume-title":"Proc. 26th Annu. Conf. Neural Inf. Process. Syst.","author":"Krizhevsky"},{"key":"ref75","first-page":"1","article-title":"Very deep convolutional networks for large-scale image recognition","volume-title":"Proc. 3rd Int. Conf. Learn. Representations","author":"Simonyan"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.4324\/9781410605337-29"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1145\/3318216.3363309"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1145\/3267809.3267828"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2019.2962435"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1145\/3620678.3624664"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2022.3232715"},{"key":"ref83","article-title":"Opara: Exploring operator parallelism for expediting DNN inference on GPUs","author":"Chen","year":"2023"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1145\/3472883.3486972"},{"key":"ref85","first-page":"133","article-title":"PowerChief: Intelligent power allocation for multi-stage applications to improve responsiveness on power constrained CMP","volume-title":"Proc. 44th Annu. Int. Symp. Comput. Archit.","author":"Chen"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1145\/3037697.3037700"},{"key":"ref87","article-title":"Towards GPU utilization prediction for cloud deep learning","volume-title":"Proc. 12th USENIX Workshop Hot Topics Cloud Comput.","author":"Yeung"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1109\/rtas.2019.00011"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1109\/ipdps.2019.00035"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.1145\/3037697.3037707"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1145\/3274808.3274813"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.1145\/3018743.3018748"},{"key":"ref93","first-page":"199","article-title":"Serving heterogeneous machine learning models on multi-GPU servers with spatio-temporal sharing","volume-title":"Proc. USENIX Annu. Tech. Conf.","author":"Choi"},{"key":"ref95","first-page":"929","article-title":"EdgeWise: A better stream processing engine for the edge","volume-title":"Proc. USENIX Annu. Tech. Conf.","author":"Fu"},{"key":"ref96","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO50266.2020.00090"},{"key":"ref97","doi-asserted-by":"publisher","DOI":"10.1145\/3583120.3589819"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1109\/TSC.2021.3098816"},{"key":"ref99","doi-asserted-by":"publisher","DOI":"10.1109\/SC41404.2022.00074"}],"container-title":["IEEE Transactions on Parallel and Distributed Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/71\/10365707\/10347464.pdf?arnumber=10347464","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,12]],"date-time":"2024-01-12T01:47:54Z","timestamp":1705024074000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10347464\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,2]]},"references-count":88,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/tpds.2023.3340518","relation":{},"ISSN":["1045-9219","1558-2183","2161-9883"],"issn-type":[{"value":"1045-9219","type":"print"},{"value":"1558-2183","type":"electronic"},{"value":"2161-9883","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,2]]}}}