{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,8]],"date-time":"2026-05-08T16:20:27Z","timestamp":1778257227923,"version":"3.51.4"},"reference-count":65,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"12","license":[{"start":{"date-parts":[[2022,12,1]],"date-time":"2022-12-01T00:00:00Z","timestamp":1669852800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,12,1]],"date-time":"2022-12-01T00:00:00Z","timestamp":1669852800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,12,1]],"date-time":"2022-12-01T00:00:00Z","timestamp":1669852800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61761136014"],"award-info":[{"award-number":["61761136014"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61520106005"],"award-info":[{"award-number":["61520106005"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"National Key Research &amp; Development (R&amp;D) Plan","award":["2017YFB1001703"],"award-info":[{"award-number":["2017YFB1001703"]}]},{"name":"DFG"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Parallel Distrib. Syst."],"published-print":{"date-parts":[[2022,12,1]]},"DOI":"10.1109\/tpds.2022.3195664","type":"journal-article","created":{"date-parts":[[2022,8,5]],"date-time":"2022-08-05T00:20:24Z","timestamp":1659658824000},"page":"4499-4514","source":"Crossref","is-referenced-by-count":55,"title":["HiTDL: High-Throughput Deep Learning Inference at the Hybrid Mobile Edge"],"prefix":"10.1109","volume":"33","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2555-0220","authenticated-orcid":false,"given":"Jing","family":"Wu","sequence":"first","affiliation":[{"name":"National Engineering Research Center for Big Data Technology and System, Services Computing Technology and System Lab, Cluster and Grid Computing Lab, School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7181-6128","authenticated-orcid":false,"given":"Lin","family":"Wang","sequence":"additional","affiliation":[{"name":"VU Amsterdam, Amsterdam, The Netherlands"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8870-4309","authenticated-orcid":false,"given":"Qiangyu","family":"Pei","sequence":"additional","affiliation":[{"name":"National Engineering Research Center for Big Data Technology and System, Services Computing Technology and System Lab, Cluster and Grid Computing Lab, School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xingqi","family":"Cui","sequence":"additional","affiliation":[{"name":"National Engineering Research Center for Big Data Technology and System, Services Computing Technology and System Lab, Cluster and Grid Computing Lab, School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8570-1345","authenticated-orcid":false,"given":"Fangming","family":"Liu","sequence":"additional","affiliation":[{"name":"National Engineering Research Center for Big Data Technology and System, Services Computing Technology and System Lab, Cluster and Grid Computing Lab, School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tingting","family":"Yang","sequence":"additional","affiliation":[{"name":"Peng Cheng Laboratory, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1145\/3300061.3300116"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1145\/3341301.3359646"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2021.3054656"},{"key":"ref32","year":"2020"},{"key":"ref31","year":"2020"},{"key":"ref30","year":"2020"},{"key":"ref37","first-page":"103","article-title":"GPipe: Efficient training of giant neural networks using pipeline parallelism","author":"huang","year":"2019","journal-title":"Proc 33rd Int Conf Neural Inf Process Syst"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2019.2962435"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1145\/3337821.3337873"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCS.2019.00107"},{"key":"ref60","first-page":"29","article-title":"Mainstream: Dynamic stem-sharing for multi-tenant video processing","author":"jiang","year":"2018","journal-title":"Proc USENIX Conf USENIX Annu Tech Conf"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1145\/3302424.3303957"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1145\/3318216.3363312"},{"key":"ref63","first-page":"1","article-title":"Improving device-edge cooperative inference of deep learning via 2-step pruning","author":"shi","year":"2019","journal-title":"Proc IEEE Conf Comput Commun Workshops"},{"key":"ref28","year":"2020"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR.2016.7900006"},{"key":"ref27","year":"2020"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCS.2017.226"},{"key":"ref29","year":"2020"},{"key":"ref2","first-page":"5998","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053993"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1145\/3376897.3379166"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/3302424.3303958"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/3318216.3363308"},{"key":"ref24","first-page":"613","article-title":"Clipper: A low-latency online prediction serving system","author":"crankshaw","year":"2017","journal-title":"Proc USENIX Symp Netw Syst Des Implementation"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/3419111.3421285"},{"key":"ref26","year":"2020"},{"key":"ref25","first-page":"353","article-title":"ALERT: Accurate learning for energy and timeliness","author":"wan","year":"2020","journal-title":"Proc USENIX Conf USENIX Annu Tech Conf"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1145\/3419111.3421284"},{"key":"ref51","year":"2020"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1145\/3173162.3173185"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1145\/3241539.3241557"},{"key":"ref57","first-page":"17:1","article-title":"Precog: Prefetching for image recognition applications at the edge","author":"drolia","year":"2017","journal-title":"Proc 2nd ACM\/IEEE Symp Edge Comput"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCS.2017.94"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1145\/3339825.3394938"},{"key":"ref54","year":"2020"},{"key":"ref53","article-title":"TasNet: Surpassing ideal time-frequency masking for speech separation","author":"luo","year":"2018"},{"key":"ref52","article-title":"EfficientNet: Rethinking model scaling for convolutional neural networks","author":"tan","year":"2019"},{"key":"ref10","article-title":"$\\mu$?Layer: Low latency on-device inference using cooperative single-layer acceleration and processor-friendly quantization","author":"kim","year":"2019","journal-title":"Proc 14th EuroSys Conf"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2020\/778"},{"key":"ref40","article-title":"Rethinking the inception architecture for computer vision","author":"szegedy","year":"2015"},{"key":"ref12","year":"2020"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/3289602.3293915"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/3318216.3363309"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/3230543.3230554"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/3037697.3037698"},{"key":"ref17","article-title":"JointDNN: An efficient training and inference engine for intelligent mobile cloud computing services","author":"eshratifar","year":"2018"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/3302424.3303953"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM.2019.8737614"},{"key":"ref4","first-page":"1106","article-title":"ImageNet classification with deep convolutional neural networks","author":"krizhevsky","year":"2012","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref3","article-title":"Language models are unsupervised multitask learners","volume":"1","author":"radford","year":"2019","journal-title":"OpenAIRE blog"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.11231"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/3372224.3419194"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/SEC50012.2020.00012"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1145\/3240508.3240556"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/3447548.3467078"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1016\/0166-218X(81)90005-6"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1023\/A:1008258310679"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1145\/3081333.3081347"},{"key":"ref47","first-page":"98","article-title":"Fine-grained GPU sharing primitives for deep learning applications","author":"yu","year":"2020","journal-title":"Proc Mach Learn Syst"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46493-0_38"},{"key":"ref41","first-page":"1049","article-title":"MArk: Exploiting cloud services for cost-effective, SLO-aware machine learning inference serving","author":"zhang","year":"2019","journal-title":"Proc USENIX Conf USENIX Annu Tech Conf"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-7091-6492-1_55"},{"key":"ref43","article-title":"MobileNets: Efficient convolutional neural networks for mobile vision applications","author":"howard","year":"2017"}],"container-title":["IEEE Transactions on Parallel and Distributed Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/71\/9790018\/09847073.pdf?arnumber=9847073","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,9,19]],"date-time":"2022-09-19T19:57:12Z","timestamp":1663617432000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9847073\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,12,1]]},"references-count":65,"journal-issue":{"issue":"12"},"URL":"https:\/\/doi.org\/10.1109\/tpds.2022.3195664","relation":{},"ISSN":["1045-9219","1558-2183","2161-9883"],"issn-type":[{"value":"1045-9219","type":"print"},{"value":"1558-2183","type":"electronic"},{"value":"2161-9883","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,12,1]]}}}