{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T23:15:00Z","timestamp":1776122100662,"version":"3.50.1"},"reference-count":45,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"6","license":[{"start":{"date-parts":[[2021,6,1]],"date-time":"2021-06-01T00:00:00Z","timestamp":1622505600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,6,1]],"date-time":"2021-06-01T00:00:00Z","timestamp":1622505600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,6,1]],"date-time":"2021-06-01T00:00:00Z","timestamp":1622505600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"National R&#x0026;D Program of China","award":["2018YFB1004800"],"award-info":[{"award-number":["2018YFB1004800"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62022057"],"award-info":[{"award-number":["62022057"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61632017"],"award-info":[{"award-number":["61632017"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61832006"],"award-info":[{"award-number":["61832006"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Parallel Distrib. Syst."],"published-print":{"date-parts":[[2021,6,1]]},"DOI":"10.1109\/tpds.2020.3047638","type":"journal-article","created":{"date-parts":[[2020,12,28]],"date-time":"2020-12-28T20:44:32Z","timestamp":1609188272000},"page":"1307-1321","source":"Crossref","is-referenced-by-count":23,"title":["E<sup>2<\/sup>bird: <u>E<\/u>nhanced <u>E<\/u>lastic <u>B<\/u>atch for <u>I<\/u>mproving <u>R<\/u>esponsiveness and Throughput of <u>D<\/u>eep Learning Services"],"prefix":"10.1109","volume":"32","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6646-5260","authenticated-orcid":false,"given":"Weihao","family":"Cui","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5832-0347","authenticated-orcid":false,"given":"Quan","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Han","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"Mengze","family":"Wei","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7404-2073","authenticated-orcid":false,"given":"Xiaoxin","family":"Tang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0034-2302","authenticated-orcid":false,"given":"Minyi","family":"Guo","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","article-title":"Profiler users guide","author":"corporation","year":"0"},{"key":"ref38","first-page":"1","article-title":"Improving the speed of neural networks on CPUs","author":"vanhoucke","year":"2011","journal-title":"Proc NIPS Workshop on Deep Learning and Unsupervised Feature Learning"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/3302424.3303949"},{"key":"ref32","first-page":"951","article-title":"DeepCPU: Serving RNN-based deep learning models 10x faster","author":"zhang","year":"2018","journal-title":"Proc USENIX Annu Techn Conf"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/3190508.3190541"},{"key":"ref30","article-title":"Tensorflow serving batching guide","year":"2019"},{"key":"ref37","first-page":"2849","article-title":"Fixed point quantization of deep convolutional networks","author":"lin","year":"2016","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2019.00363"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1145\/3140659.3080215"},{"key":"ref34","article-title":"Deep compression: Compressing deep neural networks with pruning, trained quantization and huffman coding","author":"han","year":"2015"},{"key":"ref10","article-title":"CUDA C\/C++ streams and concurrency","author":"corporation","year":"0"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1145\/3178487.3178491"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICCD46524.2019.00075"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/2954679.2872368"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/3093315.3037700"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/3093315.3037742"},{"key":"ref15","year":"0"},{"key":"ref16","first-page":"58","article-title":"Laius: Towards latency awareness and improved utilization ofspatial multitasking accelerators in datacenters","author":"wei","year":"2019","journal-title":"Proc 33rd ACM Int Conf Supercomput"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080203"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2009.4919648"},{"key":"ref19","article-title":"Horovod: fast and easy distributed deep learning in tensorflow","author":"sergeev","year":"2018"},{"key":"ref28","article-title":"Inferline: Ml inference pipeline composition framework","author":"crankshaw","year":"2018"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2014.09.003"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/3341301.3359658"},{"key":"ref3","year":"0"},{"key":"ref6","year":"0"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1145\/2934664"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001163"},{"key":"ref8","article-title":"Tensorflow-serving: Flexible, high-performance ml serving","author":"olston","year":"2017"},{"key":"ref7","article-title":"Multi-tenant GPU clusters for deep learning workloads: Analysis and implications","author":"jeon","year":"2018"},{"key":"ref2","year":"0"},{"key":"ref9","first-page":"613","article-title":"Clipper: A low-latency online prediction serving system","author":"crankshaw","year":"2017","journal-title":"Proc 11th USENIX Symp Networked Syst Des Implement"},{"key":"ref1","year":"0"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1145\/3155284.3018769"},{"key":"ref45","article-title":"cuDNN: Efficient primitives for deep learning","author":"chetlur","year":"2014"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/3132847.3133045"},{"key":"ref21","article-title":"Poseidon: A system architecture for efficient GPU-based deep learning on multiple machines","author":"zhang","year":"2015"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.435"},{"key":"ref24","first-page":"265","article-title":"TensorFlow: A system for large-scale machine learning","author":"abadi","year":"2016","journal-title":"Proc 12th USENIX Symp Operating Syst Des Implementation"},{"key":"ref41","article-title":"Training deep nets with sublinear memory cost","author":"chen","year":"2016"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2019.00036"},{"key":"ref44","article-title":"Nvidia turing architecture whitepaper","year":"2019"},{"key":"ref26","first-page":"1","article-title":"Automatic differentiation in PyTorch","author":"paszke","year":"2017","journal-title":"Proc 31st Conf Neural Inf Process Syst"},{"key":"ref43","article-title":"High performance convolutional neural networks for document processing","author":"chellapilla","year":"2006"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654889"}],"container-title":["IEEE Transactions on Parallel and Distributed Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/71\/9321244\/09309326.pdf?arnumber=9309326","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T14:50:29Z","timestamp":1652194229000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9309326\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,6,1]]},"references-count":45,"journal-issue":{"issue":"6"},"URL":"https:\/\/doi.org\/10.1109\/tpds.2020.3047638","relation":{},"ISSN":["1045-9219","1558-2183","2161-9883"],"issn-type":[{"value":"1045-9219","type":"print"},{"value":"1558-2183","type":"electronic"},{"value":"2161-9883","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,6,1]]}}}