{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T15:37:54Z","timestamp":1759333074705,"version":"3.38.0"},"reference-count":50,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"4","license":[{"start":{"date-parts":[[2025,4,1]],"date-time":"2025-04-01T00:00:00Z","timestamp":1743465600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,4,1]],"date-time":"2025-04-01T00:00:00Z","timestamp":1743465600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,4,1]],"date-time":"2025-04-01T00:00:00Z","timestamp":1743465600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62341108"],"award-info":[{"award-number":["62341108"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","award":["2242022k60006"],"award-info":[{"award-number":["2242022k60006"]}],"id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Parallel Distrib. Syst."],"published-print":{"date-parts":[[2025,4]]},"DOI":"10.1109\/tpds.2025.3526283","type":"journal-article","created":{"date-parts":[[2025,1,6]],"date-time":"2025-01-06T19:49:04Z","timestamp":1736192944000},"page":"659-674","source":"Crossref","is-referenced-by-count":1,"title":["SMDP-Based Dynamic Batching for Improving Responsiveness and Energy Efficiency of Batch Services"],"prefix":"10.1109","volume":"36","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4372-3404","authenticated-orcid":false,"given":"Yaodan","family":"Xu","sequence":"first","affiliation":[{"name":"Department of Electronic Engineering, Beijing National Research Center for Information Science and Technology (BNRist), Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0651-0071","authenticated-orcid":false,"given":"Sheng","family":"Zhou","sequence":"additional","affiliation":[{"name":"Department of Electronic Engineering, Beijing National Research Center for Information Science and Technology (BNRist), Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0420-2024","authenticated-orcid":false,"given":"Zhisheng","family":"Niu","sequence":"additional","affiliation":[{"name":"Department of Electronic Engineering, Beijing National Research Center for Information Science and Technology (BNRist), Tsinghua University, Beijing, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICC45041.2023.10278962"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2004.01.013"},{"key":"ref3","first-page":"1049","article-title":"MArk: Exploiting cloud services for cost-effective, SLO-Aware machine learning inference serving","volume-title":"Proc. USENIX Annu. Tech. Conf.","author":"Zhang"},{"key":"ref4","first-page":"613","article-title":"Clipper: A low-latency online prediction serving system","volume-title":"Proc. USENIX Symp. Netw. Syst. Des. Implementation","author":"Crankshaw"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2018.2857044"},{"article-title":"Benchmarking TPU, GPU, and CPU platforms for deep learning","year":"2019","author":"Wang","key":"ref6"},{"year":"2018","key":"ref7","article-title":"NVIDIA AI inference platform technical overview"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1016\/j.future.2022.01.004"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TGCN.2022.3171680"},{"article-title":"Packrat: Automatic reconfiguration for latency minimization in CPU-based DNN serving","year":"2023","author":"Bhardwaj","key":"ref10"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2022.3144614"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/SC41405.2020.00073"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1007\/s11227-023-05183-6"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA51647.2021.00049"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2020.3047638"},{"year":"2017","key":"ref16","article-title":"Google cloud prediction API documentation"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.4324\/9781410605337-29"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2012.237"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2018.2846234"},{"article-title":"DBS: Dynamic batch size for distributed deep neural network training","year":"2020","author":"Ye","key":"ref20"},{"key":"ref21","first-page":"119","article-title":"Zeus: Understanding and optimizing GPU energy consumption of DNN training","volume-title":"Proc. USENIX Symp. Netw. Syst. Des. Implementation","author":"You"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2020.3040596"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2024.3364669"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/JSEN.2021.3096245"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2016.25"},{"key":"ref26","first-page":"663","article-title":"AlpaServe: Statistical multiplexing with model parallelism for deep learning serving","volume-title":"Proc. USENIX Symp. Oper. Syst. Des. Implementation","author":"Li"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/3295500.3356164"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1016\/0166-5316(93)90035-S"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1016\/j.peva.2020.102183"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/MCS.2006.252831"},{"issue":"6","key":"ref31","first-page":"43","article-title":"Bulk service queueing models\u2014A survey","volume":"106","author":"Sasikala","year":"2016","journal-title":"Int. J. Pure Appl. Math"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1016\/j.ejor.2021.06.012"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.2307\/1426040"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1017\/jpr.2017.8"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2022.3192613"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1145\/3204949.3204975"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1214\/aoms\/1177698869"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1007\/s12597-015-0197-6"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1504\/IJMOR.2020.106908"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1504\/IJOR.2021.113500"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1016\/S0377-2217(01)00297-1"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1007\/s11134-022-09794-3"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1002\/SERIES1345"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1007\/BF01719758"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1016\/0022-247X(82)90271-2"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1287\/opre.9.3.383"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1007\/s001860050088"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1287\/mnsc.25.4.320"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1287\/opre.22.5.1008"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1017\/S0269964800001121"}],"container-title":["IEEE Transactions on Parallel and Distributed Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/71\/10908515\/10829781.pdf?arnumber=10829781","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,4]],"date-time":"2025-03-04T06:15:40Z","timestamp":1741068940000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10829781\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4]]},"references-count":50,"journal-issue":{"issue":"4"},"URL":"https:\/\/doi.org\/10.1109\/tpds.2025.3526283","relation":{},"ISSN":["1045-9219","1558-2183","2161-9883"],"issn-type":[{"type":"print","value":"1045-9219"},{"type":"electronic","value":"1558-2183"},{"type":"electronic","value":"2161-9883"}],"subject":[],"published":{"date-parts":[[2025,4]]}}}