{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,20]],"date-time":"2026-01-20T15:17:17Z","timestamp":1768922237694,"version":"3.49.0"},"reference-count":33,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,7,2]],"date-time":"2025-07-02T00:00:00Z","timestamp":1751414400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,7,2]],"date-time":"2025-07-02T00:00:00Z","timestamp":1751414400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Science Foundation of China","doi-asserted-by":"publisher","award":["62441225"],"award-info":[{"award-number":["62441225"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,7,2]]},"DOI":"10.1109\/iwqos65803.2025.11143526","type":"proceedings-article","created":{"date-parts":[[2025,9,9]],"date-time":"2025-09-09T17:29:52Z","timestamp":1757438992000},"page":"1-10","source":"Crossref","is-referenced-by-count":1,"title":["Bridging the Prediction-Decision Gap: Enhancing Model Deployment and Online Service Request Forecasting in Edge Inference Systems"],"prefix":"10.1109","author":[{"given":"Hesheng","family":"Sun","sequence":"first","affiliation":[{"name":"School of Computer Science, Nanjing University,State Key Laboratory for Novel Software Technology,Nanjing,China"}]},{"given":"Zhuzhong","family":"Qian","sequence":"additional","affiliation":[{"name":"School of Computer Science, Nanjing University,State Key Laboratory for Novel Software Technology,Nanjing,China"}]},{"given":"Andong","family":"Zhu","sequence":"additional","affiliation":[{"name":"School of Computer Science, Nanjing University,State Key Laboratory for Novel Software Technology,Nanjing,China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2019.2894306"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2102.05095"},{"key":"ref3","first-page":"1877","article-title":"Language models are few-shot learners","volume":"33","author":"Brown","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1016\/j.comnet.2021.108704"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/3419111.3421285"},{"key":"ref6","first-page":"613","article-title":"Clipper: A Low-Latency online prediction serving system","volume-title":"In 14th USENIX Symposium on Networked Systems Design and Implementation (NSDI 17)","author":"Crankshaw"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2021.3112604"},{"key":"ref8","first-page":"443","article-title":"Serving DNNs like clockwork: Performance predictability from the bottom up","volume-title":"In 14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20)","author":"Gujarati"},{"key":"ref9","first-page":"1041","article-title":"Cocktail: A multidimensional optimization for model serving in cloud","volume-title":"In 19th USENIX Symposium on Networked Systems Design and Implementation (NSDI 22)","author":"Raj Gunasekaran"},{"key":"ref10","year":"2023","journal-title":"Gurobi Optimizer Reference Manual"},{"key":"ref11","first-page":"539","article-title":"Microsecond-scale preemption for concurrent GPU-accelerated DNN inferences","volume-title":"In 16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22)","author":"Han"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.79.8.2554.PMC346238.PMID6953413"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/SECON48991.2020.9158425"},{"key":"ref16","first-page":"663","article-title":"AlpaServe: Statistical multiplexing with model parallelism for deep learning serving","volume-title":"In 17th USENIX Symposium on Operating Systems Design and Implementation (OSDI 23)","author":"Li"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/3627703.3629565"},{"key":"ref18","first-page":"579","article-title":"Looking beyond GPUs for DNN scheduling on MultiTenant clusters","volume-title":"In 16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22)","author":"Mohan"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/RTSS55097.2022.00032"},{"key":"ref20","year":"2019","journal-title":"Jetson nano developer kit"},{"key":"ref21","author":"Radford","year":"2018","journal-title":"Improving language understanding by generative pre-training"},{"key":"ref22","first-page":"397","article-title":"INFaaS: Automated model-less inference serving","volume-title":"In 2021 USENIX Annual Technical Conference (USENIX ATC 21)","author":"Romero"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/3341301.3359658"},{"key":"ref24","year":"2023","journal-title":"SQLite Documentation"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/3605573.3605615"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/TNSM.2019.2937342"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref28","first-page":"8795","article-title":"Learning mdps from features: Predict-thenoptimize for sequential decision making by reinforcement learning","volume":"34","author":"Wang","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref29","article-title":"Timemixer: Decomposable multiscale mixing for time series forecasting","volume-title":"In International Conference on Learning Representations (ICLR)","author":"Wang"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM48880.2022.9796961"},{"key":"ref31","first-page":"945","article-title":"Mlaas in the wild: Workload analysis and scheduling in large-scale heterogeneous gpu clusters","volume-title":"In 19th USENIX Symposium on Networked Systems Design and Implementation (NSDI 22)","author":"Weng"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1145\/3458864.3467882"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2022.3189186"}],"event":{"name":"2025 IEEE\/ACM International Symposium on Quality of Service (IWQoS)","location":"Gold Coast, Australia","start":{"date-parts":[[2025,7,2]]},"end":{"date-parts":[[2025,7,4]]}},"container-title":["2025 IEEE\/ACM 33rd International Symposium on Quality of Service (IWQoS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11143240\/11143247\/11143526.pdf?arnumber=11143526","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,10]],"date-time":"2025-09-10T05:04:52Z","timestamp":1757480692000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11143526\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,2]]},"references-count":33,"URL":"https:\/\/doi.org\/10.1109\/iwqos65803.2025.11143526","relation":{},"subject":[],"published":{"date-parts":[[2025,7,2]]}}}