{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T11:03:23Z","timestamp":1777460603850,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":33,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,4,27]]},"DOI":"10.1145\/3805621.3807643","type":"proceedings-article","created":{"date-parts":[[2026,4,28]],"date-time":"2026-04-28T13:08:45Z","timestamp":1777381725000},"page":"163-170","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Bridging CPU and GPU Autoscaling for Cost-Efficient Inference Serving"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-6362-0967","authenticated-orcid":false,"given":"Mehran","family":"Salmani","sequence":"first","affiliation":[{"name":"Computer Science and Automation, TU Ilmenau, Ilmenau, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3232-5657","authenticated-orcid":false,"given":"Kamran","family":"Razavi","sequence":"additional","affiliation":[{"name":"turbalance, Heidelberg, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7711-4450","authenticated-orcid":false,"given":"Peter","family":"Amthor","sequence":"additional","affiliation":[{"name":"Computer Science and Automation, TU Ilmenau, Ilmenau, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1588-2056","authenticated-orcid":false,"given":"Boris","family":"Koldehofe","sequence":"additional","affiliation":[{"name":"Mathematics and Computer Science, Marburg University, Marburg, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2026,4,28]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Arnaud Van Looveren, and Clive Cox","author":"Akoush Sherif","year":"2022","unstructured":"Sherif Akoush, Andrei Paleyes, Arnaud Van Looveren, and Clive Cox. 2022. Desiderata for next generation of ML model serving. arXiv preprint arXiv:2210.14665 (2022)."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/1465482.1465560"},{"key":"e_1_3_2_1_3_1","unstructured":"Dario Amodei and Danny Hernandez. 2019. AI and compute. https:\/\/openai.com\/blog\/ai-and-compute\/."},{"key":"e_1_3_2_1_4_1","unstructured":"archiveteam. 2021. Archiveteam-Twitter-stream-2021-08. https:\/\/archive.org\/details\/archiveteam-twitter-stream-2021-08."},{"key":"e_1_3_2_1_5_1","volume-title":"In-place resource resize for Kubernetes Pods. https:\/\/kubernetes.io\/blog\/2023\/05\/12\/in-place-pod-resize-alpha\/. Accessed","author":"Authors The Kubernetes","year":"2024","unstructured":"The Kubernetes Authors. 2023. In-place resource resize for Kubernetes Pods. https:\/\/kubernetes.io\/blog\/2023\/05\/12\/in-place-pod-resize-alpha\/. Accessed 24 June 2024."},{"key":"e_1_3_2_1_6_1","unstructured":"Canonical Ltd. 2025. MicorK8s. https:\/\/microk8s.io\/. Accessed 07.10.2025."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3673038.3673091"},{"key":"e_1_3_2_1_8_1","unstructured":"John C.B. Cooper. 2005. The Poisson and Exponential distributions. https:\/\/neurophysics.ucsd.edu\/courses\/physics_171\/exponential.pdf. Accessed on 02.10.2025."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3419111.3421285"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3419111.3421284"},{"key":"e_1_3_2_1_11_1","volume-title":"Proceedings of the 19th USENIX Symposium on Networked Systems Design and Implementation (NSDI 22)","author":"Gunasekaran Jashwant Raj","unstructured":"Jashwant Raj Gunasekaran, Cyan Subhra Mishra, Prashanth Thinakaran, Bikash Sharma, Mahmut Taylan Kandemir, and Chita R. Das. 2022. Cocktail: A multidimensional optimization for model serving in cloud. In Proceedings of the 19th USENIX Symposium on Networked Systems Design and Implementation (NSDI 22). USENIX Association, Renton, WA, 1041\u20131057. https:\/\/www.usenix.org\/conference\/nsdi22\/presentation\/gunasekaran"},{"key":"e_1_3_2_1_12_1","unstructured":"Jiacheng Guo and Santosh Bhavani. 2024. Reduce ML inference costs on Amazon SageMaker with hardware and software acceleration. https:\/\/tinyurl.com\/bdd4nkme\/. Accessed: 2024-06-21."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3472883.3486993"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3689031.3696073"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3689031.3717459"},{"key":"e_1_3_2_1_16_1","volume-title":"Yoav Shoham, Russell Wald, and Jack Clark.","author":"Maslej Nestor","year":"2024","unstructured":"Nestor Maslej, Loredana Fattorini, Raymond Perrault, Vanessa Parli, Anka Reuel, Erik Brynjolfsson, John Etchemendy, Katrina Ligett, Terah Lyons, James Manyika, Juan Carlos Niebles, Yoav Shoham, Russell Wald, and Jack Clark. 2024. The AI index 2024 annual report. (2024)."},{"key":"e_1_3_2_1_17_1","unstructured":"NVIDIA Corporation. 2025. Multi-Process Service Release r575. https:\/\/docs.nvidia.com\/deploy\/mps\/index.html. Accessed on 26.09.2025."},{"key":"e_1_3_2_1_18_1","first-page":"211","article-title":"Comparison of confidence intervals for the Poisson mean: some new aspects","volume":"10","author":"Patil VV","year":"2012","unstructured":"VV Patil and HV Kulkarni. 2012. Comparison of confidence intervals for the Poisson mean: some new aspects. REVSTAT-Statistical Journal 10, 2 (2012), 211\u201322.","journal-title":"REVSTAT-Statistical Journal"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3701717.3730549"},{"key":"e_1_3_2_1_20_1","unstructured":"Python Software Foundation. 2025. Asynchronous I\/O. https:\/\/docs.python.org\/3\/library\/asyncio.html. Accessed 07.10.2025."},{"key":"e_1_3_2_1_21_1","unstructured":"PyTorch. 2020. CPU threading and TorchScript inference. https:\/\/docs.pytorch.org\/docs\/stable\/notes\/cpu_threading_torchscript_inference.html. Accesses 07.10.2025."},{"key":"e_1_3_2_1_22_1","unstructured":"Pytorch Team. 2025. PyTorch. https:\/\/pytorch.org\/. Accessed 07.10.2025."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3642970.3655833"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/RTAS54340.2022.00020"},{"key":"e_1_3_2_1_25_1","volume-title":"A tale of two scales: Reconciling horizontal and vertical scaling for inference serving systems. arXiv preprint arXiv:2407.14843","author":"Razavi Kamran","year":"2024","unstructured":"Kamran Razavi, Mehran Salmani, Max M\u00fchlh\u00e4user, Boris Koldehofe, and Lin Wang. 2024. A tale of two scales: Reconciling horizontal and vertical scaling for inference serving systems. arXiv preprint arXiv:2407.14843 (2024)."},{"key":"e_1_3_2_1_26_1","volume-title":"Proceedings of the 2021 USENIX Annual Technical Conference (USENIX ATC 21)","author":"Romero Francisco","year":"2021","unstructured":"Francisco Romero, Qian Li, Neeraja J. Yadwadkar, and Christos Kozyrakis. 2021. INFaaS: Automated model-less inference serving. In Proceedings of the 2021 USENIX Annual Technical Conference (USENIX ATC 21). USENIX Association, 397\u2013411. https:\/\/www.usenix.org\/conference\/atc21\/presentation\/romero"},{"key":"e_1_3_2_1_27_1","unstructured":"Salad. 2025. Affordable secure community cloud for AI\/ML inference. https:\/\/salad.com\/pricing. Accesses 17.02.2026."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3578356.3592578"},{"key":"e_1_3_2_1_29_1","volume-title":"Proceedings of the 2020 USENIX annual technical conference (USENIX ATC 20)","author":"Shahrad Mohammad","year":"2020","unstructured":"Mohammad Shahrad, Rodrigo Fonseca, Inigo Goiri, Gohar Chaudhry, Paul Batum, Jason Cooke, Eduardo Laureano, Colby Tresness, Mark Russinovich, and Ricardo Bianchini. 2020. Serverless in the wild: Characterizing and optimizing the serverless workload at a large cloud provider. In Proceedings of the 2020 USENIX annual technical conference (USENIX ATC 20). USENIX Association, 205\u2013218. https:\/\/www.usenix.org\/conference\/atc20\/presentation\/shahrad"},{"key":"e_1_3_2_1_30_1","unstructured":"Scott Sheffield. 2019. Sums of independent random variables. https:\/\/math.mit.edu\/~sheffield\/2019600\/Lecture22.pdf. Accessed on 02.10.2025."},{"key":"e_1_3_2_1_31_1","unstructured":"The Prometheus Authors. 2025. Open source metrics and monitoring for your systems and services. https:\/\/prometheus.io\/. Accessed on 07.10.2025."},{"key":"e_1_3_2_1_32_1","unstructured":"Torch Contributors. 2025. resnet50. https:\/\/docs.pytorch.org\/vision\/main\/models\/generated\/torchvision.models.resnet50.html. Accesses 17.02.2026."},{"key":"e_1_3_2_1_33_1","unstructured":"Yuxing Xiang Xue Li Kun Qian Wenyuan Yu Ennan Zhai and Xin Jin. 2025. ServeGen: Workload characterization and generation of large language model serving in production. arXiv:2505.09999 [cs.DC] https:\/\/arxiv.org\/abs\/2505.09999"}],"event":{"name":"EuroSys '26: 21st European Conference on Computer Systems","location":"Edinburgh Scotland Uk","acronym":"EuroMLSys '26","sponsor":["SIGOPS ACM Special Interest Group on Operating Systems"]},"container-title":["Proceedings of the Sixth European Workshop on Machine Learning and Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3805621.3807643","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,28]],"date-time":"2026-04-28T13:12:45Z","timestamp":1777381965000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3805621.3807643"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4,27]]},"references-count":33,"alternative-id":["10.1145\/3805621.3807643","10.1145\/3805621"],"URL":"https:\/\/doi.org\/10.1145\/3805621.3807643","relation":{},"subject":[],"published":{"date-parts":[[2026,4,27]]},"assertion":[{"value":"2026-04-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}