{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T23:39:22Z","timestamp":1768347562869,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":33,"publisher":"ACM","funder":[{"name":"Korea AeroSpace Administration","award":["RS-2022-00155668"],"award-info":[{"award-number":["RS-2022-00155668"]}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["2403089, CNS-1822737"],"award-info":[{"award-number":["2403089, CNS-1822737"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,19]]},"DOI":"10.1145\/3772052.3772271","type":"proceedings-article","created":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T16:19:00Z","timestamp":1768321140000},"page":"695-707","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Spatio-Temporal Resource Control for Cloud-Native GPU Provisioning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-4569-2595","authenticated-orcid":false,"given":"Hyeon-Jun","family":"Jang","sequence":"first","affiliation":[{"name":"Konkuk University, Seoul, Republic of Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-4419-0979","authenticated-orcid":false,"given":"Sang-Jae","family":"Kim","sequence":"additional","affiliation":[{"name":"Konkuk University, Seoul, Republic of Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8754-0311","authenticated-orcid":false,"given":"Weikuan","family":"Yu","sequence":"additional","affiliation":[{"name":"Florida State University, Tallahassee, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9496-3486","authenticated-orcid":false,"given":"Hyun-Wook","family":"Jin","sequence":"additional","affiliation":[{"name":"Konkuk University, Seoul, Republic of Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2026,1,13]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2025. Configure Quality of Service for Pods. https:\/\/kubernetes.io\/docs\/tasks\/configure-pod-container\/quality-service-pod\/."},{"key":"e_1_3_2_1_2_1","unstructured":"2025. ImageNet Classification with Deep Convolutional Neural Networks | Communications of the ACM. https:\/\/dl.acm.org\/doi\/10.1145\/3065386."},{"key":"e_1_3_2_1_3_1","unstructured":"2025. Meta-Llama\/Llama-3.1-8B-Instruct \u2022 Hugging Face. https:\/\/huggingface.co\/meta-llama\/Llama-3.1-8B-Instruct."},{"key":"e_1_3_2_1_4_1","unstructured":"2025. NVIDIA Multi-Instance GPU. https:\/\/www.nvidia.com\/en-us\/technologies\/multi-instance-gpu\/."},{"key":"e_1_3_2_1_5_1","unstructured":"2025. Nvidia Multi-Process Service. https:\/\/docs.nvidia.com\/deploy\/mps\/index.html."},{"key":"e_1_3_2_1_6_1","unstructured":"2025. Triton-Inference-Server\/Tensorrtllm_backend. Triton Inference Server."},{"key":"e_1_3_2_1_7_1","volume-title":"PipeSwitch: Fast Pipelined Context Switching for Deep Learning Applications. In 14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20)","author":"Bai Zhihao","year":"2020","unstructured":"Zhihao Bai, Zhen Zhang, Yibo Zhu, and Xin Jin. 2020. PipeSwitch: Fast Pipelined Context Switching for Deep Learning Applications. In 14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20). USENIX Association, 499\u2013514."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/rtas58335.2023.00012"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.4230\/LIPICS.ECRTS.2025.21"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCC.2021.3119205"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3673038.3673091"},{"key":"e_1_3_2_1_12_1","volume-title":"Serving Heterogeneous Machine Learning Models on Multi-GPU Servers with Spatio-Temporal Sharing. In 2022 USENIX Annual Technical Conference (USENIX ATC 22)","author":"Choi Seungbeom","year":"2022","unstructured":"Seungbeom Choi, Sunho Lee, Yeonjae Kim, Jongse Park, Youngjin Kwon, and Jaehyuk Huh. 2022. Serving Heterogeneous Machine Learning Models on Multi-GPU Servers with Spatio-Temporal Sharing. In 2022 USENIX Annual Technical Conference (USENIX ATC 22). USENIX Association, Carlsbad, CA, 199\u2013216."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3731569.3764818"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/2959100.2959190"},{"key":"e_1_3_2_1_15_1","volume-title":"Clipper: A Low-Latency Online Prediction Serving System. In 14th USENIX Symposium on Networked Systems Design and Implementation (NSDI 17)","author":"Crankshaw Daniel","year":"2017","unstructured":"Daniel Crankshaw, Xin Wang, Guilio Zhou, Michael J. Franklin, Joseph E. Gonzalez, and Ion Stoica. 2017. Clipper: A Low-Latency Online Prediction Serving System. In 14th USENIX Symposium on Networked Systems Design and Implementation (NSDI 17). USENIX Association, Boston, MA, 613\u2013627."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","unstructured":"Aaron Grattafiori Abhimanyu Dubey Abhinav Jauhri Abhinav Pandey Abhishek Kadian Ahmad Al-Dahle Aiesha Letman Akhil Mathur and Alan at el. Schelten. 2024. The Llama 3 Herd of Models. https:\/\/doi.org\/10.48550\/arXiv.2407.21783 arXiv:2407.21783 [cs]","DOI":"10.48550\/arXiv.2407.21783"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3605573.3605638"},{"key":"e_1_3_2_1_18_1","volume-title":"Microsecond-Scale Preemption for Concurrent GPU-accelerated DNN Inferences. In 16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22)","author":"Han Mingcong","year":"2022","unstructured":"Mingcong Han, Hanze Zhang, Rong Chen, and Haibo Chen. 2022. Microsecond-Scale Preemption for Concurrent GPU-accelerated DNN Inferences. In 16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22). USENIX Association, Carlsbad, CA, 539\u2013558."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2024.3430063"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","unstructured":"Kaiming He Xiangyu Zhang Shaoqing Ren and Jian Sun. 2015. Deep Residual Learning for Image Recognition. https:\/\/doi.org\/10.48550\/arXiv.1512.03385 arXiv:1512.03385 [cs]","DOI":"10.48550\/arXiv.1512.03385"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3721427"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC41406.2024.00048"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","unstructured":"Maxim Naumov Dheevatsa Mudigere Hao-Jun Michael Shi Jianyu Huang Narayanan Sundaraman Jongsoo Park Xiaodong Wang Udit Gupta Carole-Jean Wu Alisson G. Azzolini Dmytro Dzhulgakov Andrey Mallevich Ilia Cherniavskii Yinghai Lu Raghuraman Krishnamoorthi Ansha Yu Volodymyr Kondratenko Stephanie Pereira Xianjie Chen Wenlin Chen Vijay Rao Bill Jia Liang Xiong and Misha Smelyanskiy. 2019. Deep Learning Recommendation Model for Personalization and Recommendation Systems. https:\/\/doi.org\/10.48550\/arXiv.1906.00091 arXiv:1906.00091 [cs]","DOI":"10.48550\/arXiv.1906.00091"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3600006.3613163"},{"key":"e_1_3_2_1_25_1","unstructured":"NVIDIA Corporation. 2025. Triton Inference Server: An Optimized Cloud and Edge Inferencing Solution."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3627703.3629578"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.5555\/3295222.3295349"},{"key":"e_1_3_2_1_28_1","volume-title":"Transparent GPU Sharing in Container Clouds for Deep Learning Workloads. In 20th USENIX Symposium on Networked Systems Design and Implementation (NSDI 23)","author":"Wu Bingyang","year":"2023","unstructured":"Bingyang Wu, Zili Zhang, Zhihao Bai, Xuanzhe Liu, and Xin Jin. 2023. Transparent GPU Sharing in Container Clouds for Deep Learning Workloads. In 20th USENIX Symposium on Networked Systems Design and Implementation (NSDI 23). USENIX Association, Boston, MA, 69\u201385."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3631311.3632401"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.6422"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3673038.3673089"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3689031.3696070"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","unstructured":"Yihao Zhao Xin Liu Shufan Liu Xiang Li Yibo Zhu Gang Huang Xuanzhe Liu and Xin Jin. 2023. MuxFlow: Efficient and Safe GPU Sharing in Large-Scale Production Deep Learning Clusters. In Eurosys23. arXiv Eurosys23. https:\/\/doi.org\/10.48550\/arXiv.2303.13803","DOI":"10.48550\/arXiv.2303.13803"}],"event":{"name":"SoCC '25: ACM Symposium on Cloud Computing","location":"Online USA","acronym":"SoCC '25","sponsor":["SIGOPS ACM Special Interest Group on Operating Systems","SIGMOD ACM Special Interest Group on Management of Data"]},"container-title":["Proceedings of the 2025 ACM Symposium on Cloud Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3772052.3772271","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T16:19:05Z","timestamp":1768321145000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3772052.3772271"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,19]]},"references-count":33,"alternative-id":["10.1145\/3772052.3772271","10.1145\/3772052"],"URL":"https:\/\/doi.org\/10.1145\/3772052.3772271","relation":{},"subject":[],"published":{"date-parts":[[2025,11,19]]},"assertion":[{"value":"2026-01-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}