{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T00:18:45Z","timestamp":1777421925256,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":25,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,3,30]],"date-time":"2025-03-30T00:00:00Z","timestamp":1743292800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,3,30]]},"DOI":"10.1145\/3721146.3721937","type":"proceedings-article","created":{"date-parts":[[2025,4,1]],"date-time":"2025-04-01T17:42:05Z","timestamp":1743529325000},"page":"246-253","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Manage the Workloads not the Cluster: Designing a Control Plane for Large-Scale AI Clusters"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-6067-0976","authenticated-orcid":false,"given":"Ruiqi","family":"Lai","sequence":"first","affiliation":[{"name":"NTU Singapore, Singapore, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-9593-2761","authenticated-orcid":false,"given":"Siyu","family":"Cao","sequence":"additional","affiliation":[{"name":"NTU Singapore, Singapore, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-0317-1027","authenticated-orcid":false,"given":"Leqi","family":"Li","sequence":"additional","affiliation":[{"name":"NTU Singapore, Singapore, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3594-1092","authenticated-orcid":false,"given":"Luo","family":"Mai","sequence":"additional","affiliation":[{"name":"University of Edinburgh, Edinburgh, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3156-010X","authenticated-orcid":false,"given":"Dmitrii","family":"Ustiugov","sequence":"additional","affiliation":[{"name":"NTU Singapore, Singapore, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,4]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"[n. d.]. Kubernetes. Available at https:\/\/kubernetes.io."},{"key":"e_1_3_2_1_2_1","unstructured":"NVIDIA [n. d.]. NVIDIA DGX SuperPOD: Data Center Design Featuring NVIDIA DGX H100 Systems. NVIDIA. https:\/\/docs.nvidia.com\/dgx-superpod\/design-guides\/dgx-superpod-data-center-design-h100\/latest\/electrical.html Accessed: February 11 2025."},{"key":"e_1_3_2_1_3_1","unstructured":"Data Canopy 2024. 4 Tips to Avoid Oversubscribing to Power in Your Data Center. Data Canopy. https:\/\/datacanopy.com\/4-tips-avoid-oversubscribing-power-data-center\/"},{"key":"e_1_3_2_1_4_1","volume-title":"Proceedings of the 18th Symposium on Operating System Design and Implementation (OSDI). 117--134","author":"Agrawal Amey","year":"2024","unstructured":"Amey Agrawal, Nitin Kedia, Ashish Panwar, Jayashree Mohan, Nipun Kwatra, Bhargav S. Gulavani, Alexey Tumanov, and Ramachandran Ramjee. 2024. Taming Throughput-Latency Tradeoff in LLM Inference with Sarathi-Serve. In Proceedings of the 18th Symposium on Operating System Design and Implementation (OSDI). 117--134."},{"key":"e_1_3_2_1_5_1","unstructured":"DeepSeek AI. 2025. DeepSeek-R1. https:\/\/huggingface.co\/deepseek-ai\/DeepSeek-R1 Introduces DeepSeek-R1 a reasoning model comparable to OpenAI-o1 in math code and reasoning tasks. It highlights the open-sourcing of DeepSeek-R1-Zero DeepSeek-R1."},{"key":"e_1_3_2_1_6_1","volume-title":"Prediction-Based Power Oversubscription in Cloud Platforms. In 2021 USENIX Annual Technical Conference. https:\/\/www.microsoft.com\/en-us\/research\/uploads\/prod\/2020\/10\/Per-VM-Capping-ATC21","author":"Akram Shoaib","year":"2021","unstructured":"Shoaib Akram, Joseph Izraelevitz, Christos Kozyrakis, Radhika Mittal, Jennifer Switzer, Rachee Singh, and Rebecca Isaacs. 2021. Prediction-Based Power Oversubscription in Cloud Platforms. In 2021 USENIX Annual Technical Conference. https:\/\/www.microsoft.com\/en-us\/research\/uploads\/prod\/2020\/10\/Per-VM-Capping-ATC21.pdf Accessed: February 11, 2025."},{"key":"e_1_3_2_1_7_1","unstructured":"CoreSite. 2025. Facing the Data Center Power Density Challenge. https:\/\/www.coresite.com\/blog\/facing-the-data-center-power-density-challenge Accessed 2025-02-10."},{"key":"e_1_3_2_1_8_1","unstructured":"Fierce Network. 2024. Cloud providers want to crank up rack power for AI. https:\/\/www.fierce-network.com\/cloud\/cloud-providers-want-crank-rack-power-10x-ai Accessed 2025-02-10."},{"key":"e_1_3_2_1_9_1","volume-title":"Proceedings of the 18th Symposium on Operating System Design and Implementation (OSDI). 135--153","author":"Fu Yao","year":"2024","unstructured":"Yao Fu, Leyang Xue, Yeqi Huang, Andrei-Octavian Brabete, Dmitrii Ustiugov, Yuvraj Patel, and Luo Mai. 2024. ServerlessLLM: Low-Latency Serverless Inference for Large Language Models. In Proceedings of the 18th Symposium on Operating System Design and Implementation (OSDI). 135--153."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3600006.3613165"},{"key":"e_1_3_2_1_12_1","volume-title":"d.]. Azure Public Dataset: Azure LLM Inference Trace","author":"Azure Microsoft","year":"2023","unstructured":"Microsoft Azure. [n. d.]. Azure Public Dataset: Azure LLM Inference Trace 2023. Available at https:\/\/github.com\/Azure\/AzurePublicDataset\/blob\/master\/AzureLLMInferenceDataset2023.md."},{"key":"e_1_3_2_1_13_1","volume-title":"US data center power consumption to double by","year":"2030","unstructured":"Newmark. 2025. US data center power consumption to double by 2030. https:\/\/www.datacenterdynamics.com\/en\/news\/us-data-center-power-consumption\/ Accessed 2025-02-10."},{"key":"e_1_3_2_1_14_1","volume-title":"NVIDIA Nsight Compute. https:\/\/developer.nvidia.com\/nsight-compute Accessed","author":"NVIDIA.","year":"2025","unstructured":"NVIDIA. 2025. NVIDIA Nsight Compute. https:\/\/developer.nvidia.com\/nsight-compute Accessed: 2025."},{"key":"e_1_3_2_1_15_1","volume-title":"NVIDIA Nsight Systems. https:\/\/developer.nvidia.com\/nsight-systems Accessed","author":"NVIDIA.","year":"2025","unstructured":"NVIDIA. 2025. NVIDIA Nsight Systems. https:\/\/developer.nvidia.com\/nsight-systems Accessed: 2025."},{"key":"e_1_3_2_1_16_1","unstructured":"OpenAI. 2024. ChatGPT. https:\/\/chatgpt.com ChatGPT helps you get answers find inspiration and be more productive. It is free to use and easy to try. Just ask and ChatGPT can help with writing [4].."},{"key":"e_1_3_2_1_17_1","volume-title":"Proceedings of the 51st International Symposium on Computer Architecture (ISCA). 118--132","author":"Patel Pratyush","year":"2024","unstructured":"Pratyush Patel, Esha Choukse, Chaojie Zhang, Aashaka Shah, \u00cd\u00f1igo Goiri, Saeed Maleki, and Ricardo Bianchini. 2024. Splitwise: Efficient Generative LLM Inference Using Phase Splitting. In Proceedings of the 51st International Symposium on Computer Architecture (ISCA). 118--132."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"crossref","unstructured":"Pratyush Patel Esha Choukse Chaojie Zhang \u00cd\u00f1igo Goiri Brijesh Warrier Nithish Mahalingam and Ricardo Bianchini. 2024. Characterizing Power Management Opportunities for LLMs in the Cloud. In ASPLOS (3). 207--222.","DOI":"10.1145\/3620666.3651329"},{"key":"e_1_3_2_1_19_1","volume-title":"Towards Safe Power Oversubscription and Energy Efficiency of Data Centers. Ph. D. Dissertation","author":"Patki T.","year":"2025","unstructured":"T. Patki. 2023. Towards Safe Power Oversubscription and Energy Efficiency of Data Centers. Ph. D. Dissertation. University of South Florida. https:\/\/digitalcommons.usf.edu\/cgi\/viewcontent.cgi?article=10164&context=etd Accessed: February 11, 2025."},{"key":"e_1_3_2_1_20_1","unstructured":"Precedence Research. 2025. In-Memory Computing Market. https:\/\/www.precedenceresearch.com\/in-memory-computing-market Accessed 2025-02-10."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3593856.3595893"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.34133\/icomputing.0006"},{"key":"e_1_3_2_1_23_1","volume-title":"Singularity: Planet-Scale, Preemptive and Elastic Scheduling of AI Workloads. CoRR abs\/2202.07848","author":"Shukla Dharma","year":"2022","unstructured":"Dharma Shukla, Muthian Sivathanu, Srinidhi Viswanatha, Bhargav S. Gulavani, Rimma Nehme, Amey Agrawal, Chen Chen, Nipun Kwatra, Ramachandran Ramjee, Pankaj Sharma, Atul Katiyar, Vipul Modi, Vaibhav Sharma, Abhishek Singh, Shreshth Singhal, Kaustubh Welankar, Lu Xun, Ravi Anupindi, Karthik Elangovan, Hasibur Rahman, Zhou Lin, Rahul Seetharaman, Cheng Xu, Eddie Ailijiang, Suresh Krishnappa, and Mark Russinovich. 2022. Singularity: Planet-Scale, Preemptive and Elastic Scheduling of AI Workloads. CoRR abs\/2202.07848 (2022)."},{"key":"e_1_3_2_1_24_1","volume-title":"Proceedings of the 31st IEEE Symposium on High-Performance Computer Architecture (HPCA).","author":"Stojkovic Jovan","year":"2024","unstructured":"Jovan Stojkovic, Chaojie Zhang, \u00cd\u00f1igo Goiri, Josep Torrellas, and Esha Choukse. 2024. DynamoLLM: Designing LLM Inference Clusters for Performance and Energy Efficiency. In Proceedings of the 31st IEEE Symposium on High-Performance Computer Architecture (HPCA)."},{"key":"e_1_3_2_1_25_1","unstructured":"TechInsights. 2025. Memory Market Outlook: AI Demand and Tight Supply Drive Resurgence. https:\/\/www.techinsights.com\/blog\/memory-market-outlook-ai-demand-and-tight-supply-drive-resurgence Accessed 2025-02-10."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/2741948.2741964"}],"event":{"name":"EuroMLSys '25: 5th Workshop on Machine Learning and Systems","location":"World Trade Center Rotterdam Netherlands","acronym":"EuroMLSys '25","sponsor":["SIGOPS ACM Special Interest Group on Operating Systems"]},"container-title":["Proceedings of the 5th Workshop on Machine Learning and Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3721146.3721937","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3721146.3721937","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:57:39Z","timestamp":1750298259000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3721146.3721937"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,30]]},"references-count":25,"alternative-id":["10.1145\/3721146.3721937","10.1145\/3721146"],"URL":"https:\/\/doi.org\/10.1145\/3721146.3721937","relation":{},"subject":[],"published":{"date-parts":[[2025,3,30]]},"assertion":[{"value":"2025-04-01","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}