{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,30]],"date-time":"2025-12-30T08:44:48Z","timestamp":1767084288181,"version":"3.48.0"},"publisher-location":"New York, NY, USA","reference-count":35,"publisher":"ACM","funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CNS-2336886"],"award-info":[{"award-number":["CNS-2336886"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CNS-2344505"],"award-info":[{"award-number":["CNS-2344505"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,9,8]]},"DOI":"10.1145\/3754598.3754670","type":"proceedings-article","created":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T08:34:32Z","timestamp":1766219672000},"page":"449-459","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Power Capping of GPU Servers for Machine Learning Inference Optimization"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-8028-7113","authenticated-orcid":false,"given":"Yuan","family":"Ma","sequence":"first","affiliation":[{"name":"The Ohio state university, Columbus, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5848-5667","authenticated-orcid":false,"given":"Srinivasan","family":"Subramaniyan","sequence":"additional","affiliation":[{"name":"The Ohio state university, Columbus, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9633-1418","authenticated-orcid":false,"given":"Xiaorui","family":"Wang","sequence":"additional","affiliation":[{"name":"The Ohio state university, Columbus, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,12,20]]},"reference":[{"key":"e_1_3_3_1_2_2","unstructured":"AMD. 2025. AMD ROCm SMI Power Control Documentation. https:\/\/rocm.docs.amd.com\/projects\/amdsmi\/en\/docs-5.6.1\/doxygen\/docBin\/html\/group__PowerCont.html. Accessed: 2025-04-04."},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"publisher","DOI":"10.1109\/IGCC.2018.8752132"},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCS60910.2024.00051"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCS54860.2022.00039"},{"key":"e_1_3_3_1_6_2","volume-title":"USENIX ATC","author":"Choi Seungbeom","year":"2022","unstructured":"Seungbeom Choi et\u00a0al. 2022. Serving Heterogeneous Machine Learning Models on Multi-GPU Servers with Spatio-Temporal Sharing. In USENIX ATC."},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","DOI":"10.1201\/9781003162810-13"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"publisher","DOI":"10.1145\/3643832.3661878"},{"key":"e_1_3_3_1_9_2","volume-title":"The elements of statistical learning data mining, Inference, and prediction","author":"Hastie Trevor","year":"2001","unstructured":"Trevor Hastie et\u00a0al. 2001. The elements of statistical learning data mining, Inference, and prediction. Springer."},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"publisher","DOI":"10.5555\/975344"},{"key":"e_1_3_3_1_11_2","unstructured":"Bob Keaveney. 2024. A Data Center Building Boom Is About to Begin. https:\/\/biztechmagazine.com\/article\/2024\/04\/data-center-building-boom-about-begin."},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICOIN59985.2024.10572152"},{"key":"e_1_3_3_1_13_2","unstructured":"Bill Kleyman. 2024. AFCOM 2024 State of the Data Center Report. https:\/\/itchronicles.com\/wp-content\/uploads\/2024\/02\/Data-Ctr-Report.pdf."},{"key":"e_1_3_3_1_14_2","volume-title":"USENIX ATC","author":"Kumbhare Alok\u00a0Gautam","year":"2021","unstructured":"Alok\u00a0Gautam Kumbhare, Reza Azimi, Ioannis Manousakis, et\u00a0al. 2021. Prediction-Based Power Oversubscription in Cloud Platforms. In USENIX ATC."},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICAC.2007.35"},{"key":"e_1_3_3_1_16_2","volume-title":"OSDI","author":"Li Shaohong","year":"2020","unstructured":"Shaohong Li et\u00a0al. 2020. Thunderbolt:Throughput-Optimized Quality-of-Service-Aware power capping at scale. In OSDI."},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2019.00067"},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICPP.2012.31"},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"publisher","DOI":"10.1145\/2370816.2370821"},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"crossref","unstructured":"Seyed\u00a0Morteza Nabavinejad Sherief Reda et\u00a0al. 2022. Coordinated batching and DVFS for DNN inference on GPU accelerators. TPDS (2022).","DOI":"10.1109\/TPDS.2022.3144614"},{"key":"e_1_3_3_1_22_2","unstructured":"NVIDIA. 2025. NVIDIA System Management Interface (NVIDIA-SMI) Documentation. https:\/\/docs.nvidia.com\/deploy\/nvidia-smi\/. Accessed: 2025-04-04."},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"publisher","DOI":"10.1145\/3620666.3651329"},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"publisher","DOI":"10.1145\/3617232.3624853"},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378533"},{"key":"e_1_3_3_1_26_2","unstructured":"Rachel Shin. 2025. Sam Altman says GPTs are \u2018melting GPUs\u2019 as users flood ChatGPT with Studio Ghibli-style AI images. https:\/\/fortune.com\/2025\/03\/28\/sam-altman-chatgpt-gpus-melting-ai-images\/ Accessed: 2025-04-16."},{"key":"e_1_3_3_1_27_2","unstructured":"Karen Simonyan and Andrew Zisserman. 2015. Very Deep Convolutional Networks for Large-Scale Image Recognition. https:\/\/arxiv.org\/abs\/1409.1556"},{"key":"e_1_3_3_1_28_2","unstructured":"UEFI Forum. 2021. Advanced Configuration and Power Interface (ACPI) Specification Version 6.4. https:\/\/uefi.org\/specs\/acpi\/6.4\/. Accessed: 2025-04-06."},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2008.4658631"},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"crossref","unstructured":"Xiaorui Wang Ming Chen Charles Lefurgy and Tom\u00a0W. Keller. 2011. Ship: A scalable hierarchical power control architecture for large-scale data centers. IEEE Transactions on Parallel and Distributed Systems 23 1 (2011) 168\u2013176.","DOI":"10.1109\/TPDS.2011.93"},{"key":"e_1_3_3_1_31_2","doi-asserted-by":"crossref","unstructured":"Xiaorui Wang Yingming Chen Chenyang Lu and Xenofon Koutsoukos. 2007. FC-ORB: A robust distributed real-time embedded middleware with end-to-end utilization control. Journal of Systems and Software 80 7 (2007) 938\u2013950.","DOI":"10.1016\/j.jss.2006.09.031"},{"key":"e_1_3_3_1_32_2","unstructured":"Yue Wang et\u00a0al. 2018. Energynet: Energy-efficient dynamic inference. (2018)."},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"publisher","DOI":"10.1145\/1555754.1555794"},{"key":"e_1_3_3_1_34_2","volume-title":"USENIX ATC","author":"Weng Qizhen","year":"2023","unstructured":"Qizhen Weng, Lingyun Yang, et\u00a0al. 2023. Beware of Fragmentation: Scheduling GPU-Sharing Workloads with Fragmentation Gradient Descent. In USENIX ATC."},{"key":"e_1_3_3_1_35_2","volume-title":"ISCA","author":"Wu Qiang","year":"2016","unstructured":"Qiang Wu, Qingyuan Deng, Lakshmi Ganesh, et\u00a0al. 2016. Dynamo: Facebook\u2019s Data Center-Wide Power Management System. In ISCA."},{"key":"e_1_3_3_1_36_2","doi-asserted-by":"crossref","unstructured":"Jiaying Zhang et\u00a0al. 2025. Power Control for Edge ML Inference with Hypernetwork Meta-Parameters. IEEE Communications Letters (2025).","DOI":"10.1109\/LCOMM.2025.3534320"}],"event":{"name":"ICPP '25: 54th International Conference on Parallel Processing","location":"San Diego CA USA","acronym":"ICPP '25"},"container-title":["Proceedings of the 54th International Conference on Parallel Processing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3754598.3754670","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T08:37:38Z","timestamp":1766219858000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3754598.3754670"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,8]]},"references-count":35,"alternative-id":["10.1145\/3754598.3754670","10.1145\/3754598"],"URL":"https:\/\/doi.org\/10.1145\/3754598.3754670","relation":{},"subject":[],"published":{"date-parts":[[2025,9,8]]},"assertion":[{"value":"2025-12-20","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}