{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,4]],"date-time":"2025-12-04T10:07:57Z","timestamp":1764842877195,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":35,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,28]],"date-time":"2023-10-28T00:00:00Z","timestamp":1698451200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-sa\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,28]]},"DOI":"10.1145\/3634769.3634808","type":"proceedings-article","created":{"date-parts":[[2024,5,29]],"date-time":"2024-05-29T16:13:19Z","timestamp":1716999199000},"page":"45-51","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["CoFRIS: Coordinated Frequency and Resource Scaling for GPU Inference Servers"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2577-8914","authenticated-orcid":false,"given":"Marcus","family":"Chow","sequence":"first","affiliation":[{"name":"University of California, Riverside, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5376-7868","authenticated-orcid":false,"given":"Daniel","family":"Wong","sequence":"additional","affiliation":[{"name":"University of California, Riverside, USA"}]}],"member":"320","published-online":{"date-parts":[[2024,5,29]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"AMD. 2017. Changing number of Compute Units issue #5 RadeonOpenCompute\/ROC-SMI. https:\/\/github.com\/RadeonOpenCompute\/ROC-smi\/issues\/5"},{"key":"e_1_3_2_1_2_1","unstructured":"AMD. 2022. rocm_smi_lib. https:\/\/github.com\/RadeonOpenCompute\/rocm_smi_lib"},{"key":"e_1_3_2_1_3_1","unstructured":"AMD. 2023. Stream Management HIP API. https:\/\/docs.amd.com\/bundle\/HIP-API-Guide-v5.4.1\/page\/a00183.htmlf"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2022.3223381"},{"key":"e_1_3_2_1_5_1","volume-title":"Multi-model machine learning inference serving with gpu spatial partitioning. arXiv preprint arXiv:2109.01611","author":"Choi Seungbeom","year":"2021","unstructured":"Seungbeom Choi, Sunho Lee, Yeonjae Kim, Jongse Park, Youngjin Kwon, and Jaehyuk Huh. 2021. Multi-model machine learning inference serving with gpu spatial partitioning. arXiv preprint arXiv:2109.01611 (2021)."},{"key":"e_1_3_2_1_6_1","unstructured":"Chih-Hsun Chou Laxmi\u00a0N. Bhuyan and Daniel Wong. 2019. \u03bc DPM: Dynamic Power Management for the Microsecond Era. In HPCA."},{"key":"e_1_3_2_1_7_1","unstructured":"Chih-Hsun Chou Daniel Wong and Laxmi\u00a0N Bhuyan. 2016. DynSleep: Fine-grained Power Management for a Latency-Critical Data Center Application. In ISLPED."},{"key":"e_1_3_2_1_8_1","volume-title":"KRISP: Enabling Kernel-wise Right-sizing for Spatial Partitioned GPU Inference Servers","author":"Chow Marcus","year":"2023","unstructured":"Marcus Chow, Ali Jahanshahi, and Daniel Wong. 2023. KRISP: Enabling Kernel-wise Right-sizing for Spatial Partitioned GPU Inference Servers. In HPCA. IEEE."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3419111.3421285"},{"volume-title":"Workload-aware power gating design and run-time management for massively parallel gpgpus","author":"Dev Kapil","key":"e_1_3_2_1_10_1","unstructured":"Kapil Dev, Sherief Reda, Indrani Paul, Wei Huang, and Wayne Burleson. 2016. Workload-aware power gating design and run-time management for massively parallel gpgpus. In ISVLSI. IEEE."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/2000064.2000108"},{"key":"e_1_3_2_1_12_1","unstructured":"Arpan Gujarati Reza Karimi Safya Alzayat Wei Hao Antoine Kaufmann Ymir Vigfusson and Jonathan Mace. 2020. Serving DNNs like Clockwork: Performance Predictability from the Bottom Up. In OSDI."},{"key":"e_1_3_2_1_13_1","unstructured":"Kaiming He Xiangyu Zhang Shaoqing Ren and Jian Sun. 2016. Deep residual learning for image recognition. In CVPR."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3472883.3486993"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.243"},{"key":"e_1_3_2_1_16_1","volume-title":"SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and < 0.5 MB model size. arXiv preprint arXiv:1602.07360","author":"Iandola N","year":"2016","unstructured":"Forrest\u00a0N Iandola, Song Han, Matthew\u00a0W Moskewicz, Khalid Ashraf, William\u00a0J Dally, and Kurt Keutzer. 2016. SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and < 0.5 MB model size. arXiv preprint arXiv:1602.07360 (2016)."},{"volume-title":"d.]. GPU-NEST: Characterizing energy efficiency of multi-gpu inference servers","author":"Jahanshahi Ali","key":"e_1_3_2_1_17_1","unstructured":"Ali Jahanshahi, Hadi\u00a0Zamani Sabzi, Chester Lau, and Daniel Wong. [n. d.]. GPU-NEST: Characterizing energy efficiency of multi-gpu inference servers. IEEE Computer Architecture Letters ([n. d.])."},{"key":"e_1_3_2_1_18_1","volume-title":"Dynamic space-time scheduling for gpu inference. arXiv preprint arXiv","author":"Jain Paras","year":"2018","unstructured":"Paras Jain, Xiangxi Mo, Ajay Jain, Harikaran Subbaraj, Rehan\u00a0Sohail Durrani, Alexey Tumanov, Joseph Gonzalez, and Ion Stoica. 2018. Dynamic space-time scheduling for gpu inference. arXiv preprint arXiv (2018)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/2830772.2830797"},{"key":"e_1_3_2_1_20_1","unstructured":"Yunseong Kim Yujeong Choi and Minsoo Rhu. 2022. PARIS and ELSA: an elastic scheduling algorithm for reconfigurable multi-GPU inference servers. In DAC."},{"key":"e_1_3_2_1_21_1","unstructured":"Jack Kosaian Amar Phanishayee Matthai Philipose Debadeepta Dey and Rashmi Vinayak. 2021. Boosting the Throughput and Accelerator Utilization of Specialized CNN Inference Beyond Increasing Batch Size. In ICML."},{"key":"e_1_3_2_1_22_1","volume-title":"One weird trick for parallelizing convolutional neural networks. arXiv preprint arXiv:1404.5997","author":"Krizhevsky Alex","year":"2014","unstructured":"Alex Krizhevsky. 2014. One weird trick for parallelizing convolutional neural networks. arXiv preprint arXiv:1404.5997 (2014)."},{"key":"e_1_3_2_1_23_1","volume-title":"Albert: A lite bert for self-supervised learning of language representations. arXiv preprint arXiv:1909.11942","author":"Lan Zhenzhong","year":"2019","unstructured":"Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, and Radu Soricut. 2019. Albert: A lite bert for self-supervised learning of language representations. arXiv preprint arXiv:1909.11942 (2019)."},{"key":"e_1_3_2_1_24_1","unstructured":"Ningning Ma Xiangyu Zhang Hai-Tao Zheng and Jian Sun. 2018. Shufflenet v2: Practical guidelines for efficient cnn architecture design. In ECCV."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"crossref","unstructured":"Abhinandan Majumdar Leonardo Piga Indrani Paul Joseph\u00a0L Greathouse Wei Huang and David\u00a0H Albonesi. 2017. Dynamic gpgpu power management using adaptive model predictive control. In HPCA.","DOI":"10.1109\/HPCA.2017.34"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"crossref","unstructured":"Abhinandan Majumdar Gene Wu Kapil Dev Joseph\u00a0L Greathouse Indrani Paul Wei Huang Arjun-Karthik Venugopal Leonardo Piga Chip Freitag and Sooraj Puthoor. 2015. A taxonomy of gpgpu performance scaling. In IISWC.","DOI":"10.1109\/IISWC.2015.22"},{"key":"e_1_3_2_1_27_1","unstructured":"Adam McLaughlin Indrani Paul Joseph\u00a0L Greathouse Srilatha Manne and Sudhakar Yalamanchili. 2014. A power characterization and management of gpu graph traversal. In ASBD."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2022.3144614"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/2749469.2750404"},{"key":"e_1_3_2_1_30_1","unstructured":"Francisco Romero Qian Li Neeraja\u00a0J Yadwadkar and Christos Kozyrakis. 2021. INFaaS: Automated Model-less Inference Serving. In USENIX ATC 21."},{"key":"e_1_3_2_1_31_1","volume-title":"Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)."},{"key":"e_1_3_2_1_32_1","unstructured":"SWIMProjectUCB. 2016. Statistical Workload Injector for MapReduce (SWIM). https:\/\/github.com\/SWIMProjectUCB\/SWIM\/wiki"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"crossref","unstructured":"Qizhen Weng. 2022. MLaaS in the Wild: Workload Analysis and Scheduling in Large-Scale Heterogeneous GPU Clusters. In NSDI.","DOI":"10.21203\/rs.3.rs-2266264\/v1"},{"key":"e_1_3_2_1_34_1","unstructured":"Wencong Xiao Shiru Ren Yong Li Yang Zhang Pengyang Hou Zhi Li Yihui Feng Wei Lin and Yangqing Jia. 2020. AntMan: Dynamic Scaling on GPU Clusters for Deep Learning.. In OSDI."},{"key":"e_1_3_2_1_35_1","unstructured":"Saining Xie Ross Girshick Piotr Doll\u00e1r Zhuowen Tu and Kaiming He. 2017. Aggregated residual transformations for deep neural networks. In CVPR."}],"event":{"name":"IGSC '23: THE 14th international Green and Sustainable Computing Conference","acronym":"IGSC '23","location":"Toronto ON Canada"},"container-title":["Proceedings of the 14th International Green and Sustainable Computing Conference"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3634769.3634808","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3634769.3634808","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T18:19:50Z","timestamp":1755973190000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3634769.3634808"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,28]]},"references-count":35,"alternative-id":["10.1145\/3634769.3634808","10.1145\/3634769"],"URL":"https:\/\/doi.org\/10.1145\/3634769.3634808","relation":{},"subject":[],"published":{"date-parts":[[2023,10,28]]},"assertion":[{"value":"2024-05-29","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}