{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T23:19:01Z","timestamp":1780355941397,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":52,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,4,22]],"date-time":"2024-04-22T00:00:00Z","timestamp":1713744000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100006374","name":"Peng Cheng Laboratory","doi-asserted-by":"publisher","award":["PCL2021A13"],"award-info":[{"award-number":["PCL2021A13"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,4,22]]},"DOI":"10.1145\/3627703.3629584","type":"proceedings-article","created":{"date-parts":[[2024,4,18]],"date-time":"2024-04-18T06:28:28Z","timestamp":1713421708000},"page":"769-785","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":15,"title":["Improving GPU Energy Efficiency through an Application-transparent Frequency Scaling Policy with Performance Assurance"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6925-7777","authenticated-orcid":false,"given":"Yijia","family":"Zhang","sequence":"first","affiliation":[{"name":"Peng Cheng Laboratory, Shenzhen, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2986-967X","authenticated-orcid":false,"given":"Qiang","family":"Wang","sequence":"additional","affiliation":[{"name":"Harbin Institute of Technology, Shenzhen, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-1594-2335","authenticated-orcid":false,"given":"Zhe","family":"Lin","sequence":"additional","affiliation":[{"name":"Sun Yat-sen University Shenzhen, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2273-1504","authenticated-orcid":false,"given":"Pengxiang","family":"Xu","sequence":"additional","affiliation":[{"name":"Peng Cheng Laboratory, Shenzhen, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-4964-3258","authenticated-orcid":false,"given":"Bingqiang","family":"Wang","sequence":"additional","affiliation":[{"name":"Peng Cheng Laboratory, Shenzhen, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2024,4,22]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Power and Performance Characterization and Modeling of GPU-Accelerated Systems. In 2014 IEEE 28th International Parallel and Distributed Processing Symposium. 113--122","author":"Abe Yuki","year":"2014","unstructured":"Yuki Abe, Hiroshi Sasaki, Shinpei Kato, Koji Inoue, Masato Edahiro, and Martin Peres. 2014. Power and Performance Characterization and Modeling of GPU-Accelerated Systems. In 2014 IEEE 28th International Parallel and Distributed Processing Symposium. 113--122."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPEC55821.2022.9926317"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.4143"},{"key":"e_1_3_2_1_4_1","unstructured":"Shuai Che Michael Boyer Jiayuan Meng David Tarjan Jeremy W. Sheaffer Sang-Ha Lee and Kevin Skadron. Accessed: 2023-01. Rodinia Benchmark Suite version 3.1. https:\/\/rodinia.cs.virginia.edu\/doku.php."},{"key":"e_1_3_2_1_5_1","volume-title":"Amit Kumar Singh, and Klaus McDonald-Maier","author":"Dey Somdip","year":"2022","unstructured":"Somdip Dey, Samuel Isuwa, Suman Saha, Amit Kumar Singh, and Klaus McDonald-Maier. 2022. CPU-GPU-Memory DVFS for Power-Efficient MPSoC in Mobile Cyber Physical Systems. Future Internet 14, 3 (2022)."},{"key":"e_1_3_2_1_6_1","volume-title":"Automation and Test in Europe Conference and Exhibition (DATE). 1728--1733","author":"Dey Somdip","year":"2020","unstructured":"Somdip Dey, Amit Kumar Singh, Xiaohang Wang, and Klaus McDonald-Maier. 2020. User Interaction Aware Reinforcement Learning for Power and Thermal Efficiency of CPU-GPU Mobile MPSoCs. In 2020 Design, Automation and Test in Europe Conference and Exhibition (DATE). 1728--1733."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3203217.3203273"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3337821.3337833"},{"key":"e_1_3_2_1_9_1","unstructured":"Scott Grauer-Gray Lifan Xu Robert Searles Sudhee Ayalasomayajula and John Cavazos. Accessed: 2023-01. PolyBench Benchmarks on GPU. https:\/\/github.com\/socal-ucr\/polybench-gpu."},{"key":"e_1_3_2_1_10_1","volume-title":"GPGPU Power Modeling for Multi-domain Voltage-Frequency Scaling. In 2018 IEEE International Symposium on High Performance Computer Architecture (HPCA). 789--800","author":"Guerreiro Joao","year":"2018","unstructured":"Joao Guerreiro, Aleksandar Ilic, Nuno Roma, and Pedro Tomas. 2018. GPGPU Power Modeling for Multi-domain Voltage-Frequency Scaling. In 2018 IEEE International Symposium on High Performance Computer Architecture (HPCA). 789--800."},{"key":"e_1_3_2_1_11_1","volume-title":"Multi-kernel Auto-Tuning on GPUs: Performance and Energy-Aware Optimization. In 2015 23rd Euromicro International Conference on Parallel, Distributed, and Network-Based Processing. 438--445","author":"Guerreiro Jo\u00e3o","year":"2015","unstructured":"Jo\u00e3o Guerreiro, Aleksandar Ilic, Nuno Roma, and Pedro Tom\u00e1s. 2015. Multi-kernel Auto-Tuning on GPUs: Performance and Energy-Aware Optimization. In 2015 23rd Euromicro International Conference on Parallel, Distributed, and Network-Based Processing. 438--445."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.parco.2018.02.001"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2019.2917181"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CCGrid49817.2020.00-35"},{"key":"e_1_3_2_1_15_1","volume-title":"AccelWattch: A Power Modeling Framework for Modern GPUs. In MICRO-54: 54th Annual IEEE\/ACM International Symposium on Microarchitecture","author":"Kandiah Vijay","year":"2021","unstructured":"Vijay Kandiah, Scott Peverelle, Mahmoud Khairy, Junrui Pan, Amogh Manjunath, Timothy G. Rogers, Tor M. Aamodt, and Nikos Hardavellas. 2021. AccelWattch: A Power Modeling Framework for Modern GPUs. In MICRO-54: 54th Annual IEEE\/ACM International Symposium on Microarchitecture (Virtual Event, Greece) (MICRO '21). Association for Computing Machinery, New York, NY, USA, 738--753."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458864.3468161"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCD.2013.6657064"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICPP.2012.31"},{"key":"e_1_3_2_1_19_1","volume-title":"Dynamic GPGPU Power Management Using Adaptive Model Predictive Control. In 2017 IEEE International Symposium on High Performance Computer Architecture (HPCA). 613--624","author":"Majumdar Abhinandan","unstructured":"Abhinandan Majumdar, Leonardo Piga, Indrani Paul, Joseph L. Greathouse, Wei Huang, and David H. Albonesi. 2017. Dynamic GPGPU Power Management Using Adaptive Model Predictive Control. In 2017 IEEE International Symposium on High Performance Computer Architecture (HPCA). 613--624."},{"key":"e_1_3_2_1_20_1","volume-title":"Recalibrating global data center energy-use estimates. Science 367, 6481","author":"Masanet Eric","year":"2020","unstructured":"Eric Masanet, Arman Shehabi, Nuoa Lei, Sarah Smith, and Jonathan Koomey. 2020. Recalibrating global data center energy-use estimates. Science 367, 6481 (2020), 984--986."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.dcan.2016.10.001"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/2525526.2525852"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jpdc.2022.03.004"},{"key":"e_1_3_2_1_24_1","unstructured":"Meta Research. Accessed: 2023-01. Self-Supervised Vision Transformers with DINO. https:\/\/github.com\/facebookresearch\/dino."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/LCA.2019.2942020"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2022.3144614"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/2830772.2830826"},{"key":"e_1_3_2_1_28_1","unstructured":"NVIDIA. Accessed: 2023-01. NVIDIA H100 Tensor Core GPU. https:\/\/www.nvidia.com\/en-us\/data-center\/h100\/."},{"key":"e_1_3_2_1_29_1","unstructured":"NVIDIA Corporation. Accessed: 2023-01. NVIDIA CUDA Code Samples. https:\/\/github.com\/nvidia\/cuda-samples."},{"key":"e_1_3_2_1_30_1","unstructured":"NVIDIA Corporation. Accessed: 2023-01. NVIDIA CUDA Profiling Tools Interface (CUPTI). https:\/\/developer.nvidia.com\/cupti."},{"key":"e_1_3_2_1_31_1","volume-title":"2023-01","author":"NVIDIA Corporation","unstructured":"NVIDIA Corporation. Accessed: 2023-01. NVIDIA Data Center GPU Manager (DCGM). https:\/\/developer.nvidia.com\/dcgm."},{"key":"e_1_3_2_1_32_1","volume-title":"2023-01","author":"NVIDIA Corporation","unstructured":"NVIDIA Corporation. Accessed: 2023-01. NVIDIA Management Library (NVML). https:\/\/developer.nvidia.com\/nvidia-management-library-nvml."},{"key":"e_1_3_2_1_33_1","unstructured":"NVIDIA Corporation. Accessed: 2023-01. NVIDIA Nsight Systems. https:\/\/developer.nvidia.com\/nsight-systems."},{"key":"e_1_3_2_1_34_1","unstructured":"NVIDIA Corporation. Accessed: 2023-01. NVIDIA profiling tools (nvprof). https:\/\/docs.nvidia.com\/cuda\/profiler-users-guide\/index.html."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2012.2235126"},{"key":"e_1_3_2_1_36_1","volume-title":"James Glosli, Helgi Ingolfsson, and Barry Rountree.","author":"Patki Tapasya","year":"2019","unstructured":"Tapasya Patki, Zachary Frye, Harsh Bhatia, Francesco Di Natale, James Glosli, Helgi Ingolfsson, and Barry Rountree. 2019. Comparing GPU Power and Frequency Capping: A Case Study with the MuMMI Workflow. In 2019 IEEE\/ACM Workflows in Support of Large-Scale Science (WORKS). 31--39."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/2749469.2750404"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/2503210.2503227"},{"key":"e_1_3_2_1_39_1","unstructured":"PyTorch Developers. Accessed: 2023-01. PyTorch Examples. https:\/\/github.com\/pytorch\/examples."},{"key":"e_1_3_2_1_40_1","unstructured":"Antonin Raffin. Accessed: 2023-01. RL Baselines3 Zoo: A Training Framework for Stable Baselines3 Reinforcement Learning Agents. https:\/\/github.com\/DLR-RM\/rl-baselines3-zoo."},{"key":"e_1_3_2_1_41_1","unstructured":"Robin Rombach Andreas Blattmann Dominik Lorenz Patrick Esser and Bj\u00f6rn Ommer. Accessed: 2023-01. Latent Diffusion Models. https:\/\/github.com\/CompVis\/latent-diffusion."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2016.7783719"},{"key":"e_1_3_2_1_43_1","volume-title":"Equalizer: Dynamic Tuning of GPU Resources for Efficient Execution. In 2014 47th Annual IEEE\/ACM International Symposium on Microarchitecture. 647--658","author":"Sethia Ankit","year":"2014","unstructured":"Ankit Sethia and Scott Mahlke. 2014. Equalizer: Dynamic Tuning of GPU Resources for Efficient Execution. In 2014 47th Annual IEEE\/ACM International Symposium on Microarchitecture. 647--658."},{"key":"e_1_3_2_1_44_1","volume-title":"2013 IEEE 27th International Symposium on Parallel and Distributed Processing. 673--686","author":"Song Shuaiwen","unstructured":"Shuaiwen Song, Chunyi Su, Barry Rountree, and Kirk W. Cameron. 2013. A Simplified and Accurate Model of Power-Performance Efficiency on Emergent GPU Architectures. In 2013 IEEE 27th International Symposium on Parallel and Distributed Processing. 673--686."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3307772.3328315"},{"key":"e_1_3_2_1_46_1","volume-title":"2023-05. TOP500 List (June","year":"2023","unstructured":"TOP500. Accessed: 2023-05. TOP500 List (June 2023). https:\/\/www.top500.org\/lists\/top500\/2023\/06\/."},{"key":"e_1_3_2_1_47_1","first-page":"2943","article-title":"Dynamic GPU Energy Optimization for Machine Learning Training Workloads","volume":"33","author":"Wang Farui","year":"2022","unstructured":"Farui Wang, Weizhe Zhang, Shichao Lai, Meng Hao, and Zheng Wang. 2022. Dynamic GPU Energy Optimization for Machine Learning Training Workloads. IEEE Transactions on Parallel and Distributed Systems 33, 11 (2022), 2943--2954.","journal-title":"IEEE Transactions on Parallel and Distributed Systems"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2020.3004623"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3366428.3380767"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2015.7056063"},{"key":"e_1_3_2_1_51_1","volume-title":"Know Your Enemy To Save Cloud Energy: Energy-Performance Characterization of Machine Learning Serving. In 2023 IEEE International Symposium on High-Performance Computer Architecture (HPCA). 842--854","author":"Yu Junyeol","year":"2023","unstructured":"Junyeol Yu, Jongseok Kim, and Euiseong Seo. 2023. Know Your Enemy To Save Cloud Energy: Energy-Performance Characterization of Machine Learning Serving. In 2023 IEEE International Symposium on High-Performance Computer Architecture (HPCA). 842--854."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"crossref","unstructured":"Pengfei Zou Ang Li Kevin Barker and Rong Ge. 2020. Indicator-Directed Dynamic Power Management for Iterative Workloads on GPU-Accelerated Systems. In 2020 20th IEEE\/ACM International Symposium on Cluster Cloud and Internet Computing (CCGRID). 559--568.","DOI":"10.1109\/CCGrid49817.2020.00-37"}],"event":{"name":"EuroSys '24: Nineteenth European Conference on Computer Systems","location":"Athens Greece","acronym":"EuroSys '24","sponsor":["SIGOPS ACM Special Interest Group on Operating Systems"]},"container-title":["Proceedings of the Nineteenth European Conference on Computer Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627703.3629584","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3627703.3629584","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T01:09:53Z","timestamp":1755824993000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627703.3629584"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,22]]},"references-count":52,"alternative-id":["10.1145\/3627703.3629584","10.1145\/3627703"],"URL":"https:\/\/doi.org\/10.1145\/3627703.3629584","relation":{},"subject":[],"published":{"date-parts":[[2024,4,22]]},"assertion":[{"value":"2024-04-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}