{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,16]],"date-time":"2026-07-16T22:38:16Z","timestamp":1784241496120,"version":"3.55.0"},"publisher-location":"New York, NY, USA","reference-count":35,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,8,29]],"date-time":"2022-08-29T00:00:00Z","timestamp":1661731200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"European Commission's EuroHPC and H2020 programmes","award":["956560"],"award-info":[{"award-number":["956560"]}]},{"name":"NVIDIA Academic Hardware Grant Program"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,8,29]]},"DOI":"10.1145\/3547276.3548630","type":"proceedings-article","created":{"date-parts":[[2023,1,15]],"date-time":"2023-01-15T00:56:17Z","timestamp":1673744177000},"page":"1-10","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":16,"title":["Optimizing Hardware Resource Partitioning and Job Allocations on Modern GPUs under Power Caps"],"prefix":"10.1145","author":[{"given":"Eishi","family":"Arima","sequence":"first","affiliation":[{"name":"Technical University of Munich, Germany"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Minjoon","family":"Kang","sequence":"additional","affiliation":[{"name":"Technical University of Munich, Germany"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Issa","family":"Saba","sequence":"additional","affiliation":[{"name":"Technical University of Munich, Germany"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Josef","family":"Weidendorfer","sequence":"additional","affiliation":[{"name":"Leibniz Supercomputing Centre, Germany"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Carsten","family":"Trinitis","sequence":"additional","affiliation":[{"name":"Technical University of Munich, Germany"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Martin","family":"Schulz","sequence":"additional","affiliation":[{"name":"Technical University of Munich, Germany and Leibniz Supercomputing Centre, Germany"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2023,1,13]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"d.]. random-access-bench. https:\/\/github.com\/cowsintuxedos\/random-access-bench. Online","year":"2022","unstructured":"[n. d.]. random-access-bench. https:\/\/github.com\/cowsintuxedos\/random-access-bench. Online; accessed 20 January 2022."},{"key":"e_1_3_2_1_2_1","volume-title":"d.]. STREAM Benchmark in CUDA C++. https:\/\/github.com\/bcumming\/cuda-stream. Online","year":"2022","unstructured":"[n. d.]. STREAM Benchmark in CUDA C++. https:\/\/github.com\/bcumming\/cuda-stream. Online; accessed 20 January 2022."},{"key":"e_1_3_2_1_3_1","volume-title":"d.]. TOP 500. https:\/\/www.top500.org\/statistics\/list\/. Online","year":"2022","unstructured":"[n. d.]. TOP 500. https:\/\/www.top500.org\/statistics\/list\/. Online; accessed 20 January 2022."},{"key":"e_1_3_2_1_4_1","volume-title":"Slate: Enabling Workload-Aware Efficient Multiprocessing for Modern GPGPUs. In 2019 IEEE International Parallel and Distributed Processing Symposium (IPDPS). 252\u2013261","author":"Allen Tyler","year":"2019","unstructured":"Tyler Allen, Xizhou Feng, and Rong Ge. 2019. Slate: Enabling Workload-Aware Efficient Multiprocessing for Modern GPGPUs. In 2019 IEEE International Parallel and Distributed Processing Symposium (IPDPS). 252\u2013261."},{"key":"e_1_3_2_1_5_1","volume-title":"Footprint-Aware Power Capping for Hybrid Memory Based Systems. In International Conference on High Performance Computing. 347\u2013369","author":"Arima Eishi","year":"2020","unstructured":"Eishi Arima, Toshihiro Hanawa, Carsten Trinitis, and Martin Schulz. 2020. Footprint-Aware Power Capping for Hybrid Memory Based Systems. In International Conference on High Performance Computing. 347\u2013369."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3173162.3173169"},{"key":"e_1_3_2_1_7_1","volume-title":"Adaptive Configuration Selection for Power-Constrained Heterogeneous Systems. In 2014 43rd International Conference on Parallel Processing (ICPP). 371\u2013380","author":"Bailey E.","year":"2014","unstructured":"Peter\u00a0E. Bailey, David\u00a0K. Lowenthal, Vignesh Ravi, Barry Rountree, Martin Schulz, and Bronis\u00a0R. De\u00a0Supinski. 2014. Adaptive Configuration Selection for Power-Constrained Heterogeneous Systems. In 2014 43rd International Conference on Parallel Processing (ICPP). 371\u2013380."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/1375527.1375580"},{"key":"e_1_3_2_1_9_1","volume-title":"Rodinia: A Benchmark Suite for Heterogeneous Computing. In 2009 IEEE International Symposium on Workload Characterization (IISWC). 44\u201354","author":"Che Shuai","year":"2009","unstructured":"Shuai Che, Michael Boyer, Jiayuan Meng, David Tarjan, Jeremy\u00a0W. Sheaffer, Sang-Ha Lee, and Kevin Skadron. 2009. Rodinia: A Benchmark Suite for Heterogeneous Computing. In 2009 IEEE International Symposium on Workload Characterization (IISWC). 44\u201354."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3412841.3441989"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/HCS49909.2020.9220622"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/2155620.2155641"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2018.00027"},{"key":"e_1_3_2_1_14_1","volume-title":"Application-Aware Prioritization Mechanisms for On-Chip Networks. In 2009 42nd Annual IEEE\/ACM International Symposium on Microarchitecture (MICRO). 280\u2013291","author":"Das Reetuparna","year":"2009","unstructured":"Reetuparna Das, Onur Mutlu, Thomas Moscibroda, and Chita\u00a0R. Das. 2009. Application-Aware Prioritization Mechanisms for On-Chip Networks. In 2009 42nd Annual IEEE\/ACM International Symposium on Microarchitecture (MICRO). 280\u2013291."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/1840845.1840883"},{"key":"e_1_3_2_1_16_1","volume-title":"12th International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS). 185\u2013194.","author":"Lee C.","unstructured":"Benjamin\u00a0C. Lee and David\u00a0M. Brooks. 2006. Accurate and Efficient Regression Modeling for Microarchitectural Performance and Power Prediction. In 12th International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS). 185\u2013194."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/L-CA.2012.25"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/GREENCOMP.2010.5598315"},{"key":"e_1_3_2_1_19_1","volume-title":"d.]. CUTLASS 2.8. https:\/\/github.com\/NVIDIA\/cutlass. Online","author":"NVIDIA.","year":"2022","unstructured":"NVIDIA. [n. d.]. CUTLASS 2.8. https:\/\/github.com\/NVIDIA\/cutlass. Online; accessed 20 January 2022."},{"key":"e_1_3_2_1_20_1","volume-title":"d.]. Multi-Process Service. https:\/\/docs.nvidia.com\/deploy\/mps\/index.html. Online","author":"NVIDIA.","year":"2022","unstructured":"NVIDIA. [n. d.]. Multi-Process Service. https:\/\/docs.nvidia.com\/deploy\/mps\/index.html. Online; accessed 20 January 2022."},{"key":"e_1_3_2_1_21_1","volume-title":"d.]. Nsight Compute. https:\/\/developer.nvidia.com\/nsight-compute. Online","author":"NVIDIA.","year":"2022","unstructured":"NVIDIA. [n. d.]. Nsight Compute. https:\/\/developer.nvidia.com\/nsight-compute. Online; accessed 6 February 2022."},{"key":"e_1_3_2_1_22_1","volume-title":"d.]. NVIDIA Multi-Instance GPU User Guide. https:\/\/docs.nvidia.com\/datacenter\/tesla\/mig-user-guide\/. Online","author":"NVIDIA.","year":"2022","unstructured":"NVIDIA. [n. d.]. NVIDIA Multi-Instance GPU User Guide. https:\/\/docs.nvidia.com\/datacenter\/tesla\/mig-user-guide\/. Online; accessed 20 January 2022."},{"key":"e_1_3_2_1_23_1","volume-title":"Proceedings of the Eighteenth International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS). 407\u2013418","author":"Pai Sreepathi","unstructured":"Sreepathi Pai, Matthew\u00a0J. Thazhuthaveetil, and R. Govindarajan. 2013. Improving GPGPU Concurrency with Elastic Kernels. In Proceedings of the Eighteenth International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS). 407\u2013418."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/2749246.2749262"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2017.2784428"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2014.71"},{"key":"e_1_3_2_1_27_1","volume-title":"An Intra-Task Dvfs Technique Based on Statistical Analysis of Hardware Events. In 4th International Conference on Computing Frontiers (CF). 123\u2013130","author":"Sasaki Hiroshi","year":"2007","unstructured":"Hiroshi Sasaki, Yoshimichi Ikeda, Masaaki Kondo, and Hiroshi Nakamura. 2007. An Intra-Task Dvfs Technique Based on Statistical Analysis of Hardware Events. In 4th International Conference on Computing Frontiers (CF). 123\u2013130."},{"key":"e_1_3_2_1_28_1","volume-title":"Coordinated Power-Performance Optimization in Manycores. In 22nd International Conference on Parallel Architectures and Compilation Techniques (PACT). 51\u201361","author":"Sasaki Hiroshi","year":"2013","unstructured":"Hiroshi Sasaki, Satoshi Imamura, and Koji Inoue. 2013. Coordinated Power-Performance Optimization in Manycores. In 22nd International Conference on Parallel Architectures and Compilation Techniques (PACT). 51\u201361."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/2370816.2370833"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/2668930.2688052"},{"key":"e_1_3_2_1_31_1","volume-title":"Improving Provisioned Power Efficiency in HPC Systems with GPU-CAPP. In 2018 IEEE 25th International Conference on High Performance Computing (HiPC). 112\u2013122","author":"Straube Kramer","year":"2018","unstructured":"Kramer Straube, Jason Lowe-Power, Christopher Nitta, Matthew Farrens, and Venkatesh Akella. 2018. Improving Provisioned Power Efficiency in HPC Systems with GPU-CAPP. In 2018 IEEE 25th International Conference on High Performance Computing (HiPC). 112\u2013122."},{"key":"e_1_3_2_1_32_1","volume-title":"Enabling Preemptive Multiprogramming on GPUs. In 41st Annual International Symposium on Computer Architecuture (ISCA). 193\u2013204","author":"Tanasic Ivan","year":"2014","unstructured":"Ivan Tanasic, Isaac Gelado, Javier Cabezas, Alex Ramirez, Nacho Navarro, and Mateo Valero. 2014. Enabling Preemptive Multiprogramming on GPUs. In 41st Annual International Symposium on Computer Architecuture (ISCA). 193\u2013204."},{"key":"e_1_3_2_1_33_1","volume-title":"Slurm: Simple Linux Utility for Resource Management. In Workshop on job scheduling strategies for parallel processing. 44\u201360","author":"Yoo B","year":"2003","unstructured":"Andy\u00a0B Yoo, Morris\u00a0A Jette, and Mark Grondona. 2003. Slurm: Simple Linux Utility for Resource Management. In Workshop on job scheduling strategies for parallel processing. 44\u201360."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/IGCC.2012.6322289"},{"key":"e_1_3_2_1_35_1","volume-title":"Co-Run Scheduling with Power Cap on Integrated CPU-GPU Systems. In 2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS). 967\u2013977","author":"Zhu Qi","year":"2017","unstructured":"Qi Zhu, Bo Wu, Xipeng Shen, Li Shen, and Zhiying Wang. 2017. Co-Run Scheduling with Power Cap on Integrated CPU-GPU Systems. In 2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS). 967\u2013977."}],"event":{"name":"ICPP '22: 51st International Conference on Parallel Processing","location":"Bordeaux France","acronym":"ICPP '22"},"container-title":["Workshop Proceedings of the 51st International Conference on Parallel Processing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3547276.3548630","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3547276.3548630","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:02:56Z","timestamp":1750186976000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3547276.3548630"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,8,29]]},"references-count":35,"alternative-id":["10.1145\/3547276.3548630","10.1145\/3547276"],"URL":"https:\/\/doi.org\/10.1145\/3547276.3548630","relation":{},"subject":[],"published":{"date-parts":[[2022,8,29]]},"assertion":[{"value":"2023-01-13","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}