{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T00:12:31Z","timestamp":1777421551170,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":47,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,3,30]],"date-time":"2025-03-30T00:00:00Z","timestamp":1743292800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100006374","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62325205, 62072228, 62172204"],"award-info":[{"award-number":["62325205, 62072228, 62172204"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,3,30]]},"DOI":"10.1145\/3669940.3707231","type":"proceedings-article","created":{"date-parts":[[2025,2,6]],"date-time":"2025-02-06T12:28:01Z","timestamp":1738844881000},"page":"1118-1132","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":7,"title":["Using Analytical Performance\/Power Model and Fine-Grained DVFS to Enhance AI Accelerator Energy Efficiency"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-0499-5328","authenticated-orcid":false,"given":"Zibo","family":"Wang","sequence":"first","affiliation":[{"name":"State Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6925-7777","authenticated-orcid":false,"given":"Yijia","family":"Zhang","sequence":"additional","affiliation":[{"name":"Peng Cheng Laboratory, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-8244-7241","authenticated-orcid":false,"given":"Fuchun","family":"Wei","sequence":"additional","affiliation":[{"name":"Huawei Technologies Co., Ltd, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-4964-3258","authenticated-orcid":false,"given":"Bingqiang","family":"Wang","sequence":"additional","affiliation":[{"name":"Peng Cheng Laboratory, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-7030-1307","authenticated-orcid":false,"given":"Yanlin","family":"Liu","sequence":"additional","affiliation":[{"name":"Huawei Technologies Co., Ltd, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-7821-070X","authenticated-orcid":false,"given":"Zhiheng","family":"Hu","sequence":"additional","affiliation":[{"name":"Huawei Technologies Co., Ltd, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1909-6332","authenticated-orcid":false,"given":"Jingyi","family":"Zhang","sequence":"additional","affiliation":[{"name":"Huawei Technologies Co., Ltd, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-7940-6031","authenticated-orcid":false,"given":"Xiaoxin","family":"Xu","sequence":"additional","affiliation":[{"name":"Huawei Technologies Co., Ltd, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-1484-5802","authenticated-orcid":false,"given":"Jian","family":"He","sequence":"additional","affiliation":[{"name":"Huawei Technologies Co., Ltd, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3410-8621","authenticated-orcid":false,"given":"Xiaoliang","family":"Wang","sequence":"additional","affiliation":[{"name":"State Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4833-2023","authenticated-orcid":false,"given":"Wanchun","family":"Dou","sequence":"additional","affiliation":[{"name":"State Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6934-1685","authenticated-orcid":false,"given":"Guihai","family":"Chen","sequence":"additional","affiliation":[{"name":"State Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2710-7628","authenticated-orcid":false,"given":"Chen","family":"Tian","sequence":"additional","affiliation":[{"name":"State Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"}]}],"member":"320","published-online":{"date-parts":[[2025,3,30]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPEC55821.2022.9926317"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.future.2023.07.011"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3605573.3605600"},{"key":"e_1_3_2_1_4_1","first-page":"253","volume-title":"Proceedings of the 28th ACM International Conference on Architectural Support for Programming Languages and Operating Systems","volume":"4","author":"Bharadwaj Srikant","year":"2024","unstructured":"Srikant Bharadwaj, Shomit Das, Kaushik Mazumdar, Bradford M. Beckmann, and Stephen Kosonocky. Predict; don't react for enabling efficient fine-grain dvfs in gpus. In Proceedings of the 28th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 4, ASPLOS '23, page 253--267, New York, NY, USA, 2024. Association for Computing Machinery."},{"key":"e_1_3_2_1_5_1","first-page":"191","volume-title":"Proceedings of the 33rd Annual ACM\/IEEE International Symposium on Microarchitecture, MICRO 33","author":"Adam Butts J.","year":"2000","unstructured":"J. Adam Butts and Gurindar S. Sohi. A static power model for architects. In Proceedings of the 33rd Annual ACM\/IEEE International Symposium on Microarchitecture, MICRO 33, page 191--201, New York, NY, USA, 2000. Association for Computing Machinery."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.4143"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3203217.3203273"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3337821.3337833"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1023\/A:1022602019183"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/4.604077"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2018.00072"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.parco.2018.02.001"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2019.2917181"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/1090.001.0001"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CCGrid49817.2020.00-35"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICTEA.2012.6462883"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080246"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/LCA.2024.3406038"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3466752.3480063"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIE.2021.3095790"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/MC.2003.1250885"},{"key":"e_1_3_2_1_22_1","volume-title":"Handbook of genetic algorithms","author":"Lawrence Davis","year":"1991","unstructured":"Davis Lawrence. Handbook of genetic algorithms. Van Nostrand Reinhold, 1991."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA51647.2021.00071"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/HOTCHIPS.2019.8875654"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1126\/science.aba3758"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.dcan.2016.10.001"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jpdc.2022.03.004"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/2830772.2830826"},{"key":"e_1_3_2_1_29_1","unstructured":"NVIDIA. Accessed: 2024-05. cuda c programming guide. https:\/\/docs.nvidia.com\/cuda\/cuda-c-programming-guide\/."},{"key":"e_1_3_2_1_30_1","unstructured":"NVIDIA. Accessed: 2024-05. nvidia h100 tensor core gpu. https:\/\/www.nvidia.com\/ en-us\/ data-center\/h100\/."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3620665.3640383"},{"key":"e_1_3_2_1_32_1","first-page":"54","volume-title":"Proceedings of the 42nd Annual International Symposium on Computer Architecture, ISCA '15","author":"Paul Indrani","year":"2015","unstructured":"Indrani Paul, Wei Huang, Manish Arora, and Sudhakar Yalamanchili. Harmonia: Balancing compute and memory power in highperformance gpus. In Proceedings of the 42nd Annual International Symposium on Computer Architecture, ISCA '15, page 54--65, New York, NY, USA, 2015. Association for Computing Machinery."},{"key":"e_1_3_2_1_33_1","first-page":"97","volume-title":"Genetic Algorithms","author":"Sastry Kumara","year":"2005","unstructured":"Kumara Sastry, David Goldberg, and Graham Kendall. Genetic Algorithms, pages 97--125. Springer US, Boston, MA, 2005."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/PMBS56514.2022.00010"},{"key":"e_1_3_2_1_35_1","volume-title":"Dynamollm: Designing llm inference clusters for performance and energy efficiency. arXiv:2408.00741","author":"Stojkovic Jovan","year":"2024","unstructured":"Jovan Stojkovic, Chaojie Zhang, \u00cd\u00f1igo Goiri, Josep Torrellas, and Esha Choukse. Dynamollm: Designing llm inference clusters for performance and energy efficiency. arXiv:2408.00741, 2024."},{"key":"e_1_3_2_1_36_1","volume-title":"Is leakage power a linear function of temperature? arXiv:1809.03147","author":"Sultan Hameedah","year":"2018","unstructured":"Hameedah Sultan, Shashank Varshney, and Smruti R Sarangi. Is leakage power a linear function of temperature? arXiv:1809.03147, 2018."},{"key":"e_1_3_2_1_37_1","volume-title":"2023--11. top500 list (november","year":"2023","unstructured":"TOP500. Accessed: 2023--11. top500 list (november 2023). https:\/\/www.top500.org\/ lists\/ top500\/ 2023\/ 11\/ ."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSUSC.2023.3314916"},{"issue":"11","key":"e_1_3_2_1_39_1","first-page":"2943","article-title":"Dynamic gpu energy optimization for machine learning training workloads","volume":"33","author":"Wang Farui","year":"2022","unstructured":"Farui Wang, Weizhe Zhang, Shichao Lai, Meng Hao, and Zheng Wang. Dynamic gpu energy optimization for machine learning training workloads. IEEE Transactions on Parallel and Distributed Systems, 33(11):2943--2954, 2022.","journal-title":"IEEE Transactions on Parallel and Distributed Systems"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2020.3004623"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3366428.3380767"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2022.3200528"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2015.7056063"},{"key":"e_1_3_2_1_44_1","first-page":"521","volume-title":"16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22)","author":"Yu Gyeong-In","year":"2022","unstructured":"Gyeong-In Yu, Joo Seong Jeong, Geon-Woo Kim, Soojeong Kim, and Byung-Gon Chun. Orca: A distributed serving system for Transformer- Based generative models. In 16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22), pages 521--538, Carlsbad, CA, July 2022. USENIX Association."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA56546.2023.10070943"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/3627703.3629584"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/CCGrid49817.2020.00-37"}],"event":{"name":"ASPLOS '25: 30th ACM International Conference on Architectural Support for Programming Languages and Operating Systems","location":"Rotterdam Netherlands","acronym":"ASPLOS '25","sponsor":["SIGPLAN ACM Special Interest Group on Programming Languages","SIGOPS ACM Special Interest Group on Operating Systems","SIGARCH ACM Special Interest Group on Computer Architecture"]},"container-title":["Proceedings of the 30th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 1"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3669940.3707231","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3669940.3707231","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T14:48:19Z","timestamp":1755787699000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3669940.3707231"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,30]]},"references-count":47,"alternative-id":["10.1145\/3669940.3707231","10.1145\/3669940"],"URL":"https:\/\/doi.org\/10.1145\/3669940.3707231","relation":{},"subject":[],"published":{"date-parts":[[2025,3,30]]},"assertion":[{"value":"2025-03-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}