{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T08:00:26Z","timestamp":1776931226838,"version":"3.51.2"},"publisher-location":"New York, NY, USA","reference-count":34,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,16]]},"DOI":"10.1145\/3731599.3767568","type":"proceedings-article","created":{"date-parts":[[2025,11,7]],"date-time":"2025-11-07T16:13:44Z","timestamp":1762532024000},"page":"2041-2050","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["EAS-Sim: A Framework and its Methodology for the Co-Design of Multi-Objective, Energy-Aware Schedulers for AI Clusters"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-8636-8466","authenticated-orcid":false,"given":"Roblex","family":"Nana Tchakoute","sequence":"first","affiliation":[{"name":"Centre de recherche en informatique (CRI), Mines Paris - PSL University, Fontainebleau, France"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1194-6400","authenticated-orcid":false,"given":"Claude","family":"Tadonki","sequence":"additional","affiliation":[{"name":"Centre de recherche en informatique (CRI), Mines Paris - PSL University, Fontainebleau, France"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,11,15]]},"reference":[{"key":"e_1_3_3_2_2_2","doi-asserted-by":"publisher","DOI":"10.1109\/IGSC48788.2019.8957174"},{"key":"e_1_3_3_2_3_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS57955.2024.00036"},{"key":"e_1_3_3_2_4_2","unstructured":"Alexey Dosovitskiy Lucas Beyer Alexander Kolesnikov Dirk Weissenborn Xiaohua Zhai Thomas Unterthiner Mostafa Dehghani Matthias Minderer Georg Heigold Sylvain Gelly Jakob Uszkoreit and Neil Houlsby. 2021. An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale. arxiv:https:\/\/arXiv.org\/abs\/2010.11929\u00a0[cs.CV]"},{"key":"e_1_3_3_2_5_2","doi-asserted-by":"publisher","DOI":"10.1145\/3472883.3486978"},{"key":"e_1_3_3_2_6_2","first-page":"485","volume-title":"16th USENIX Symposium on Networked Systems Design and Implementation (NSDI 19)","author":"Gu Juncheng","year":"2019","unstructured":"Juncheng Gu, Mosharaf Chowdhury, Kang\u00a0G. Shin, Yibo Zhu, Myeongjae Jeon, Junjie Qian, Hongqiang Liu, and Chuanxiong Guo. 2019. Tiresias: A GPU Cluster Manager for Distributed Deep Learning. In 16th USENIX Symposium on Networked Systems Design and Implementation (NSDI 19). Boston, MA, 485\u2013500."},{"key":"e_1_3_3_2_7_2","unstructured":"Kaiming He Xiangyu Zhang Shaoqing Ren and Jian Sun. 2015. Deep Residual Learning for Image Recognition. arxiv:https:\/\/arXiv.org\/abs\/1512.03385\u00a0[cs.CV]"},{"key":"e_1_3_3_2_8_2","unstructured":"Anne Helmenstine. 2016. Sources of Error in Science Experiments. https:\/\/sciencenotes.org\/error-in-science\/ Accessed: 2025-08-13."},{"key":"e_1_3_3_2_9_2","doi-asserted-by":"publisher","DOI":"10.1109\/LPE.1994.573184"},{"key":"e_1_3_3_2_10_2","unstructured":"HPC-CRI. 2025. ADEPT (Analytical Deep-learning Energy Performance and Time-estimator). https:\/\/github.com\/HPC-CRI\/ADEPT Accessed: 2025-08-14."},{"key":"e_1_3_3_2_11_2","doi-asserted-by":"publisher","unstructured":"Amin Isazadeh Davide Ziviani and David\u00a0E. Claridge. 2023. Global trends performance metrics and energy reduction measures in datacom facilities. Renewable and Sustainable Energy Reviews 174 (2023) 113149. 10.1016\/j.rser.2023.113149","DOI":"10.1016\/j.rser.2023.113149"},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"publisher","DOI":"10.1145\/3600006.3613175"},{"key":"e_1_3_3_2_13_2","first-page":"947","volume-title":"2019 USENIX Annual Technical Conference (USENIX ATC 19)","author":"Jeon Myeongjae","year":"2019","unstructured":"Myeongjae Jeon, Shivaram Venkataraman, Amar Phanishayee, Junjie Qian, Wencong Xiao, and Fan Yang. 2019. Analysis of Large-Scale Multi-Tenant GPU Clusters for DNN Training Workloads. In 2019 USENIX Annual Technical Conference (USENIX ATC 19). USENIX Association, Renton, WA, 947\u2013960."},{"key":"e_1_3_3_2_14_2","unstructured":"Jared Kaplan Sam McCandlish Tom Henighan Tom\u00a0B. Brown Benjamin Chess Rewon Child Scott Gray Alec Radford Jeffrey Wu and Dario Amodei. 2020. Scaling Laws for Neural Language Models. arxiv:https:\/\/arXiv.org\/abs\/2001.08361\u00a0[cs.LG]"},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"publisher","DOI":"10.5555\/324493.325074"},{"key":"e_1_3_3_2_16_2","unstructured":"A. Krizhevsky and G. Hinton. 2009. Learning multiple layers of features from tiny images. Master\u2019s thesis Dept. of Computer Science University of Toronto (2009)."},{"key":"e_1_3_3_2_17_2","unstructured":"Zhenzhong Lan Mingda Chen Sebastian Goodman Kevin Gimpel Piyush Sharma and Radu Soricut. 2019. ALBERT: A Lite BERT for Self-supervised Learning of Language Representations. CoRR abs\/1909.11942 (2019). arxiv:https:\/\/arXiv.org\/abs\/1909.11942"},{"key":"e_1_3_3_2_18_2","doi-asserted-by":"publisher","DOI":"10.1145\/2592784.2592786"},{"key":"e_1_3_3_2_19_2","doi-asserted-by":"publisher","unstructured":"Haoyang Li Fangcheng Fu Hao Ge Sheng Lin Xuanyu Wang Jiawen Niu Yujie Wang Hailin Zhang Xiaonan Nie and Bin Cui. 2025. Malleus: Straggler-Resilient Hybrid Parallel Training of Large-scale Models via Malleable Data and Model Parallelization. Proc. ACM Manag. Data 3 3 Article 185 (June 2025) 28\u00a0pages. 10.1145\/3725322","DOI":"10.1145\/3725322"},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"publisher","DOI":"10.1145\/3695053.3731082"},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"publisher","DOI":"10.1145\/3698038.3698532"},{"key":"e_1_3_3_2_22_2","first-page":"289","volume-title":"17th USENIX Symposium on Networked Systems Design and Implementation (NSDI 20)","author":"Mahajan Kshiteej","year":"2020","unstructured":"Kshiteej Mahajan, Arjun Balasubramanian, Arjun Singhvi, Shivaram Venkataraman, Aditya Akella, Amar Phanishayee, and Shuchi Chawla. 2020. Themis: Fair and Efficient GPU Cluster Scheduling. In 17th USENIX Symposium on Networked Systems Design and Implementation (NSDI 20). Santa Clara, CA, 289\u2013304."},{"key":"e_1_3_3_2_23_2","first-page":"481","volume-title":"14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20)","author":"Narayanan Deepak","year":"2020","unstructured":"Deepak Narayanan, Keshav Santhanam, Fiodar Kazhamiaka, Amar Phanishayee, and Matei Zaharia. 2020. Heterogeneity-Aware Cluster Scheduling Policies for Deep Learning Workloads. In 14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20). USENIX Association, 481\u2013498."},{"key":"e_1_3_3_2_24_2","first-page":"1","volume-title":"15th USENIX Symposium on Operating Systems Design and Implementation (OSDI 21)","author":"Qiao Aurick","year":"2021","unstructured":"Aurick Qiao, Sang\u00a0Keun Choe, Suhas\u00a0Jayaram Subramanya, Willie Neiswanger, Qirong Ho, Hao Zhang, Gregory\u00a0R. Ganger, and Eric\u00a0P. Xing. 2021. Pollux: Co-adaptive Cluster Scheduling for Goodput-Optimized Deep Learning. In 15th USENIX Symposium on Operating Systems Design and Implementation (OSDI 21). USENIX Association, 1\u201318."},{"key":"e_1_3_3_2_25_2","unstructured":"SimPy. 2002. SimPy: Discrete event simulation for Python. https:\/\/simpy.readthedocs.io\/en\/latest\/ Accessed: 2025-08-13."},{"key":"e_1_3_3_2_26_2","doi-asserted-by":"publisher","unstructured":"Sean\u00a0J. Taylor and Benjamin Letham. 2018. Forecasting at Scale. The American Statistician 72 1 (2018) 37\u201345. 10.1080\/00031305.2017.1380080","DOI":"10.1080\/00031305.2017.1380080"},{"key":"e_1_3_3_2_27_2","doi-asserted-by":"publisher","unstructured":"J.\u00a0D. Ullman. 1975. NP-complete scheduling problems. J. Comput. Syst. Sci. 10 3 (June 1975) 384\u2013393. 10.1016\/S0022-0000(75)80008-0","DOI":"10.1016\/S0022-0000(75)80008-0"},{"key":"e_1_3_3_2_28_2","doi-asserted-by":"publisher","DOI":"10.1145\/2741948.2741964"},{"key":"e_1_3_3_2_29_2","doi-asserted-by":"publisher","DOI":"10.1145\/3698038.3698515"},{"key":"e_1_3_3_2_30_2","doi-asserted-by":"publisher","unstructured":"Ronald\u00a0W. Wolff. 1982. Poisson Arrivals See Time Averages. Operations Research 30 2 (April 1982) 223\u2013231. 10.1287\/opre.30.2.223","DOI":"10.1287\/opre.30.2.223"},{"key":"e_1_3_3_2_31_2","first-page":"595","volume-title":"13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18)","author":"Xiao Wencong","year":"2018","unstructured":"Wencong Xiao, Romil Bhardwaj, Ramachandran Ramjee, Muthian Sivathanu, Nipun Kwatra, Zhenhua Han, Pratyush Patel, Xuan Peng, Hanyu Zhao, Quanlu Zhang, Fan Yang, and Lidong Zhou. 2018. Gandiva: Introspective Cluster Scheduling for Deep Learning. In 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18). USENIX Association, Carlsbad, CA, 595\u2013610."},{"key":"e_1_3_3_2_32_2","first-page":"119","volume-title":"20th USENIX Symposium on Networked Systems Design and Implementation (NSDI 23)","author":"You Jie","year":"2023","unstructured":"Jie You, Jae-Won Chung, and Mosharaf Chowdhury. 2023. Zeus: Understanding and Optimizing GPU Energy Consumption of DNN Training. In 20th USENIX Symposium on Networked Systems Design and Implementation (NSDI 23). USENIX Association, Boston, MA, 119\u2013139."},{"key":"e_1_3_3_2_33_2","doi-asserted-by":"publisher","DOI":"10.1109\/E2SC.2014.10"},{"key":"e_1_3_3_2_34_2","doi-asserted-by":"publisher","DOI":"10.1145\/3620678.3624793"},{"key":"e_1_3_3_2_35_2","first-page":"703","volume-title":"20th USENIX Symposium on Networked Systems Design and Implementation (NSDI 23)","author":"Zheng Pengfei","year":"2023","unstructured":"Pengfei Zheng, Rui Pan, Tarannum Khan, Shivaram Venkataraman, and Aditya Akella. 2023. Shockwave: Fair and Efficient Cluster Scheduling for Dynamic Adaptation in Machine Learning. In 20th USENIX Symposium on Networked Systems Design and Implementation (NSDI 23). Boston, MA, 703\u2013723."}],"event":{"name":"SC Workshops '25: Workshops of the International Conference for High Performance Computing, Networking, Storage and Analysis","location":"St Louis MO USA","acronym":"SC Workshops '25","sponsor":["SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing"]},"container-title":["Proceedings of the SC '25 Workshops of the International Conference for High Performance Computing, Networking, Storage and Analysis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3731599.3767568","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,9]],"date-time":"2026-01-09T19:30:37Z","timestamp":1767987037000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3731599.3767568"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,15]]},"references-count":34,"alternative-id":["10.1145\/3731599.3767568","10.1145\/3731599"],"URL":"https:\/\/doi.org\/10.1145\/3731599.3767568","relation":{},"subject":[],"published":{"date-parts":[[2025,11,15]]},"assertion":[{"value":"2025-11-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}