{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T11:39:22Z","timestamp":1777462762041,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":23,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,4,27]]},"DOI":"10.1145\/3805621.3807621","type":"proceedings-article","created":{"date-parts":[[2026,4,28]],"date-time":"2026-04-28T13:08:45Z","timestamp":1777381725000},"page":"127-138","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["GPU Memory and Utilization Estimation for Training-Aware Resource Management: Opportunities and Limitations"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0156-1435","authenticated-orcid":false,"given":"Ehsan","family":"Yousefzadeh-Asl-Miandoab","sequence":"first","affiliation":[{"name":"IT University of Copenhagen, Copenhagen, Denmark"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0911-367X","authenticated-orcid":false,"given":"Reza","family":"Karimzadeh","sequence":"additional","affiliation":[{"name":"University of Copenhagen, Copenhagen, Denmark"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-2867-6780","authenticated-orcid":false,"given":"Danyal","family":"Yorulmaz","sequence":"additional","affiliation":[{"name":"IT University of Copenhagen, Copenhagen, Denmark"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7739-7788","authenticated-orcid":false,"given":"Bulat","family":"Ibragimov","sequence":"additional","affiliation":[{"name":"University of Copenhagen, Copenhagen, Denmark"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6838-4854","authenticated-orcid":false,"given":"P\u0131nar","family":"T\u00f6z\u00fcn","sequence":"additional","affiliation":[{"name":"IT University of Copenhagen, Copenhagen, Denmark"}]}],"member":"320","published-online":{"date-parts":[[2026,4,28]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Martin Abadi Paul Barham Jianmin Chen Zhifeng Chen Andy Davis Jeffrey Dean Matthieu Devin Sanjay Ghemawat Geoffrey Irving Michael Isard Manjunath Kudlur Josh Levenberg Rajat Monga Sherry Moore Derek G. Murray Benoit Steiner Paul Tucker Vijay Vasudevan Pete Warden Martin Wicke Yuan Yu and Xiaoqiang Zheng. 2016. TensorFlow: A system for large-scale machine learning. In OSDI 16. 265\u2013283."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CCGrid54584.2022.00079"},{"key":"e_1_3_2_1_3_1","unstructured":"PyTorch Contributors. 2025. Fake Tensor Mode in PyTorch. https:\/\/pytorch.org\/docs\/stable\/torch.compiler_fake_tensor.html Accessed: 2025-06-01."},{"key":"e_1_3_2_1_4_1","unstructured":"NVIDIA Corporation. [n.d.]. NVIDIA System Management User Guide. https:\/\/docs.nvidia.com\/datacenter\/nvsm\/nvsm-user-guide\/latest\/nvsm-user-guide.pdf. Accessed: 2025-06-01."},{"key":"e_1_3_2_1_5_1","unstructured":"NVIDIA Corporation. 2024. NVIDIA DCGM. https:\/\/developer.nvidia.com\/dcgm. Accessed: 2026-02-09."},{"key":"e_1_3_2_1_6_1","unstructured":"Ubuntu Documentation. [n.d.]. Ubuntu Manuals top command. https:\/\/manpages.ubuntu.com\/manpages\/focal\/man1\/top.1.html. Accessed: 2025-06-01."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE-SEIP58684.2023.00039"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3597503.3639232"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3368089.3417050"},{"key":"e_1_3_2_1_10_1","volume-title":"Proceedings of the 2019 USENIX Conference on Usenix Annual Technical Conference (Renton, WA, USA) (USENIX ATC '19). USENIX Association, USA, 947\u2013960","author":"Jeon Myeongjae","year":"2019","unstructured":"Myeongjae Jeon, Shivaram Venkataraman, Amar Phanishayee, unjie Qian, Wencong Xiao, and Fan Yang. 2019. Analysis of Large-Scale Multi-Tenant GPU Clusters for DNN Training Workloads. In Proceedings of the 2019 USENIX Conference on Usenix Annual Technical Conference (Renton, WA, USA) (USENIX ATC '19). USENIX Association, USA, 947\u2013960."},{"key":"e_1_3_2_1_11_1","volume-title":"LLMem: Estimating GPU Memory Usage for Fine-Tuning Pre-Trained LLMs. arXiv preprint arXiv:2404.10933","author":"Kim Taeho","year":"2024","unstructured":"Taeho Kim, Yanming Wang, Vatshank Chaturvedi, Lokesh Gupta, Seyeon Kim, Yongin Kwon, and Sangtae Ha. 2024. LLMem: Estimating GPU Memory Usage for Fine-Tuning Pre-Trained LLMs. arXiv preprint arXiv:2404.10933 (2024)."},{"key":"e_1_3_2_1_12_1","volume-title":"DeepSpeed Memory Requirements. https:\/\/deepspeed.readthedocs.io\/en\/latest\/memory.html","author":"DeepSpeed Team Microsoft","year":"2026","unstructured":"Microsoft DeepSpeed Team. 2023. DeepSpeed Memory Requirements. https:\/\/deepspeed.readthedocs.io\/en\/latest\/memory.html. Accessed: February 2026."},{"key":"e_1_3_2_1_13_1","volume-title":"Heterogeneity-Aware Cluster Scheduling Policies for Deep Learning Workloads. In 14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20)","author":"Narayanan Deepak","year":"2020","unstructured":"Deepak Narayanan, Keshav Santhanam, Fiodar Kazhamiaka, Amar Phanishayee, and Matei Zaharia. 2020. Heterogeneity-Aware Cluster Scheduling Policies for Deep Learning Workloads. In 14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20). USENIX Association, 481\u2013498. https:\/\/www.usenix.org\/conference\/osdi20\/presentation\/narayanan-deepak"},{"key":"e_1_3_2_1_14_1","volume-title":"PyTorch: An Imperative Style","author":"Paszke Adam","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, Alban Desmaison, Andreas K\u00f6pf, Edward Yang, Zach DeVito, Martin Raison, Alykhan Tejani, Sasank Chilamkurthy, Benoit Steiner, Lu Fang, Junjie Bai, and Soumith Chintala. 2019. PyTorch: An Imperative Style, High-Performance Deep Learning Library. In NIPS. 8026\u20138037."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3642970.3655827"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3721462.3770773"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3627703.3629578"},{"key":"e_1_3_2_1_19_1","volume-title":"MLaaS in the Wild: Workload Analysis and Scheduling in Large-Scale Heterogeneous GPU Clusters. In 19th USENIX Symposium on Networked Systems Design and Implementation (NSDI 22)","author":"Weng Qizhen","year":"2022","unstructured":"Qizhen Weng, Wencong Xiao, Yinghao Yu, Wei Wang, Cheng Wang, Jian He, Yong Li, Liping Zhang, Wei Lin, and Yu Ding. 2022. MLaaS in the Wild: Workload Analysis and Scheduling in Large-Scale Heterogeneous GPU Clusters. In 19th USENIX Symposium on Networked Systems Design and Implementation (NSDI 22). 945\u2013960."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","unstructured":"Ross Wightman. 2019. PyTorch Image Models. https:\/\/github.com\/rwightman\/pytorch-image-models. doi:10.5281\/zenodo.4414861","DOI":"10.5281\/zenodo.4414861"},{"key":"e_1_3_2_1_21_1","volume-title":"Gandiva: Introspective Cluster Scheduling for Deep Learning. In 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18)","author":"Xiao Wencong","year":"2018","unstructured":"Wencong Xiao, Romil Bhardwaj, Ramachandran Ramjee, Muthian Sivathanu, Nipun Kwatra, Zhenhua Han, Pratyush Patel, Xuan Peng, Hanyu Zhao, Quanlu Zhang, Fan Yang, and Lidong Zhou. 2018. Gandiva: Introspective Cluster Scheduling for Deep Learning. In 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18). USENIX Association, Carlsbad, CA, 595\u2013610. https:\/\/www.usenix.org\/conference\/osdi18\/presentation\/xiao"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2021.3079202"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3578356.3592589"}],"event":{"name":"EuroSys '26: 21st European Conference on Computer Systems","location":"Edinburgh Scotland Uk","acronym":"EuroMLSys '26","sponsor":["SIGOPS ACM Special Interest Group on Operating Systems"]},"container-title":["Proceedings of the Sixth European Workshop on Machine Learning and Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3805621.3807621","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,28]],"date-time":"2026-04-28T13:18:21Z","timestamp":1777382301000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3805621.3807621"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4,27]]},"references-count":23,"alternative-id":["10.1145\/3805621.3807621","10.1145\/3805621"],"URL":"https:\/\/doi.org\/10.1145\/3805621.3807621","relation":{},"subject":[],"published":{"date-parts":[[2026,4,27]]},"assertion":[{"value":"2026-04-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}