{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,8]],"date-time":"2026-03-08T19:37:21Z","timestamp":1772998641222,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":34,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,3,30]],"date-time":"2025-03-30T00:00:00Z","timestamp":1743292800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100001858","name":"Vinnova","doi-asserted-by":"publisher","award":["2016?05193"],"award-info":[{"award-number":["2016?05193"]}],"id":[{"id":"10.13039\/501100001858","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,3,30]]},"DOI":"10.1145\/3721146.3721957","type":"proceedings-article","created":{"date-parts":[[2025,4,1]],"date-time":"2025-04-01T17:42:05Z","timestamp":1743529325000},"page":"230-237","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":4,"title":["Utilizing Large Language Models for Ablation Studies in Machine Learning and Deep Learning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7236-4637","authenticated-orcid":false,"given":"Sina","family":"Sheikholeslami","sequence":"first","affiliation":[{"name":"KTH Royal Institute of Technology, Stockholm, Sweden"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0034-5098","authenticated-orcid":false,"given":"Hamid","family":"Ghasemirahni","sequence":"additional","affiliation":[{"name":"KTH Royal Institute of Technology, Stockholm, Sweden"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2748-8929","authenticated-orcid":false,"given":"Amir H.","family":"Payberah","sequence":"additional","affiliation":[{"name":"KTH Royal Institute of Technology, Stockholm, Sweden"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0422-6560","authenticated-orcid":false,"given":"Tianze","family":"Wang","sequence":"additional","affiliation":[{"name":"KTH Royal Institute of Technology, Stockholm, Sweden"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9484-6714","authenticated-orcid":false,"given":"Jim","family":"Dowling","sequence":"additional","affiliation":[{"name":"Hopsworks AB, Stockholm, Sweden"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6779-7435","authenticated-orcid":false,"given":"Vladimir","family":"Vlassov","sequence":"additional","affiliation":[{"name":"KTH Royal Institute of Technology, Stockholm, Sweden"}]}],"member":"320","published-online":{"date-parts":[[2025,4]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"NIPS 2014 workshop on high-energy physics and machine learning. PMLR, 19--55","author":"Adam-Bourdarios Claire","year":"2015","unstructured":"Claire Adam-Bourdarios, Glen Cowan, C\u00e9cile Germain, Isabelle Guyon, Bal\u00e1zs K\u00e9gl, and David Rousseau. 2015. The Higgs boson machine learning challenge. In NIPS 2014 workshop on high-energy physics and machine learning. PMLR, 19--55."},{"key":"e_1_3_2_1_2_1","volume-title":"EvoPrompting: language models for code-level neural architecture search. Advances in Neural Information Processing Systems","author":"Chen Angelica","year":"2023","unstructured":"Angelica Chen, David Dohan, and David So. 2023. EvoPrompting: language models for code-level neural architecture search. Advances in Neural Information Processing Systems (2023)."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/2939672.2939785"},{"key":"e_1_3_2_1_4_1","first-page":"1","article-title":"Palm: Scaling language modeling with pathways","volume":"24","author":"Chowdhery Aakanksha","year":"2023","unstructured":"Aakanksha Chowdhery, Sharan Narang, Jacob Devlin, Maarten Bosma, Gaurav Mishra, Adam Roberts, Paul Barham, Hyung Won Chung, Charles Sutton, Sebastian Gehrmann, et al. 2023. Palm: Scaling language modeling with pathways. Journal of Machine Learning Research 24, 240 (2023), 1--113.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"crossref","first-page":"12548","DOI":"10.1038\/s41598-024-63376-2","article-title":"Leveraging graph neural networks for supporting automatic triage of patients","volume":"14","author":"Defilippo Annamaria","year":"2024","unstructured":"Annamaria Defilippo, Pierangelo Veltri, Pietro Li\u00f3, and Pietro Hiram Guzzi. 2024. Leveraging graph neural networks for supporting automatic triage of patients. Scientific Reports 14, 1 (2024), 12548.","journal-title":"Scientific Reports"},{"key":"e_1_3_2_1_6_1","volume-title":"International Conference on Machine Learning. PMLR, 7480--7512","author":"Dehghani Mostafa","year":"2023","unstructured":"Mostafa Dehghani, Josip Djolonga, Basil Mustafa, Piotr Padlewski, Jonathan Heek, Justin Gilmer, Andreas Peter Steiner, Mathilde Caron, Robert Geirhos, Ibrahim Alabdulmohsin, et al. 2023. Scaling vision transformers to 22 billion parameters. In International Conference on Machine Learning. PMLR, 7480--7512."},{"key":"e_1_3_2_1_7_1","unstructured":"Abhimanyu Dubey Abhinav Jauhri Abhinav Pandey Abhishek Kadian Ahmad Al-Dahle Aiesha Letman Akhil Mathur Alan Schelten Amy Yang Angela Fan et al. 2024. The llama 3 herd of models. arXiv preprint arXiv:2407.21783 (2024)."},{"key":"e_1_3_2_1_8_1","volume-title":"ABLATOR: Robust Horizontal-Scaling of Machine Learning Ablation Experiments. In International Conference on Automated Machine Learning. PMLR, 19--1.","author":"Fostiropoulos Iordanis","year":"2023","unstructured":"Iordanis Fostiropoulos and Laurent Itti. 2023. ABLATOR: Robust Horizontal-Scaling of Machine Learning Ablation Experiments. In International Conference on Automated Machine Learning. PMLR, 19--1."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3642970.3655836"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3643916.3644396"},{"key":"e_1_3_2_1_11_1","volume-title":"Inductive representation learning on large graphs. Advances in neural information processing systems 30","author":"Hamilton Will","year":"2017","unstructured":"Will Hamilton, Zhitao Ying, and Jure Leskovec. 2017. Inductive representation learning on large graphs. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_13_1","volume-title":"Laks VS Lakshmanan, and Dujian Ding","author":"Jawahar Ganesh","year":"2023","unstructured":"Ganesh Jawahar, Muhammad Abdul-Mageed, Laks VS Lakshmanan, and Dujian Ding. 2023. LLM Performance Predictors are good initializers for Architecture Search. arXiv preprint arXiv:2310.16712 (2023)."},{"key":"e_1_3_2_1_14_1","volume-title":"LLM Maybe LongLM: Self-extend LLM context window without tuning. arXiv preprint arXiv:2401.01325","author":"Jin Hongye","year":"2024","unstructured":"Hongye Jin, Xiaotian Han, Jingfeng Yang, Zhimeng Jiang, Zirui Liu, Chia-Yuan Chang, Huiyuan Chen, and Xia Hu. 2024. LLM Maybe LongLM: Self-extend LLM context window without tuning. arXiv preprint arXiv:2401.01325 (2024)."},{"key":"e_1_3_2_1_15_1","volume-title":"Adam: A Method for Stochastic Optimization. arXiv e-prints","author":"Kingma Diederik P","year":"2014","unstructured":"Diederik P Kingma and Jimmy Ba. 2014. Adam: A Method for Stochastic Optimization. arXiv e-prints (2014), arXiv-1412."},{"key":"e_1_3_2_1_16_1","volume-title":"Imagenet classification with deep convolutional neural networks. Advances in neural information processing systems 25","author":"Krizhevsky Alex","year":"2012","unstructured":"Alex Krizhevsky, Ilya Sutskever, and Geoffrey E Hinton. 2012. Imagenet classification with deep convolutional neural networks. Advances in neural information processing systems 25 (2012)."},{"key":"e_1_3_2_1_17_1","unstructured":"LangChain-AI. 2025. LangGraph. https:\/\/github.com\/langchain-ai\/langgraph. Accessed: 2025-02-09."},{"key":"e_1_3_2_1_18_1","volume-title":"Large Language Model Agent for Hyper-Parameter Optimization. arXiv preprint arXiv:2402.01881","author":"Liu Siyi","year":"2024","unstructured":"Siyi Liu, Chen Gao, and Yong Li. 2024. Large Language Model Agent for Hyper-Parameter Optimization. arXiv preprint arXiv:2402.01881 (2024)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/MS.2006.114"},{"key":"e_1_3_2_1_20_1","volume-title":"Constantin Waubert de Puiseau, and Tobias Meisen","author":"Meyes Richard","year":"2019","unstructured":"Richard Meyes, Melanie Lu, Constantin Waubert de Puiseau, and Tobias Meisen. 2019. Ablation studies in artificial neural networks. arXiv preprint arXiv:1901.08644 (2019)."},{"key":"e_1_3_2_1_21_1","volume-title":"The RefinedWeb dataset for Falcon LLM: outperforming curated corpora with web data, and web data only. arXiv preprint arXiv:2306.01116","author":"Penedo Guilherme","year":"2023","unstructured":"Guilherme Penedo, Quentin Malartic, Daniel Hesslow, Ruxandra Cojocaru, Alessandro Cappelli, Hamza Alobeidli, Baptiste Pannier, Ebtesam Almazrouei, and Julien Launay. 2023. The RefinedWeb dataset for Falcon LLM: outperforming curated corpora with web data, and web data only. arXiv preprint arXiv:2306.01116 (2023)."},{"key":"e_1_3_2_1_22_1","unstructured":"Sina Sheikholeslami. 2019. Ablation Programming for Machine Learning. Master's thesis. KTH School of Electrical Engineering and Computer Science (EECS)."},{"key":"e_1_3_2_1_23_1","volume-title":"Proceedings of the 1st Workshop on Machine Learning and Systems. 55--61","author":"Sheikholeslami Sina","year":"2021","unstructured":"Sina Sheikholeslami, Moritz Meister, Tianze Wang, Amir H Payberah, Vladimir Vlassov, and Jim Dowling. 2021. Autoablation: Automated parallel ablation studies for deep learning. In Proceedings of the 1st Workshop on Machine Learning and Systems. 55--61."},{"key":"e_1_3_2_1_24_1","unstructured":"Shaden Smith Mostofa Patwary Brandon Norick Patrick LeGresley Samyam Rajbhandari Jared Casper Zhun Liu Shrimai Prabhumoye George Zerveas Vijay Korthikanti et al. 2022. Using deepspeed and megatron to train megatron-turing nlg 530b a large-scale generative language model. arXiv preprint arXiv:2201.11990 (2022)."},{"key":"e_1_3_2_1_25_1","volume-title":"Findings of the Association for Computational Linguistics: ACL","author":"Song Demin","year":"2024","unstructured":"Demin Song, Honglin Guo, Yunhua Zhou, Shuhao Xing, Yudong Wang, Zifan Song, Wenwei Zhang, Qipeng Guo, Hang Yan, Xipeng Qiu, and Dahua Lin. 2024. Code Needs Comments: Enhancing Code LLMs with Comment Augmentation. In Findings of the Association for Computational Linguistics: ACL 2024, Lun-Wei Ku, Andre Martins, and Vivek Srikumar (Eds.). Association for Computational Linguistics, Bangkok, Thailand, 13640--13656."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"e_1_3_2_1_27_1","unstructured":"Kotaro Tanahashi Yuichi Inoue Yu Yamaguchi Hidetatsu Yaginuma Daiki Shiotsuka Hiroyuki Shimatani Kohei Iwamasa Yoshiaki Inoue Takafumi Yamaguchi Koki Igari et al. 2023. Evaluation of Large Language Models for Decision Making in Autonomous Driving. arXiv preprint arXiv:2312.06351 (2023)."},{"key":"e_1_3_2_1_28_1","volume-title":"\u0141 ukasz Kaiser, and Illia Polosukhin","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141 ukasz Kaiser, and Illia Polosukhin. 2017. Attention is All you Need. In Advances in Neural Information Processing Systems, I. Guyon, U. Von Luxburg, S. Bengio, H. Wallach, R. Fergus, S. Vishwanathan, and R. Garnett (Eds.), Vol. 30. Curran Associates, Inc."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"crossref","unstructured":"Jiaqi Wang Zhengliang Liu Lin Zhao Zihao Wu Chong Ma Sigang Yu Haixing Dai Qiushi Yang Yiheng Liu Songyao Zhang et al. 2023. Review of large vision models and visual prompt engineering. Meta-Radiology (2023) 100047.","DOI":"10.1016\/j.metrad.2023.100047"},{"key":"e_1_3_2_1_30_1","volume-title":"Autogen: Enabling next-gen llm applications via multi-agent conversation framework. arXiv preprint arXiv:2308.08155","author":"Wu Qingyun","year":"2023","unstructured":"Qingyun Wu, Gagan Bansal, Jieyu Zhang, Yiran Wu, Shaokun Zhang, Erkang Zhu, Beibin Li, Li Jiang, Xiaoyun Zhang, and Chi Wang. 2023. Autogen: Enabling next-gen llm applications via multi-agent conversation framework. arXiv preprint arXiv:2308.08155 (2023)."},{"key":"e_1_3_2_1_31_1","volume-title":"Large Language Models Synergize with Automated Machine Learning. Transactions on Machine Learning Research","author":"Xu Jinglue","year":"2024","unstructured":"Jinglue Xu, Jialong Li, Zhen Liu, NAV Suryanarayanan, Guoyuan Zhou, JIA GUO, Hitoshi Iba, and Kenji Tei. 2024. Large Language Models Synergize with Automated Machine Learning. Transactions on Machine Learning Research (2024)."},{"key":"e_1_3_2_1_32_1","volume-title":"Foundation models for decision making: Problems, methods, and opportunities. arXiv preprint arXiv:2303.04129","author":"Yang Sherry","year":"2023","unstructured":"Sherry Yang, Ofir Nachum, Yilun Du, Jason Wei, Pieter Abbeel, and Dale Schuurmans. 2023. Foundation models for decision making: Problems, methods, and opportunities. arXiv preprint arXiv:2303.04129 (2023)."},{"key":"e_1_3_2_1_33_1","volume-title":"Using Large Language Models for Hyperparameter Optimization. arXiv e-prints","author":"Zhang Michael R","year":"2023","unstructured":"Michael R Zhang, Nishkrit Desai, Juhan Bae, Jonathan Lorraine, and Jimmy Ba. 2023. Using Large Language Models for Hyperparameter Optimization. arXiv e-prints (2023), arXiv-2312."},{"key":"e_1_3_2_1_34_1","volume-title":"How Well Do LLMs Generate Code for Different Application Domains? Benchmark and Evaluation. arXiv preprint arXiv:2412.18573","author":"Zheng Dewu","year":"2024","unstructured":"Dewu Zheng, Yanlin Wang, Ensheng Shi, Hongyu Zhang, and Zibin Zheng. 2024. How Well Do LLMs Generate Code for Different Application Domains? Benchmark and Evaluation. arXiv preprint arXiv:2412.18573 (2024)."}],"event":{"name":"EuroMLSys '25: 5th Workshop on Machine Learning and Systems","location":"World Trade Center Rotterdam Netherlands","acronym":"EuroMLSys '25","sponsor":["SIGOPS ACM Special Interest Group on Operating Systems"]},"container-title":["Proceedings of the 5th Workshop on Machine Learning and Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3721146.3721957","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3721146.3721957","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:57:39Z","timestamp":1750298259000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3721146.3721957"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,30]]},"references-count":34,"alternative-id":["10.1145\/3721146.3721957","10.1145\/3721146"],"URL":"https:\/\/doi.org\/10.1145\/3721146.3721957","relation":{},"subject":[],"published":{"date-parts":[[2025,3,30]]},"assertion":[{"value":"2025-04-01","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}