{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T13:45:26Z","timestamp":1776951926253,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":31,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,5,4]]},"DOI":"10.1145\/3777911.3800631","type":"proceedings-article","created":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T13:00:11Z","timestamp":1776949211000},"page":"255-263","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Optimizing Agentic AI Applications Under Budget Constraints"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6495-5717","authenticated-orcid":false,"given":"Hamta","family":"Sedghani","sequence":"first","affiliation":[{"name":"Politecnico di Milano, Milan, Italy"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6549-924X","authenticated-orcid":false,"given":"Federica","family":"Filippini","sequence":"additional","affiliation":[{"name":"University of Milano-Bicocca, Milan, --- Seleziona il tuo stato ---, Italy"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-8492-4640","authenticated-orcid":false,"given":"Zahra","family":"Seyedi","sequence":"additional","affiliation":[{"name":"Politecnico di Milano, Milan, Italy"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6958-8215","authenticated-orcid":false,"given":"Blerina","family":"Spahiu","sequence":"additional","affiliation":[{"name":"University of Milano-Bicocca, Milan, Italy"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2480-966X","authenticated-orcid":false,"given":"Michele","family":"Ciavotta","sequence":"additional","affiliation":[{"name":"University of Milano-Bicocca, Milan, Italy"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4224-927X","authenticated-orcid":false,"given":"Danilo","family":"Ardagna","sequence":"additional","affiliation":[{"name":"Politecnico di Milano, Milan, Italy"}]}],"member":"320","published-online":{"date-parts":[[2026,5,3]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Xin Eric Wang, and William Wang","author":"Amayuelas Alfonso","year":"2025","unstructured":"Alfonso Amayuelas, Jingbo Yang, Saaket Agashe, Ashwin Nagarajan, Antonis Antoniades, Xin Eric Wang, and William Wang. 2025. Self-Resource Allocation in Multi-Agent LLM Systems. arXiv:2504.02051 [cs.MA] https:\/\/arxiv.org\/abs\/2504.02051"},{"key":"e_1_3_2_1_2_1","volume-title":"Stephen McAleer, Albert Q Jiang, Jia Deng, Stella Biderman, and Sean Welleck.","author":"Azerbayev Zhangir","year":"2023","unstructured":"Zhangir Azerbayev, Hailey Schoelkopf, Keiran Paster, Marco Dos Santos, Stephen McAleer, Albert Q Jiang, Jia Deng, Stella Biderman, and Sean Welleck. 2023. Llemma: An open language model for mathematics. arXiv preprint arXiv:2310.10631 (2023)."},{"key":"e_1_3_2_1_3_1","volume-title":"Yingyan Celine Lin, and Pavlo Molchanov","author":"Belcak Peter","year":"2025","unstructured":"Peter Belcak, Greg Heinrich, Shizhe Diao, Yonggan Fu, Xin Dong, Saurav Muralidharan, Yingyan Celine Lin, and Pavlo Molchanov. 2025. Small Language Models are the Future of Agentic AI. arXiv:2506.02153 [cs.AI] https:\/\/arxiv.org\/abs\/2506.02153"},{"key":"e_1_3_2_1_4_1","unstructured":"Lingjiao Chen Matei Zaharia and James Zou. 2023. FrugalGPT: How to Use Large Language Models While Reducing Cost and Improving Performance. arXiv:2305.05176 [cs.LG] https:\/\/arxiv.org\/abs\/2305.05176"},{"key":"e_1_3_2_1_5_1","unstructured":"Karl Cobbe Vineet Kosaraju Mohammad Bavarian Mark Chen Heewoo Jun Lukasz Kaiser Matthias Plappert Jerry Tworek Jacob Hilton Reiichiro Nakano et al. 2021. Training verifiers to solve math word problems. arXiv preprint arXiv:2110.14168 (2021)."},{"key":"e_1_3_2_1_6_1","unstructured":"Jasper Dekoninck Maximilian Baader and Martin Vechev. 2025. A Unified Approach to Routing and Cascading for LLMs. In PMLR. https:\/\/arxiv.org\/abs\/2410.10347"},{"key":"e_1_3_2_1_7_1","first-page":"4171","volume-title":"Proceedings of the 2019 conference of the North American chapter of the association for computational linguistics: human language technologies","volume":"1","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. Bert: Pre-training of deep bidirectional transformers for language understanding. In Proceedings of the 2019 conference of the North American chapter of the association for computational linguistics: human language technologies, volume 1 (long and short papers). 4171-4186."},{"key":"e_1_3_2_1_8_1","unstructured":"Cooper Elsworth Keguo Huang David Patterson Ian Schneider Robert Sedivy Savannah Goodman Ben Townsend Parthasarathy Ranganathan Jeff Dean Amin Vahdat Ben Gomes and James Manyika. 2025. Measuring the environmental impact of delivering AI at Google Scale. arXiv:2508.15734 [cs.AI] https:\/\/arxiv.org\/abs\/2508.15734"},{"key":"e_1_3_2_1_9_1","unstructured":"Zhouhong Gu Xiaoxuan Zhu Yin Cai Hao Shen Xingzhou Chen Qingyi Wang Jialin Li Xiaoran Shi Haoran Guo Wenxuan Huang et al. 2025. AgentGroupChat-V2: Divide-and-Conquer Is What LLM-Based Multi-Agent System Need. arXiv preprint arXiv:2506.15451 (2025)."},{"key":"e_1_3_2_1_10_1","volume-title":"Deberta: Decoding-enhanced bert with disentangled attention. arXiv preprint arXiv:2006.03654","author":"He Pengcheng","year":"2020","unstructured":"Pengcheng He, Xiaodong Liu, Jianfeng Gao, and Weizhu Chen. 2020. Deberta: Decoding-enhanced bert with disentangled attention. arXiv preprint arXiv:2006.03654 (2020)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.14778\/3749646.3749702"},{"key":"e_1_3_2_1_12_1","unstructured":"Bowen Jin TJ Collins Donghan Yu Mert Cemri Shenao Zhang Mengyu Li Jay Tang Tian Qin Zhiyang Xu Jiarui Lu et al. 2025. Controlling Performance and Budget of a Centralized Multi-agent LLM System with Reinforcement Learning. arXiv preprint arXiv:2511.02755 (2025)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.naacl-long.472"},{"key":"e_1_3_2_1_14_1","unstructured":"Aitor Lewkowycz Anders Andreassen David Dohan Ethan Dyer Henryk Michalewski Vinay Ramasesh Ambrose Slone Cem Anil Imanol Schlag Theo Gutman-Solo et al. 2022. Solving quantitative reasoning problems with language models 2022. URL https:\/\/arxiv.org\/abs\/2206.14858 Vol. 1 (2022)."},{"key":"e_1_3_2_1_15_1","volume-title":"The Twelfth International Conference on Learning Representations.","author":"Lightman Hunter","year":"2023","unstructured":"Hunter Lightman, Vineet Kosaraju, Yuri Burda, Harrison Edwards, Bowen Baker, Teddy Lee, Jan Leike, John Schulman, Ilya Sutskever, and Karl Cobbe. 2023. Let's verify step by step. In The Twelfth International Conference on Learning Representations."},{"key":"e_1_3_2_1_16_1","volume-title":"G-eval: NLG evaluation using gpt-4 with better human alignment. arXiv preprint arXiv:2303.16634","author":"Liu Yang","year":"2023","unstructured":"Yang Liu, Dan Iter, Yichong Xu, Shuohang Wang, Ruochen Xu, and Chenguang Zhu. 2023. G-eval: NLG evaluation using gpt-4 with better human alignment. arXiv preprint arXiv:2303.16634 (2023)."},{"key":"e_1_3_2_1_17_1","volume-title":"CPLS: Optimizing the Assignment of LLM Queries. In 2024 IEEE International Conference on Software Maintenance and Evolution (ICSME). 151-162","author":"Liu Yueyue","year":"2024","unstructured":"Yueyue Liu, Hongyu Zhang, Zhiqiang Li, and Yuantian Miao. 2024a. CPLS: Optimizing the Assignment of LLM Queries. In 2024 IEEE International Conference on Software Maintenance and Evolution (ICSME). 151-162."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICWS62655.2024.00098"},{"key":"e_1_3_2_1_19_1","volume-title":"Knapsack problems: algorithms and computer implementations","author":"Martello Silvano","unstructured":"Silvano Martello and Paolo Toth. 1990. Knapsack problems: algorithms and computer implementations. John Wiley & Sons, Inc., USA."},{"key":"e_1_3_2_1_20_1","volume-title":"The Thirteenth International Conference on Learning Representations.","author":"Ong Isaac","unstructured":"Isaac Ong, Amjad Almahairi, Vincent Wu, Wei-Lin Chiang, Tianhao Wu, Joseph E Gonzalez, M Waleed Kadous, and Ion Stoica. [n.d.]. RouteLLM: Learning to Route LLMs from Preference Data. In The Thirteenth International Conference on Learning Representations."},{"key":"e_1_3_2_1_21_1","volume-title":"Emmanuele Lacavalla, Alessandro Basile, Shuyi Yang, Paul Castro, Daniel Kang, Joseph E. Gonzalez, Koushik Sen, Dawn Song, Ion Stoica, Matei Zaharia, and Marquita Ellis.","author":"Pan Melissa Z.","year":"2025","unstructured":"Melissa Z. Pan, Negar Arabzadeh, Riccardo Cogo, Yuxuan Zhu, Alexander Xiong, Lakshya A Agrawal, Huanzhi Mao, Emma Shen, Sid Pallerla, Liana Patel, Shu Liu, Tianneng Shi, Xiaoyuan Liu, Jared Quincy Davis, Emmanuele Lacavalla, Alessandro Basile, Shuyi Yang, Paul Castro, Daniel Kang, Joseph E. Gonzalez, Koushik Sen, Dawn Song, Ion Stoica, Matei Zaharia, and Marquita Ellis. 2025. Measuring Agents in Production. arXiv:2512.04123 [cs.CY] https:\/\/arxiv.org\/abs\/2512.04123"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00045"},{"key":"e_1_3_2_1_23_1","volume-title":"Beyond Pipelines: A Survey of the Paradigm Shift toward Model-Native Agentic AI. arXiv:2510.16720 [cs.AI] https:\/\/arxiv.org\/abs\/2510.16720","author":"Sang Jitao","year":"2025","unstructured":"Jitao Sang, Jinlin Xiao, Jiarun Han, Jilin Chen, Xiaoyi Chen, Shuyu Wei, Yongjie Sun, and Yuhang Wang. 2025. Beyond Pipelines: A Survey of the Paradigm Shift toward Model-Native Agentic AI. arXiv:2510.16720 [cs.AI] https:\/\/arxiv.org\/abs\/2510.16720"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"crossref","unstructured":"H. Sedghani D. Ardagna M. Matteucci G.A. Fontana G. Verticale F. Amarilli R. Badia D. Lezzi I. Blanquer A. Martin and K. Wawruch. 2021. Advancing Design and Runtime Management of AI Applications with AI-SPRINT (Position Paper). In 2021 IEEE 45th COMPSAC. 1455-1462.","DOI":"10.1109\/COMPSAC51774.2021.00216"},{"key":"e_1_3_2_1_25_1","unstructured":"Charlie Snell Jaehoon Lee Kelvin Xu and Aviral Kumar. 2024. Scaling LLM Test-Time Compute Optimally can be More Effective than Scaling Model Parameters. arXiv:2408.03314 [cs.LG] https:\/\/arxiv.org\/abs\/2408.03314"},{"key":"e_1_3_2_1_26_1","unstructured":"David Alejandro Torrado Guzman. 2025. Development and Evaluation of AI Agentic Workflows in LangGraph: An Energy and Computational Performance Analysis. Master's thesis. M.Sc. Computer Science Engineering - Politecnico di Milano. https:\/\/www.politesi.polimi.it\/retrieve\/71d42dc3-2735-4b07-851f-a24cda63d9d8\/Torrado_7_2025.pdf"},{"key":"e_1_3_2_1_27_1","volume-title":"Multi-agent collaboration mechanisms: A survey of llms. arXiv preprint arXiv:2501.06322","author":"Tran Khanh-Tung","year":"2025","unstructured":"Khanh-Tung Tran, Dung Dao, Minh-Duong Nguyen, Quoc-Viet Pham, Barry O'Sullivan, and Hoang D Nguyen. 2025. Multi-agent collaboration mechanisms: A survey of llms. arXiv preprint arXiv:2501.06322 (2025)."},{"key":"e_1_3_2_1_28_1","unstructured":"Yonghui Wu Mike Schuster Zhifeng Chen Quoc V Le Mohammad Norouzi Wolfgang Macherey Maxim Krikun Yuan Cao Qin Gao Klaus Macherey et al. 2016. Google's neural machine translation system: Bridging the gap between human and machine translation. arXiv preprint arXiv:1609.08144 (2016)."},{"key":"e_1_3_2_1_29_1","volume-title":"BAMAS: Structuring Budget-Aware Multi-Agent Systems. arXiv preprint arXiv:2511.21572","author":"Yang Liming","year":"2025","unstructured":"Liming Yang, Junyu Luo, Xuanzhe Liu, Yiling Lou, and Zhenpeng Chen. 2025. BAMAS: Structuring Budget-Aware Multi-Agent Systems. arXiv preprint arXiv:2511.21572 (2025)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3637528.3671614"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"crossref","unstructured":"Lianmin Zheng Wei-Lin Chiang Ying Sheng Siyuan Zhuang Zhanghao Wu Yonghao Zhuang Zi Lin Zhuohan Li Dacheng Li Eric Xing et al. 2023. Judging llm-as-a-judge with mt-bench and chatbot arena. Advances in neural information processing systems Vol. 36 (2023) 46595-46623.","DOI":"10.52202\/075280-2020"}],"event":{"name":"ICPE '26: 17th ACM\/SPEC International Conference on Performance Engineering","location":"Florence Italy","sponsor":["SIGSOFT ACM Special Interest Group on Software Engineering","SIGMETRICS ACM Special Interest Group on Measurement and Evaluation","SPEC"]},"container-title":["Companion of the 17th ACM\/SPEC International Conference on Performance Engineering"],"original-title":[],"deposited":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T13:01:11Z","timestamp":1776949271000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3777911.3800631"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,5,3]]},"references-count":31,"alternative-id":["10.1145\/3777911.3800631","10.1145\/3777911"],"URL":"https:\/\/doi.org\/10.1145\/3777911.3800631","relation":{},"subject":[],"published":{"date-parts":[[2026,5,3]]},"assertion":[{"value":"2026-05-03","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}