{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,14]],"date-time":"2026-04-14T20:47:35Z","timestamp":1776199655070,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":26,"publisher":"ACM","funder":[{"name":"EPSRC","award":["EP\/X040518\/1"],"award-info":[{"award-number":["EP\/X040518\/1"]}]},{"name":"EPSRC","award":["EP\/Y037421\/1"],"award-info":[{"award-number":["EP\/Y037421\/1"]}]},{"name":"EPSRC","award":["EP\/Y019229\/1"],"award-info":[{"award-number":["EP\/Y019229\/1"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,4,27]]},"DOI":"10.1145\/3802973.3804457","type":"proceedings-article","created":{"date-parts":[[2026,4,14]],"date-time":"2026-04-14T19:41:22Z","timestamp":1776195682000},"page":"25-30","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["CEDAR: Carbon Efficient Dynamic Allocation and Routing for Agentic LLM Inference"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-8303-4828","authenticated-orcid":false,"given":"Amit","family":"More","sequence":"first","affiliation":[{"name":"University of York, York, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7157-0236","authenticated-orcid":false,"given":"Tarique","family":"Anwar","sequence":"additional","affiliation":[{"name":"RMIT University, Melbourne, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0169-0704","authenticated-orcid":false,"given":"Poonam","family":"Yadav","sequence":"additional","affiliation":[{"name":"University of York, York, United Kingdom"}]}],"member":"320","published-online":{"date-parts":[[2026,4,26]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Joshua Achiam. [n. d.]. Spinning Up in Deep Reinforcement Learning. https: \/\/spinningup.openai.com."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3627703.3629569"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3698038.3698517"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3716368.3735301"},{"key":"e_1_3_2_1_5_1","unstructured":"Electricity Maps. 2025. Real-Time Carbon Intensity of Electricity Worldwide. https:\/\/www.electricitymaps.com."},{"key":"e_1_3_2_1_6_1","volume-title":"Advances in Neural Information Processing Systems 37 (NeurIPS","author":"Fu Yichao","year":"2024","unstructured":"Yichao Fu, Siqi Zhu, Runlong Su, Aurick Qiao, Ion Stoica, and Hao Zhang. 2024. Efficient LLM Scheduling by Learning to Rank. In Advances in Neural Information Processing Systems 37 (NeurIPS 2024), Lun-Wei Ku, Andr\u00e9 Martins, and Vivek Srikumar (Eds.). Curran Associates, Inc., 52301\u201352322. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2024\/file\/6c8985579293e0209bdaa4f21bb1d237-Paper-Conference.pdf"},{"key":"e_1_3_2_1_7_1","volume-title":"Multi-Agent System Inquiries Surge 1,445% from Q1 2024 to Q2","year":"2025","unstructured":"Gartner. 2025. Multi-Agent System Inquiries Surge 1,445% from Q1 2024 to Q2 2025. Industry Report."},{"key":"e_1_3_2_1_8_1","volume-title":"Ravi Shreyas Anupindi, and Ramachandran Ramjee","author":"Goel Kanishk","year":"2025","unstructured":"Kanishk Goel, Jayashree Mohan, Nipun Kwatra, Ravi Shreyas Anupindi, and Ramachandran Ramjee. 2025. Niyama: Breaking the Silos of LLM Inference Serving. arXiv preprint arXiv:2503.22562 (2025). https:\/\/arxiv.org\/abs\/2503.22562"},{"key":"e_1_3_2_1_9_1","volume-title":"Proceedings of the 35th International Conference on Machine Learning (ICML '18)","volume":"80","author":"Haarnoja Tuomas","year":"2018","unstructured":"Tuomas Haarnoja, Aurick Zhou, Pieter Abbeel, and Sergey Levine. 2018. Soft Actor-Critic: Off-Policy Maximum Entropy Deep Reinforcement Learning with a Stochastic Actor. In Proceedings of the 35th International Conference on Machine Learning (ICML '18), Vol. 80. PMLR, 1861\u20131870. https:\/\/proceedings.mlr.press\/v80\/haarnoja18b.html"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2026.3655019"},{"key":"e_1_3_2_1_11_1","unstructured":"International Energy Agency. 2025. Energy Demand from AI. Technical Report. IEA Paris France. https:\/\/www.iea.org\/reports\/energy-and-ai\/energy-demand-from-ai"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3600006.3613165"},{"key":"e_1_3_2_1_13_1","volume-title":"Share & Growth Analysis","year":"2025","unstructured":"MarketsandMarkets. 2025. AI Inference Market Size, Share & Growth Analysis, 2025\u20132030. Market Research Report. https:\/\/www.marketsandmarkets.com\/Market-Reports\/ai-inference-market-189921964.html"},{"key":"e_1_3_2_1_14_1","unstructured":"MIT Technology Review. 2025. We Did the Math on AI's Energy Footprint. Here's the Story You Haven't Heard. https:\/\/www.technologyreview.com\/2025\/05\/20\/1116327\/ai-energy-usage-climate-footprint-big-tech\/."},{"key":"e_1_3_2_1_15_1","volume-title":"Proceedings of the 2025 European Conference on Machine Learning Systems (EuroMLSys '25)","author":"Umamaheswari Devi Felix George Preetam Patil","year":"2025","unstructured":"Preetam Patil Umamaheswari Devi Felix George Pratibha Moogi Moonmoon Mohanty, Gautham Bolar and Parimal Parag. 2025. Deferred Prefill for Throughput Maximization in LLM Inference. In Proceedings of the 2025 European Conference on Machine Learning Systems (EuroMLSys '25). Amsterdam, Netherlands. https:\/\/euromlsys.eu\/pdf\/euromlsys25-39.pdf"},{"key":"e_1_3_2_1_16_1","volume-title":"Astraea: A State-Aware Scheduling Engine for LLM-Powered Agents. arXiv preprint arXiv:2512.14142","author":"Ni Hongqiu","year":"2024","unstructured":"Hongqiu Ni, Yizhou Zhou, Zhuohan Li, Zi Ye, Ion Stoica, and Lianmin Zheng. 2024. Astraea: A State-Aware Scheduling Engine for LLM-Powered Agents. arXiv preprint arXiv:2512.14142 (2024). https:\/\/arxiv.org\/abs\/2512.14142"},{"key":"e_1_3_2_1_17_1","unstructured":"NVIDIA Corporation. 2024. TensorRT-LLM: High-Performance Inference Library for Large Language Models. https:\/\/github.com\/NVIDIA\/TensorRT-LLM"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3698038.3698523"},{"key":"e_1_3_2_1_19_1","volume-title":"2024 United States Data Center Energy Usage Report","author":"Shehabi Arman","unstructured":"Arman Shehabi, Sarah J. Smith, Eric Masanet, and Jonathan Koomey. 2024. 2024 United States Data Center Energy Usage Report. Technical Report LBNL-2001552. Lawrence Berkeley National Laboratory, Berkeley, CA, USA. https:\/\/escholarship.org\/uc\/item\/32d6m0d1"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA59077.2025.00068"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA61900.2025.00102"},{"key":"e_1_3_2_1_22_1","volume-title":"Anas Abubakar Bisu, Hongjian Sun, and Poonam Yadav.","author":"Tota Khel Ahmad Massud","year":"2025","unstructured":"Ahmad Massud Tota Khel, Olufemi Isaac Olayiwola, Anas Abubakar Bisu, Hongjian Sun, and Poonam Yadav. 2025. Towards Carbon-Neutrality for 6G Networks. White Paper. Communications Hub for Empowering Distributed Cloud Computing Applications and Research (CHEDDAR). https:\/\/cheddarhub.org\/wp-content\/uploads\/sites\/168\/White_Paper__Final_.pdf Version 1.0."},{"key":"e_1_3_2_1_23_1","volume-title":"Artificial Intelligence: How Much Energy Does AI Use? https:\/\/unric.org\/en\/artificial-intelligence-how-much-energy-does-ai-use\/.","author":"United Nations Regional Information Centre for Western Europe.","year":"2025","unstructured":"United Nations Regional Information Centre for Western Europe. 2025. Artificial Intelligence: How Much Energy Does AI Use? https:\/\/unric.org\/en\/artificial-intelligence-how-much-energy-does-ai-use\/."},{"key":"e_1_3_2_1_24_1","unstructured":"WattTime. 2025. WattTime API: Real-Time Grid Carbon Intensity Data. https:\/\/www.watttime.org\/api-documentation."},{"key":"e_1_3_2_1_25_1","volume-title":"GreenScale: CO2e-Aware Autoscaling for Carbon-Efficient Cloud Computing. arXiv preprint arXiv:2304.00404","author":"Wong David","year":"2023","unstructured":"David Wong, Bodin Uddamvathanak, and Linh Thi Xuan Phan. 2023. GreenScale: CO2e-Aware Autoscaling for Carbon-Efficient Cloud Computing. arXiv preprint arXiv:2304.00404 (2023). https:\/\/arxiv.org\/abs\/2304.00404"},{"key":"e_1_3_2_1_26_1","volume-title":"Shiyi Cao, Christos Kozyrakis, Ion Stoica, Joseph E. Gonzalez, Clark Barrett, and Ying Sheng.","author":"Zheng Lianmin","year":"2024","unstructured":"Lianmin Zheng, Liangsheng Yin, Zhiqiang Xie, Jeff Huang, Chuyue Sun, Cody Hao Yu, Shiyi Cao, Christos Kozyrakis, Ion Stoica, Joseph E. Gonzalez, Clark Barrett, and Ying Sheng. 2024. SGLang: Efficient Execution of Structured Language Model Programs. In Advances in Neural Information Processing Systems 37 (NeurIPS 2024), Lun-Wei Ku, Andr\u00e9 Martins, and Vivek Srikumar (Eds.). Curran Associates, Inc., 41203\u201341228. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2024\/file\/724be4472168f31ba1c9ac630f15dec8-Paper-Conference.pdf"}],"event":{"name":"EuroSys '26: 21st European Conference on Computer Systems","location":"Edinburgh Scotland Uk","acronym":"GreenSys '26","sponsor":["SIGOPS ACM Special Interest Group on Operating Systems"]},"container-title":["Proceedings of the 2nd International Workshop on Systems and Methods for Sustainable Large-Scale AI (GreenSys)"],"original-title":[],"deposited":{"date-parts":[[2026,4,14]],"date-time":"2026-04-14T19:41:32Z","timestamp":1776195692000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3802973.3804457"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4,26]]},"references-count":26,"alternative-id":["10.1145\/3802973.3804457","10.1145\/3802973"],"URL":"https:\/\/doi.org\/10.1145\/3802973.3804457","relation":{},"subject":[],"published":{"date-parts":[[2026,4,26]]},"assertion":[{"value":"2026-04-26","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}