{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,12]],"date-time":"2026-06-12T15:49:22Z","timestamp":1781279362410,"version":"3.54.1"},"reference-count":35,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"11","license":[{"start":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T00:00:00Z","timestamp":1761955200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T00:00:00Z","timestamp":1761955200000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T00:00:00Z","timestamp":1761955200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T00:00:00Z","timestamp":1761955200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000006","name":"Office of Naval Research","doi-asserted-by":"publisher","award":["N00014-23-1-2505"],"award-info":[{"award-number":["N00014-23-1-2505"]}],"id":[{"id":"10.13039\/100000006","id-type":"DOI","asserted-by":"publisher"}]},{"name":"National Science Foundation","award":["CCF-2046991"],"award-info":[{"award-number":["CCF-2046991"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Robot. Autom. Lett."],"published-print":{"date-parts":[[2025,11]]},"DOI":"10.1109\/lra.2025.3617726","type":"journal-article","created":{"date-parts":[[2025,10,3]],"date-time":"2025-10-03T17:26:42Z","timestamp":1759512402000},"page":"11944-11951","source":"Crossref","is-referenced-by-count":1,"title":["Capacity-Aware Planning and Scheduling in Budget-Constrained Multi-Agent MDPs: A Meta-RL Approach"],"prefix":"10.1109","volume":"10","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-9517-7358","authenticated-orcid":false,"given":"Manav","family":"Vora","sequence":"first","affiliation":[{"name":"University of Illinois Urbana-Champaign, Champaign, IL, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5077-2269","authenticated-orcid":false,"given":"Ilan","family":"Shomorony","sequence":"additional","affiliation":[{"name":"University of Illinois Urbana-Champaign, Champaign, IL, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8510-8787","authenticated-orcid":false,"given":"Melkior","family":"Ornik","sequence":"additional","affiliation":[{"name":"University of Illinois Urbana-Champaign, Champaign, IL, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ASET.2017.7983729"},{"key":"ref2","article-title":"Planning under uncertainty for aggregated electric vehicle charging using Markov decision processes","volume-title":"Proc. AAAI Workshop Artif. Intell. Smart Grids Smart Buildings","author":"Walraven","year":"2016"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1115\/MSEC2023-105230"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1057\/palgrave.jors.2601079"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1613\/jair.1.12233"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1016\/S0377-2217(99)00453-1"},{"key":"ref7","article-title":"Solving truly massive budgeted monotonic POMDPs with oracle-guided meta-reinforcement learning","author":"Vora","year":"2024"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i9.16979"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1201\/9781315140223"},{"key":"ref10","first-page":"22","article-title":"Constrained policy optimization","volume-title":"Proc. 34th Int. Conf. Mach. Learn.","author":"Achiam","year":"2017"},{"key":"ref11","first-page":"1326","article-title":"Stationary deterministic policies for constrained MDPs with multiple long-run objectives","volume-title":"Proc. 21st Conf. Uncertainty Artif. Intell.","volume":"19","author":"Dolgov","year":"2005"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-53291-8_22"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2018.8618745"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i10.26366"},{"key":"ref15","article-title":"Budgeted reinforcement learning in continuous state space","volume-title":"Proc. 32nd Adv. Neural Inf. Process. Syst.","author":"Carrara","year":"2019"},{"key":"ref16","article-title":"Benchmarking safe exploration in deep reinforcement learning","author":"Ray","year":"2019"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/3005745.3005750"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1287\/moor.12.3.441"},{"key":"ref19","volume-title":"Theory of Linear and Integer Programming","author":"Schrijver","year":"1998"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1007\/s10479-005-3446-x"},{"key":"ref21","article-title":"Constrained combinatorial optimization with reinforcement learning","author":"Solozabal","year":"2020"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4757-3023-4_2"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TAES.2016.140952"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/331499.331504"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/LCSYS.2023.3280080"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2018.07.006"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511804441"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.12794\/metadc1505267"},{"key":"ref29","article-title":"Rl$^{2}$: Fast reinforcement learning via slow reinforcement learning","author":"Duan","year":"2016"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1061\/40799(213)4"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1111\/j.1740-9713.2018.01123.x"},{"key":"ref32","volume-title":"The New Weibull Handbook","author":"Abernethy","year":"2006"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1002\/0471725234"},{"key":"ref34","volume-title":"Genetic Algorithms in Search, Optimization, and Machine Learning","author":"Goldberg","year":"1989"},{"key":"ref35","article-title":"Traderbots: A market-based approach for resource, role, and task allocation in multirobot coordination","author":"Dias","year":"2003"}],"container-title":["IEEE Robotics and Automation Letters"],"original-title":[],"link":[{"URL":"https:\/\/ieeexplore.ieee.org\/ielam\/7083369\/11169302\/11192684-aam.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/7083369\/11169302\/11192684.pdf?arnumber=11192684","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,24]],"date-time":"2025-11-24T19:02:03Z","timestamp":1764010923000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11192684\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11]]},"references-count":35,"journal-issue":{"issue":"11"},"URL":"https:\/\/doi.org\/10.1109\/lra.2025.3617726","relation":{},"ISSN":["2377-3766","2377-3774"],"issn-type":[{"value":"2377-3766","type":"electronic"},{"value":"2377-3774","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,11]]}}}