{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,24]],"date-time":"2025-08-24T00:01:08Z","timestamp":1755993668499,"version":"3.44.0"},"reference-count":25,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,7,8]],"date-time":"2025-07-08T00:00:00Z","timestamp":1751932800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,7,8]],"date-time":"2025-07-08T00:00:00Z","timestamp":1751932800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,7,8]]},"DOI":"10.23919\/acc63710.2025.11107697","type":"proceedings-article","created":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T18:17:51Z","timestamp":1755800271000},"page":"2458-2464","source":"Crossref","is-referenced-by-count":0,"title":["Fast Bandit-based Policy Adaptation in Diverse Environments"],"prefix":"10.23919","author":[{"given":"Ziyi","family":"Zhang","sequence":"first","affiliation":[{"name":"Carnegie Mellon University,Electrical and Computer Engineering,Pittsburgh,PA,USA,15213"}]},{"given":"Guannan","family":"Qu","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University,Electrical and Computer Engineering,Pittsburgh,PA,USA,15213"}]},{"given":"Yorie","family":"Nakahira","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University,Electrical and Computer Engineering,Pittsburgh,PA,USA,15213"}]}],"member":"263","reference":[{"doi-asserted-by":"publisher","key":"ref1","DOI":"10.23919\/ACC.2004.1384369"},{"doi-asserted-by":"publisher","key":"ref2","DOI":"10.1016\/j.rser.2016.03.003"},{"doi-asserted-by":"publisher","key":"ref3","DOI":"10.1515\/9781400874668"},{"doi-asserted-by":"publisher","key":"ref4","DOI":"10.3166\/ejc.11.335-352"},{"doi-asserted-by":"publisher","key":"ref5","DOI":"10.1016\/S0005-1098(99)00205-8"},{"doi-asserted-by":"publisher","key":"ref6","DOI":"10.1016\/S0005-1098(96)80003-3"},{"doi-asserted-by":"publisher","key":"ref7","DOI":"10.1016\/S0167-6911(02)00342-0"},{"doi-asserted-by":"publisher","key":"ref8","DOI":"10.1109\/ACC.2009.5159946"},{"doi-asserted-by":"publisher","key":"ref9","DOI":"10.1109\/TIE.2017.2701774"},{"doi-asserted-by":"publisher","key":"ref10","DOI":"10.1016\/j.ijhydene.2013.08.106"},{"year":"2016","author":"Duan","article-title":"Rl2: Fast reinforcement learning via slow reinforcement learning","key":"ref11"},{"key":"ref12","article-title":"Improved context-based offline meta-rl with attention and contrastive learning","volume":"abs\/2102.10774","author":"Li","year":"2021","journal-title":"CoRR"},{"key":"ref13","article-title":"Bootstrapped meta-learning","volume":"abs\/2109.04504","author":"Flennerhag","year":"2021","journal-title":"CoRR"},{"key":"ref14","first-page":"1082","article-title":"On the convergence theory of gradient-based model-agnostic meta-learning algorithms","volume-title":"AISTATS","volume":"108","author":"Fallah"},{"key":"ref15","article-title":"Provably efficient model-based policy adaptation","volume":"abs\/2006.08051","author":"Song","year":"2020","journal-title":"CoRR"},{"key":"ref16","article-title":"Adaptive gradient-based meta-learning methods","volume":"abs\/1906.02717","author":"Khodak","year":"2019","journal-title":"CoRR"},{"volume-title":"AISTATS","author":"Chua","article-title":"Provable hierarchy-based meta-reinforcement learning","key":"ref17"},{"key":"ref18","first-page":"9088","article-title":"Provably efficient model-based policy adaptation","volume-title":"ICML","volume":"119","author":"Song"},{"key":"ref19","first-page":"1138","article-title":"Online switching control with stability and regret guarantees","volume-title":"L4DC","volume":"211","author":"Li"},{"doi-asserted-by":"publisher","key":"ref20","DOI":"10.1109\/TAC.2009.2014923"},{"key":"ref21","article-title":"Learning to navigate in complex environments","volume":"abs\/1611.03673","author":"Mirowski","year":"2016","journal-title":"CoRR"},{"key":"ref22","first-page":"1126","article-title":"Model-agnostic meta-learning for fast adaptation of deep networks","volume-title":"ICML","volume":"70","author":"Finn"},{"volume-title":"NeurIPS","author":"Wu","article-title":"A finite-time analysis of two time-scale actor-critic methods","key":"ref23"},{"key":"ref24","doi-asserted-by":"crossref","DOI":"10.2139\/ssrn.4773505","article-title":"Fast bandit-based policy adaptation in diverse environments","author":"Zhang","year":"2024"},{"year":"2023","author":"John","article-title":"Quadcopter simcon","key":"ref25"}],"event":{"name":"2025 American Control Conference (ACC)","start":{"date-parts":[[2025,7,8]]},"location":"Denver, CO, USA","end":{"date-parts":[[2025,7,10]]}},"container-title":["2025 American Control Conference (ACC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11107441\/11107442\/11107697.pdf?arnumber=11107697","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T23:57:27Z","timestamp":1755907047000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11107697\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,8]]},"references-count":25,"URL":"https:\/\/doi.org\/10.23919\/acc63710.2025.11107697","relation":{},"subject":[],"published":{"date-parts":[[2025,7,8]]}}}