{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T07:56:54Z","timestamp":1775203014316,"version":"3.50.1"},"reference-count":87,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"3","license":[{"start":{"date-parts":[[2023,7,1]],"date-time":"2023-07-01T00:00:00Z","timestamp":1688169600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,7,1]],"date-time":"2023-07-01T00:00:00Z","timestamp":1688169600000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,7,1]],"date-time":"2023-07-01T00:00:00Z","timestamp":1688169600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,7,1]],"date-time":"2023-07-01T00:00:00Z","timestamp":1688169600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100008982","name":"National Science Foundation","doi-asserted-by":"publisher","award":["1837021"],"award-info":[{"award-number":["1837021"]}],"id":[{"id":"10.13039\/501100008982","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Sustain. Comput."],"published-print":{"date-parts":[[2023,7,1]]},"DOI":"10.1109\/tsusc.2023.3251302","type":"journal-article","created":{"date-parts":[[2023,3,1]],"date-time":"2023-03-01T18:27:21Z","timestamp":1677695241000},"page":"504-521","source":"Crossref","is-referenced-by-count":23,"title":["Fast Human-in-the-Loop Control for HVAC Systems via Meta-Learning and Model-Based Offline Reinforcement Learning"],"prefix":"10.1109","volume":"8","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9594-640X","authenticated-orcid":false,"given":"Liangliang","family":"Chen","sequence":"first","affiliation":[{"name":"School of Electrical and Computer Engineering, Georgia Institute of Technology, Atlanta, GA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9225-040X","authenticated-orcid":false,"given":"Fei","family":"Meng","sequence":"additional","affiliation":[{"name":"Department of Electronic Engineering, Chinese University of Hong Kong, Hong Kong"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5246-2141","authenticated-orcid":false,"given":"Ying","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Electrical and Computer Engineering, Georgia Institute of Technology, Atlanta, GA, USA"}]}],"member":"263","reference":[{"key":"ref1","article-title":"2018 renewable energy data book","author":"Koebrich","year":"2020"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/j.conengprac.2021.104782"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1016\/j.apenergy.2019.01.187"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1016\/j.buildenv.2021.107952"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1080\/19401493.2014.891656"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1016\/j.enbuild.2014.11.058"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1016\/j.enbuild.2015.11.033"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1016\/j.enbuild.2019.07.029"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/tnn.1998.712192"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TIE.2019.2946545"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3105176"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.13140\/RG.2.2.18893.74727"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1126\/science.aar6404"},{"key":"ref15","first-page":"1889","article-title":"Trust region policy optimization","volume-title":"Proc. 31st Int. Conf. Mach. Learn.","author":"Schulman"},{"key":"ref16","first-page":"1","article-title":"Continuous control with deep reinforcement learning","volume-title":"Proc. 4th Int. Conf. Learn. Representations","author":"Lillicrap"},{"key":"ref17","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proc. 35th Int. Conf. Mach. Learn.","author":"Haarnoja"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/3061639.3062224"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1016\/j.buildenv.2021.108680"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1016\/j.apenergy.2021.117164"},{"key":"ref21","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","volume-title":"Proc. 35th Int. Conf. Mach. Learn.","author":"Fujimoto"},{"key":"ref22","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017"},{"key":"ref23","first-page":"1","article-title":"Model-based reinforcement learning for Atari","volume-title":"Proc. 8th Int. Conf. Learn. Representations","author":"Kaiser"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1561\/2200000086"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/3360322.3360861"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2022.3164023"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TSUSC.2022.3164084"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICCPS54341.2022.00023"},{"key":"ref29","article-title":"Offline reinforcement learning: Tutorial, review, and perspectives on open problems","author":"Levine","year":"2020"},{"key":"ref30","first-page":"7436","article-title":"Uncertainty-based offline reinforcement learning with diversified Q-ensemble","volume-title":"Proc. 35th Conf. Neural Inf. Process. Syst.","author":"An"},{"key":"ref31","first-page":"21810","article-title":"MOReL: Model-based offline reinforcement learning","volume-title":"Proc. 34th Conf. Adv. Neural Inf. Process. Syst.","author":"Kidambi"},{"key":"ref32","first-page":"14129","article-title":"MOPO: Model-based offline policy optimization","volume-title":"Proc. 34th Conf. Adv. Neural Inf. Process. Syst.","author":"Yu"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1016\/j.enbuild.2021.110833"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1016\/j.energy.2022.125290"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1016\/j.buildenv.2019.106535"},{"key":"ref36","first-page":"11761","article-title":"Stabilizing off-policy Q-learning via bootstrapping error reduction","volume-title":"Proc. 33rd Conf. Neural Inf. Process. Syst.","author":"Kumar"},{"key":"ref37","first-page":"1179","article-title":"Conservative Q-learning for offline reinforcement learning","volume-title":"Proc. 34th Conf. Adv. Neural Inf. Process. Syst.","author":"Kumar"},{"key":"ref38","first-page":"1","article-title":"Offline reinforcement learning with munchausen regularization","volume-title":"Proc. Offline Reinforcement Learn. Workshop Neural Inf. Process. Syst.","author":"Liu"},{"key":"ref39","first-page":"2052","article-title":"Off-policy deep reinforcement learning without exploration","volume-title":"Proc. 36th Int. Conf. Mach. Learn.","author":"Fujimoto"},{"key":"ref40","first-page":"28954","article-title":"COMBO: Conservative offline model-based policy optimization","volume-title":"Proc. 35th Conf. Neural Inf. Process. Syst.","author":"Yu"},{"key":"ref41","first-page":"1","article-title":"RAMBO-RL: Robust adversarial model-based offline reinforcement learning","volume-title":"Proc. 36th Conf. Neural Inf. Process. Syst.","author":"Rigter"},{"key":"ref42","first-page":"1","article-title":"Model-based offline planning","volume-title":"Proc. 9th Int. Conf. Learn. Representations","author":"Argenson"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8463189"},{"key":"ref44","first-page":"12498","article-title":"When to trust your model: Model-based policy optimization","volume-title":"Proc. 33rd Conf. Neural Inf. Process. Syst.","author":"Janner"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1016\/j.buildenv.2017.03.009"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1016\/j.buildenv.2018.10.027"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1016\/j.enbuild.2019.04.016"},{"key":"ref48","volume-title":"Thermal Comfort: Analysis and Applications in Environmental Engineering","author":"Fanger","year":"1970"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1016\/j.jobe.2019.100846"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-020-05935-y"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3094479"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1016\/j.jmsy.2023.01.003"},{"key":"ref53","article-title":"A global database of thermal comfort field experiments","volume":"104","author":"De Dear","year":"1998","journal-title":"ASHRAE Trans."},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1016\/j.buildenv.2018.06.022"},{"key":"ref55","volume-title":"ASHRAE Glob. Database Thermal Comfort Field Meas.","author":"Parkinson","year":"2022"},{"key":"ref56","first-page":"1126","article-title":"Model-agnostic meta-learning for fast adaptation of deep networks","volume-title":"Proc. 34th Int. Conf. Mach. Learn.","author":"Finn"},{"key":"ref57","volume-title":"Gaussian Processes for Machine Learning","volume":"2","author":"Williams","year":"2006"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4615-7566-5"},{"key":"ref59","first-page":"2402","article-title":"Meta-gradient reinforcement learning","volume-title":"Proc. 32nd Conf. Adv. Neural Inf. Process. Syst.","author":"Xu"},{"key":"ref60","first-page":"4759","article-title":"Deep reinforcement learning in a handful of trials using probabilistic dynamics models","volume-title":"Proc. 32nd Conf. Adv. Neural Inf. Process. Syst.","author":"Chua"},{"key":"ref61","first-page":"7953","article-title":"A game theoretic framework for model based reinforcement learning","volume-title":"Proc. 37th Int. Conf. Mach. Learn.","author":"Rajeswaran"},{"key":"ref62","first-page":"6405","article-title":"Simple and scalable predictive uncertainty estimation using deep ensembles","volume-title":"Proc. 31st Conf. Neural Inf. Process. Syst.","author":"Lakshminarayanan"},{"key":"ref63","first-page":"4033","article-title":"Deep exploration via bootstrapped DQN","volume-title":"Proc. 30th Conf. Neural Inf. Process. Syst.","author":"Osband"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.5555\/3045390.3045502"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/MCI.2022.3155327"},{"key":"ref66","article-title":"Intro to commercial building HVAC systems and energy code requirements","author":"Cole","year":"2019"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1115\/DSCC2011-6078"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1201\/9781315137667"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/TII.2017.2725899"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/IROS51168.2021.9636140"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1016\/S0378-7788(02)00018-X"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1016\/j.buildenv.2012.08.024"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1016\/j.enbuild.2020.109776"},{"key":"ref74","first-page":"1","article-title":"Learning multimodal transition dynamics for model-based reinforcement learning","volume-title":"Proc. 1st Scaling-Up Reinforcement Learn. Workshop","author":"Moerland"},{"key":"ref75","first-page":"6414","article-title":"Single-model uncertainties for deep learning","volume-title":"Proc. 33rd Conf. Neural Inf. Process. Syst.","author":"Tagasovska"},{"key":"ref76","article-title":"Uncertainty in deep learning","author":"Gal","year":"2016"},{"key":"ref77","first-page":"4314","article-title":"Calibrated model-based deep reinforcement learning","volume-title":"Proc. 36th Int. Conf. Mach. Learn.","author":"Malik"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.3390\/robotics8030065"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.3389\/frobt.2019.00075"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-13-2853-4_4"},{"key":"ref81","first-page":"627","article-title":"A reduction of imitation learning and structured prediction to no-regret online learning","volume-title":"Proc. 14th Int. Conf. Artif. Intell. Statist.","author":"Ross"},{"key":"ref82","article-title":"Safely bridging offline and online reinforcement learning","author":"Xu","year":"2021"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1007\/s10846-020-01183-3"},{"key":"ref84","first-page":"1785","article-title":"Better exploration with optimistic actor critic","volume-title":"Proc. 33rd Conf. Neural Inf. Process. Syst.","author":"Ciosek"},{"key":"ref85","article-title":"UCB exploration via Q-ensembles","author":"Chen","year":"2017"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-01560-1"},{"key":"ref87","first-page":"1","article-title":"Meta-learning for batch mode active learning","volume-title":"Proc. 6th Int. Conf. Learn. Representations","author":"Ravi"}],"container-title":["IEEE Transactions on Sustainable Computing"],"original-title":[],"link":[{"URL":"https:\/\/ieeexplore.ieee.org\/ielam\/7274860\/10244170\/10057050-aam.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7274860\/10244170\/10057050.pdf?arnumber=10057050","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,5,28]],"date-time":"2024-05-28T04:24:50Z","timestamp":1716870290000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10057050\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,7,1]]},"references-count":87,"journal-issue":{"issue":"3"},"URL":"https:\/\/doi.org\/10.1109\/tsusc.2023.3251302","relation":{},"ISSN":["2377-3782","2377-3790"],"issn-type":[{"value":"2377-3782","type":"electronic"},{"value":"2377-3790","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,7,1]]}}}