{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T00:07:59Z","timestamp":1772755679173,"version":"3.50.1"},"reference-count":48,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"3","license":[{"start":{"date-parts":[[2022,5,1]],"date-time":"2022-05-01T00:00:00Z","timestamp":1651363200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,5,1]],"date-time":"2022-05-01T00:00:00Z","timestamp":1651363200000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,5,1]],"date-time":"2022-05-01T00:00:00Z","timestamp":1651363200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,5,1]],"date-time":"2022-05-01T00:00:00Z","timestamp":1651363200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100006233","name":"National Renewable Energy Laboratory, operated by Alliance for Sustainable Energy, LLC, for the U.S. Department of Energy","doi-asserted-by":"publisher","award":["DE-AC36-08GO28308"],"award-info":[{"award-number":["DE-AC36-08GO28308"]}],"id":[{"id":"10.13039\/100006233","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Smart Grid"],"published-print":{"date-parts":[[2022,5]]},"DOI":"10.1109\/tsg.2022.3141625","type":"journal-article","created":{"date-parts":[[2022,1,10]],"date-time":"2022-01-10T20:57:22Z","timestamp":1641848242000},"page":"1976-1987","source":"Crossref","is-referenced-by-count":22,"title":["Two-Stage Reinforcement Learning Policy Search for Grid-Interactive Building Control"],"prefix":"10.1109","volume":"13","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4857-2318","authenticated-orcid":false,"given":"Xiangyu","family":"Zhang","sequence":"first","affiliation":[{"name":"Computational Science Center, National Renewable Energy Laboratory, Golden, CO, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1070-7192","authenticated-orcid":false,"given":"Yue","family":"Chen","sequence":"additional","affiliation":[{"name":"Power Systems Engineering Center, National Renewable Energy Laboratory, Golden, CO, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4489-8388","authenticated-orcid":false,"given":"Andrey","family":"Bernstein","sequence":"additional","affiliation":[{"name":"Power Systems Engineering Center, National Renewable Energy Laboratory, Golden, CO, USA"}]},{"given":"Rohit","family":"Chintala","sequence":"additional","affiliation":[{"name":"Building Technologies and Science Center, National Renewable Energy Laboratory, Golden, CO, USA"}]},{"given":"Peter","family":"Graf","sequence":"additional","affiliation":[{"name":"Computational Science Center, National Renewable Energy Laboratory, Golden, CO, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7746-5955","authenticated-orcid":false,"given":"Xin","family":"Jin","sequence":"additional","affiliation":[{"name":"Building Technologies and Science Center, National Renewable Energy Laboratory, Golden, CO, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6140-1957","authenticated-orcid":false,"given":"David","family":"Biagioni","sequence":"additional","affiliation":[{"name":"Computational Science Center, National Renewable Energy Laboratory, Golden, CO, USA"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1016\/j.apenergy.2018.07.089"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1016\/j.epsr.2008.04.002"},{"key":"ref33","first-page":"4155","article-title":"Gridinteractive multi-zone building control using reinforcement learning with global-local policy search","author":"zhang","year":"2021","journal-title":"Proc Amer Control Conf (ACC)"},{"key":"ref32","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"2014","journal-title":"arXiv 1412 6980"},{"key":"ref31","first-page":"543","article-title":"A method of solving a convex programming problem with convergence rate O(1\/k&#x02C6;2)","volume":"269","author":"nesterov","year":"1983","journal-title":"Doklady Akademii Nauk"},{"key":"ref30","first-page":"1467","article-title":"Global convergence of policy gradient methods for the linear quadratic regulator","author":"fazel","year":"2018","journal-title":"Proc 35th Int Conf Mach Learn"},{"key":"ref37","article-title":"Combining PPO and Evolutionary Strategies for Better Policy Search.","author":"she","year":"2021"},{"key":"ref36","article-title":"Proximal policy optimization algorithms","author":"schulman","year":"2017","journal-title":"arXiv 1707 06347"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1007\/s10208-015-9296-2"},{"key":"ref34","article-title":"Evolution strategies as a scalable alternative to reinforcement learning","author":"salimans","year":"2017","journal-title":"arXiv 1703 03864"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/JSYST.2020.3017190"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2021.3074871"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.23919\/ACC45564.2020.9147608"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1016\/j.enbuild.2015.08.025"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1016\/j.apenergy.2017.08.093"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1016\/j.buildenv.2013.11.016"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1016\/j.arcontrol.2020.09.001"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TSG.2020.3014055"},{"key":"ref17","author":"sutton","year":"2018","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref18","article-title":"Playing Atari with deep reinforcement learning","author":"mnih","year":"2013","journal-title":"arXiv 1312 5602"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8794127"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.23919\/ACC45564.2020.9147629"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TSG.2014.2322604"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1016\/j.enbuild.2019.07.029"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1016\/j.apenergy.2019.03.038"},{"key":"ref6","year":"2021","journal-title":"Commercial Buildings Energy Consumption Survey (CBECS) Table E3 Electricity consumption (BTU) by End Use 2012"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1016\/j.egyai.2020.100020"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TIA.2019.2941179"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TSG.2020.2965559"},{"key":"ref7","year":"2021","journal-title":"AC Saver (Summer Saver)"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/MELE.2016.2614181"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1016\/j.egypro.2017.03.028"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.2172\/1166884"},{"key":"ref46","article-title":"OpenAI Gym","author":"brockman","year":"2016","journal-title":"arXiv 1606 01540 [cs]"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TSG.2016.2517211"},{"key":"ref45","first-page":"3053","article-title":"RLlib: Abstractions for distributed reinforcement learning","volume":"80","author":"liang","year":"2018","journal-title":"Proc 35th Int Conf Mach Learn"},{"key":"ref48","article-title":"DD-PPO: Learning near-perfect pointgoal navigators from 2.5 billion frames","author":"wijmans","year":"2019","journal-title":"arXiv 1911 00357"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/3427773.3427872"},{"key":"ref47","article-title":"An empirical model of large-batch training","author":"mccandlish","year":"2018","journal-title":"arXiv 1812 06162"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TSG.2016.2640184"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2019.2951106"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/3427773.3427865"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1115\/DSCC2015-9933"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TSG.2020.2978061"},{"key":"ref44","year":"2021","journal-title":"Austin Weather Data"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1145\/3061639.3062224"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.2172\/1009264"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TSG.2018.2834219"}],"container-title":["IEEE Transactions on Smart Grid"],"original-title":[],"link":[{"URL":"https:\/\/ieeexplore.ieee.org\/ielam\/5165411\/9761268\/9675814-aam.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/5165411\/9761268\/09675814.pdf?arnumber=9675814","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,30]],"date-time":"2022-05-30T21:53:47Z","timestamp":1653947627000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9675814\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,5]]},"references-count":48,"journal-issue":{"issue":"3"},"URL":"https:\/\/doi.org\/10.1109\/tsg.2022.3141625","relation":{},"ISSN":["1949-3053","1949-3061"],"issn-type":[{"value":"1949-3053","type":"print"},{"value":"1949-3061","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,5]]}}}