{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,6]],"date-time":"2026-04-06T19:47:08Z","timestamp":1775504828477,"version":"3.50.1"},"reference-count":48,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"19","license":[{"start":{"date-parts":[[2022,10,1]],"date-time":"2022-10-01T00:00:00Z","timestamp":1664582400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,10,1]],"date-time":"2022-10-01T00:00:00Z","timestamp":1664582400000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,10,1]],"date-time":"2022-10-01T00:00:00Z","timestamp":1664582400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,10,1]],"date-time":"2022-10-01T00:00:00Z","timestamp":1664582400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000144","name":"National Science Foundation through Cyber\u2013Physical Systems","doi-asserted-by":"publisher","award":["1837021"],"award-info":[{"award-number":["1837021"]}],"id":[{"id":"10.13039\/100000144","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Internet Things J."],"published-print":{"date-parts":[[2022,10,1]]},"DOI":"10.1109\/jiot.2022.3164023","type":"journal-article","created":{"date-parts":[[2022,4,1]],"date-time":"2022-04-01T20:05:49Z","timestamp":1648843549000},"page":"19160-19173","source":"Crossref","is-referenced-by-count":53,"title":["MBRL-MC: An HVAC Control Approach via Combining Model-Based Deep Reinforcement Learning and Model Predictive Control"],"prefix":"10.1109","volume":"9","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9594-640X","authenticated-orcid":false,"given":"Liangliang","family":"Chen","sequence":"first","affiliation":[{"name":"School of Electrical and Computer Engineering, Georgia Institute of Technology, Atlanta, GA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9225-040X","authenticated-orcid":false,"given":"Fei","family":"Meng","sequence":"additional","affiliation":[{"name":"Department of Electronic Engineering, The Chinese University of Hong Kong, Hong Kong"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5246-2141","authenticated-orcid":false,"given":"Ying","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Electrical and Computer Engineering, Georgia Institute of Technology, Atlanta, GA, USA"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.2172\/1581221"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2019.2951106"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2018.2871461"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1016\/j.buildenv.2013.11.016"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1016\/j.conengprac.2011.03.005"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1016\/j.enconman.2014.05.053"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TCST.2014.2332873"},{"key":"ref8","volume-title":"Reinforcement Learning: An Introduction","author":"Sutton","year":"2018"},{"key":"ref9","first-page":"1","article-title":"Model-based reinforcement learning for Atari","volume-title":"Proc. 8th Int. Conf. Learn. Represent.","author":"Kaiser"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8463189"},{"key":"ref11","first-page":"1","article-title":"Value prediction network","volume-title":"Proc. 31st Conf. Neural Inf. Process. Syst.","author":"Oh"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-020-03051-4"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2021.3051400"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/3061639.3062224"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2019.2957289"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/j.enbuild.2021.110833"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2021.3078462"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref19","first-page":"1","article-title":"Continuous control with deep reinforcement learning","volume-title":"Proc. 4th Int. Conf. Learn. Represent.","author":"Lillicrap"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1561\/2200000086"},{"key":"ref21","first-page":"1","article-title":"Efficient model-based reinforcement learning through optimistic policy search and planning","volume-title":"Proc. 34th Conf. Neural Inf. Process. Syst.","author":"Curi"},{"key":"ref22","first-page":"4759","article-title":"Deep reinforcement learning in a handful of trials using probabilistic dynamics models","volume-title":"Proc. 32nd Conf. Neural Inf. Process. Syst.","author":"Chua"},{"key":"ref23","article-title":"Benchmarking model-based reinforcement learning","author":"Wang","year":"2019","journal-title":"arXiv:1907.02057"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-141-3.50030-4"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/122344.122377"},{"key":"ref26","first-page":"1","article-title":"Learning to adapt in dynamic, real-world environments through meta-reinforcement learning","volume-title":"Proc. 7th Int. Conf. Learn. Represent.","author":"Nagabandi"},{"key":"ref27","first-page":"2672","article-title":"Generative adversarial nets","volume-title":"Proc. 28th Conf. Neural Inf. Process. Syst.","author":"Goodfellow"},{"key":"ref28","first-page":"1","article-title":"Exploring model-based planning with policy networks","volume-title":"Proc. 8th Int. Conf. Learn. Represent.","author":"Wang"},{"key":"ref29","volume":"2","author":"Rawlings","year":"2017","journal-title":"Model Predictive Control: Theory, Computation, and Design"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/3360322.3360861"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/3408308.3427986"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.2514\/1.G001921"},{"key":"ref33","first-page":"1","article-title":"CEM-RL: Combining evolutionary and gradient-based methods for policy search","volume-title":"Proc. 7th Int. Conf. Learn. Represent.","author":"Pourchot"},{"key":"ref34","article-title":"Deep reinforcement learning and the deadly triad","author":"Van Hasselt","year":"2018","journal-title":"arXiv:1812.02648"},{"key":"ref35","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","volume-title":"Proc. 35th Int. Conf. Mach. Learn.","author":"Fujimoto"},{"key":"ref36","first-page":"29","article-title":"Deep recurrent Q-learning for partially observable MDPS","volume-title":"Proc. AAAI Fall Symp. Ser.","author":"Hausknecht"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/IROS51168.2021.9636140"},{"key":"ref38","first-page":"1262","article-title":"Discovering and removing exogenous state variables and rewards for reinforcement learning","volume-title":"Proc. 35th Int. Conf. Mach. Learn.","author":"Dietterich"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-13-2853-4_4"},{"key":"ref40","first-page":"3814","article-title":"Data center cooling using model-predictive control","volume-title":"Proc. 32nd Conf. Neural Inf. Process. Syst.","author":"Lazic"},{"key":"ref41","first-page":"1547","article-title":"Path integral policy improvement with covariance matrix adaptation","volume-title":"Proc. 29th Int. Conf. Mach. Learn.","author":"Stulp"},{"key":"ref42","article-title":"The CMA evolution strategy: A tutorial","author":"Hansen","year":"2016","journal-title":"arXiv:1604.00772"},{"key":"ref43","first-page":"617","article-title":"Model-based reinforcement learning via meta-policy optimization","volume-title":"Proc. Conf. Robot Learn.","author":"Clavera"},{"key":"ref44","first-page":"387","article-title":"Deterministic policy gradient algorithms","volume-title":"Proc. 31st Int. Conf. Mach. Learn.","author":"Silver"},{"key":"ref45","volume-title":"Deep Learning","author":"Goodfellow","year":"2016"},{"key":"ref46","article-title":"Importance mixing: Improving sample reuse in evolutionary policy search methods","author":"Pourchot","year":"2018","journal-title":"arXiv:1808.05832"},{"key":"ref47","first-page":"1","article-title":"Adam: A method for stochastic optimization","volume-title":"Proc. 2nd Int. Conf. Learn. Represent.","author":"Kingma"},{"key":"ref48","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017","journal-title":"arXiv:1707.06347"}],"container-title":["IEEE Internet of Things Journal"],"original-title":[],"link":[{"URL":"https:\/\/ieeexplore.ieee.org\/ielam\/6488907\/9899827\/9747916-aam.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6488907\/9899827\/09747916.pdf?arnumber=9747916","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,18]],"date-time":"2024-01-18T00:31:45Z","timestamp":1705537905000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9747916\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10,1]]},"references-count":48,"journal-issue":{"issue":"19"},"URL":"https:\/\/doi.org\/10.1109\/jiot.2022.3164023","relation":{},"ISSN":["2327-4662","2372-2541"],"issn-type":[{"value":"2327-4662","type":"electronic"},{"value":"2372-2541","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,10,1]]}}}