{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,29]],"date-time":"2026-05-29T11:14:44Z","timestamp":1780053284220,"version":"3.54.0"},"publisher-location":"New York, NY, USA","reference-count":52,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,6,16]],"date-time":"2023-06-16T00:00:00Z","timestamp":1686873600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,6,20]]},"DOI":"10.1145\/3575813.3595202","type":"proceedings-article","created":{"date-parts":[[2023,6,16]],"date-time":"2023-06-16T16:18:31Z","timestamp":1686932311000},"page":"242-265","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":10,"title":["Rule-based Policy Regularization for Reinforcement Learning-based Building Control"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9316-2150","authenticated-orcid":false,"given":"Hsin-Yu","family":"Liu","sequence":"first","affiliation":[{"name":"Electrical and Computer Engineering, University of California San Diego, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9490-2018","authenticated-orcid":false,"given":"Bharathan","family":"Balaji","sequence":"additional","affiliation":[{"name":"Amazon, United States of America"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6489-7633","authenticated-orcid":false,"given":"Rajesh","family":"Gupta","sequence":"additional","affiliation":[{"name":"University of California San Diego, United States of America"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5224-6043","authenticated-orcid":false,"given":"Dezhi","family":"Hong","sequence":"additional","affiliation":[{"name":"Amazon, United States of America"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2023,6,16]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Tameem Adel Alexander Rosenberg and Been Kim. 2019. Learning to Explain: An Information-Theoretic Perspective on Model Interpretation. In Advances in Neural Information Processing Systems (NeurIPS). 10027\u201310036."},{"key":"e_1_3_2_1_2_1","volume-title":"Deep reinforcement learning at the edge of the statistical precipice. Advances in neural information processing systems 34","author":"Agarwal Rishabh","year":"2021","unstructured":"Rishabh Agarwal, Max Schwarzer, Pablo\u00a0Samuel Castro, Aaron\u00a0C Courville, and Marc Bellemare. 2021. Deep reinforcement learning at the edge of the statistical precipice. Advances in neural information processing systems 34 (2021), 29304\u201329320."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11797"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1016\/0888-613X(92)90020-Z"},{"key":"e_1_3_2_1_5_1","volume-title":"Openai gym. arXiv preprint arXiv:1606.01540","author":"Brockman Greg","year":"2016","unstructured":"Greg Brockman, Vicki Cheung, Ludwig Pettersson, Jonas Schneider, John Schulman, Jie Tang, and Wojciech Zaremba. 2016. Openai gym. arXiv preprint arXiv:1606.01540 (2016)."},{"key":"e_1_3_2_1_6_1","volume-title":"OCTOPUS: Deep reinforcement learning for holistic smart building control. In BuildSys. 326\u2013335.","author":"Ding Xianzhong","year":"2019","unstructured":"Xianzhong Ding, Wan Du, and Alberto Cerpa. 2019. OCTOPUS: Deep reinforcement learning for holistic smart building control. In BuildSys. 326\u2013335."},{"key":"e_1_3_2_1_7_1","volume-title":"D4rl: Datasets for deep data-driven reinforcement learning. arXiv preprint arXiv:2004.07219","author":"Fu Justin","year":"2020","unstructured":"Justin Fu, Aviral Kumar, Ofir Nachum, George Tucker, and Sergey Levine. 2020. D4rl: Datasets for deep data-driven reinforcement learning. arXiv preprint arXiv:2004.07219 (2020)."},{"key":"e_1_3_2_1_8_1","volume-title":"A minimalist approach to offline reinforcement learning. Advances in neural information processing systems 34","author":"Fujimoto Scott","year":"2021","unstructured":"Scott Fujimoto and Shixiang\u00a0Shane Gu. 2021. A minimalist approach to offline reinforcement learning. Advances in neural information processing systems 34 (2021), 20132\u201320145."},{"key":"e_1_3_2_1_9_1","volume-title":"International conference on machine learning. PMLR, 1587\u20131596","author":"Fujimoto Scott","year":"2018","unstructured":"Scott Fujimoto, Herke Hoof, and David Meger. 2018. Addressing function approximation error in actor-critic methods. In International conference on machine learning. PMLR, 1587\u20131596."},{"key":"e_1_3_2_1_10_1","volume-title":"ICML. PMLR","author":"Fujimoto Scott","year":"2019","unstructured":"Scott Fujimoto, David Meger, and Doina Precup. 2019. Off-policy deep reinforcement learning without exploration. In ICML. PMLR, 2052\u20132062."},{"key":"e_1_3_2_1_11_1","volume-title":"International conference on machine learning. PMLR","author":"Haarnoja Tuomas","year":"2018","unstructured":"Tuomas Haarnoja, Aurick Zhou, Pieter Abbeel, and Sergey Levine. 2018. Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor. In International conference on machine learning. PMLR, 1861\u20131870."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.scs.2019.101748"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/4284.4286"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11694"},{"key":"e_1_3_2_1_15_1","volume-title":"Chang Ye, and Jeff Braga.","author":"Huang Shengyi","year":"2021","unstructured":"Shengyi Huang, Rousslan Fernand\u00a0Julien Dossa, Chang Ye, and Jeff Braga. 2021. CleanRL: High-quality Single-file Implementations of Deep Reinforcement Learning Algorithms. arXiv preprint arXiv:2111.08819 (2021)."},{"key":"e_1_3_2_1_16_1","unstructured":"Lu Jiang Tong Xiao and Thomas Huang. 2018. Learning to Explain: A Framework for Machine Learning Explanations. In Advances in Neural Information Processing Systems (NeurIPS). 9810\u20139820."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3486611.3488729"},{"key":"e_1_3_2_1_18_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","author":"Kingma P","year":"2014","unstructured":"Diederik\u00a0P Kingma and Jimmy Ba. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)."},{"key":"e_1_3_2_1_19_1","volume-title":"Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114","author":"Kingma P","year":"2013","unstructured":"Diederik\u00a0P Kingma and Max Welling. 2013. Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114 (2013)."},{"key":"e_1_3_2_1_20_1","volume-title":"International Conference on Machine Learning. PMLR, 5774\u20135783","author":"Kostrikov Ilya","year":"2021","unstructured":"Ilya Kostrikov, Rob Fergus, Jonathan Tompson, and Ofir Nachum. 2021. Offline reinforcement learning with fisher divergence critic regularization. In International Conference on Machine Learning. PMLR, 5774\u20135783."},{"key":"e_1_3_2_1_21_1","first-page":"1179","article-title":"Conservative q-learning for offline reinforcement learning","volume":"33","author":"Kumar Aviral","year":"2020","unstructured":"Aviral Kumar, Aurick Zhou, George Tucker, and Sergey Levine. 2020. Conservative q-learning for offline reinforcement learning. Advances in Neural Information Processing Systems 33 (2020), 1179\u20131191.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_22_1","unstructured":"National Renewable\u00a0Energy Laboratory. 2008. TMY3 Datasets."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.3034218"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2020.103568"},{"key":"e_1_3_2_1_25_1","volume-title":"Self-improving reactive agents based on reinforcement learning, planning and teaching. Machine learning 8, 3","author":"Lin Long-Ji","year":"1992","unstructured":"Long-Ji Lin. 1992. Self-improving reactive agents based on reinforcement learning, planning and teaching. Machine learning 8, 3 (1992), 293\u2013321."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCPS54341.2022.00023"},{"key":"e_1_3_2_1_27_1","volume-title":"Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602","author":"Mnih Volodymyr","year":"2013","unstructured":"Volodymyr Mnih, Koray Kavukcuoglu, David Silver, Alex Graves, Ioannis Antonoglou, Daan Wierstra, and Martin Riedmiller. 2013. Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602 (2013)."},{"key":"e_1_3_2_1_28_1","unstructured":"Department of Energy. 2023. Prototype Building Models. https:\/\/www.energycodes.gov\/prototype-building-models#TMY3"},{"key":"e_1_3_2_1_29_1","volume-title":"Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems 32","author":"Paszke Adam","year":"2019","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, 2019. Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems 32 (2019)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2021.106836"},{"key":"e_1_3_2_1_31_1","volume-title":"International conference on machine learning. PMLR","author":"Schulman John","year":"2015","unstructured":"John Schulman, Sergey Levine, Pieter Abbeel, Michael Jordan, and Philipp Moritz. 2015. Trust region policy optimization. In International conference on machine learning. PMLR, 1889\u20131897."},{"key":"e_1_3_2_1_32_1","volume-title":"Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347","author":"Schulman John","year":"2017","unstructured":"John Schulman, Filip Wolski, Prafulla Dhariwal, Alec Radford, and Oleg Klimov. 2017. Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347 (2017)."},{"key":"e_1_3_2_1_33_1","volume-title":"d3rlpy: An Offline Deep Reinforcement Learning Library. arXiv preprint arXiv:2111.03788","author":"Seno Takuma","year":"2021","unstructured":"Takuma Seno and Michita Imai. 2021. d3rlpy: An Offline Deep Reinforcement Learning Library. arXiv preprint arXiv:2111.03788 (2021)."},{"key":"e_1_3_2_1_34_1","volume-title":"International conference on machine learning. PMLR, 387\u2013395","author":"Silver David","year":"2014","unstructured":"David Silver, Guy Lever, Nicolas Heess, Thomas Degris, Daan Wierstra, and Martin Riedmiller. 2014. Deterministic policy gradient algorithms. In International conference on machine learning. PMLR, 387\u2013395."},{"key":"e_1_3_2_1_35_1","volume-title":"Mastering the game of go without human knowledge. nature 550, 7676","author":"Silver David","year":"2017","unstructured":"David Silver, Julian Schrittwieser, Karen Simonyan, Ioannis Antonoglou, Aja Huang, Arthur Guez, Thomas Hubert, Lucas Baker, Matthew Lai, Adrian Bolton, 2017. Mastering the game of go without human knowledge. nature 550, 7676 (2017), 354\u2013359."},{"key":"e_1_3_2_1_36_1","volume-title":"Proceedings of the 2019 International Conference on Robotics and Automation (ICRA). IEEE, 3239\u20133245","author":"Spurek Przemys\u0142aw","year":"2019","unstructured":"Przemys\u0142aw Spurek, Damian Szyma\u0144ski, and Tomasz Tajmajer. 2019. Towards Interpretable Reinforcement Learning Using Attention Augmented Agents. In Proceedings of the 2019 International Conference on Robotics and Automation (ICRA). IEEE, 3239\u20133245."},{"key":"e_1_3_2_1_37_1","volume-title":"International conference on machine learning. PMLR, 997\u20131005","author":"Sui Yanan","year":"2015","unstructured":"Yanan Sui, Alkis Gotovos, Joel Burdick, and Andreas Krause. 2015. Safe exploration for optimization with Gaussian processes. In International conference on machine learning. PMLR, 997\u20131005."},{"key":"e_1_3_2_1_38_1","volume-title":"Reinforcement learning: An introduction","author":"Sutton S","unstructured":"Richard\u00a0S Sutton and Andrew\u00a0G Barto. 2018. Reinforcement learning: An introduction. MIT press."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"e_1_3_2_1_40_1","volume-title":"Safe exploration in finite markov decision processes with gaussian processes. Advances in Neural Information Processing Systems 29","author":"Turchetta Matteo","year":"2016","unstructured":"Matteo Turchetta, Felix Berkenkamp, and Andreas Krause. 2016. Safe exploration in finite markov decision processes with gaussian processes. Advances in Neural Information Processing Systems 29 (2016)."},{"key":"e_1_3_2_1_41_1","volume-title":"The NumPy array: a structure for efficient numerical computation. Computing in science & engineering 13, 2","author":"Der\u00a0Walt Stefan Van","year":"2011","unstructured":"Stefan Van Der\u00a0Walt, S\u00a0Chris Colbert, and Gael Varoquaux. 2011. The NumPy array: a structure for efficient numerical computation. Computing in science & engineering 13, 2 (2011), 22\u201330."},{"key":"e_1_3_2_1_42_1","first-page":"4235","article-title":"Munchausen reinforcement learning","volume":"33","author":"Vieillard Nino","year":"2020","unstructured":"Nino Vieillard, Olivier Pietquin, and Matthieu Geist. 2020. Munchausen reinforcement learning. Advances in Neural Information Processing Systems 33 (2020), 4235\u20134246.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2019.8852110"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.apenergy.2020.115036"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3061639.3062224"},{"key":"e_1_3_2_1_46_1","volume-title":"Behavior regularized offline reinforcement learning. arXiv preprint arXiv:1911.11361","author":"Wu Yifan","year":"2019","unstructured":"Yifan Wu, George Tucker, and Ofir Nachum. 2019. Behavior regularized offline reinforcement learning. arXiv preprint arXiv:1911.11361 (2019)."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.apenergy.2015.07.050"},{"key":"e_1_3_2_1_48_1","volume-title":"Deep Reinforcement Learning for Smart Building Energy Management: A Survey. arXiv preprint arXiv:2008.05074","author":"Yu Liang","year":"2020","unstructured":"Liang Yu, Shuqi Qin, Meng Zhang, Chao Shen, Tao Jiang, and Xiaohong Guan. 2020. Deep Reinforcement Learning for Smart Building Energy Management: A Survey. arXiv preprint arXiv:2008.05074 (2020)."},{"key":"e_1_3_2_1_49_1","volume-title":"Combo: Conservative offline model-based policy optimization. Advances in neural information processing systems 34","author":"Yu Tianhe","year":"2021","unstructured":"Tianhe Yu, Aviral Kumar, Rafael Rafailov, Aravind Rajeswaran, Sergey Levine, and Chelsea Finn. 2021. Combo: Conservative offline model-based policy optimization. Advances in neural information processing systems 34 (2021), 28954\u201328967."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSUSC.2022.3164084"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.enbuild.2019.07.029"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMECH.2021.3072675"}],"event":{"name":"e-Energy '23: The 14th ACM International Conference on Future Energy Systems","location":"Orlando FL USA","acronym":"e-Energy '23","sponsor":["SIGEnergy ACM Special Interest Group on Energy Systems and Informatics"]},"container-title":["Proceedings of the 14th ACM International Conference on Future Energy Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3575813.3595202","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3575813.3595202","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:46:11Z","timestamp":1750178771000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3575813.3595202"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,16]]},"references-count":52,"alternative-id":["10.1145\/3575813.3595202","10.1145\/3575813"],"URL":"https:\/\/doi.org\/10.1145\/3575813.3595202","relation":{},"subject":[],"published":{"date-parts":[[2023,6,16]]},"assertion":[{"value":"2023-06-16","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}