{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,8]],"date-time":"2026-05-08T15:44:08Z","timestamp":1778255048914,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":43,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T00:00:00Z","timestamp":1730160000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"ACE, one of the seven centers in JUMP 2.0, a Semiconductor Research Corporation (SRC) program sponsored by DARPA","award":["#2023-JU-3134"],"award-info":[{"award-number":["#2023-JU-3134"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,29]]},"DOI":"10.1145\/3671127.3698163","type":"proceedings-article","created":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T00:30:41Z","timestamp":1730248241000},"page":"1-10","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Adaptive Policy Regularization for Offline-to-Online Reinforcement Learning in HVAC Control"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9316-2150","authenticated-orcid":false,"given":"Hsin-Yu","family":"Liu","sequence":"first","affiliation":[{"name":"University of California, San Diego, La Jolla, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9490-2018","authenticated-orcid":false,"given":"Bharathan","family":"Balaji","sequence":"additional","affiliation":[{"name":"Amazon, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6489-7633","authenticated-orcid":false,"given":"Rajesh","family":"Gupta","sequence":"additional","affiliation":[{"name":"University of California, San Diego, La Jolla, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5224-6043","authenticated-orcid":false,"given":"Dezhi","family":"Hong","sequence":"additional","affiliation":[{"name":"Amazon, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,10,29]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Aaron C Courville, and Marc Bellemare.","author":"Agarwal Rishabh","year":"2021","unstructured":"Rishabh Agarwal, Max Schwarzer, Pablo Samuel Castro, Aaron C Courville, and Marc Bellemare. 2021. Deep reinforcement learning at the edge of the statistical precipice. Advances in neural information processing systems 34 (2021), 29304--29320."},{"key":"e_1_3_2_1_2_1","volume-title":"Efficient online reinforcement learning with offline data. arXiv preprint arXiv:2302.02948","author":"Ball Philip J","year":"2023","unstructured":"Philip J Ball, Laura Smith, Ilya Kostrikov, and Sergey Levine. 2023. Efficient online reinforcement learning with offline data. arXiv preprint arXiv:2302.02948 (2023)."},{"key":"e_1_3_2_1_3_1","volume-title":"Efficient online reinforcement learning with offline data. arXiv preprint arXiv:2302.02948","author":"Ball Philip J","year":"2023","unstructured":"Philip J Ball, Laura Smith, Ilya Kostrikov, and Sergey Levine. 2023. Efficient online reinforcement learning with offline data. arXiv preprint arXiv:2302.02948 (2023)."},{"key":"e_1_3_2_1_4_1","volume-title":"OCTOPUS: Deep reinforcement learning for holistic smart building control. In BuildSys. 326--335.","author":"Ding Xianzhong","year":"2019","unstructured":"Xianzhong Ding, Wan Du, and Alberto Cerpa. 2019. OCTOPUS: Deep reinforcement learning for holistic smart building control. In BuildSys. 326--335."},{"key":"e_1_3_2_1_5_1","volume-title":"International Conference on Machine Learning. PMLR, 3061--3071","author":"Fedus William","year":"2020","unstructured":"William Fedus, Prajit Ramachandran, Rishabh Agarwal, Yoshua Bengio, Hugo Larochelle, Mark Rowland, and Will Dabney. 2020. Revisiting fundamentals of experience replay. In International Conference on Machine Learning. PMLR, 3061--3071."},{"key":"e_1_3_2_1_6_1","volume-title":"A minimalist approach to offline reinforcement learning. Advances in neural information processing systems 34","author":"Fujimoto Scott","year":"2021","unstructured":"Scott Fujimoto and Shixiang Shane Gu. 2021. A minimalist approach to offline reinforcement learning. Advances in neural information processing systems 34 (2021), 20132--20145."},{"key":"e_1_3_2_1_7_1","volume-title":"International conference on machine learning. PMLR, 1587--1596","author":"Fujimoto Scott","year":"2018","unstructured":"Scott Fujimoto, Herke Hoof, and David Meger. 2018. Addressing function approximation error in actor-critic methods. In International conference on machine learning. PMLR, 1587--1596."},{"key":"e_1_3_2_1_8_1","volume-title":"ICML. PMLR","author":"Fujimoto Scott","year":"2019","unstructured":"Scott Fujimoto, David Meger, and Doina Precup. 2019. Off-policy deep reinforcement learning without exploration. In ICML. PMLR, 2052--2062."},{"key":"e_1_3_2_1_9_1","volume-title":"International conference on machine learning. PMLR","author":"Haarnoja Tuomas","year":"2018","unstructured":"Tuomas Haarnoja, Aurick Zhou, Pieter Abbeel, and Sergey Levine. 2018. Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor. In International conference on machine learning. PMLR, 1861--1870."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.scs.2019.101748"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11757"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3486611.3488729"},{"key":"e_1_3_2_1_13_1","volume-title":"Learning from limited demonstrations. Advances in Neural Information Processing Systems 26","author":"Kim Beomjoon","year":"2013","unstructured":"Beomjoon Kim, Amir-massoud Farahmand, Joelle Pineau, and Doina Precup. 2013. Learning from limited demonstrations. Advances in Neural Information Processing Systems 26 (2013)."},{"key":"e_1_3_2_1_14_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","author":"Kingma Diederik P","year":"2014","unstructured":"Diederik P Kingma and Jimmy Ba. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)."},{"key":"e_1_3_2_1_15_1","volume-title":"Stabilizing off-policy q-learning via bootstrapping error reduction. Advances in Neural Information Processing Systems 32","author":"Kumar Aviral","year":"2019","unstructured":"Aviral Kumar, Justin Fu, Matthew Soh, George Tucker, and Sergey Levine. 2019. Stabilizing off-policy q-learning via bootstrapping error reduction. Advances in Neural Information Processing Systems 32 (2019)."},{"key":"e_1_3_2_1_16_1","first-page":"1179","article-title":"Conservative q-learning for offline reinforcement learning","volume":"33","author":"Kumar Aviral","year":"2020","unstructured":"Aviral Kumar, Aurick Zhou, George Tucker, and Sergey Levine. 2020. Conservative q-learning for offline reinforcement learning. Advances in Neural Information Processing Systems 33 (2020), 1179--1191.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_17_1","unstructured":"National Renewable Energy Laboratory. 2008. TMY3 Datasets."},{"key":"e_1_3_2_1_18_1","volume-title":"Conference on Robot Learning. PMLR, 1702--1712","author":"Lee Seunghyun","year":"2022","unstructured":"Seunghyun Lee, Younggyo Seo, Kimin Lee, Pieter Abbeel, and Jinwoo Shin. 2022. Offline-to-online reinforcement learning via balanced replay and pessimistic q-ensemble. In Conference on Robot Learning. PMLR, 1702--1712."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCPS54341.2022.00023"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3563357.3566164"},{"key":"e_1_3_2_1_21_1","volume-title":"Trade-offs and Practical Solutions. arXiv preprint arXiv:2303.17396","author":"Luo Yicheng","year":"2023","unstructured":"Yicheng Luo, Jackie Kay, Edward Grefenstette, and Marc Peter Deisenroth. 2023. Finetuning from Offline Reinforcement Learning: Challenges, Trade-offs and Practical Solutions. arXiv preprint arXiv:2303.17396 (2023)."},{"key":"e_1_3_2_1_22_1","volume-title":"Awac: Accelerating online reinforcement learning with offline datasets. arXiv preprint arXiv:2006.09359","author":"Nair Ashvin","year":"2020","unstructured":"Ashvin Nair, Abhishek Gupta, Murtaza Dalal, and Sergey Levine. 2020. Awac: Accelerating online reinforcement learning with offline datasets. arXiv preprint arXiv:2006.09359 (2020)."},{"key":"e_1_3_2_1_23_1","unstructured":"Department of Energy. 2023. Prototype Building Models. https:\/\/www.energycodes.gov\/prototype-building-models#TMY3"},{"key":"e_1_3_2_1_24_1","volume-title":"Deep exploration via bootstrapped DQN. Advances in neural information processing systems 29","author":"Osband Ian","year":"2016","unstructured":"Ian Osband, Charles Blundell, Alexander Pritzel, and Benjamin Van Roy. 2016. Deep exploration via bootstrapped DQN. Advances in neural information processing systems 29 (2016)."},{"key":"e_1_3_2_1_25_1","volume-title":"Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems 32","author":"Paszke Adam","year":"2019","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, et al. 2019. Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems 32 (2019)."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-662-44851-9_35"},{"key":"e_1_3_2_1_27_1","volume-title":"Prioritized Experience Replay. In International Conference on Learning Representations (ICLR).","author":"Schaul Tom","year":"2016","unstructured":"Tom Schaul, John Quan, Ioannis Antonoglou, and David Silver. 2016. Prioritized Experience Replay. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_1_28_1","volume-title":"Lecture 3: Planning by Dynamic Programming. UCL Course on RL","author":"Silver David","year":"2015","unstructured":"David Silver. 2015. Lecture 3: Planning by Dynamic Programming. UCL Course on RL (2015)."},{"key":"e_1_3_2_1_29_1","volume-title":"Reinforcement learning: An introduction","author":"Sutton Richard S","unstructured":"Richard S Sutton and Andrew G Barto. 2018. Reinforcement learning: An introduction. MIT press."},{"key":"e_1_3_2_1_30_1","volume-title":"The NumPy array: a structure for efficient numerical computation. Computing in science & engineering 13, 2","author":"Der Walt Stefan Van","year":"2011","unstructured":"Stefan Van Der Walt, S Chris Colbert, and Gael Varoquaux. 2011. The NumPy array: a structure for efficient numerical computation. Computing in science & engineering 13, 2 (2011), 22--30."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3360322.3360998"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.apenergy.2020.115036"},{"key":"e_1_3_2_1_33_1","volume-title":"Behavior regularized offline reinforcement learning. arXiv preprint arXiv:1911.11361","author":"Wu Yifan","year":"2019","unstructured":"Yifan Wu, George Tucker, and Ofir Nachum. 2019. Behavior regularized offline reinforcement learning. arXiv preprint arXiv:1911.11361 (2019)."},{"key":"e_1_3_2_1_34_1","volume-title":"Uncertainty weighted actor-critic for offline reinforcement learning. arXiv preprint arXiv:2105.08140","author":"Wu Yue","year":"2021","unstructured":"Yue Wu, Shuangfei Zhai, Nitish Srivastava, Joshua Susskind, Jian Zhang, Ruslan Salakhutdinov, and Hanlin Goh. 2021. Uncertainty weighted actor-critic for offline reinforcement learning. arXiv preprint arXiv:2105.08140 (2021)."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.apenergy.2015.07.050"},{"key":"e_1_3_2_1_36_1","volume-title":"Deep Reinforcement Learning for Smart Building Energy Management: A Survey. arXiv preprint arXiv:2008.05074","author":"Yu Liang","year":"2020","unstructured":"Liang Yu, Shuqi Qin, Meng Zhang, Chao Shen, Tao Jiang, and Xiaohong Guan. 2020. Deep Reinforcement Learning for Smart Building Energy Management: A Survey. arXiv preprint arXiv:2008.05074 (2020)."},{"key":"e_1_3_2_1_37_1","volume-title":"International Conference on Machine Learning. PMLR, 40452--40474","author":"Yu Zishun","year":"2023","unstructured":"Zishun Yu and Xinhua Zhang. 2023. Actor-critic alignment for offline-to-online reinforcement learning. In International Conference on Machine Learning. PMLR, 40452--40474."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSUSC.2022.3164084"},{"key":"e_1_3_2_1_39_1","volume-title":"Policy Expansion for Bridging Offline-to-Online Reinforcement Learning. arXiv preprint arXiv:2302.00935","author":"Zhang Haichao","year":"2023","unstructured":"Haichao Zhang, We Xu, and Haonan Yu. 2023. Policy Expansion for Bridging Offline-to-Online Reinforcement Learning. arXiv preprint arXiv:2302.00935 (2023)."},{"key":"e_1_3_2_1_40_1","volume-title":"A deeper look at experience replay. arXiv preprint arXiv:1712.01275","author":"Zhang Shangtong","year":"2017","unstructured":"Shangtong Zhang and Richard S Sutton. 2017. A deeper look at experience replay. arXiv preprint arXiv:1712.01275 (2017)."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.enbuild.2019.07.029"},{"key":"e_1_3_2_1_42_1","volume-title":"Adaptive behavior cloning regularization for stable offline-to-online reinforcement learning. arXiv preprint arXiv:2210.13846","author":"Zhao Yi","year":"2022","unstructured":"Yi Zhao, Rinu Boney, Alexander Ilin, Juho Kannala, and Joni Pajarinen. 2022. Adaptive behavior cloning regularization for stable offline-to-online reinforcement learning. arXiv preprint arXiv:2210.13846 (2022)."},{"key":"e_1_3_2_1_43_1","volume-title":"Adaptive Policy Learning for Offline-to-Online Reinforcement Learning. arXiv preprint arXiv:2303.07693","author":"Zheng Han","year":"2023","unstructured":"Han Zheng, Xufang Luo, Pengfei Wei, Xuan Song, Dongsheng Li, and Jing Jiang. 2023. Adaptive Policy Learning for Offline-to-Online Reinforcement Learning. arXiv preprint arXiv:2303.07693 (2023)."}],"event":{"name":"BuildSys '24: The 11th ACM International Conference on Systems for Energy-Efficient Buildings, Cities, and Transportation","location":"Hangzhou China","acronym":"BuildSys '24"},"container-title":["Proceedings of the 11th ACM International Conference on Systems for Energy-Efficient Buildings, Cities, and Transportation"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3671127.3698163","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3671127.3698163","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,4]],"date-time":"2025-11-04T23:24:56Z","timestamp":1762298696000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3671127.3698163"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,29]]},"references-count":43,"alternative-id":["10.1145\/3671127.3698163","10.1145\/3671127"],"URL":"https:\/\/doi.org\/10.1145\/3671127.3698163","relation":{},"subject":[],"published":{"date-parts":[[2024,10,29]]},"assertion":[{"value":"2024-10-29","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}