{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T22:27:39Z","timestamp":1773872859711,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":30,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,11,9]],"date-time":"2022-11-09T00:00:00Z","timestamp":1667952000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,11,9]]},"DOI":"10.1145\/3563357.3566164","type":"proceedings-article","created":{"date-parts":[[2022,12,8]],"date-time":"2022-12-08T13:31:36Z","timestamp":1670506296000},"page":"462-465","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["B2RL"],"prefix":"10.1145","author":[{"given":"Hsin-Yu","family":"Liu","sequence":"first","affiliation":[{"name":"University of California"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaohan","family":"Fu","sequence":"additional","affiliation":[{"name":"University of California"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bharathan","family":"Balaji","sequence":"additional","affiliation":[{"name":"Amazon"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Rajesh","family":"Gupta","sequence":"additional","affiliation":[{"name":"University of California"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dezhi","family":"Hong","sequence":"additional","affiliation":[{"name":"Amazon"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2022,12,8]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/2528282.2528304"},{"key":"e_1_3_2_1_2_1","volume-title":"Openai gym. arXiv preprint arXiv:1606.01540","author":"Brockman Greg","year":"2016","unstructured":"Greg Brockman, Vicki Cheung, Ludwig Pettersson, Jonas Schneider, John Schulman, Jie Tang, and Wojciech Zaremba. 2016. Openai gym. arXiv preprint arXiv:1606.01540 (2016)."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"crossref","unstructured":"Drury B Crawley Linda K Lawrie Frederick C Winkelmann Walter F Buhl Y Joe Huang Curtis O Pedersen Richard K Strand Richard J Liesen Daniel E Fisher Michael J Witte et al. 2001. EnergyPlus: creating a new-generation building energy simulation program. Energy and buildings 33 4 (2001) 319--331.","DOI":"10.1016\/S0378-7788(00)00114-6"},{"key":"e_1_3_2_1_4_1","volume-title":"A global database of thermal comfort field experiments. ASHRAE transactions 104","author":"De Dear Richard J","year":"1998","unstructured":"Richard J De Dear. 1998. A global database of thermal comfort field experiments. ASHRAE transactions 104 (1998), 1141."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"crossref","unstructured":"Xianzhong Ding Wan Du and Alberto E Cerpa. 2020. MB2C: Model-Based Deep Reinforcement Learning for Multi-zone Building Control. In BuildSys. 50--59.","DOI":"10.1145\/3408308.3427986"},{"key":"e_1_3_2_1_6_1","unstructured":"Povl O Fanger et al. 1970. Thermal comfort. Analysis and applications in environmental engineering. Thermal comfort. Analysis and applications in environmental engineering. (1970)."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3538637.3538866"},{"key":"e_1_3_2_1_8_1","volume-title":"D4rl: Datasets for deep data-driven reinforcement learning. arXiv preprint arXiv:2004.07219","author":"Fu Justin","year":"2020","unstructured":"Justin Fu, Aviral Kumar, Ofir Nachum, George Tucker, and Sergey Levine. 2020. D4rl: Datasets for deep data-driven reinforcement learning. arXiv preprint arXiv:2004.07219 (2020)."},{"key":"e_1_3_2_1_9_1","volume-title":"A minimalist approach to offline reinforcement learning. Advances in neural information processing systems 34","author":"Fujimoto Scott","year":"2021","unstructured":"Scott Fujimoto and Shixiang Shane Gu. 2021. A minimalist approach to offline reinforcement learning. Advances in neural information processing systems 34 (2021), 20132--20145."},{"key":"e_1_3_2_1_10_1","volume-title":"ICML. PMLR","author":"Fujimoto Scott","year":"2019","unstructured":"Scott Fujimoto, David Meger, and Doina Precup. 2019. Off-policy deep reinforcement learning without exploration. In ICML. PMLR, 2052--2062."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2020.2992117"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3486611.3488729"},{"key":"e_1_3_2_1_13_1","first-page":"3146","article-title":"Lightgbm: A highly efficient gradient boosting decision tree","volume":"30","author":"Ke Guolin","year":"2017","unstructured":"Guolin Ke, Qi Meng, Thomas Finley, Taifeng Wang, Wei Chen, Weidong Ma, Qiwei Ye, and Tie-Yan Liu. 2017. Lightgbm: A highly efficient gradient boosting decision tree. NIPS 30 (2017), 3146--3154.","journal-title":"NIPS"},{"key":"e_1_3_2_1_14_1","volume-title":"University of Wisconsin-Madison Solar Energy Laboratory. TRNSYS: A transient simulation program. Eng. Experiment Station","author":"Klein SA","year":"1976","unstructured":"SA Klein. 1976. University of Wisconsin-Madison Solar Energy Laboratory. TRNSYS: A transient simulation program. Eng. Experiment Station (1976)."},{"key":"e_1_3_2_1_15_1","volume-title":"Stabilizing off-policy q-learning via bootstrapping error reduction. arXiv preprint arXiv:1906.00949","author":"Kumar Aviral","year":"2019","unstructured":"Aviral Kumar, Justin Fu, George Tucker, and Sergey Levine. 2019. Stabilizing off-policy q-learning via bootstrapping error reduction. arXiv preprint arXiv:1906.00949 (2019)."},{"key":"e_1_3_2_1_16_1","first-page":"1179","article-title":"Conservative q-learning for offline reinforcement learning","volume":"33","author":"Kumar Aviral","year":"2020","unstructured":"Aviral Kumar, Aurick Zhou, George Tucker, and Sergey Levine. 2020. Conservative q-learning for offline reinforcement learning. Advances in Neural Information Processing Systems 33 (2020), 1179--1191.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCPS54341.2022.00023"},{"key":"e_1_3_2_1_18_1","volume-title":"Provably good batch reinforcement learning without great exploration. arXiv preprint arXiv:2007.08202","author":"Liu Yao","year":"2020","unstructured":"Yao Liu, Adith Swaminathan, Alekh Agarwal, and Emma Brunskill. 2020. Provably good batch reinforcement learning without great exploration. arXiv preprint arXiv:2007.08202 (2020)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.23919\/ACC45564.2020.9147629"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSG.2016.2517211"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/PSCC.2014.7038106"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.3390\/app11083518"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.egypro.2017.07.429"},{"key":"e_1_3_2_1_24_1","volume-title":"CityLearn: Standardizing research in multi-agent reinforcement learning for demand response and urban energy management. arXiv preprint arXiv:2012.10504","author":"V\u00e1zquez-Canteli Jos\u00e9 R","year":"2020","unstructured":"Jos\u00e9 R V\u00e1zquez-Canteli, Sourav Dey, Gregor Henze, and Zolt\u00e1n Nagy. 2020. CityLearn: Standardizing research in multi-agent reinforcement learning for demand response and urban energy management. arXiv preprint arXiv:2012.10504 (2020)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.apenergy.2020.115036"},{"key":"e_1_3_2_1_26_1","volume-title":"Building controls virtual test bed","author":"Wetter Michael","unstructured":"Michael Wetter, Philip Haves, and Brian Coffey. 2008. Building controls virtual test bed. Technical Report. Lawrence Berkeley National Laboratory."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.apenergy.2015.07.050"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"crossref","unstructured":"Chi Zhang Sanmukh R Kuppannagari Rajgopal Kannan and Viktor K Prasanna. 2019. Building HVAC scheduling using reinforcement learning via neural network based model approximation. In BuildSys. 287--296.","DOI":"10.1145\/3360322.3360861"},{"key":"e_1_3_2_1_29_1","volume-title":"Sanmukh Rao Kuppannagari, and Viktor K Prasanna","author":"Zhang Chi","year":"2022","unstructured":"Chi Zhang, Sanmukh Rao Kuppannagari, and Viktor K Prasanna. 2022. Safe Building HVAC Control via Batch Reinforcement Learning. IEEE Transactions on Sustainable Computing (2022)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"crossref","unstructured":"Zhiang Zhang and Khee Poh Lam. 2018. Practical implementation and evaluation of deep reinforcement learning control for a radiant heating system. In BuildSys.","DOI":"10.1145\/3276774.3276775"}],"event":{"name":"BuildSys '22: The 9th ACM International Conference on Systems for Energy-Efficient Buildings, Cities, and Transportation","location":"Boston Massachusetts","acronym":"BuildSys '22","sponsor":["SIGEnergy ACM Special Interest Group on Energy Systems and Informatics"]},"container-title":["Proceedings of the 9th ACM International Conference on Systems for Energy-Efficient Buildings, Cities, and Transportation"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3563357.3566164","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3563357.3566164","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T07:27:46Z","timestamp":1755847666000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3563357.3566164"}},"subtitle":["an open-source dataset for building batch reinforcement learning"],"short-title":[],"issued":{"date-parts":[[2022,11,9]]},"references-count":30,"alternative-id":["10.1145\/3563357.3566164","10.1145\/3563357"],"URL":"https:\/\/doi.org\/10.1145\/3563357.3566164","relation":{},"subject":[],"published":{"date-parts":[[2022,11,9]]},"assertion":[{"value":"2022-12-08","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}