{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T00:06:06Z","timestamp":1755907566421,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":52,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,11,9]],"date-time":"2022-11-09T00:00:00Z","timestamp":1667952000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,11,9]]},"DOI":"10.1145\/3563357.3564055","type":"proceedings-article","created":{"date-parts":[[2022,12,8]],"date-time":"2022-12-08T13:31:36Z","timestamp":1670506296000},"page":"40-48","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Constrained differentiable cross-entropy method for safe model-based reinforcement learning"],"prefix":"10.1145","author":[{"given":"Sam","family":"Mottahedi","sequence":"first","affiliation":[{"name":"The Pennsylvania State University"}]},{"given":"Gregory S.","family":"Pavlak","sequence":"additional","affiliation":[{"name":"The Pennsylvania State University"}]}],"member":"320","published-online":{"date-parts":[[2022,12,8]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1177\/0278364910371999"},{"key":"e_1_3_2_1_2_1","volume-title":"International Conference on Machine Learning. PMLR, 22--31","author":"Achiam Joshua","year":"2017","unstructured":"Joshua Achiam, David Held, Aviv Tamar, and Pieter Abbeel. 2017. Constrained policy optimization. In International Conference on Machine Learning. PMLR, 22--31."},{"key":"e_1_3_2_1_3_1","volume-title":"Constrained Markov decision processes with total cost criteria: Lagrangian approach and dual linear program. Mathematical methods of operations research 48, 3","author":"Altman Eitan","year":"1998","unstructured":"Eitan Altman. 1998. Constrained Markov decision processes with total cost criteria: Lagrangian approach and dual linear program. Mathematical methods of operations research 48, 3 (1998), 387--417."},{"volume-title":"Constrained Markov decision processes","author":"Altman Eitan","key":"e_1_3_2_1_4_1","unstructured":"Eitan Altman. 1999. Constrained Markov decision processes. Vol. 7. CRC Press."},{"key":"e_1_3_2_1_5_1","volume-title":"The limited multi-label projection layer. arXiv preprint arXiv:1906.08707","author":"Amos Brandon","year":"2019","unstructured":"Brandon Amos, Vladlen Koltun, and J Zico Kolter. 2019. The limited multi-label projection layer. arXiv preprint arXiv:1906.08707 (2019)."},{"key":"e_1_3_2_1_6_1","volume-title":"International Conference on Machine Learning. PMLR, 291--302","author":"Amos Brandon","year":"2020","unstructured":"Brandon Amos and Denis Yarats. 2020. The differentiable cross-entropy method. In International Conference on Machine Learning. PMLR, 291--302."},{"key":"e_1_3_2_1_7_1","volume-title":"A learning algorithm for risk-sensitive cost. Mathematics of operations research 33, 4","author":"Basu Arnab","year":"2008","unstructured":"Arnab Basu, Tirthankar Bhattacharyya, and Vivek S Borkar. 2008. A learning algorithm for risk-sensitive cost. Mathematics of operations research 33, 4 (2008), 880--898."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-6911(01)00152-9"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3360322.3360849"},{"key":"e_1_3_2_1_10_1","volume-title":"Enforcing Policy Feasibility Constraints through Differentiable Projection for Energy Optimization. arXiv preprint arXiv:2105.08881","author":"Chen Bingqing","year":"2021","unstructured":"Bingqing Chen, Priya Donti, Kyri Baker, J Zico Kolter, and Mario Berges. 2021. Enforcing Policy Feasibility Constraints through Differentiable Projection for Energy Optimization. arXiv preprint arXiv:2105.08881 (2021)."},{"key":"e_1_3_2_1_11_1","volume-title":"A lyapunov-based approach to safe reinforcement learning. arXiv preprint arXiv:1805.07708","author":"Chow Yinlam","year":"2018","unstructured":"Yinlam Chow, Ofir Nachum, Edgar Duenez-Guzman, and Mohammad Ghavamzadeh. 2018. A lyapunov-based approach to safe reinforcement learning. arXiv preprint arXiv:1805.07708 (2018)."},{"key":"e_1_3_2_1_12_1","volume-title":"Lyapunov-based safe policy optimization for continuous control. arXiv preprint arXiv:1901.10031","author":"Chow Yinlam","year":"2019","unstructured":"Yinlam Chow, Ofir Nachum, Aleksandra Faust, Edgar Duenez-Guzman, and Mohammad Ghavamzadeh. 2019. Lyapunov-based safe policy optimization for continuous control. arXiv preprint arXiv:1901.10031 (2019)."},{"key":"e_1_3_2_1_13_1","volume-title":"Transfer from simulation to real world through learning deep inverse dynamics model. arXiv preprint arXiv:1610.03518","author":"Christiano Paul","year":"2016","unstructured":"Paul Christiano, Zain Shah, Igor Mordatch, Jonas Schneider, Trevor Blackwell, Joshua Tobin, Pieter Abbeel, and Wojciech Zaremba. 2016. Transfer from simulation to real world through learning deep inverse dynamics model. arXiv preprint arXiv:1610.03518 (2016)."},{"key":"e_1_3_2_1_14_1","volume-title":"Deep reinforcement learning in a handful of trials using probabilistic dynamics models. arXiv preprint arXiv:1805.12114","author":"Chua Kurtland","year":"2018","unstructured":"Kurtland Chua, Roberto Calandra, Rowan McAllister, and Sergey Levine. 2018. Deep reinforcement learning in a handful of trials using probabilistic dynamics models. arXiv preprint arXiv:1805.12114 (2018)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"crossref","unstructured":"Roger Cooke et al. 1991. Experts in uncertainty: opinion and subjective probability in science. Oxford University Press on Demand.","DOI":"10.1093\/oso\/9780195064650.001.0001"},{"key":"e_1_3_2_1_16_1","volume-title":"Samba: Safe model-based & active reinforcement learning. arXiv preprint arXiv:2006.09436","author":"Cowen-Rivers Alexander I","year":"2020","unstructured":"Alexander I Cowen-Rivers, Daniel Palenicek, Vincent Moens, Mohammed Abdullah, Aivar Sootla, Jun Wang, and Haitham Ammar. 2020. Samba: Safe model-based & active reinforcement learning. arXiv preprint arXiv:2006.09436 (2020)."},{"key":"e_1_3_2_1_17_1","volume-title":"Safe exploration in continuous action spaces. arXiv preprint arXiv:1801.08757","author":"Dalal Gal","year":"2018","unstructured":"Gal Dalal, Krishnamurthy Dvijotham, Matej Vecerik, Todd Hester, Cosmin Paduraru, and Yuval Tassa. 2018. Safe exploration in continuous action spaces. arXiv preprint arXiv:1801.08757 (2018)."},{"key":"e_1_3_2_1_18_1","volume-title":"Proceedings of the 28th International Conference on machine learning (ICML-11)","author":"Deisenroth Marc","year":"2011","unstructured":"Marc Deisenroth and Carl E Rasmussen. 2011. PILCO: A model-based and data-efficient approach to policy search. In Proceedings of the 28th International Conference on machine learning (ICML-11). Citeseer, 465--472."},{"key":"e_1_3_2_1_19_1","volume-title":"Gaussian processes for data-efficient learning in robotics and control","author":"Deisenroth Marc Peter","year":"2013","unstructured":"Marc Peter Deisenroth, Dieter Fox, and Carl Edward Rasmussen. 2013. Gaussian processes for data-efficient learning in robotics and control. IEEE transactions on pattern analysis and machine intelligence 37, 2 (2013), 408--423."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3408308.3427986"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1023\/B:MACH.0000039779.47329.3a"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.5555\/2789272.2886795"},{"key":"e_1_3_2_1_23_1","volume-title":"Proceedings of the 2013 international conference on Autonomous agents and multi-agent systems. 1037--1044","author":"Gehring Clement","year":"2013","unstructured":"Clement Gehring and Doina Precup. 2013. Smart exploration in reinforcement learning using absolute temporal difference errors. In Proceedings of the 2013 international conference on Autonomous agents and multi-agent systems. 1037--1044."},{"key":"e_1_3_2_1_24_1","volume-title":"International Conference on Machine Learning. PMLR, 2555--2565","author":"Hafner Danijar","year":"2019","unstructured":"Danijar Hafner, Timothy Lillicrap, Ian Fischer, Ruben Villegas, David Ha, Honglak Lee, and James Davidson. 2019. Learning latent dynamics for planning from pixels. In International Conference on Machine Learning. PMLR, 2555--2565."},{"key":"e_1_3_2_1_25_1","volume-title":"Risk-sensitive Markov decision processes. Management science 18, 7","author":"Howard Ronald A","year":"1972","unstructured":"Ronald A Howard and James E Matheson. 1972. Risk-sensitive Markov decision processes. Management science 18, 7 (1972), 356--369."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.camwa.2005.11.013"},{"key":"e_1_3_2_1_27_1","volume-title":"Objective mismatch in model-based reinforcement learning. arXiv preprint arXiv:2002.04523","author":"Lambert Nathan","year":"2020","unstructured":"Nathan Lambert, Brandon Amos, Omry Yadan, and Roberto Calandra. 2020. Objective mismatch in model-based reinforcement learning. arXiv preprint arXiv:2002.04523 (2020)."},{"key":"e_1_3_2_1_28_1","unstructured":"Edith LM Law. 2005. Risk-directed exploration in reinforcement learning. (2005)."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCPS54341.2022.00023"},{"key":"e_1_3_2_1_30_1","volume-title":"Constrained Model-based Reinforcement Learning with Robust Cross-Entropy Method. arXiv preprint arXiv:2010.07968","author":"Liu Zuxin","year":"2020","unstructured":"Zuxin Liu, Hongyi Zhou, Baiming Chen, Sicheng Zhong, Martial Hebert, and Ding Zhao. 2020. Constrained Model-based Reinforcement Learning with Robust Cross-Entropy Method. arXiv preprint arXiv:2010.07968 (2020)."},{"key":"e_1_3_2_1_31_1","volume-title":"Beyond VaR: From measuring risk to managing risk. ALGO research quarterly 1, 2","author":"Mausser Helmut","year":"1998","unstructured":"Helmut Mausser. 1998. Beyond VaR: From measuring risk to managing risk. ALGO research quarterly 1, 2 (1998), 5--20."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.1988.194354"},{"key":"e_1_3_2_1_33_1","unstructured":"Teodor Mihai Moldovan and Pieter Abbeel. 2012. Risk Aversion in Markov Decision Processes via Near Optimal Chernoff Bounds.. In NIPS. 3140--3148."},{"key":"e_1_3_2_1_34_1","unstructured":"Tetsuro Morimura Masashi Sugiyama Hisashi Kashima Hirotaka Hachiya and Toshiyuki Tanaka. 2010. Nonparametric return distribution approximation for reinforcement learning. In ICML."},{"key":"e_1_3_2_1_35_1","volume-title":"Deep reinforcement learning for optimal control of space heating. arXiv preprint arXiv:1805.03777","author":"Nagy Adam","year":"2018","unstructured":"Adam Nagy, Hussain Kazmi, Farah Cheaib, and Johan Driesen. 2018. Deep reinforcement learning for optimal control of space heating. arXiv preprint arXiv:1805.03777 (2018)."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1287\/opre.1050.0216"},{"key":"e_1_3_2_1_37_1","volume-title":"Searching for activation functions. arXiv preprint arXiv:1710.05941","author":"Ramachandran Prajit","year":"2017","unstructured":"Prajit Ramachandran, Barret Zoph, and Quoc V Le. 2017. Searching for activation functions. arXiv preprint arXiv:1710.05941 (2017)."},{"key":"e_1_3_2_1_38_1","volume-title":"Benchmarking safe exploration in deep reinforcement learning. arXiv preprint arXiv:1910.01708 7","author":"Ray Alex","year":"2019","unstructured":"Alex Ray, Joshua Achiam, and Dario Amodei. 2019. Benchmarking safe exploration in deep reinforcement learning. arXiv preprint arXiv:1910.01708 7 (2019)."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-control-053018-023825"},{"key":"e_1_3_2_1_40_1","volume-title":"Supervised actor-critic reinforcement learning. Learning and Approximate Dynamic Programming: Scaling Up to the Real World","author":"Rosenstein Michael T","year":"2004","unstructured":"Michael T Rosenstein, AndrewG Barto, Jennie Si, Andy Barto,Warren Powell, and Donald Wunsch. 2004. Supervised actor-critic reinforcement learning. Learning and Approximate Dynamic Programming: Scaling Up to the Real World (2004), 359--380."},{"volume-title":"Monte-Carlo simulation and machine learning","author":"Rubinstein Reuven Y","key":"e_1_3_2_1_41_1","unstructured":"Reuven Y Rubinstein and Dirk P Kroese. 2013. The cross-entropy method: a unified approach to combinatorial optimization, Monte-Carlo simulation and machine learning. Springer Science & Business Media."},{"key":"e_1_3_2_1_42_1","volume-title":"Julian Schrittwieser, Ioannis Antonoglou, Veda Panneershelvam, Marc Lanctot, et al.","author":"Silver David","year":"2016","unstructured":"David Silver, Aja Huang, Chris J Maddison, Arthur Guez, Laurent Sifre, George Van Den Driessche, Julian Schrittwieser, Ioannis Antonoglou, Veda Panneershelvam, Marc Lanctot, et al. 2016. Mastering the game of Go with deep neural networks and tree search. nature 529, 7587 (2016), 484--489."},{"key":"e_1_3_2_1_43_1","unstructured":"David Silver Thomas Hubert Julian Schrittwieser Ioannis Antonoglou Matthew Lai Arthur Guez Marc Lanctot Laurent Sifre Dharshan Kumaran Thore Graepel et al. 2017. Mastering chess and shogi by self-play with a general reinforcement learning algorithm. arXiv preprint arXiv:1712.01815 (2017)."},{"key":"e_1_3_2_1_44_1","volume-title":"International Conference on Machine Learning. PMLR, 9133--9143","author":"Stooke Adam","year":"2020","unstructured":"Adam Stooke, Joshua Achiam, and Pieter Abbeel. 2020. Responsive safety in reinforcement learning by pid lagrangian methods. In International Conference on Machine Learning. PMLR, 9133--9143."},{"key":"e_1_3_2_1_45_1","volume-title":"Scaling up robust MDPs by reinforcement learning. arXiv preprint arXiv:1306.6189","author":"Tamar Aviv","year":"2013","unstructured":"Aviv Tamar, Huan Xu, and Shie Mannor. 2013. Scaling up robust MDPs by reinforcement learning. arXiv preprint arXiv:1306.6189 (2013)."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2010.5509832"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8202133"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/DEVLRN.2007.4354030"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3061639.3062224"},{"key":"e_1_3_2_1_50_1","volume-title":"Constrained cross-entropy method for safe reinforcement learning","author":"Wen Min","year":"2020","unstructured":"Min Wen and Ufuk Topcu. 2020. Constrained cross-entropy method for safe reinforcement learning. IEEE Trans. Automat. Control (2020)."},{"key":"e_1_3_2_1_51_1","volume-title":"safe-control-gym: a Unified Benchmark Suite for Safe Learning-based Control and Reinforcement Learning. arXiv preprint arXiv:2109.06325","author":"Yuan Zhaocong","year":"2021","unstructured":"Zhaocong Yuan, Adam W Hall, Siqi Zhou, Lukas Brunke, Melissa Greeff, Jacopo Panerati, and Angela P Schoellig. 2021. safe-control-gym: a Unified Benchmark Suite for Safe Learning-based Control and Reinforcement Learning. arXiv preprint arXiv:2109.06325 (2021)."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/3276774.3276775"}],"event":{"name":"BuildSys '22: The 9th ACM International Conference on Systems for Energy-Efficient Buildings, Cities, and Transportation","sponsor":["SIGEnergy ACM Special Interest Group on Energy Systems and Informatics"],"location":"Boston Massachusetts","acronym":"BuildSys '22"},"container-title":["Proceedings of the 9th ACM International Conference on Systems for Energy-Efficient Buildings, Cities, and Transportation"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3563357.3564055","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3563357.3564055","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T07:28:33Z","timestamp":1755847713000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3563357.3564055"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,11,9]]},"references-count":52,"alternative-id":["10.1145\/3563357.3564055","10.1145\/3563357"],"URL":"https:\/\/doi.org\/10.1145\/3563357.3564055","relation":{},"subject":[],"published":{"date-parts":[[2022,11,9]]},"assertion":[{"value":"2022-12-08","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}