{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,27]],"date-time":"2025-07-27T07:55:53Z","timestamp":1753602953798,"version":"3.28.0"},"reference-count":47,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,6,8]],"date-time":"2022-06-08T00:00:00Z","timestamp":1654646400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,6,8]],"date-time":"2022-06-08T00:00:00Z","timestamp":1654646400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,6,8]]},"DOI":"10.23919\/acc53348.2022.9867841","type":"proceedings-article","created":{"date-parts":[[2022,9,5]],"date-time":"2022-09-05T16:24:10Z","timestamp":1662395050000},"page":"2703-2708","source":"Crossref","is-referenced-by-count":4,"title":["Embracing Risk in Reinforcement Learning: The Connection between Risk-Sensitive Exponential and Distributionally Robust Criteria"],"prefix":"10.23919","author":[{"given":"Erfaun","family":"Noorani","sequence":"first","affiliation":[{"name":"Institute for System Research (ISR) at the University of Maryland College Park,Department of Electrical and Computer Engineering,College Park,MD,USA"}]},{"given":"John S.","family":"Baras","sequence":"additional","affiliation":[{"name":"Institute for System Research (ISR) at the University of Maryland College Park,Department of Electrical and Computer Engineering,College Park,MD,USA"}]}],"member":"263","reference":[{"doi-asserted-by":"publisher","key":"ref39","DOI":"10.1016\/0022-247X(81)90109-8"},{"year":"2019","author":"ke","article-title":"Imitation Learning as f-Divergence Minimization","key":"ref38"},{"doi-asserted-by":"publisher","key":"ref33","DOI":"10.1007\/s007800200072"},{"doi-asserted-by":"publisher","key":"ref32","DOI":"10.1109\/CDC45484.2021.9683645"},{"year":"2018","author":"abdolmaleki","article-title":"Maximum a Posteriori Policy Optimisation","key":"ref31"},{"key":"ref30","first-page":"1889","article-title":"Trust Region Policy Optimization","volume":"37","author":"schulman","year":"2015","journal-title":"Proceedings of The 32nd International Conference on Machine Learning"},{"doi-asserted-by":"publisher","key":"ref37","DOI":"10.1142\/S0219493711003334"},{"doi-asserted-by":"publisher","key":"ref36","DOI":"10.1007\/s10957-011-9968-2"},{"doi-asserted-by":"publisher","key":"ref35","DOI":"10.1016\/j.physrep.2009.05.002"},{"key":"ref34","first-page":"18","article-title":"Large Deviations Techniques and Applications","volume":"98","author":"dembod","year":"1996","journal-title":"Jahresbericht Deutschen Math -Vereinigung"},{"doi-asserted-by":"publisher","key":"ref10","DOI":"10.1016\/0167-6911(88)90055-2"},{"year":"2014","author":"puterman","journal-title":"Markov Decision Processes Discrete Stochastic Dynamic Programming","key":"ref40"},{"year":"2018","author":"sutton","journal-title":"Reinforcement Learning An Introduction","key":"ref11"},{"year":"1953","author":"morgenstern","journal-title":"Theory of Games and Economic Behavior","key":"ref12"},{"doi-asserted-by":"publisher","key":"ref13","DOI":"10.1007\/BF00122574"},{"key":"ref14","first-page":"3509","article-title":"Algorithms for CVaR Optimization in MDPs","volume":"27","author":"chow","year":"2014","journal-title":"Advances in neural information processing systems"},{"doi-asserted-by":"publisher","key":"ref15","DOI":"10.1007\/978-3-319-11662-4_12"},{"key":"ref16","first-page":"682","article-title":"R2PG: Risk-Sensitive and Reliable Policy Gradient","author":"liu","year":"2018","journal-title":"The Workshops of the The Thirty-Second AAAI Conference on Artificial Intelligence New Orleans Louisiana USA February 2-7 2018"},{"doi-asserted-by":"publisher","key":"ref17","DOI":"10.1109\/IROS40897.2019.8967699"},{"key":"ref18","first-page":"2817","article-title":"Robust Adversarial Reinforcement Learning","author":"pinto","year":"2017","journal-title":"International Conference on Machine Learning"},{"year":"2017","author":"pattanaik","article-title":"Robust Deep Reinforcement Learning with Adversarial Attacks","key":"ref19"},{"key":"ref28","first-page":"1861","article-title":"Soft Actor-Critic: Off-Policy Maximum Entropy Deep Reinforcement Learning with a Stochastic Actor","volume":"80","author":"haarnoja","year":"2018","journal-title":"Proceedings of the 35th International Conference on Machine Learning"},{"key":"ref4","first-page":"371","article-title":"Robust and Risk-sensitive Output Feedback Control for Finite State Machines and Hidden Markov Models","volume":"7","author":"baras","year":"1997","journal-title":"Journal of Mathematical Systems Estimation and Control"},{"key":"ref27","first-page":"1352","article-title":"Reinforcement Learning with Deep Energy-Based Policies","author":"haarnoja","year":"2017","journal-title":"Proceedings of the 34th International Conference on Machine Learning - Volume 70"},{"doi-asserted-by":"publisher","key":"ref3","DOI":"10.1109\/9.286253"},{"doi-asserted-by":"publisher","key":"ref6","DOI":"10.1137\/S0363012994273337"},{"year":"2017","author":"schulman","article-title":"Proximal Policy Optimization Algorithms","key":"ref29"},{"doi-asserted-by":"publisher","key":"ref5","DOI":"10.1109\/9.388678"},{"doi-asserted-by":"publisher","key":"ref8","DOI":"10.1109\/TAC.1973.1100265"},{"doi-asserted-by":"publisher","key":"ref7","DOI":"10.1109\/9.654887"},{"year":"2008","author":"ba?ar","journal-title":"H-Infinity Optimal Control and Related Minimax Design Problems A Dynamic Game Approach","key":"ref2"},{"doi-asserted-by":"publisher","key":"ref9","DOI":"10.1109\/TAC.1974.1100606"},{"key":"ref1","doi-asserted-by":"crossref","first-page":"89","DOI":"10.1007\/978-1-4899-0445-4_5","article-title":"On Making Life and Death Decisions","author":"howard","year":"1980","journal-title":"Societal Risk Assessment"},{"key":"ref46","article-title":"Policy Gradients with Variance Related Risk Criteria","author":"dotan di castro","year":"2012","journal-title":"Proceedings of the 29th International Conference on Machine Learning Edinburgh Scotland UK"},{"year":"2018","author":"derman","article-title":"Soft-robust Actor-critic Policy-gradient","key":"ref20"},{"year":"2013","author":"mnih","article-title":"Playing Atari with Deep Reinforcement Learning","key":"ref45"},{"year":"2019","author":"mankowitz","article-title":"Robust Reinforcement Learning for Continuous Control with Model Misspec-ification","key":"ref22"},{"doi-asserted-by":"publisher","key":"ref47","DOI":"10.1016\/S0378-4266(02)00270-4"},{"key":"ref21","first-page":"6215","article-title":"Action Robust Reinforcement Learning and Applications in Continuous Control","author":"tessler","year":"2019","journal-title":"International Conference on Machine Learning"},{"year":"2009","author":"koller","journal-title":"Probabilistic Graphical Models Principles and Techniques","key":"ref42"},{"year":"2019","author":"galashov","article-title":"Information Asymmetry in KL-regularized RL","key":"ref24"},{"year":"2008","author":"wainwright","journal-title":"Graphical models exponential families and variational inference","key":"ref41"},{"key":"ref23","first-page":"1369","article-title":"Linearly-solvable Markov Decision Problems","author":"todorov","year":"2007","journal-title":"Advances in neural information processing systems"},{"key":"ref44","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level Control Through Deep Reinforcement Learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref26","first-page":"1433","article-title":"Maximum Entropy Inverse Reinforcement Learning","author":"ziebart","year":"2008","journal-title":"Proceedings of the 23rd National Conference on Artificial Intelligence - Volume 3"},{"doi-asserted-by":"publisher","key":"ref43","DOI":"10.1007\/3-540-45631-7_39"},{"doi-asserted-by":"publisher","key":"ref25","DOI":"10.1080\/09540099108946587"}],"event":{"name":"2022 American Control Conference (ACC)","start":{"date-parts":[[2022,6,8]]},"location":"Atlanta, GA, USA","end":{"date-parts":[[2022,6,10]]}},"container-title":["2022 American Control Conference (ACC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9866948\/9867142\/09867841.pdf?arnumber=9867841","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,5]],"date-time":"2022-12-05T18:39:45Z","timestamp":1670265585000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9867841\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6,8]]},"references-count":47,"URL":"https:\/\/doi.org\/10.23919\/acc53348.2022.9867841","relation":{},"subject":[],"published":{"date-parts":[[2022,6,8]]}}}