{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,6]],"date-time":"2026-02-06T23:38:06Z","timestamp":1770421086918,"version":"3.49.0"},"reference-count":27,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,6,8]],"date-time":"2022-06-08T00:00:00Z","timestamp":1654646400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,6,8]],"date-time":"2022-06-08T00:00:00Z","timestamp":1654646400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100016311","name":"Arm","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100016311","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,6,8]]},"DOI":"10.23919\/acc53348.2022.9867288","type":"proceedings-article","created":{"date-parts":[[2022,9,5]],"date-time":"2022-09-05T20:24:10Z","timestamp":1662409450000},"page":"2697-2702","source":"Crossref","is-referenced-by-count":6,"title":["A Probabilistic Perspective on Risk-sensitive Reinforcement Learning"],"prefix":"10.23919","author":[{"given":"Erfaun","family":"Noorani","sequence":"first","affiliation":[{"name":"University of Maryland College Park,Institute for System Research (ISR),Department of Electrical and Computer Engineering,College Park,MD,USA"}]},{"given":"John S.","family":"Baras","sequence":"additional","affiliation":[{"name":"University of Maryland College Park,Institute for System Research (ISR),Department of Electrical and Computer Engineering,College Park,MD,USA"}]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-11662-4_12"},{"key":"ref11","article-title":"Risk-Sensitive and Efficient Reinforcement Learning Algorithms","author":"tamar","year":"2015"},{"key":"ref12","first-page":"682","article-title":"R2PG: Risk-Sensitive and Reliable Policy Gradient","volume":"ws 18","author":"liu","year":"2018","journal-title":"The Workshops of the The Thirty-Second AAAI Conference on Artificial Intelligence New Orleans Louisiana USA February 2-7 2018"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/IROS40897.2019.8967699"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.1973.1100265"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.1974.1100606"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/0022-247X(81)90109-8"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/9.286253"},{"key":"ref18","first-page":"371","article-title":"Robust and Risk-sensitive Output Feedback Control for Finite State Machines and Hidden Markov Models","volume":"7","author":"baras","year":"1997","journal-title":"Journal of Mathematical Systems Estimation and Control"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/9.388678"},{"key":"ref4","first-page":"1352","article-title":"Reinforcement Learning with Deep Energy-Based Policies","author":"haarnoja","year":"0"},{"key":"ref27","article-title":"Policy Gradients With Variance Related Risk Criteria","author":"dotan di castro","year":"2012","journal-title":"Proceedings of the 29th International Conference on Machine Learning"},{"key":"ref3","first-page":"1433","article-title":"Maximum Entropy Inverse Reinforcement Learning","author":"ziebart","year":"2008","journal-title":"Proceedings of the 23rd National Conference on Artificial Intelligence - Volume 3"},{"key":"ref6","author":"eysenbach","year":"2019","journal-title":"If MaxEnt RL is the Answer What is the Question?"},{"key":"ref5","first-page":"1861","article-title":"Soft Actor-Critic: Off-Policy Maximum Entropy Deep Reinforcement Learning with a Stochastic Actor","volume":"80","author":"haarnoja","year":"0"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/BF00122574"},{"key":"ref7","author":"morgenstern","year":"1953","journal-title":"Theory of Games and Economic Behavior"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1080\/09540099108946587"},{"key":"ref9","first-page":"3509","article-title":"Algorithms for CVaR Optimization in MDPs","volume":"27","author":"chow","year":"2014","journal-title":"Advances in neural information processing systems"},{"key":"ref1","author":"sutton","year":"2018","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1137\/S0363012994273337"},{"key":"ref22","author":"puterman","year":"2014","journal-title":"Markov Decision Processes Discrete Stochastic Dynamic Programming"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/9.654887"},{"key":"ref24","author":"koller","year":"2009","journal-title":"Probabilistic Graphical Models Principles and Techniques"},{"key":"ref23","author":"wainwright","year":"2008","journal-title":"Graphical models exponential families and variational inference"},{"key":"ref26","article-title":"Understanding The Variational Lower Bound","author":"yang","year":"2017"},{"key":"ref25","author":"levine","year":"2018","journal-title":"Reinforcement Learning and Control as Probabilistic Inference Tutorial and Review"}],"event":{"name":"2022 American Control Conference (ACC)","location":"Atlanta, GA, USA","start":{"date-parts":[[2022,6,8]]},"end":{"date-parts":[[2022,6,10]]}},"container-title":["2022 American Control Conference (ACC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9866948\/9867142\/09867288.pdf?arnumber=9867288","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,3]],"date-time":"2022-10-03T20:39:08Z","timestamp":1664829548000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9867288\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6,8]]},"references-count":27,"URL":"https:\/\/doi.org\/10.23919\/acc53348.2022.9867288","relation":{},"subject":[],"published":{"date-parts":[[2022,6,8]]}}}