{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,5]],"date-time":"2026-02-05T10:53:48Z","timestamp":1770288828120,"version":"3.49.0"},"reference-count":51,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"9","license":[{"start":{"date-parts":[[2022,9,1]],"date-time":"2022-09-01T00:00:00Z","timestamp":1661990400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,9,1]],"date-time":"2022-09-01T00:00:00Z","timestamp":1661990400000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,9,1]],"date-time":"2022-09-01T00:00:00Z","timestamp":1661990400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,9,1]],"date-time":"2022-09-01T00:00:00Z","timestamp":1661990400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["ECCS (NRI) 18-30639"],"award-info":[{"award-number":["ECCS (NRI) 18-30639"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000015","name":"U.S. Department of Energy","doi-asserted-by":"publisher","award":["DE-EE0009125"],"award-info":[{"award-number":["DE-EE0009125"]}],"id":[{"id":"10.13039\/100000015","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004835","name":"Dynamic Research Enterprise for Multidisciplinary Engineering Sciences (DREMES)\u2014collaboration between Zhejiang University and the University of Illinois at Urbana\u2013Champaign","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004835","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Cybern."],"published-print":{"date-parts":[[2022,9]]},"DOI":"10.1109\/tcyb.2021.3102510","type":"journal-article","created":{"date-parts":[[2021,8,18]],"date-time":"2021-08-18T20:10:50Z","timestamp":1629317450000},"page":"9339-9351","source":"Crossref","is-referenced-by-count":13,"title":["Parameterized MDPs and Reinforcement Learning Problems\u2014A Maximum Entropy Principle-Based Framework"],"prefix":"10.1109","volume":"52","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0568-8276","authenticated-orcid":false,"given":"Amber","family":"Srivastava","sequence":"first","affiliation":[{"name":"Mechanical Science and Engineering Department and Coordinated Science Laboratory, University of Illinois at Urbana&#x2013;Champaign, Urbana, IL, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3259-6027","authenticated-orcid":false,"given":"Srinivasa M.","family":"Salapaka","sequence":"additional","affiliation":[{"name":"Mechanical Science and Engineering Department and Coordinated Science Laboratory, University of Illinois at Urbana&#x2013;Champaign, Urbana, IL, USA"}]}],"member":"263","reference":[{"key":"ref1","volume-title":"Handbook of Markov Decision Processes: Methods and Applications","volume":"40","author":"Feinberg","year":"2012"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.1995.478953"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1287\/mnsc.25.4.352"},{"key":"ref4","first-page":"496","article-title":"Linear programming for large-scale Markov decision problems","volume-title":"Proc. JMLR Workshop Conf.","author":"Abbasi-Yadkori"},{"issue":"3","key":"ref5","doi-asserted-by":"crossref","first-page":"279","DOI":"10.1007\/BF00992698","article-title":"Q-learning","volume":"8","author":"Watkins","year":"1992","journal-title":"Mach. Learn."},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRev.106.620"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.1991.170767"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1021\/ci700023y"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2013.2292054"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2014.2319473"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/bth467"},{"key":"ref12","first-page":"1433","article-title":"Maximum entropy inverse reinforcement learning","volume-title":"Proc. AAAI","volume":"8","author":"Ziebart"},{"key":"ref13","first-page":"2613","article-title":"Double Q-learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Hasselt"},{"key":"ref14","volume-title":"Taming the noise in reinforcement learning via soft updates","author":"Fox","year":"2015"},{"key":"ref15","first-page":"1","article-title":"Soft Q-learning with mutual-information regularization","volume-title":"Proc. ICLR","author":"Grau-Moya"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v24i1.7727"},{"key":"ref17","volume-title":"A unified view of entropy-regularized Markov decision processes","author":"Neu","year":"2017"},{"key":"ref18","first-page":"243","article-title":"An alternative softmax operator for reinforcement learning","volume-title":"Proc. 34th Int. Conf. Mach. Learn.","volume":"70","author":"Asadi"},{"key":"ref19","first-page":"2775","article-title":"Bridging the gap between value and policy based reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Nachum"},{"key":"ref20","volume-title":"SBEED: Convergent reinforcement learning with nonlinear function approximation","author":"Dai","year":"2017"},{"key":"ref21","first-page":"1889","article-title":"Trust region policy optimization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Schulman"},{"key":"ref22","volume-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"Haarnoja","year":"2018"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1016\/j.comnet.2015.10.011"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/MWC.2015.7306534"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2015.2483780"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/TCNS.2020.2995831"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-00202-1_24"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2016.2633498"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2017.2702343"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/475"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2019.2949596"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2015.01.006"},{"key":"ref33","volume-title":"Deep reinforcement learning in parameterized action space","author":"Hausknecht","year":"2015"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v30i1.10226"},{"key":"ref35","first-page":"1","article-title":"Hierarchical approaches for reinforcement learning in parameterized action space","volume-title":"Proc. AAAI Spring Symp. Series","author":"Wei"},{"key":"ref36","volume-title":"Parametrized deep Q-networks learning: Reinforcement learning with discrete-continuous hybrid action space","author":"Xiong","year":"2018"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2017.2741342"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2014.2352038"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511790423"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1016\/j.jlamp.2014.05.001"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/ALLERTON.2018.8636066"},{"issue":"48","key":"ref42","first-page":"1729","article-title":"A Bayesian approach for learning and planning in partially observable Markov decision processes","volume":"12","author":"Ross","year":"2011","journal-title":"J. Mach. Learn. Res."},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1016\/j.jet.2004.12.006"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2017.2775960"},{"key":"ref45","volume-title":"Approximate inference and stochastic optimal control","author":"Rawlik","year":"2010"},{"key":"ref46","first-page":"2411","article-title":"Speedy Q-learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Ghavamzadeh"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v30i1.10303"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1145\/3178876.3185994"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/ROMAN.2012.6343862"},{"key":"ref50","volume-title":"Mathematics of Information and Coding","volume":"203","author":"Kobayashi","year":"2007"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2011.2166713"}],"container-title":["IEEE Transactions on Cybernetics"],"original-title":[],"link":[{"URL":"https:\/\/ieeexplore.ieee.org\/ielam\/6221036\/9861400\/9517030-aam.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6221036\/9861400\/09517030.pdf?arnumber=9517030","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,11]],"date-time":"2024-01-11T23:28:32Z","timestamp":1705015712000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9517030\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,9]]},"references-count":51,"journal-issue":{"issue":"9"},"URL":"https:\/\/doi.org\/10.1109\/tcyb.2021.3102510","relation":{},"ISSN":["2168-2267","2168-2275"],"issn-type":[{"value":"2168-2267","type":"print"},{"value":"2168-2275","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,9]]}}}