{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,2]],"date-time":"2025-11-02T16:50:56Z","timestamp":1762102256795,"version":"3.37.3"},"reference-count":31,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"6","license":[{"start":{"date-parts":[[2019,6,1]],"date-time":"2019-06-01T00:00:00Z","timestamp":1559347200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2019,6,1]],"date-time":"2019-06-01T00:00:00Z","timestamp":1559347200000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2019,6,1]],"date-time":"2019-06-01T00:00:00Z","timestamp":1559347200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,6,1]],"date-time":"2019-06-01T00:00:00Z","timestamp":1559347200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["DMS-1664644","CNS-1645681","CCF-1527292","IIS-1237022"],"award-info":[{"award-number":["DMS-1664644","CNS-1645681","CCF-1527292","IIS-1237022"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000183","name":"Army Research Office","doi-asserted-by":"publisher","award":["W911NF-12-1-0390"],"award-info":[{"award-number":["W911NF-12-1-0390"]}],"id":[{"id":"10.13039\/100000183","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000006","name":"Office of Naval Research","doi-asserted-by":"publisher","award":["MURI N00014-16-1-2832"],"award-info":[{"award-number":["MURI N00014-16-1-2832"]}],"id":[{"id":"10.13039\/100000006","id-type":"DOI","asserted-by":"publisher"}]},{"name":"IIT Bombay IRCC SEED"},{"name":"INSPIRE faculty fellowship","award":["IFA-14\/ENG-73"],"award-info":[{"award-number":["IFA-14\/ENG-73"]}]},{"name":"Government of India"},{"name":"Lin Guangzhao &amp; the Hu Guozan Graduate Education International Exchange Fund"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Automat. Contr."],"published-print":{"date-parts":[[2019,6]]},"DOI":"10.1109\/tac.2018.2866455","type":"journal-article","created":{"date-parts":[[2018,8,21]],"date-time":"2018-08-21T18:38:32Z","timestamp":1534876712000},"page":"2298-2309","source":"Crossref","is-referenced-by-count":11,"title":["Learning Policies for Markov Decision Processes From Data"],"prefix":"10.1109","volume":"64","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1807-5487","authenticated-orcid":false,"given":"Manjesh Kumar","family":"Hanawal","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0510-6501","authenticated-orcid":false,"given":"Hao","family":"Liu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4534-6975","authenticated-orcid":false,"given":"Henghui","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"Ioannis Ch.","family":"Paschalidis","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"journal-title":"Neural Network Learning Theoretical Foundations","year":"2009","author":"anthony","key":"ref31"},{"key":"ref30","first-page":"527","article-title":"Covering number bounds of certain regularized linear function classes","volume":"2","author":"zhang","year":"2002","journal-title":"J Mach Learn Res"},{"key":"ref10","first-page":"627","article-title":"A reduction of imitation learning and structured prediction to no-regret online learning","author":"ross","year":"2011","journal-title":"Proc Artif Intell Statist"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-15883-4_25"},{"key":"ref12","article-title":"Learning parameterized skills","author":"da silva","year":"2012","journal-title":"Proc 29th Int Conf Mach Learn"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1016\/S1364-6613(99)01327-3"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2008.10.024"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2006.01.004"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/MED.2012.6265807"},{"key":"ref17","article-title":"Feature selection, ${L}_1$ vs. ${L}_2$ regularization, and rotational invariance","author":"ng","year":"2004","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1007\/s10107-014-0819-4"},{"article-title":"Learning decisions: Robustness, uncertainty, and approximation","year":"2004","author":"bagnell","key":"ref19"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4612-6333-3"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1002\/9780470182963"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.2307\/1427277"},{"journal-title":"Neuro-Dynamic Programming","year":"1996","author":"bertsekas","key":"ref3"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2016.2616384"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4612-5254-2"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1137\/S0363012901385691"},{"key":"ref8","article-title":"Apprenticeship learning via inverse reinforcement leaning","author":"abbeel","year":"2004","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1002\/nav.21481"},{"key":"ref2","volume":"i and ii","author":"bertsekas","year":"1995","journal-title":"Dynamic Programming and Optimal Control"},{"key":"ref9","first-page":"295","article-title":"Apprenticeship learning using inverse reinforcement learning and gradient methods","author":"neu","year":"2007","journal-title":"Proc Uncertainty Artif Intell"},{"key":"ref1","doi-asserted-by":"crossref","DOI":"10.1002\/9780470316887","author":"puterman","year":"1994","journal-title":"Markov Decision Processes Discrete Stochastic Dynamic Programming"},{"key":"ref20","article-title":"Modelling policies in MDPs in reproducing kernel Hilbert space","author":"lever","year":"2015","journal-title":"Proc Int Conf Artif Intell Statist"},{"journal-title":"Elements of Information Theory","year":"2006","author":"cover","key":"ref22"},{"key":"ref21","first-page":"663","article-title":"Algorithms for inverse reinforcement learning","author":"ng","year":"2000","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1137\/S0363012901385691"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2016.7799361"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1016\/S0024-3795(01)00320-2"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2014.7039948"}],"container-title":["IEEE Transactions on Automatic Control"],"original-title":[],"link":[{"URL":"https:\/\/ieeexplore.ieee.org\/ielam\/9\/8723048\/8443086-aam.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9\/8723048\/08443086.pdf?arnumber=8443086","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,29]],"date-time":"2022-08-29T22:48:52Z","timestamp":1661813332000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8443086\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,6]]},"references-count":31,"journal-issue":{"issue":"6"},"URL":"https:\/\/doi.org\/10.1109\/tac.2018.2866455","relation":{},"ISSN":["0018-9286","1558-2523","2334-3303"],"issn-type":[{"type":"print","value":"0018-9286"},{"type":"electronic","value":"1558-2523"},{"type":"electronic","value":"2334-3303"}],"subject":[],"published":{"date-parts":[[2019,6]]}}}