{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,14]],"date-time":"2026-01-14T01:01:09Z","timestamp":1768352469180,"version":"3.49.0"},"reference-count":33,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"9","license":[{"start":{"date-parts":[[2018,9,1]],"date-time":"2018-09-01T00:00:00Z","timestamp":1535760000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Automat. Contr."],"published-print":{"date-parts":[[2018,9]]},"DOI":"10.1109\/tac.2017.2775960","type":"journal-article","created":{"date-parts":[[2017,11,20]],"date-time":"2017-11-20T19:06:33Z","timestamp":1511204793000},"page":"2787-2802","source":"Crossref","is-referenced-by-count":46,"title":["Infinite Time Horizon Maximum Causal Entropy Inverse Reinforcement Learning"],"prefix":"10.1109","volume":"63","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0005-9411","authenticated-orcid":false,"given":"Zhengyuan","family":"Zhou","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5864-7753","authenticated-orcid":false,"given":"Michael","family":"Bloem","sequence":"additional","affiliation":[]},{"given":"Nicholas","family":"Bambos","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.2514\/6.2014-2026"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2013.2260745"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.2514\/1.51203"},{"key":"ref30","author":"spall","year":"2005","journal-title":"Introduction to Stochastic Search and Optimization Estimation Simulation and Control"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2012.2234824"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/1390156.1390286"},{"key":"ref12","first-page":"1449","article-title":"A game-theoretic approach to apprenticeship learning","author":"syed","year":"2008","journal-title":"Proc 20th Int Conf Adv Neural Inf Process Syst"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2014.7040156"},{"key":"ref14","author":"bertsekas","year":"2012","journal-title":"Dynamic Programming and Optimal Control Approximate Dynamic Programming (Athena Scientific optimization and computation series)"},{"key":"ref15","article-title":"Directed information for channels with feedback","author":"kramer","year":"1998"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1137\/060652105"},{"key":"ref17","doi-asserted-by":"crossref","first-page":"1367","DOI":"10.1214\/009053604000000553","article-title":"Game theory, maximum entropy, minimum discrepancy and robust Bayesian decision theory","volume":"32","author":"gr\u00fcnwald","year":"2004","journal-title":"Ann Stat"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1016\/j.jet.2004.12.006"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4419-7970-4"},{"key":"ref28","article-title":"Convergence of ${Q}$\n-learning: A simple proof","author":"melo","year":"2007"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143936"},{"key":"ref27","volume":"1","author":"bertsekas","year":"2005","journal-title":"Dynamic Programming and Optimal Control"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1177\/0278364910371999"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/Allerton.2011.6120271"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1994.6.6.1185"},{"key":"ref5","first-page":"1433","article-title":"Maximum entropy inverse\n reinforcement learning","author":"ziebart","year":"2008","journal-title":"Proc 23rd Nat Conf Artif Intell"},{"key":"ref8","first-page":"1255","article-title":"Modeling interaction via the principle of maximum causal entropy","author":"ziebart","year":"2010","journal-title":"Proc 27th Int Conf Mach Learn"},{"key":"ref7","first-page":"182","article-title":"Relative\n entropy inverse reinforcement learning","author":"boularias","year":"2011","journal-title":"Proc 14th Int Conf Artificial Intell"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/1015330.1015430"},{"key":"ref9","article-title":"Modeling purposeful adaptive behavior with the principle of\n maximum causal entropy","author":"ziebart","year":"2010"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CEC.2012.6256507"},{"key":"ref20","article-title":"Bootstrapping apprenticeship learning","author":"boularias","year":"2011","journal-title":"Proceedings of ICAPS Multiagent Planning Workshop"},{"key":"ref22","article-title":"CVX: Matlab software for disciplined convex programming","author":"grant","year":"2009"},{"key":"ref21","author":"cover","year":"2012","journal-title":"Elements of Information Theory"},{"key":"ref24","article-title":"CVXOPT: Python software for convex optimization","author":"andersen","year":"2013"},{"key":"ref23","article-title":"CVXPY: A Python package for modeling convex optimization problems.","author":"rubira","year":"2013"},{"key":"ref26","article-title":"Inverse reinforcement learning with simultaneous estimation of\n rewards and dynamics","author":"herman","year":"2016","journal-title":"Proc Int Conf Artif Intell Stat"},{"key":"ref25","first-page":"1","article-title":"Stochastic models of ground delay program implementation for prediction, simulation, and insight","author":"bloem","year":"2017","journal-title":"J Aerosp Operat"}],"container-title":["IEEE Transactions on Automatic Control"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9\/8449129\/08115277.pdf?arnumber=8115277","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,12]],"date-time":"2022-01-12T16:28:30Z","timestamp":1642004910000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8115277\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,9]]},"references-count":33,"journal-issue":{"issue":"9"},"URL":"https:\/\/doi.org\/10.1109\/tac.2017.2775960","relation":{},"ISSN":["0018-9286","1558-2523","2334-3303"],"issn-type":[{"value":"0018-9286","type":"print"},{"value":"1558-2523","type":"electronic"},{"value":"2334-3303","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018,9]]}}}