{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T18:00:11Z","timestamp":1775066411396,"version":"3.50.1"},"reference-count":39,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2020,11,5]],"date-time":"2020-11-05T00:00:00Z","timestamp":1604534400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,11,5]],"date-time":"2020-11-05T00:00:00Z","timestamp":1604534400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["1830421"],"award-info":[{"award-number":["1830421"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Auton Agent Multi-Agent Syst"],"published-print":{"date-parts":[[2021,4]]},"DOI":"10.1007\/s10458-020-09485-4","type":"journal-article","created":{"date-parts":[[2020,11,5]],"date-time":"2020-11-05T19:02:30Z","timestamp":1604602950000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["I2RL: online inverse reinforcement learning under occlusion"],"prefix":"10.1007","volume":"35","author":[{"given":"Saurabh","family":"Arora","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9042-9131","authenticated-orcid":false,"given":"Prashant","family":"Doshi","sequence":"additional","affiliation":[]},{"given":"Bikramjit","family":"Banerjee","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,11,5]]},"reference":[{"key":"9485_CR1","doi-asserted-by":"crossref","unstructured":"Abbeel, P., & Ng, A.Y. (2004). Apprenticeship learning via inverse reinforcement learning. In Twenty-first international conference on machine learning (ICML), pp. 1\u20138.","DOI":"10.1145\/1015330.1015430"},{"key":"9485_CR2","doi-asserted-by":"crossref","unstructured":"Aghasadeghi, N., & Bretl, T. (2011). Maximum entropy inverse reinforcement learning in continuous state spaces with path integrals. In: 2011 IEEE\/RSJ International conference on intelligent robots and systems, pp. 1561\u20131566.","DOI":"10.1109\/IROS.2011.6094679"},{"key":"9485_CR3","unstructured":"Amin, K., Jiang, N., & Singh, S. (2017). Repeated inverse reinforcement learning. In Advances in neural information processing systems, pp. 1815\u20131824."},{"issue":"5","key":"9485_CR4","doi-asserted-by":"publisher","first-page":"469","DOI":"10.1016\/j.robot.2008.10.024","volume":"57","author":"BD Argall","year":"2009","unstructured":"Argall, B. D., Chernova, S., Veloso, M., & Browning, B. (2009). A survey of robot learning from demonstration. Robotics and Autonomous Systems, 57(5), 469\u2013483.","journal-title":"Robotics and Autonomous Systems"},{"key":"9485_CR5","unstructured":"Arora, S., & Doshi, P. (2018). A survey of inverse reinforcement learning: Challenges, methods and progress. CoRR arXiv:1806.06877"},{"key":"9485_CR6","unstructured":"Arora, S., Doshi, P., & Banerjee, B. (2019). Online inverse reinforcement learning under occlusion. In: Proceedings of the 18th International Conference on Autonomous Agents and MultiAgent Systems, AAMAS \u201919, pp. 1170\u20131178. International Foundation for Autonomous Agents and Multiagent Systems"},{"key":"9485_CR7","unstructured":"Auer, P., Cesa-Bianchi, N., Freund, Y., & Schapire, R.E. (2000). Gambling in a rigged casino: The adversarial multi-armed bandit problem. Electronic Colloquium on Computational Complexity (ECCC) 7(68)."},{"key":"9485_CR8","unstructured":"Babes-Vroman, M., Marivate, V., Subramanian, K., & Littman, M. (2011). Apprenticeship learning about multiple intentions. In 28th International conference on machine learning (ICML), pp. 897\u2013904."},{"key":"9485_CR9","unstructured":"Bogert, K., & Doshi, P. (2014). Multi-robot inverse reinforcement learning under occlusion with interactions. In Proceedings of the 2014 International Conference on Autonomous Agents and Multi-agent Systems, AAMAS \u201914, pp. 173\u2013180."},{"key":"9485_CR10","unstructured":"Bogert, K., & Doshi, P. (2015). Toward estimating others\u2019 transition models under occlusion for multi-robot irl. In 24th International joint conference on artificial intelligence (IJCAI), pp. 1867\u20131873."},{"key":"9485_CR11","unstructured":"Bogert, K., & Doshi, P. (2017). Scaling expectation-maximization for inverse reinforcement learning to multiple robots under occlusion. In Proceedings of the 16th conference on autonomous agents and multiagent systems, AAMAS \u201917, pp. 522\u2013529."},{"key":"9485_CR12","unstructured":"Bogert, K., Lin, J.F.S., Doshi, P., & Kulic, D. (2016). Expectation-maximization for inverse reinforcement learning with hidden data. In 2016 International conference on autonomous agents and multiagent systems, pp. 1034\u20131042."},{"key":"9485_CR13","unstructured":"Boularias, A., Kober, J., & Peters, J. (2011). Relative entropy inverse reinforcement learning. In Proceedings of the fourteenth international conference on artificial intelligence and statistics, AISTATS 2011, Fort Lauderdale, USA, April 11-13, 2011, pp. 182\u2013189"},{"key":"9485_CR14","first-page":"227","volume":"II","author":"A Boularias","year":"2012","unstructured":"Boularias, A., Kr\u00f6mer, O., & Peters, J. (2012). Structured apprenticeship learning. European Conference on Machine Learning and Knowledge Discovery in Databases, Part, II, 227\u2013242.","journal-title":"European Conference on Machine Learning and Knowledge Discovery in Databases, Part"},{"key":"9485_CR15","first-page":"691","volume":"12","author":"J Choi","year":"2011","unstructured":"Choi, J., & Kim, K. E. (2011). Inverse reinforcement learning in partially observable environments. J. Mach. Learn. Res., 12, 691\u2013730.","journal-title":"J. Mach. Learn. Res."},{"key":"9485_CR16","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1111\/j.2517-6161.1977.tb01600.x","volume":"39","author":"AP Dempster","year":"1977","unstructured":"Dempster, A. P., Laird, N. M., & Rubin, D. B. (1977). Maximum likelihood from incomplete data via the em algorithm. Journal of the Royal Statistical Society, Series B (Methodological), 39, 1\u201338.","journal-title":"Journal of the Royal Statistical Society, Series B (Methodological)"},{"key":"9485_CR17","doi-asserted-by":"publisher","first-page":"472","DOI":"10.1007\/978-3-540-27819-1_33","volume-title":"Learning Theory","author":"M Dud\u00edk","year":"2004","unstructured":"Dud\u00edk, M., Phillips, S. J., & Schapire, R. E. (2004). Performance guarantees for regularized maximum entropy density estimation. In J. Shawe-Taylor & Y. Singer (Eds.), Learning Theory (pp. 472\u2013486). Berlin Heidelberg: Springer."},{"key":"9485_CR18","unstructured":"Gerkey, B., Vaughan, R.T., & Howard, A. (2003). The player\/stage project: Tools for multi-robot and distributed sensor systems. In Proceedings of the 11th international conference on advanced robotics, vol.\u00a01."},{"key":"9485_CR19","doi-asserted-by":"crossref","unstructured":"Herman, M., Fischer, V., Gindele, T., & Burgard, W. (2015). Inverse reinforcement learning of behavioral models for online-adapting navigation strategies. In 2015 IEEE international conference on robotics and automation (ICRA), pp. 3215\u20133222. IEEE.","DOI":"10.1109\/ICRA.2015.7139642"},{"key":"9485_CR20","first-page":"4565","volume":"29","author":"J Ho","year":"2016","unstructured":"Ho, J., & Ermon, S. (2016). Generative adversarial imitation learning. Advances in Neural Information Processing Systems (NIPS), 29, 4565\u20134573.","journal-title":"Advances in Neural Information Processing Systems (NIPS)"},{"issue":"1","key":"9485_CR21","doi-asserted-by":"publisher","first-page":"17","DOI":"10.1631\/jzus.C1010010","volume":"12","author":"Z Jun Jin","year":"2010","unstructured":"Jun Jin, Z., Qian, H., Yi Chen, S., & Liang Zhu, M. (2010). Convergence analysis of an incremental approach to online inverse reinforcement learning. Journal of Zhejiang University-Science C, 12(1), 17\u201324.","journal-title":"Journal of Zhejiang University-Science C"},{"key":"9485_CR22","unstructured":"Kamalaruban, P., Devidze, R., Cevher, V., & Singla, A. (2019). Interactive teaching algorithms for inverse reinforcement learning. arXiv preprint arXiv:1905.11867."},{"key":"9485_CR23","doi-asserted-by":"crossref","unstructured":"Kitani, K.M., Ziebart, B.D., Bagnell, J.A., & Hebert, M. (2012). Activity forecasting. In 12th European conference on computer vision - Volume Part IV, pp. 201\u2013214.","DOI":"10.1007\/978-3-642-33765-9_15"},{"issue":"1","key":"9485_CR24","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1006\/inco.1996.2612","volume":"132","author":"J Kivinen","year":"1997","unstructured":"Kivinen, J., & Warmuth, M. K. (1997). Exponentiated gradient versus gradient descent for linear predictors. Information and Computation, 132(1), 1\u201363.","journal-title":"Information and Computation"},{"key":"9485_CR25","unstructured":"Levine, S., Popovi\u0107, Z., & Koltun, V. (2010). Feature construction for inverse reinforcement learning. In Proceedings of the 23rd international conference on neural information processing systems, NIPS\u201910, pp. 1342\u20131350. Curran Associates Inc., USA"},{"key":"9485_CR26","unstructured":"Ng, A., & Russell, S. (2000). Algorithms for inverse reinforcement learning. In Seventeenth international conference on machine learning, pp. 663\u2013670."},{"key":"9485_CR27","doi-asserted-by":"crossref","unstructured":"Osa, T., Pajarinen, J., Neumann, G., Bagnell, J.A., Abbeel, P., & Peters, J. (2018). An algorithmic perspective on imitation learning. Foundations and Trends\u00ae in Robotics 7(2), 1\u2013179.","DOI":"10.1561\/9781680834116"},{"key":"9485_CR28","unstructured":"Ramachandran, D., & Amir, E. (2007). Bayesian inverse reinforcement learning. In 20th international joint conference on artifical intelligence (IJCAI), pp. 2586\u20132591."},{"key":"9485_CR29","first-page":"380","volume":"2","author":"N Ratliff","year":"2007","unstructured":"Ratliff, N., Bagnell, J., & Zinkevich, M. (2007). (online) subgradient methods for structured prediction. Journal of Machine Learning Research - Proceedings Track, 2, 380\u2013387.","journal-title":"Journal of Machine Learning Research - Proceedings Track"},{"key":"9485_CR30","doi-asserted-by":"crossref","unstructured":"Ratliff, N.D., Bagnell, J.A., & Zinkevich, M.A. (2006). Maximum margin planning. In 23rd international conference on machine learning, pp. 729\u2013736.","DOI":"10.1145\/1143844.1143936"},{"key":"9485_CR31","doi-asserted-by":"crossref","unstructured":"Rhinehart, N., & Kitani, K.M. (2017). First-person activity forecasting with online inverse reinforcement learning. In International conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2017.399"},{"key":"9485_CR32","doi-asserted-by":"crossref","unstructured":"Russell, S. (1998). Learning agents for uncertain environments (extended abstract). In Eleventh annual conference on computational learning theory, pp. 101\u2013103.","DOI":"10.1145\/279943.279964"},{"key":"9485_CR33","unstructured":"Steinhardt, J., & Liang, P. (2014). Adaptivity and optimism: An improved exponentiated gradient algorithm. In 31st International conference on machine learning, pp. 1593\u20131601."},{"key":"9485_CR34","doi-asserted-by":"crossref","unstructured":"Trivedi, M., & Doshi, P. (2018). Inverse learning of robot behavior for collaborative planning. In 2018 IEEE\/RSJ international conference on intelligent robots and systems (IROS), pp. 1\u20139.","DOI":"10.1109\/IROS.2018.8593745"},{"key":"9485_CR35","doi-asserted-by":"crossref","unstructured":"Wang, S., Rosenfeld, R., Zhao, Y., & Schuurmans, D. (2002). The Latent Maximum Entropy Principle. In IEEE international symposium on information theory, pp. 131\u2013131.","DOI":"10.1109\/ISIT.2002.1023403"},{"key":"9485_CR36","doi-asserted-by":"crossref","unstructured":"Wang, S., & Schuurmans Yunxin Zhao, D. (2012). The Latent Maximum Entropy Principle. ACM Transactions on Knowledge Discovery from Data 6(8).","DOI":"10.1145\/2297456.2297460"},{"key":"9485_CR37","unstructured":"Wulfmeier, M., & Posner, I. (2015). Maximum Entropy Deep Inverse Reinforcement Learning. arXiv preprint."},{"key":"9485_CR38","unstructured":"Ziebart, B.D., Maas, A., Bagnell, J.A., & Dey, A.K. (2008). Maximum entropy inverse reinforcement learning. In 23rd national conference on artificial intelligence - Volume 3, pp. 1433\u20131438."},{"key":"9485_CR39","doi-asserted-by":"crossref","unstructured":"Ziebart, B.D., Ratliff, N., Gallagher, G., Mertz, C., Peterson, K., Bagnell, J.A., Hebert, M., Dey, A.K., & Srinivasa, S. (2009). Planning-based prediction for pedestrians. In: Proceedings of the 2009 IEEE\/RSJ International Conference on Intelligent Robots and Systems, IROS\u201909, pp. 3931\u20133936. IEEE Press, Piscataway, NJ, USA.","DOI":"10.1109\/IROS.2009.5354147"}],"container-title":["Autonomous Agents and Multi-Agent Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10458-020-09485-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10458-020-09485-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10458-020-09485-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,4,21]],"date-time":"2021-04-21T09:31:07Z","timestamp":1618997467000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10458-020-09485-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,11,5]]},"references-count":39,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2021,4]]}},"alternative-id":["9485"],"URL":"https:\/\/doi.org\/10.1007\/s10458-020-09485-4","relation":{},"ISSN":["1387-2532","1573-7454"],"issn-type":[{"value":"1387-2532","type":"print"},{"value":"1573-7454","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,11,5]]},"assertion":[{"value":"22 October 2020","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 November 2020","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"4"}}