{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T01:26:45Z","timestamp":1740101205630,"version":"3.37.3"},"reference-count":53,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,10,23]],"date-time":"2022-10-23T00:00:00Z","timestamp":1666483200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,10,23]],"date-time":"2022-10-23T00:00:00Z","timestamp":1666483200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000006","name":"ONR","doi-asserted-by":"publisher","award":["N00014-18-1-2828"],"award-info":[{"award-number":["N00014-18-1-2828"]}],"id":[{"id":"10.13039\/100000006","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,10,23]]},"DOI":"10.1109\/iros47612.2022.9981874","type":"proceedings-article","created":{"date-parts":[[2022,12,26]],"date-time":"2022-12-26T19:38:15Z","timestamp":1672083495000},"page":"7429-7435","source":"Crossref","is-referenced-by-count":0,"title":["WFA-IRL: Inverse Reinforcement Learning of Autonomous Behaviors Encoded as Weighted Finite Automata"],"prefix":"10.1109","author":[{"given":"Tianyu","family":"Wang","sequence":"first","affiliation":[{"name":"University of California San Diego,Department of Electrical and Computer Engineering,La Jolla,CA,USA,92093"}]},{"given":"Nikolay","family":"Atanasov","sequence":"additional","affiliation":[{"name":"University of California San Diego,Department of Electrical and Computer Engineering,La Jolla,CA,USA,92093"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Algorithms for inverse reinforcement learning","volume-title":"International Conference on Machine Learning","author":"Ng","year":"2000"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143936"},{"key":"ref3","article-title":"Maximum entropy inverse reinforcement learning","volume-title":"AAAI Conference on Artificial Intelligence","author":"Ziebart","year":"2008"},{"key":"ref4","first-page":"19","article-title":"Nonlinear inverse reinforcement learning with gaussian processes","volume":"24","author":"Levine","year":"2011","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2016.7759328"},{"key":"ref6","article-title":"Guided cost learning: Deep inverse optimal control via policy optimization","volume-title":"International Conference on Machine Learning","author":"Finn","year":"2016"},{"key":"ref7","article-title":"Learning task specifications from demonstrations","author":"Vazquez-Chanlatte","year":"2017","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1177\/0278364918784350"},{"volume-title":"Minimalistic grid-world environment for openai gym","year":"2018","author":"Chevalier-Boisvert","key":"ref9"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(99)00052-1"},{"key":"ref11","article-title":"Hierarchical deep reinforcement learning: Integrating temporal abstraction and intrinsic motivation","author":"Kulkarni","year":"2016","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.10916"},{"volume-title":"Learning abstract options","year":"2018","author":"Riemer","key":"ref13"},{"key":"ref14","article-title":"Multi-level discovery of deep options","author":"Fox","year":"2017","journal-title":"arXiv preprint"},{"key":"ref15","article-title":"CompILE: Compositional imitation learning and execution","volume-title":"International Conference on Machine Learning","author":"Kipf","year":"2019"},{"key":"ref16","article-title":"Deep imitation learning for bimanual robotic manipulation","author":"Xie","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.3233\/AIC-150682"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/3342355"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-98938-9_10"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2008.08.008"},{"volume-title":"Principles of Model Checking","year":"2008","author":"Baier","key":"ref21"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2007.363946"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2009.2030225"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2005.1582935"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2010.5509503"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2016.7487554"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ASE.2015.71"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/3424307"},{"volume-title":"Bayesian inference of temporal task specifications from demonstrations","year":"2018","author":"Shah","key":"ref29"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2020.xvi.097"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-01492-5"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1007\/11523468_42"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9197199"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2011.6161122"},{"volume-title":"Dynamic Programming and Optimal Control","year":"1995","author":"Bertsekas","key":"ref35"},{"key":"ref36","article-title":"Apprenticeship learning using inverse reinforcement learning and gradient methods","volume-title":"Conference on Uncertainty in Artificial Intelligence","author":"Neu","year":"2007"},{"key":"ref37","article-title":"Bayesian inverse reinforcement learning","volume-title":"International Joint Conference on Artifical Intelligence","author":"Ramachandran","year":"2007"},{"key":"ref38","article-title":"Deep recurrent q-learning for partially observable mdps","volume-title":"AAAI Fall Symposium Series","author":"Hausknecht","year":"2015"},{"key":"ref39","article-title":"Learning to navigate in complex environments","volume-title":"International Conference on Learning Representations","author":"Mirowski","year":"2017"},{"key":"ref40","article-title":"Spectral learning of general weighted automata via constrained matrix completion","author":"Balle","year":"2012","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref41","article-title":"Connecting weighted automata and recurrent neural networks through spectral learning","volume-title":"International Conference on Artificial Intelligence and Statistics","author":"Rabusseau","year":"2019"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-73235-5"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4612-6264-0"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1007\/BF01386390"},{"key":"ref45","article-title":"ARA*: Anytime a* with provable bounds on sub-optimality","author":"Likhachev","year":"2004","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1177\/0278364911406761"},{"key":"ref47","article-title":"WFA-IRL: Inverse reinforcement learning of autonomous behaviors encoded as weighted finite automata","author":"Wang","year":"2021","journal-title":"arXiv preprint"},{"key":"ref48","article-title":"Scikit-splearn: a toolbox for the spectral learning of weighted automata compatible with scikit-learn","volume-title":"Conference francophone sur lApprentissage Aurtomatique","author":"Arrivault","year":"2017"},{"key":"ref49","first-page":"661","article-title":"Efficient reductions for imitation learning","volume-title":"International Conference on Artificial Intelligence and Statistics","author":"Ross","year":"2010"},{"key":"ref50","article-title":"Generative adversarial imitation learning","author":"Ho","year":"2016","journal-title":"Advances in Neural Information Processing Systems"},{"volume-title":"The imitation library for imitation learning and inverse reinforcement learning","year":"2020","author":"Wang","key":"ref51"},{"key":"ref52","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017","journal-title":"arXiv preprint"},{"key":"ref53","article-title":"Discriminator-actor-critic: Addressing sample inefficiency and reward bias in adversarial imitation learning","volume-title":"International Conference on Learning Representations","author":"Kostrikov","year":"2019"}],"event":{"name":"2022 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","start":{"date-parts":[[2022,10,23]]},"location":"Kyoto, Japan","end":{"date-parts":[[2022,10,27]]}},"container-title":["2022 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9981026\/9981028\/09981874.pdf?arnumber=9981874","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,2]],"date-time":"2024-03-02T08:38:41Z","timestamp":1709368721000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9981874\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10,23]]},"references-count":53,"URL":"https:\/\/doi.org\/10.1109\/iros47612.2022.9981874","relation":{},"subject":[],"published":{"date-parts":[[2022,10,23]]}}}