{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,11]],"date-time":"2024-09-11T19:44:11Z","timestamp":1726083851087},"publisher-location":"Cham","reference-count":26,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030430887"},{"type":"electronic","value":"9783030430894"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-43089-4_43","type":"book-chapter","created":{"date-parts":[[2020,5,6]],"date-time":"2020-05-06T16:04:08Z","timestamp":1588781048000},"page":"672-687","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["SWIRL: A SequentialWindowed Inverse Reinforcement Learning Algorithm for Robot Tasks With Delayed Rewards"],"prefix":"10.1007","author":[{"given":"Sanjay","family":"Krishnan","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Animesh","family":"Garg","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Richard","family":"Liaw","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Brijen","family":"Thananjeyan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lauren","family":"Miller","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Florian T.","family":"Pokorny","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ken","family":"Goldberg","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2020,5,7]]},"reference":[{"key":"43_CR1","doi-asserted-by":"crossref","unstructured":"Argall, B.D., Chernova, S., Veloso, M., Browning, B.: A Survey of Robot Learning from Demonstration. Robotics and Autonomous Systems 57(5) (2009) 469\u2013483","DOI":"10.1016\/j.robot.2008.10.024"},{"key":"43_CR2","unstructured":"Kolter, J.Z., Abbeel, P., Ng, A.Y.: Hierarchical apprenticeship learning with application to quadruped locomotion. In: NIPS. (2007) 769\u2013776"},{"key":"43_CR3","doi-asserted-by":"crossref","unstructured":"Coates, A., Abbeel, P., Ng, A.Y.: Learning for control from multiple demonstrations. In: ICML, ACM (2008)","DOI":"10.1145\/1390156.1390175"},{"key":"43_CR4","doi-asserted-by":"crossref","unstructured":"Abbeel, P., Ng, A.Y.: Apprenticeship learning via inverse reinforcement learning. In: ICML, ACM (2004) 1","DOI":"10.1145\/1015330.1015430"},{"key":"43_CR5","unstructured":"Ng, A.Y., Russell, S.J., et al.: Algorithms for inverse reinforcement learning. In: Icml. (2000) 663\u2013670"},{"key":"43_CR6","unstructured":"Krishnan*, S., Garg*, A., Patil, S., Lea, C., Hager, G., Abbeel, P., Goldberg, K., (*denotes equal contribution): Transition State Clustering: Unsupervised Surgical Trajectory Segmentation For Robot Learning. In: International Symposium of Robotics Research, Springer STAR (2015)"},{"key":"43_CR7","doi-asserted-by":"crossref","unstructured":"Murali*, A., Garg*, A., Krishnan*, S., Pokorny, F.T., Abbeel, P., Darrell, T., Goldberg, K., (*denotes equal contribution): TSC-DL: Unsupervised Trajectory Segmentation of Multi-Modal Surgical Demonstrations with Deep Learning. In: IEEE Int. Conf. on Robotics and Automation (ICRA). (2016)","DOI":"10.1109\/ICRA.2016.7487607"},{"key":"43_CR8","unstructured":"Ng, A.Y., Harada, D., Russell, S.J.: Policy invariance under reward transformations: Theory and application to reward shaping. In: ICML. (1999) 278\u2013287"},{"key":"43_CR9","unstructured":"Judah, K., Fern, A.P., Tadepalli, P., Goetschalckx, R.: Imitation learning with demonstrations and shaping rewards. In: AAAI. (2014) 1890\u20131896"},{"key":"43_CR10","unstructured":"Ijspeert, A., Nakanishi, J., Schaal, S.: Learning attractor landscapes for learning motor primitives. In: Neural Information Processing Systems (NIPS). (2002) 1523\u20131530"},{"key":"43_CR11","doi-asserted-by":"crossref","unstructured":"Pastor, P., Hoffmann, H., Asfour, T., Schaal, S.: Learning and generalization of motor skills by learning from demonstration. In: IEEE ICRA. (2009)","DOI":"10.1109\/ROBOT.2009.5152385"},{"key":"43_CR12","doi-asserted-by":"crossref","unstructured":"Manschitz, S., Kober, J., Gienger, M., Peters, J.: Learning movement primitive attractor goals and sequential skills from kinesthetic demonstrations. Robotics and Autonomous Systems (2015)","DOI":"10.1016\/j.robot.2015.07.005"},{"key":"43_CR13","doi-asserted-by":"crossref","unstructured":"Niekum, S., Osentoski, S., Konidaris, G., Barto, A.: Learning and generalization of complex tasks from unstructured demonstrations. In: Int. Conf. on Intelligent Robots and Systems (IROS), IEEE (2012)","DOI":"10.1109\/IROS.2012.6386006"},{"key":"43_CR14","doi-asserted-by":"crossref","unstructured":"Calinon, S.: Skills learning in robots by interaction with users and environment. In: IEEE Int. Conf. on Ubiquitous Robots and Ambient Intelligence (URAI). (2014)","DOI":"10.1109\/URAI.2014.7057522"},{"key":"43_CR15","unstructured":"Konidaris, G., Kuindersma, S., Grupen, R., Barto, A.: Robot Learning from Demonstration by Constructing Skill Trees. Int. Journal of Robotics Research 31(3) (2011) 360\u2013375"},{"key":"43_CR16","doi-asserted-by":"crossref","unstructured":"Ranchod, P., Rosman, B., Konidaris, G.: Nonparametric bayesian reward segmentation for skill discovery using inverse reinforcement learning. In: IEEE\/RSJ Int. Conf. on Intelligent Robots and Systems (IROS), IEEE (2015)","DOI":"10.1109\/IROS.2015.7353414"},{"key":"43_CR17","unstructured":"Dietterich, T.G.: Hierarchical reinforcement learning with the maxq value function decomposition. J. Artif. Intell. Res.(JAIR) 13 (2000) 227\u2013303"},{"key":"43_CR18","doi-asserted-by":"crossref","unstructured":"Moldovan, T., Levine, S., Jordan, M., Abbeel, P.: Optimism-driven exploration for nonlinear systems. In: Int. Conf. on Robotics and Automation (ICRA). (2015)","DOI":"10.1109\/ICRA.2015.7139645"},{"key":"43_CR19","unstructured":"Khansari-Zadeh, S.M., Billard, A.: Learning stable nonlinear dynamical systems with gaussian mixture models. Robotics, IEEE Transactions on 27(5) (2011) 943\u2013957"},{"key":"43_CR20","doi-asserted-by":"crossref","unstructured":"Kruger, V., Herzog, D., Baby, S., Ude, A., Kragic, D.: Learning actions from observations. Robotics & Automation Magazine, IEEE 17(2) (2010) 30\u201343","DOI":"10.1109\/MRA.2010.936961"},{"key":"43_CR21","unstructured":"Kulis, B., Jordan, M.I.: Revisiting k-means: New algorithms via bayesian nonparametrics. arXiv preprint \narXiv:1111.0352\n\n (2011)"},{"key":"43_CR22","unstructured":"Mika, S., Sch\u00f6lkopf, B., Smola, A.J., M\u00fcller, K., Scholz, M., R\u00e4tsch, G.: Kernel PCA and de-noising in feature spaces. In: NIPS. (1998) 536\u2013542"},{"key":"43_CR23","unstructured":"Ziebart, B.D., Maas, A.L., Bagnell, J.A., Dey, A.K.: Maximum entropy inverse reinforcement learning. In: AAAI. (2008)"},{"key":"43_CR24","unstructured":"Ziebart, B., Dey, A., Bagnell, J.A.: Probabilistic pointing target prediction via inverse optimal control. In: UIST, ACM (2012) 1\u201310"},{"key":"43_CR25","unstructured":"Krishnan, S., Garg, A., Liaw, R., Miller, L., Pokorny, F.T., Goldberg, K.: Hirl: Hierarchical inverse reinforcement learning for long-horizon tasks with delayed rewards. arXiv preprint \narXiv:1604.06508\n\n (2016)"},{"key":"43_CR26","doi-asserted-by":"crossref","unstructured":"Murali*, A., Sen*, S., Kehoe, B., Garg, A., McFarland, S., Patil, S., Boyd, W., Lim, S., Abbeel, P., Goldberg, K., (*denotes equal contribution): Learning by Observation for Surgical Subtasks: Multilateral Cutting of 3D Viscoelastic and 2D Orthotropic Tissue Phantoms. In: IEEE Int. Conf. on Robotics and Automation (ICRA). (2015)","DOI":"10.1109\/ICRA.2015.7139344"}],"container-title":["Springer Proceedings in Advanced Robotics","Algorithmic Foundations of Robotics XII"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-43089-4_43","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,5,6]],"date-time":"2020-05-06T16:17:00Z","timestamp":1588781820000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-43089-4_43"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030430887","9783030430894"],"references-count":26,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-43089-4_43","relation":{},"ISSN":["2511-1256","2511-1264"],"issn-type":[{"type":"print","value":"2511-1256"},{"type":"electronic","value":"2511-1264"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"7 May 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}