{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T00:19:36Z","timestamp":1740097176511,"version":"3.37.3"},"publisher-location":"Cham","reference-count":25,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319148021"},{"type":"electronic","value":"9783319148038"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2015]]},"DOI":"10.1007\/978-3-319-14803-8_18","type":"book-chapter","created":{"date-parts":[[2015,1,9]],"date-time":"2015-01-09T20:03:21Z","timestamp":1420833801000},"page":"226-242","source":"Crossref","is-referenced-by-count":1,"title":["Learning Options for an MDP from\u00a0Demonstrations"],"prefix":"10.1007","author":[{"given":"Marco","family":"Tamassia","sequence":"first","affiliation":[]},{"given":"Fabio","family":"Zambetta","sequence":"additional","affiliation":[]},{"given":"William","family":"Raffe","sequence":"additional","affiliation":[]},{"given":"Xiaodong","family":"Li","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"18_CR1","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/1015330.1015430","volume-title":"Proceedings of the 21st International Conference on Machine Learning, ICML 2004","author":"P. Abbeel","year":"2004","unstructured":"Abbeel, P., Ng, A.Y.: Apprenticeship learning via inverse reinforcement learning. In: Proceedings of the 21st International Conference on Machine Learning, ICML 2004, pp. 1\u20138. ACM, New York (2004), \n                    \n                      http:\/\/doi.acm.org\/10.1145\/1015330.1015430\n                    \n                    \n                  , doi:10.1145\/1015330.1015430"},{"key":"18_CR2","first-page":"28","volume-title":"Proceedings of the Fifteenth International Conference on Machine Learning, ICML 1998","author":"J. Baxter","year":"1998","unstructured":"Baxter, J., Tridgell, A., Weaver, L.: Knightcap: A chess programm that learns by combining TD(lambda) with game-tree search. In: Proceedings of the Fifteenth International Conference on Machine Learning, ICML 1998, pp. 28\u201336. Morgan Kaufmann Publishers Inc., San Francisco (1998), \n                    \n                      http:\/\/dl.acm.org\/citation.cfm?id=645527.657300"},{"issue":"0","key":"18_CR3","doi-asserted-by":"publisher","first-page":"103","DOI":"10.1016\/j.artint.2014.07.003","volume":"216","author":"L.C. Cobo","year":"2014","unstructured":"Cobo, L.C., Subramanian, K., Jr., C.L.I., Lanterman, A.D., Thomaz, A.L.: Abstraction from demonstration for efficient reinforcement learning in high-dimensional domains. Artificial Intelligence\u00a0216(0), 103 (2014), \n                    \n                      http:\/\/www.sciencedirect.com\/science\/article\/pii\/S0004370214000861\n                    \n                    \n                  , doi:10.1016\/j.artint.2014.07.003","journal-title":"Artificial Intelligence"},{"key":"18_CR4","doi-asserted-by":"publisher","first-page":"816","DOI":"10.1145\/1102351.1102454","volume-title":"Proceedings of the 22nd International Conference on Machine learning, ICML 2005","author":"\u00d6. \u015eim\u015fek","year":"2005","unstructured":"\u015eim\u015fek, \u00d6., Wolfe, A.P., Barto, A.G.: Identifying useful subgoals in reinforcement learning by local graph partitioning. In: Proceedings of the 22nd International Conference on Machine learning, ICML 2005, pp. 816\u2013823. ACM, New York (2005), \n                    \n                      http:\/\/doi.acm.org\/10.1145\/1102351.1102454\n                    \n                    \n                  , doi:10.1145\/1102351.1102454"},{"key":"18_CR5","unstructured":"Ester, M., Kriegel, H.-P., Sander, J., Xu, X.: A density-based algorithm for discovering clusters in large spatial databases with noise. In: Simoudis, E., Fayyad, U., Han, J. (eds.) Proceedings of the Second International Conference on Knowledge Discovery and Data Mining, vol.\u00a096, pp. 226\u2013231. AAAI Press (1996)"},{"issue":"6","key":"18_CR6","doi-asserted-by":"publisher","first-page":"345","DOI":"10.1145\/367766.368168","volume":"5","author":"R.W. Floyd","year":"1962","unstructured":"Floyd, R.W.: Algorithm 97: Shortest path. Communications of the ACM\u00a05(6), 345\u2013349 (1962), \n                    \n                      http:\/\/doi.acm.org\/10.1145\/367766.368168\n                    \n                    \n                  , doi:10.1145\/367766.368168","journal-title":"Communications of the ACM"},{"key":"18_CR7","first-page":"299","volume-title":"Proceedings of the 7th International Joint Conference on Autonomous Agents and Multiagent Systems, AAMAS 2008","author":"N.K. Jong","year":"2008","unstructured":"Jong, N.K., Hester, T., Stone, P.: The utility of temporal abstraction in reinforcement learning. In: Proceedings of the 7th International Joint Conference on Autonomous Agents and Multiagent Systems, AAMAS 2008, vol.\u00a01, pp. 299\u2013306. International Foundation for Autonomous Agents and Multiagent Systems, Richland (2008), \n                    \n                      http:\/\/dl.acm.org\/citation.cfm?id=1402383.1402429"},{"key":"18_CR8","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"285","DOI":"10.1007\/978-3-642-29946-9_28","volume-title":"Recent Advances in Reinforcement Learning","author":"E. Klein","year":"2012","unstructured":"Klein, E., Geist, M., Pietquin, O.: Batch, off-policy and model-free apprenticeship learning. In: Sanner, S., Hutter, M. (eds.) EWRL 2011. LNCS, vol.\u00a07188, pp. 285\u2013296. Springer, Heidelberg (2012), \n                    \n                      http:\/\/dx.doi.org\/10.1007\/978-3-642-29946-9_28"},{"key":"18_CR9","doi-asserted-by":"crossref","unstructured":"Kober, J., Peters, J.: Reinforcement learning in robotics: A survey. In: Wiering, M., van Otterlo, M. (eds.) Reinforcement Learning. Adaptation, Learning, and Optimization, vol.\u00a012, pp. 579\u2013610. Springer, Heidelberg (2012), \n                    \n                      http:\/\/dx.doi.org\/10.1007\/978-3-642-27645-3_18\n                    \n                    \n                  , doi:10.1007\/978-3-642-27645-3_18","DOI":"10.1007\/978-3-642-27645-3_18"},{"key":"18_CR10","doi-asserted-by":"publisher","first-page":"71","DOI":"10.1145\/1015330.1015355","volume-title":"Proceedings of the 21st International Conference on Machine Learning, ICML 2004","author":"S. Mannor","year":"2004","unstructured":"Mannor, S., Menache, I., Hoze, A., Klein, U.: Dynamic abstraction in reinforcement learning via clustering. In: Proceedings of the 21st International Conference on Machine Learning, ICML 2004, pp. 71\u201378. ACM, New York (2004), \n                    \n                      http:\/\/doi.acm.org\/10.1145\/1015330.1015355\n                    \n                    \n                  , doi:10.1145\/1015330.1015355"},{"key":"18_CR11","first-page":"361","volume-title":"Proceedings of the Eighteenth International Conference on Machine Learning, ICML 2001","author":"A. McGovern","year":"2001","unstructured":"McGovern, A., Barto, A.G.: Automatic discovery of subgoals in reinforcement learning using diverse density. In: Proceedings of the Eighteenth International Conference on Machine Learning, ICML 2001, pp. 361\u2013368. Morgan Kaufmann Publishers Inc., San Francisco (2001), \n                    \n                      http:\/\/dl.acm.org\/citation.cfm?id=645530.655681"},{"key":"18_CR12","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"162","DOI":"10.1007\/978-3-642-15883-4_11","volume-title":"Machine Learning and Knowledge Discovery in Databases","author":"A. Lacasse","year":"2010","unstructured":"Lacasse, A., Laviolette, F., Marchand, M., Turgeon-Boutin, F.: Learning with randomized majority votes. In: Balc\u00e1zar, J.L., Bonchi, F., Gionis, A., Sebag, M. (eds.) ECML PKDD 2010, Part II. LNCS, vol.\u00a06322, pp. 162\u2013177. Springer, Heidelberg (2010), \n                    \n                      http:\/\/dx.doi.org\/10.1007\/978-3-642-15883-4_25"},{"key":"18_CR13","series-title":"Springer Tracts in Advanced Robotics","doi-asserted-by":"publisher","first-page":"363","DOI":"10.1007\/11552246_35","volume-title":"Experimental Robotics IX","author":"A. Ng","year":"2006","unstructured":"Ng, A., Coates, A., Diel, M., Ganapathi, V., Schulte, J., Tse, B., Berger, E., Liang, E.: Autonomous inverted helicopter flight via reinforcement learning. In: Ang Jr, M.H., Khatib, O. (eds.) Experimental Robotics IX. Springer Tracts in Advanced Robotics, vol.\u00a021, pp. 363\u2013372. Springer, Heidelberg (2006), \n                    \n                      http:\/\/dx.doi.org\/10.1007\/11552246_35"},{"key":"18_CR14","first-page":"663","volume-title":"Proceedings of the Seventeenth International Conference on Machine Learning, ICML 2000","author":"A.Y. Ng","year":"2000","unstructured":"Ng, A.Y., Russell, S.J.: Algorithms for inverse reinforcement learning. In: Proceedings of the Seventeenth International Conference on Machine Learning, ICML 2000, pp. 663\u2013670. Morgan Kaufmann Publishers Inc., San Francisco (2000), \n                    \n                      http:\/\/dl.acm.org\/citation.cfm?id=645529.657801"},{"key":"18_CR15","first-page":"2825","volume":"12","author":"F. Pedregosa","year":"2011","unstructured":"Pedregosa, F., Varoquaux, G., Gramfort, A., Michel, V., Thirion, B., Grisel, O., Blondel, M., Prettenhofer, P., Weiss, R., Dubourg, V., Vanderplas, J., Passos, A., Cournapeau, D., Brucher, M., Perrot, M., Duchesnay, E.: Scikit-learn: Machine learning in python. The Journal of Machine Learning Research\u00a012, 2825\u20132830 (2011), \n                    \n                      http:\/\/dl.acm.org\/citation.cfm?id=1953048.2078195","journal-title":"The Journal of Machine Learning Research"},{"key":"18_CR16","first-page":"2586","volume-title":"Proceedings of the 20th International Joint Conference on Artifical Intelligence, IJCAI 2007","author":"D. Ramachandran","year":"2007","unstructured":"Ramachandran, D., Amir, E.: Bayesian inverse reinforcement learning. In: Proceedings of the 20th International Joint Conference on Artifical Intelligence, IJCAI 2007, pp. 2586\u20132591. Morgan Kaufmann Publishers Inc, San Francisco (2007), \n                    \n                      http:\/\/dl.acm.org\/citation.cfm?id=1625275.1625692"},{"key":"18_CR17","doi-asserted-by":"publisher","first-page":"95","DOI":"10.1145\/1015330.1015353","volume-title":"Proceedings of the 21st International Conference on Machine Learning, ICML 2004","author":"\u00d6. \u015eim\u015fek","year":"2004","unstructured":"\u015eim\u015fek, \u00d6., Barto, A.G.: Using relative novelty to identify useful temporal abstractions in reinforcement learning. In: Proceedings of the 21st International Conference on Machine Learning, ICML 2004, pp. 95\u2013102. ACM, New York (2004), \n                    \n                      http:\/\/doi.acm.org\/10.1145\/1015330.1015353\n                    \n                    \n                  , doi:10.1145\/1015330.1015353"},{"key":"18_CR18","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"212","DOI":"10.1007\/3-540-45622-8_16","volume-title":"Abstraction, Reformulation, and Approximation","author":"M. Stolle","year":"2002","unstructured":"Stolle, M., Precup, D.: Learning options in reinforcement learning. In: Koenig, S., Holte, R. (eds.) SARA 2002. LNCS (LNAI), vol.\u00a02371, pp. 212\u2013223. Springer, Heidelberg (2002), \n                    \n                      http:\/\/dx.doi.org\/10.1007\/3-540-45622-8_16"},{"key":"18_CR19","first-page":"537","volume-title":"Proceedings of the Eighteenth International Conference on Machine Learning, ICML 2001","author":"P. Stone","year":"2001","unstructured":"Stone, P., Sutton, R.S.: Scaling reinforcement learning toward robocup soccer. In: Proceedings of the Eighteenth International Conference on Machine Learning, ICML 2001, pp. 537\u2013544. Morgan Kaufmann Publishers Inc., San Francisco (2001), \n                    \n                      http:\/\/dl.acm.org\/citation.cfm?id=645530.655674"},{"key":"18_CR20","volume-title":"Introduction to Reinforcement Learning","author":"R.S. Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Introduction to Reinforcement Learning, 1st edn. MIT Press, Cambridge (1998)","edition":"1"},{"key":"18_CR21","unstructured":"Sutton, R.S., Precup, D., Singh, S.: Between MDPs and semi-MDPs: A framework for temporal abstraction in reinforcement learning. Artificial Intelligence\u00a0112(1\u20132), 181\u2013211 (1999), \n                    \n                      http:\/\/www.sciencedirect.com\/science\/article\/pii\/S0004370299000521\n                    \n                    \n                  , doi:\n                    \n                      http:\/\/dx.doi.org\/10.1016\/S0004-37029900052-1"},{"issue":"2","key":"18_CR22","doi-asserted-by":"publisher","first-page":"132","DOI":"10.1109\/TAMD.2010.2050205","volume":"2","author":"C. Vigorito","year":"2010","unstructured":"Vigorito, C., Barto, A.: Intrinsically motivated hierarchical skill learning in structured environments. IEEE Transactions on Autonomous Mental Development\u00a02(2), 132\u2013143 (2010), doi:10.1109\/TAMD.2010.2050205","journal-title":"IEEE Transactions on Autonomous Mental Development"},{"key":"18_CR23","doi-asserted-by":"crossref","unstructured":"Walt, S., van, d. C.S.C., Varoquaux, G.: The numpy array: A structure for efficient numerical computation. Computing in Science & Engineering\u00a013(2), 22\u201330 (2011), \n                    \n                      http:\/\/scitation.aip.org\/content\/aip\/journal\/cise\/13\/2\/10.1109\/MCSE.2011.37\n                    \n                    \n                  , doi: \n                    \n                      http:\/\/dx.doi.org\/10.1109\/MCSE.2011.37","DOI":"10.1109\/MCSE.2011.37"},{"key":"18_CR24","unstructured":"Watkins, C.J.C.H.: Learning from delayed rewards. Ph.D. thesis, University of Cambridge (1989)"},{"key":"18_CR25","unstructured":"Ziebart, B.D., Maas, A., Bagnell, J.A., Dey, A.K.: Maximum entropy inverse reinforcement learning. In: Proceedings of the 23rd National Conference on Artificial Intelligence - Volume 3, AAAI 2008, pp. 1433\u20131438. AAAI Press (2008), \n                    \n                      http:\/\/dl.acm.org\/citation.cfm?id=1620270.1620297"}],"container-title":["Lecture Notes in Computer Science","Artificial Life and Computational Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-14803-8_18","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,28]],"date-time":"2019-05-28T23:10:51Z","timestamp":1559085051000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-14803-8_18"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015]]},"ISBN":["9783319148021","9783319148038"],"references-count":25,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-14803-8_18","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2015]]}}}