{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,2]],"date-time":"2025-08-02T04:22:12Z","timestamp":1754108532306},"publisher-location":"Berlin, Heidelberg","reference-count":29,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642158827"},{"type":"electronic","value":"9783642158834"}],"license":[{"start":{"date-parts":[[2010,1,1]],"date-time":"2010-01-01T00:00:00Z","timestamp":1262304000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2010]]},"DOI":"10.1007\/978-3-642-15883-4_25","type":"book-chapter","created":{"date-parts":[[2010,8,17]],"date-time":"2010-08-17T05:29:46Z","timestamp":1282022986000},"page":"385-401","source":"Crossref","is-referenced-by-count":9,"title":["Learning from Demonstration Using MDP Induced Metrics"],"prefix":"10.1007","author":[{"given":"Francisco S.","family":"Melo","sequence":"first","affiliation":[]},{"given":"Manuel","family":"Lopes","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"unstructured":"Abbeel, P.: Apprenticeship learning and reinforcement learning with application to robotic control. Ph.D. thesis, Dep. Computer Science, Stanford Univ (2008)","key":"25_CR1"},{"doi-asserted-by":"crossref","unstructured":"Abbeel, P., Ng, A.: Apprenticeship learning via inverse reinforcement learning. In: Proc. 21st Int. Conf. Machine Learning, pp. 1\u20138 (2004)","key":"25_CR2","DOI":"10.1145\/1015330.1015430"},{"issue":"5","key":"25_CR3","doi-asserted-by":"publisher","first-page":"469","DOI":"10.1016\/j.robot.2008.10.024","volume":"57","author":"B. Argall","year":"2009","unstructured":"Argall, B., Chernova, S., Veloso, M.: A survey of robot learning from demonstration. Robotics and Autonomous Systems\u00a057(5), 469\u2013483 (2009)","journal-title":"Robotics and Autonomous Systems"},{"key":"25_CR4","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1613\/jair.2584","volume":"34","author":"S. Chernova","year":"2009","unstructured":"Chernova, S., Veloso, M.: Interactive policy learning through confidence-based autonomy. J. Artificial Intelligence Research\u00a034, 1\u201325 (2009)","journal-title":"J. Artificial Intelligence Research"},{"unstructured":"Fern, A., Yoon, S., Givan, R.: Approximate policy iteration with a policy language bias. In: Adv. Neural Information Proc. Systems 16 (2003)","key":"25_CR5"},{"key":"25_CR6","doi-asserted-by":"crossref","first-page":"75","DOI":"10.1613\/jair.1700","volume":"25","author":"A. Fern","year":"2006","unstructured":"Fern, A., Yoon, S., Givan, R.: Approximate policy iteration with a policy language bias: Solving relational Markov decision processes. J. Artificial Intelligence Research\u00a025, 75\u2013118 (2006)","journal-title":"J. Artificial Intelligence Research"},{"unstructured":"Ferns, N., Panangaden, P., Precup, D.: Metrics for finite Markov decision processes. In: Proc. 20th Conf. Uncertainty in Artificial Intelligence, pp. 162\u2013169 (2004)","key":"25_CR7"},{"unstructured":"Ferns, N., Panangaden, P., Precup, D.: Metrics for Markov decision processes with infinite state-spaces. In: Proc. 21st Conf. Uncertainty in Artificial Intelligence, pp. 201\u2013208 (2005)","key":"25_CR8"},{"key":"25_CR9","doi-asserted-by":"crossref","first-page":"163","DOI":"10.1016\/S0004-3702(02)00376-4","volume":"147","author":"R. Givan","year":"2003","unstructured":"Givan, R., Dean, T., Greig, M.: Equivalence notions and model minimization in Markov Decision Processes. Artificial Intelligence\u00a0147, 163\u2013223 (2003)","journal-title":"Artificial Intelligence"},{"unstructured":"Lagoudakis, M., Parr, R.: Reinforcement learning as classification: Leveraging modern classifiers. In: Proc. 20th Int. Conf. Machine Learning, pp. D424\u2013D431 (2003)","key":"25_CR10"},{"doi-asserted-by":"crossref","unstructured":"Langford, J., Zadrozny, B.: Relating reinforcement learning performance to classification performance. In: Proc. 22nd Int. Conf. Machine Learning, pp. D473\u2013D480 (2005)","key":"25_CR11","DOI":"10.1145\/1102351.1102411"},{"unstructured":"Lazaric, A., Ghavamzadeh, M., Munos, R.: Analysis of a classification-based policy iteration algorithm. In: Proc. 27th Int. Conf. Machine Learning (to appear, 2010)","key":"25_CR12"},{"doi-asserted-by":"crossref","unstructured":"Lopes, M., Melo, F., Montesano, L., Santos-Victor, J.: Abstraction levels for robotic imitation: Overview and computational approaches. In: From Motor Learning to Interaction Learning in Robots, pp. 313\u2013355 (2010)","key":"25_CR13","DOI":"10.1007\/978-3-642-05181-4_14"},{"doi-asserted-by":"crossref","unstructured":"Montesano, L., Lopes, M.: Learning grasping affordances from local visual descriptors. In: Proc. 8th Int. Conf. Development and Learning, pp. 1\u20136 (2009)","key":"25_CR14","DOI":"10.1109\/DEVLRN.2009.5175529"},{"unstructured":"Neu, G., Szepesv\u00e1ri, C.: Apprenticeship learning using inverse reinforcement learning and gradient methods. In: Proc. 23rd Conf. Uncertainty in Artificial Intelligence, pp. 295\u2013302 (2007)","key":"25_CR15"},{"doi-asserted-by":"crossref","unstructured":"Neu, G., Szepesv\u00e1ri, C.: Training parsers by inverse reinforcement learning. Machine Learning (2009) (accepted)","key":"25_CR16","DOI":"10.1007\/s10994-009-5110-1"},{"unstructured":"Ng, A., Russel, S.: Algorithms for inverse reinforcement learning. In: Proc. 17th Int. Conf. Machine Learning, pp. 663\u2013670 (2000)","key":"25_CR17"},{"issue":"1","key":"25_CR18","doi-asserted-by":"publisher","first-page":"88","DOI":"10.1162\/neco.1991.3.1.88","volume":"3","author":"D. Pomerleau","year":"1991","unstructured":"Pomerleau, D.: Efficient training of artificial neural networks for autonomous navigation. Neural Computation\u00a03(1), 88\u201397 (1991)","journal-title":"Neural Computation"},{"unstructured":"Ramachandran, D., Amir, E.: Bayesian inverse reinforcement learning. In: Proc. 20th Int. Joint Conf. Artificial Intelligence, pp. 2586\u20132591 (2007)","key":"25_CR19"},{"doi-asserted-by":"crossref","unstructured":"Ratliff, N., Bagnell, J., Zinkevich, M.: Maximum margin planning. In: Proc. 23rd Int. Conf. Machine Learning, pp. 729\u2013736 (2006)","key":"25_CR20","DOI":"10.1145\/1143844.1143936"},{"unstructured":"Ravindran, B., Barto, A.: Approximate homomorphisms: A framework for non-exact minimization in Markov decision processes. In: Proc. 5th Int. Conf. Knowledge-Based Computer Systems (2004)","key":"25_CR21"},{"doi-asserted-by":"crossref","unstructured":"Saunders, J., Nehaniv, C., Dautenhahn, K.: Teaching robots by moulding behavior and scaffolding the environment. In: Proc. 1st Annual Conf. Human-Robot Interaction (2006)","key":"25_CR22","DOI":"10.1145\/1121241.1121263"},{"key":"25_CR23","volume-title":"Learning with kernels: Support vector machines, regularization, optimization and beyond","author":"B. Sch\u00f6lkopf","year":"2002","unstructured":"Sch\u00f6lkopf, B., Smola, A.: Learning with kernels: Support vector machines, regularization, optimization and beyond. MIT Press, Cambridge (2002)"},{"unstructured":"Settles, B.: Active learning literature survey. Tech. Rep. CS Tech. Rep.\u00a01648, Univ. Wisconsin-Maddison (2009)","key":"25_CR24"},{"unstructured":"Syed, U., Schapire, R.: A game-theoretic approach to apprenticeship learning. In: Adv. Neural Information Proc. Systems, vol.\u00a020, pp. 1449\u20131456 (2008)","key":"25_CR25"},{"doi-asserted-by":"crossref","unstructured":"Syed, U., Schapire, R., Bowling, M.: Apprenticeship learning using linear programming. In: Proc. 25th Int. Conf. Machine Learning, pp. 1032\u20131039 (2008)","key":"25_CR26","DOI":"10.1145\/1390156.1390286"},{"unstructured":"Taylor, J., Precup, D., Panangaden, P.: Bounding performance loss in approximate MDP homomorphisms. In: Adv. Neural Information Proc. Systems, pp. 1649\u20131656 (2008)S","key":"25_CR27"},{"unstructured":"Zhu, J., Hastie, T.: Kernel logistic regression and the import vector machine. In: Adv. Neural Information Proc. Systems. pp. 1081\u20131088 (2002)","key":"25_CR28"},{"unstructured":"Ziebart, B., Maas, A., Bagnell, J., Dey, A.: Maximum entropy inverse reinforcement learning. In: Proc. 23rd AAAI Conf. Artificial Intelligence, pp. 1433\u20131438 (2008)","key":"25_CR29"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-15883-4_25","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,19]],"date-time":"2019-05-19T17:18:14Z","timestamp":1558286294000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-15883-4_25"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2010]]},"ISBN":["9783642158827","9783642158834"],"references-count":29,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-15883-4_25","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2010]]}}}