{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,10]],"date-time":"2026-03-10T14:48:23Z","timestamp":1773154103550,"version":"3.50.1"},"publisher-location":"Berlin, Heidelberg","reference-count":17,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"value":"9783642237799","type":"print"},{"value":"9783642237805","type":"electronic"}],"license":[{"start":{"date-parts":[[2011,1,1]],"date-time":"2011-01-01T00:00:00Z","timestamp":1293840000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2011,1,1]],"date-time":"2011-01-01T00:00:00Z","timestamp":1293840000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2011]]},"DOI":"10.1007\/978-3-642-23780-5_30","type":"book-chapter","created":{"date-parts":[[2011,8,17]],"date-time":"2011-08-17T17:54:37Z","timestamp":1313603677000},"page":"312-327","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":15,"title":["Preference-Based Policy Iteration: Leveraging Preference Learning for Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Weiwei","family":"Cheng","sequence":"first","affiliation":[]},{"given":"Johannes","family":"F\u00fcrnkranz","sequence":"additional","affiliation":[]},{"given":"Eyke","family":"H\u00fcllermeier","sequence":"additional","affiliation":[]},{"given":"Sang-Hyeun","family":"Park","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"30_CR1","first-page":"835","volume":"13","author":"A.G. Barto","year":"1983","unstructured":"Barto, A.G., Sutton, R.S., Anderson, C.: Neuron-like elements that can solve difficult learning control problems. IEEE Transaction on Systems, Man and Cybernetics\u00a013, 835\u2013846 (1983)","journal-title":"IEEE Transaction on Systems, Man and Cybernetics"},{"issue":"11","key":"30_CR2","doi-asserted-by":"publisher","first-page":"2471","DOI":"10.1016\/j.automatica.2009.07.008","volume":"45","author":"S. Bhatnagar","year":"2009","unstructured":"Bhatnagar, S., Sutton, R.S., Ghavamzadeh, M., Lee, M.: Natural actor-critic algorithms. Automatica\u00a045(11), 2471\u20132482 (2009)","journal-title":"Automatica"},{"issue":"3","key":"30_CR3","doi-asserted-by":"publisher","first-page":"157","DOI":"10.1007\/s10994-008-5069-3","volume":"72","author":"C. Dimitrakakis","year":"2008","unstructured":"Dimitrakakis, C., Lagoudakis, M.G.: Rollout sampling approximate policy iteration. Machine Learning\u00a072(3), 157\u2013171 (2008)","journal-title":"Machine Learning"},{"key":"30_CR4","doi-asserted-by":"crossref","first-page":"75","DOI":"10.1613\/jair.1700","volume":"25","author":"A. Fern","year":"2006","unstructured":"Fern, A., Yoon, S.W., Givan, R.: Approximate policy iteration with a policy language bias: Solving relational markov decision processes. Journal of Artificial Intelligence Research\u00a025, 75\u2013118 (2006)","journal-title":"Journal of Artificial Intelligence Research"},{"key":"30_CR5","volume-title":"Preference Learning","year":"2010","unstructured":"F\u00fcrnkranz, J., H\u00fcllermeier, E. (eds.): Preference Learning. Springer, Heidelberg (2010)"},{"key":"30_CR6","unstructured":"Gabillon, V., Lazaric, A., Ghavamzadeh, M.: Rollout allocation strategies for classification-based policy iteration. In: Auer, P., Kaski, S., Szepesv\u00e0ri, C. (eds.) Proceedings of the ICML 2010 Workshop on Reinforcement Learning and Search in Very Large Spaces (2010)"},{"issue":"1","key":"30_CR7","doi-asserted-by":"publisher","first-page":"10","DOI":"10.1145\/1656274.1656278","volume":"11","author":"M. Hall","year":"2009","unstructured":"Hall, M., Frank, E., Holmes, G., Pfahringer, B., Reutemann, P., Witten, I.: The weka data mining software: An update. SIGKDD Explorations\u00a011(1), 10\u201318 (2009)","journal-title":"SIGKDD Explorations"},{"key":"30_CR8","doi-asserted-by":"publisher","first-page":"1897","DOI":"10.1016\/j.artint.2008.08.002","volume":"172","author":"E. H\u00fcllermeier","year":"2008","unstructured":"H\u00fcllermeier, E., F\u00fcrnkranz, J., Cheng, W., Brinker, K.: Label ranking by learning pairwise preferences. Artificial Intelligence\u00a0172, 1897\u20131916 (2008)","journal-title":"Artificial Intelligence"},{"key":"30_CR9","doi-asserted-by":"publisher","first-page":"456","DOI":"10.1145\/1390156.1390214","volume-title":"Proceedings of the 25th International Conference on Machine Learning (ICML 2008)","author":"K. Kersting","year":"2008","unstructured":"Kersting, K., Driessens, K.: Non-parametric policy gradients: a unified treatment of propositional and relational domains. In: Cohen, W.W., McCallum, A., Roweis, S.T. (eds.) Proceedings of the 25th International Conference on Machine Learning (ICML 2008), pp. 456\u2013463. ACM, Helsinki (2008)"},{"issue":"4","key":"30_CR10","doi-asserted-by":"publisher","first-page":"1143","DOI":"10.1137\/S0363012901385691","volume":"42","author":"V.R. Konda","year":"2003","unstructured":"Konda, V.R., Tsitsiklis, J.N.: On actor-critic algorithms. SIAM Journal of Control and Optimization\u00a042(4), 1143\u20131166 (2003)","journal-title":"SIAM Journal of Control and Optimization"},{"key":"30_CR11","first-page":"424","volume-title":"Proceedings of the 20th International Conference on Machine Learning (ICML 2003)","author":"M.G. Lagoudakis","year":"2003","unstructured":"Lagoudakis, M.G., Parr, R.: Reinforcement learning as classification: Leveraging modern classifiers. In: Fawcett, T.E., Mishra, N. (eds.) Proceedings of the 20th International Conference on Machine Learning (ICML 2003), pp. 424\u2013431. AAAI Press, Washington, DC (2003)"},{"key":"30_CR12","first-page":"9","volume":"3","author":"R.S. Sutton","year":"1988","unstructured":"Sutton, R.S.: Learning to predict by the methods of temporal differences. Machine Learning\u00a03, 9\u201344 (1988)","journal-title":"Machine Learning"},{"key":"30_CR13","first-page":"1057","volume-title":"Advances in Neural Information Processing Systems 12 (NIPS-1999)","author":"R.S. Sutton","year":"1999","unstructured":"Sutton, R.S., McAllester, D.A., Singh, S.P., Mansour, Y.: Policy gradient methods for reinforcement learning with function approximation. In: Solla, S.A., Leen, T.K., M\u00fcller, K.-R. (eds.) Advances in Neural Information Processing Systems 12 (NIPS-1999), pp. 1057\u20131063. MIT Press, Denver (1999)"},{"key":"30_CR14","doi-asserted-by":"crossref","unstructured":"Vembu, S., G\u00e4rtner, T.: Label ranking algorithms: A survey. In: F\u00fcrnkranz and H\u00fcllermeier [5], pp. 45\u201364.","DOI":"10.1007\/978-3-642-14125-6_3"},{"key":"30_CR15","first-page":"279","volume":"8","author":"C.J. Watkins","year":"1992","unstructured":"Watkins, C.J., Dayan, P.: Q-learning. Machine Learning\u00a08, 279\u2013292 (1992)","journal-title":"Machine Learning"},{"key":"30_CR16","first-page":"229","volume":"8","author":"R.J. Williams","year":"1992","unstructured":"Williams, R.J.: Simple statistical gradient-following algorithms for connectionist reinforcement learning. Machine Learning\u00a08, 229\u2013256 (1992)","journal-title":"Machine Learning"},{"key":"30_CR17","first-page":"3295","volume":"28","author":"Y. Zhao","year":"2009","unstructured":"Zhao, Y., Kosorok, M., Zeng, D.: Reinforcement learning design for cancer clinical trials. Statistics in Medicine\u00a028, 3295\u20133315 (2009)","journal-title":"Statistics in Medicine"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-23780-5_30","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,9,1]],"date-time":"2021-09-01T00:19:19Z","timestamp":1630455559000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-642-23780-5_30"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2011]]},"ISBN":["9783642237799","9783642237805"],"references-count":17,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-23780-5_30","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2011]]},"assertion":[{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}