{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T13:34:10Z","timestamp":1742996050485,"version":"3.40.3"},"publisher-location":"Cham","reference-count":18,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319231136"},{"type":"electronic","value":"9783319231143"}],"license":[{"start":{"date-parts":[[2015,1,1]],"date-time":"2015-01-01T00:00:00Z","timestamp":1420070400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2015,1,1]],"date-time":"2015-01-01T00:00:00Z","timestamp":1420070400000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2015]]},"DOI":"10.1007\/978-3-319-23114-3_36","type":"book-chapter","created":{"date-parts":[[2015,8,27]],"date-time":"2015-08-27T09:01:33Z","timestamp":1440666093000},"page":"573-577","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Sequential Decision Making Under Uncertainty Using Ordinal Preferential Information"],"prefix":"10.1007","author":[{"given":"Hugo","family":"Gilbert","sequence":"first","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2015,8,28]]},"reference":[{"unstructured":"Abbeel, P., Ng, A.: Apprenticeship Learning via Inverse Reinforcement Learning. In: Proceedings of Twenty-first International Conference on Machine Learning. ICML 2004, ACM, New York, NY, USA (2004). http:\/\/doi.acm.org\/10.1145\/1015330.1015430","key":"36_CR1"},{"unstructured":"Akrour, R., Schoenauer, M., Sebag, M.: APRIL: Active preference-learning based reinforcement learning. In: CoRR (2012). http:\/\/arxiv.org\/abs\/1208.0984","key":"36_CR2"},{"doi-asserted-by":"crossref","unstructured":"Bain, M., Sammut, C.: A Framework for Behavioural Cloning. In: Machine Intelligence vol. 15, pp. 103\u2013129. Oxford University Press (1996)","key":"36_CR3","DOI":"10.1093\/oso\/9780198538677.003.0006"},{"unstructured":"Busa-fekete, R., Sznyi, B., Weng, P., Cheng, W., Hullermeier, E.: Preference-based Evolutionary Direct Policy Search (2014)","key":"36_CR4"},{"key":"36_CR5","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"312","DOI":"10.1007\/978-3-642-23780-5_30","volume-title":"Machine Learning and Knowledge Discovery in Databases","author":"W Cheng","year":"2011","unstructured":"Cheng, W., F\u00fcrnkranz, J., H\u00fcllermeier, E., Park, S.-H.: Preference-based policy iteration: leveraging preference learning for reinforcement learning. In: Gunopulos, D., Hofmann, T., Malerba, D., Vazirgiannis, M. (eds.) ECML PKDD 2011, Part I. LNCS, vol. 6911, pp. 312\u2013327. Springer, Heidelberg (2011)"},{"issue":"2","key":"36_CR6","doi-asserted-by":"publisher","first-page":"830","DOI":"10.1214\/aos\/1176350378","volume":"15","author":"P Fishburn","year":"1987","unstructured":"Fishburn, P., LaValle, I.: A nonlinear, nontransitive and additive-probability model for decisions under uncertainty. Ann. Statist. 15(2), 830\u2013844 (1987)","journal-title":"Ann. Statist."},{"doi-asserted-by":"crossref","unstructured":"Gilbert, H., Spanjaard, O., Viappiani, P., Weng, P.: Reducing the number of queries in interactive value iteration. In: ADT (2015)","key":"36_CR7","DOI":"10.1007\/978-3-319-23114-3_9"},{"unstructured":"Gilbert, H., Spanjaard, O., Viappiani, P., Weng, P.: Solving MDPs with skew symmetric bilinear utility functions. In: IJCAI (2015)","key":"36_CR8"},{"issue":"1\u20132","key":"36_CR9","doi-asserted-by":"publisher","first-page":"71","DOI":"10.1016\/S0004-3702(00)00047-3","volume":"122","author":"R Givan","year":"2000","unstructured":"Givan, R., Leach, S., Dean, T.: Bounded-parameter Markov decision processes. Artif. Intell. 122(1\u20132), 71\u2013109 (2000)","journal-title":"Artif. Intell."},{"key":"36_CR10","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"77","DOI":"10.1007\/978-3-642-23291-6_8","volume-title":"Case-Based Reasoning Research and Development","author":"E H\u00fcllermeier","year":"2011","unstructured":"H\u00fcllermeier, E., Schlegel, P.: Preference-Based CBR: first steps toward a methodological framework. In: Ram, A., Wiratunga, N. (eds.) ICCBR 2011. LNCS, vol. 6880, pp. 77\u201391. Springer, Heidelberg (2011)"},{"key":"36_CR11","doi-asserted-by":"publisher","DOI":"10.1002\/9780470316887","volume-title":"Markov Decision Processes: Discrete Stochastic Dynamic Programming","author":"M Puterman","year":"1994","unstructured":"Puterman, M.: Markov Decision Processes: Discrete Stochastic Dynamic Programming, 1st edn. John Wiley & Sons Inc, New York (1994)","edition":"1"},{"issue":"1","key":"36_CR12","doi-asserted-by":"publisher","first-page":"339","DOI":"10.1111\/j.1467-937X.2009.00564.x","volume":"77","author":"M Rostek","year":"2010","unstructured":"Rostek, M.: Quantile Maximization in Decision Theory. Rev. Econ. Stud. 77(1), 339\u2013371 (2010)","journal-title":"Rev. Econ. Stud."},{"key":"36_CR13","volume-title":"Reinforcement learning: An introduction","author":"R Sutton","year":"1998","unstructured":"Sutton, R., Barto, A.: Reinforcement learning: An introduction, vol. 116. Cambridge University Press, Cambridge (1998)"},{"unstructured":"Weng, P.: Ordinal decision models for Markov decision processes. In: ECAI 2012\u201320th European Conference on Artificial Intelligence. Including Prestigious Applications of Artificial Intelligence (PAIS-2012) System Demonstrations Track, Montpellier, France, 27\u201331 August, 2012. pp. 828\u2013833 (2012)","key":"36_CR14"},{"unstructured":"Weng, P., Busa-Fekete, R., H\u00fcllermeier, E.: Interactive Q-learning with ordinal rewards and unreliable tutor. In: ECML\/PKDD Workshop Reinforcement Learning with Generalized Feedback (September 2013). http:\/\/www-desir.lip6.fr\/weng\/pub\/ecml2013-ws.pdf","key":"36_CR15"},{"unstructured":"Weng, P., Zanuttini, B.: Interactive Value Iteration for Markov Decision Processes with Unknown Rewards. In: Rossi, F. (ed.) IJCAI. IJCAI\/AAAI (2013)","key":"36_CR16"},{"issue":"1","key":"36_CR17","doi-asserted-by":"publisher","first-page":"193","DOI":"10.1006\/jmaa.1998.6015","volume":"225","author":"S Yu","year":"1998","unstructured":"Yu, S., Lin, Y., Yan, P.: Optimization models for the first arrival target distribution function in discrete time. J. Math. Anal. Appl. 225(1), 193\u2013223 (1998). http:\/\/www.sciencedirect.com\/science\/article\/pii\/S0022247X98960152","journal-title":"J. Math. Anal. Appl."},{"doi-asserted-by":"crossref","unstructured":"Yue, Y., Broder, J., Kleinberg, R., Joachims, T.: The K-armed Dueling Bandits Problem. Journal of Computer and System Sciences (2012). (in press)","key":"36_CR18","DOI":"10.1016\/j.jcss.2011.12.028"}],"container-title":["Lecture Notes in Computer Science","Algorithmic Decision Theory"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-23114-3_36","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,10]],"date-time":"2024-06-10T19:29:37Z","timestamp":1718047777000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-23114-3_36"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015]]},"ISBN":["9783319231136","9783319231143"],"references-count":18,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-23114-3_36","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2015]]},"assertion":[{"value":"28 August 2015","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}