{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T13:55:32Z","timestamp":1743083732568,"version":"3.40.3"},"publisher-location":"Cham","reference-count":35,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319493961"},{"type":"electronic","value":"9783319493978"}],"license":[{"start":{"date-parts":[[2016,1,1]],"date-time":"2016-01-01T00:00:00Z","timestamp":1451606400000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016]]},"DOI":"10.1007\/978-3-319-49397-8_20","type":"book-chapter","created":{"date-parts":[[2016,11,9]],"date-time":"2016-11-09T06:20:42Z","timestamp":1478672442000},"page":"231-242","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["From Preference-Based to Multiobjective Sequential Decision-Making"],"prefix":"10.1007","author":[{"given":"Paul","family":"Weng","sequence":"first","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2016,11,10]]},"reference":[{"issue":"13","key":"20_CR1","doi-asserted-by":"publisher","first-page":"1608","DOI":"10.1177\/0278364910371999","volume":"29","author":"P Abbeel","year":"2010","unstructured":"Abbeel, P., Coates, A., Ng, A.Y.: Autonomous helicopter aerobatics through apprenticeship learning. Int. J. Rob. Res. 29(13), 1608\u20131639 (2010)","journal-title":"Int. J. Rob. Res."},{"key":"20_CR2","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"116","DOI":"10.1007\/978-3-642-33486-3_8","volume-title":"Machine Learning and Knowledge Discovery in Databases","author":"R Akrour","year":"2012","unstructured":"Akrour, R., Schoenauer, M., Sebag, M.: APRIL: active preference learning-based reinforcement learning. In: Flach, P.A., Bie, T., Cristianini, N. (eds.) ECML PKDD 2012. LNCS (LNAI), vol. 7524, pp. 116\u2013131. Springer, Heidelberg (2012). doi:10.1007\/978-3-642-33486-3_8"},{"key":"20_CR3","doi-asserted-by":"crossref","unstructured":"Barrett, L., Narayanan, S.: Learning all optimal policies with multiple criteria. In: ICML (2008)","DOI":"10.1145\/1390156.1390162"},{"key":"20_CR4","unstructured":"Busa-Fekete, R., Sz\u00f6renyi, B., Weng, P., Cheng, W., H\u00fcllermeier, E.: Preference-based reinforcement learning. In: European Workshop on Reinforcement Learning, Dagstuhl Seminar (2013)"},{"key":"20_CR5","unstructured":"Busa-Fekete, R., Sz\u00f6renyi, B., Weng, P., Cheng, W., H\u00fcllermeier, E.: Top-k selection based on adaptive sampling of noisy preferences. In: International Conference on Marchine Learning (ICML) (2013)"},{"issue":"3","key":"20_CR6","doi-asserted-by":"publisher","first-page":"327","DOI":"10.1007\/s10994-014-5458-8","volume":"97","author":"R Busa-Fekete","year":"2014","unstructured":"Busa-Fekete, R., Szorenyi, B., Weng, P., Cheng, W., H\u00fcllermeier, E.: Preference-based reinforcement learning: evolutionary direct policy search using a preference-based Racing algorithm. Mach. Learn. 97(3), 327\u2013351 (2014)","journal-title":"Mach. Learn."},{"key":"20_CR7","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"325","DOI":"10.1007\/11672142_26","volume-title":"STACS 2006","author":"K Chatterjee","year":"2006","unstructured":"Chatterjee, K., Majumdar, R., Henzinger, T.A.: Markov decision processes with multiple objectives. In: Durand, B., Thomas, W. (eds.) STACS 2006. LNCS, vol. 3884, pp. 325\u2013336. Springer, Heidelberg (2006). doi:10.1007\/11672142_26"},{"key":"20_CR8","unstructured":"Dud\u00edk, M., Hofmann, K., Schapire, R.E., Slivkins, A., Zoghi, M.: Contextual dueling bandits. In: COLT (2015)"},{"issue":"1","key":"20_CR9","doi-asserted-by":"publisher","first-page":"123","DOI":"10.1007\/s10994-012-5313-8","volume":"89","author":"J F\u00fcrnkranz","year":"2012","unstructured":"F\u00fcrnkranz, J., H\u00fcllermeier, E., Cheng, W., Park, S.: Preference-based reinforcement learning: a formal framework and a policy iteration algorithm. Mach. Learn. 89(1), 123\u2013156 (2012)","journal-title":"Mach. Learn."},{"key":"20_CR10","unstructured":"G\u00e1bor, Z., Kalm\u00e1r, Z., Szepesv\u00e1ri, C.: Multicriteria reinforcement learning. In: Proceedings of International Conference of Machine Learning (1998)"},{"key":"20_CR11","series-title":"Lecture Notes in Computer Science (LNCS)","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1007\/978-3-319-23114-3_9","volume-title":"Algorithmic Decision Theory","author":"H Gilbert","year":"2015","unstructured":"Gilbert, H., Spanjaard, O., Viappiani, P., Weng, P.: Reducing the number of queries in interactive value iteration. In: Walsh, T. (ed.) ADT 2015. (LNAI), vol. 9346, pp. 139\u2013152. Springer, Heidelberg (2015). doi:10.1007\/978-3-319-23114-3_9"},{"key":"20_CR12","unstructured":"Gilbert, H., Spanjaard, O., Viappiani, P., Weng, P.: Solving MDPs with skew symmetric bilinear utility functions. In: IJCAI, pp. 1989\u20131995 (2015)"},{"key":"20_CR13","unstructured":"Gretton, C., Price, D., Thiebaux, S.: Implementation and comparison of solution methods for decision processes with non-Markovian rewards. In: UAI, vol. 19, pp. 289\u2013296 (2003)"},{"key":"20_CR14","unstructured":"Lizotte, D.J., Bowling, M., Murphy, S.A.: Efficient reinforcement learning with multiple reward functions for randomized controlled trial analysis. In: ICML (2010)"},{"key":"20_CR15","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., Rusu, A.A., Veness, J., Bellemare, M.G., Graves, A., Riedmiller, M., Fidjeland, A.K., Ostrovski, G., Petersen, S., Beattie, C., Sadik, A., Antonoglou, I., King, H., Kumaran, D., Wierstra, D., Legg, S., Hassabis, D.: Human-level control through deep reinforcement learning. Nature 518, 529\u2013533 (2015)","journal-title":"Nature"},{"key":"20_CR16","unstructured":"Ng, A., Russell, S.: Algorithms for inverse reinforcement learning. In: ICML. Morgan Kaufmann (2000)"},{"key":"20_CR17","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"190","DOI":"10.1007\/978-3-642-24873-3_15","volume-title":"Algorithmic Decision Theory","author":"W Ogryczak","year":"2011","unstructured":"Ogryczak, W., Perny, P., Weng, P.: On minimizing ordered weighted regrets in multiobjective Markov decision processes. In: Brafman, R.I., Roberts, F.S., Tsouki\u00e0s, A. (eds.) ADT 2011. LNCS (LNAI), vol. 6992, pp. 190\u2013204. Springer, Heidelberg (2011). doi:10.1007\/978-3-642-24873-3_15"},{"key":"20_CR18","doi-asserted-by":"publisher","first-page":"1021","DOI":"10.1142\/S0219622013400075","volume":"12","author":"W Ogryczak","year":"2013","unstructured":"Ogryczak, W., Perny, P., Weng, P.: A compromise programming approach to multiobjective Markov decision processes. Int. J. Inf. Technol. Decis. Making 12, 1021\u20131053 (2013)","journal-title":"Int. J. Inf. Technol. Decis. Making"},{"key":"20_CR19","unstructured":"Perny, P., Weng, P.: On finding compromise solutions in multiobjective Markov decision processes. In: Multidisciplinary Workshop on Advances in Preference Handling (MPREF) @ European Conference on Artificial Intelligence (ECAI) (2010)"},{"key":"20_CR20","unstructured":"Perny, P., Weng, P., Goldsmith, J., Hanna, J.: Approximation of Lorenz-optimal solutions in multiobjective Markov decision processes. In: International Conference on Uncertainty in Artificial Intelligence (UAI) (2013)"},{"key":"20_CR21","doi-asserted-by":"publisher","DOI":"10.1002\/9780470316887","volume-title":"Markov Decision Processes: Discrete Stochastic Dynamic Programming","author":"M Puterman","year":"1994","unstructured":"Puterman, M.: Markov Decision Processes: Discrete Stochastic Dynamic Programming. Wiley, Hoboken (1994)"},{"key":"20_CR22","unstructured":"Regan, K., Boutilier, C.: Eliciting additive reward functions for Markov decision processes. In: IJCAI, pp. 2159\u20132164 (2011)"},{"key":"20_CR23","unstructured":"Regan, K., Boutilier, C.: Robust online optimization of reward-uncertain MDPs. In: IJCAI, pp. 2165\u20132171 (2011)"},{"key":"20_CR24","doi-asserted-by":"crossref","first-page":"67","DOI":"10.1613\/jair.3987","volume":"48","author":"D Roijers","year":"2013","unstructured":"Roijers, D., Vamplew, P., Whiteson, S., Dazeley, R.: A survey of multi-objective sequential decision-making. J. Artif. Intell. Res. 48, 67\u2013113 (2013)","journal-title":"J. Artif. Intell. Res."},{"key":"20_CR25","doi-asserted-by":"publisher","first-page":"326","DOI":"10.1007\/BF02591870","volume":"26","author":"R Steuer","year":"1983","unstructured":"Steuer, R., Choo, E.U.: An interactive weighted Tchebycheff procedure for multiple objective programming. Math. Program. 26, 326\u2013344 (1983)","journal-title":"Math. Program."},{"key":"20_CR26","first-page":"2413","volume":"10","author":"AL Strehl","year":"2009","unstructured":"Strehl, A.L., Littman, M.L.: Reinforcement learning in finite MDPs: PAC analysis. J. Mach. Learn. Res. 10, 2413\u20132444 (2009)","journal-title":"J. Mach. Learn. Res."},{"key":"20_CR27","volume-title":"Reinforcement Learning: An Introduction","author":"R Sutton","year":"1998","unstructured":"Sutton, R., Barto, A.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (1998)"},{"issue":"3","key":"20_CR28","doi-asserted-by":"publisher","first-page":"58","DOI":"10.1145\/203330.203343","volume":"38","author":"G Tesauro","year":"1995","unstructured":"Tesauro, G.: Temporal difference learning and TD-Gammon. Commun. ACM 38(3), 58\u201368 (1995)","journal-title":"Commun. ACM"},{"key":"20_CR29","doi-asserted-by":"crossref","unstructured":"Weng, P.: Markov decision processes with ordinal rewards: Reference point-based preferences. International Conference on Automated Planning and Scheduling (ICAPS), vol. 21, pp. 282\u2013289 (2011)","DOI":"10.1609\/icaps.v21i1.13448"},{"key":"20_CR30","unstructured":"Weng, P.: Ordinal decision models for Markov decision processes. In: European Conference on Artificial Intelligence (ECAI), vol. 20, pp. 828\u2013833 (2012)"},{"key":"20_CR31","unstructured":"Weng, P., Zanuttini, B.: Interactive value iteration for Markov decision processes with unknown rewards. In: IJCAI (2013)"},{"key":"20_CR32","unstructured":"Weng, P., Busa-Fekete, R., H\u00fcllermeier, E.: Interactive Q-learning with ordinal rewards and unreliable tutor. In: ECML\/PKDD Workshop Reinforcement Learning with Generalized Feedback, September 2013"},{"key":"20_CR33","doi-asserted-by":"publisher","first-page":"639","DOI":"10.1016\/0022-247X(82)90122-6","volume":"89","author":"D White","year":"1982","unstructured":"White, D.: Multi-objective infinite-horizon discounted Markov decision processes. J. Math. Anal. Appls. 89, 639\u2013647 (1982)","journal-title":"J. Math. Anal. Appls."},{"key":"20_CR34","doi-asserted-by":"crossref","unstructured":"Wray, K.H., Zilberstein, S., Mouaddib, A.I.: Multi-objective MDPs with conditional lexicographic reward preferences. In: AAAI (2015)","DOI":"10.1609\/aaai.v29i1.9647"},{"issue":"5","key":"20_CR35","doi-asserted-by":"publisher","first-page":"1538","DOI":"10.1016\/j.jcss.2011.12.028","volume":"78","author":"Y Yue","year":"2012","unstructured":"Yue, Y., Broder, J., Kleinberg, R., Joachims, T.: The k-armed dueling bandits problem. J. Comput. Syst. Sci. 78(5), 1538\u20131556 (2012)","journal-title":"J. Comput. Syst. Sci."}],"container-title":["Lecture Notes in Computer Science","Multi-disciplinary Trends in Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-49397-8_20","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,13]],"date-time":"2024-03-13T15:49:59Z","timestamp":1710344999000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-319-49397-8_20"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016]]},"ISBN":["9783319493961","9783319493978"],"references-count":35,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-49397-8_20","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2016]]},"assertion":[{"value":"10 November 2016","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"MIWAI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Workshop on Multi-disciplinary Trends in Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chiang Mai","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Thailand","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2016","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7 December 2016","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 December 2016","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"10","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"miwai2016","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}