{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,4]],"date-time":"2026-03-04T20:02:43Z","timestamp":1772654563764,"version":"3.50.1"},"reference-count":23,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2014,12]]},"DOI":"10.1109\/adprl.2014.7010622","type":"proceedings-article","created":{"date-parts":[[2015,1,20]],"date-time":"2015-01-20T02:48:03Z","timestamp":1421722083000},"page":"1-6","source":"Crossref","is-referenced-by-count":22,"title":["Model-based multi-objective reinforcement learning"],"prefix":"10.1109","author":[{"given":"Marco A.","family":"Wiering","sequence":"first","affiliation":[]},{"given":"Maikel","family":"Withagen","sequence":"additional","affiliation":[]},{"given":"Madalina M","family":"Drugan","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","article-title":"Efficient reinforcement learning with multiple reward functions for randomized clinical trial analysis","author":"lizotte","year":"2010","journal-title":"Proceedings of the Twenty-Seventh International Conference on Machine Learning (ICML)"},{"key":"ref11","doi-asserted-by":"crossref","DOI":"10.1109\/IJCNN.2014.6889390","article-title":"Exploration vs exploitation in the multi-objective multi-armed bandit problem","author":"yahyaa","year":"2014","journal-title":"Proceedings of International Joint Conference of Neural Networks (IJCNN)"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2013.6707036"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2014.6889484"},{"key":"ref14","article-title":"Multi-criteria reinforcement learning","author":"gabor","year":"1998","journal-title":"Proceedings of the Fifteenth International Conference on Machine Learning"},{"key":"ref15","article-title":"Hypervolume-based multi-objective reinforcement learning","author":"van moffaert","year":"2013","journal-title":"Proc of Evolutionary Multiobjective Optimization (EMO)"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-013-5369-0"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1007\/BF00993104"},{"key":"ref18","article-title":"Explorations in efficient reinforcement learning","author":"wiering","year":"1999"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ADPRL.2007.368183"},{"key":"ref4","doi-asserted-by":"crossref","first-page":"67","DOI":"10.1613\/jair.3987","article-title":"A survey of multi-objective sequential decision-making","volume":"48","author":"roijers","year":"2013","journal-title":"J Artif Intell Res (JAIR)"},{"key":"ref3","article-title":"Learning from delayed rewards","author":"watkins","year":"1989"},{"key":"ref6","article-title":"Learning sets of Pareto optimal policies","author":"van moffaert","year":"2014","journal-title":"Thirteenth International Conference on Autonomous Agents and Multiagent Systems-Adaptive Learning Agents Workshop (ALA)"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-010-5232-5"},{"key":"ref8","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-540-79159-1","author":"eichfelder","year":"2008","journal-title":"Adaptive Scalarization Methods in Multiobjective Optimization"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TEVC.2003.810758"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-27645-3"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.1998.712192"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ADPRL.2013.6615007"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1016\/0022-247X(82)90122-6"},{"key":"ref22","first-page":"77","article-title":"Constrained Markov decision processes as multi-objective problems","author":"thomas","year":"1983","journal-title":"Multi-Objective Decision Making"},{"key":"ref21","author":"bellman","year":"1957","journal-title":"Dynamic Programming"},{"key":"ref23","first-page":"205","article-title":"Vector valued Markovian decision processes within countable state space","author":"furukawa","year":"1980","journal-title":"Recent Developments in Markov Decision Processes"}],"event":{"name":"2014 IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL)","location":"Orlando, FL, USA","start":{"date-parts":[[2014,12,9]]},"end":{"date-parts":[[2014,12,12]]}},"container-title":["2014 IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7000183\/7010603\/07010622.pdf?arnumber=7010622","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,8,29]],"date-time":"2020-08-29T03:55:31Z","timestamp":1598673331000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7010622\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014,12]]},"references-count":23,"URL":"https:\/\/doi.org\/10.1109\/adprl.2014.7010622","relation":{},"subject":[],"published":{"date-parts":[[2014,12]]}}}