{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,13]],"date-time":"2025-06-13T04:03:07Z","timestamp":1749787387425,"version":"3.41.0"},"reference-count":13,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016,7]]},"DOI":"10.1109\/ijcnn.2016.7727695","type":"proceedings-article","created":{"date-parts":[[2016,11,8]],"date-time":"2016-11-08T21:15:56Z","timestamp":1478639756000},"page":"3837-3844","source":"Crossref","is-referenced-by-count":4,"title":["Advantage based value iteration for Markov decision processes with unknown rewards"],"prefix":"10.1109","author":[{"given":"Pegah","family":"Alizadeh","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yann","family":"Chevaleyre","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Francois","family":"Levy","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref10","article-title":"Robust online optimization of reward-uncertain mdps","author":"regan","year":"2011","journal-title":"Twenty-Second Joint Conference on Artificial Intel-ligence (IJCAI 2011)"},{"key":"ref11","doi-asserted-by":"crossref","first-page":"282","DOI":"10.1609\/icaps.v21i1.13448","article-title":"Markov decision processes with ordinal rewards: Reference point-based preferences","volume":"21","author":"weng","year":"2011","journal-title":"International Conference on Automated Planning and Scheduling"},{"key":"ref12","article-title":"Interactive value iteration for markov decision processes with unknown rewards","author":"weng","year":"2013","journal-title":"IJCAI"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2009.5400796"},{"journal-title":"On the sample complexity of reinforcement learning","year":"2003","author":"kakade","key":"ref4"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/1273496.1273525"},{"key":"ref6","first-page":"663","article-title":"Algorithms for inverse reinforcement learning","author":"ng","year":"2000","journal-title":"Proc 17th International Conf on Machine Learning"},{"key":"ref5","first-page":"536","article-title":"Planning in the presence of cost functions controlled by an adversary","author":"macmahan","year":"2003","journal-title":"ICML-03"},{"key":"ref8","article-title":"Regret-based reward elicitation for markov decision processes","author":"regan","year":"2009","journal-title":"The 25th Conference on Uncertainty in Artificial Intelligence"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1002\/9780470316887"},{"key":"ref2","doi-asserted-by":"crossref","DOI":"10.21236\/ADA280862","author":"baird","year":"1993","journal-title":"Advantage updating"},{"key":"ref1","article-title":"Apprenticeship learning via inverse reinforce-ment learning","author":"abbeel","year":"2004","journal-title":"Proceedings of the 21st International Conference on Machine Learning"},{"key":"ref9","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v24i1.7740","article-title":"Robust policy computation in reward-uncertain mdps using nondominated policies","author":"regan","year":"2010","journal-title":"Twenty-fourth AAAI Conference on Artificial Intelligence (AAAI-10)"}],"event":{"name":"2016 International Joint Conference on Neural Networks (IJCNN)","start":{"date-parts":[[2016,7,24]]},"location":"Vancouver, BC, Canada","end":{"date-parts":[[2016,7,29]]}},"container-title":["2016 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7593175\/7726591\/07727695.pdf?arnumber=7727695","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,12]],"date-time":"2025-06-12T05:01:36Z","timestamp":1749704496000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7727695\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,7]]},"references-count":13,"URL":"https:\/\/doi.org\/10.1109\/ijcnn.2016.7727695","relation":{},"subject":[],"published":{"date-parts":[[2016,7]]}}}