{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,13]],"date-time":"2026-02-13T04:51:12Z","timestamp":1770958272023,"version":"3.50.1"},"reference-count":22,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2013,8]]},"DOI":"10.1109\/devlrn.2013.6652533","type":"proceedings-article","created":{"date-parts":[[2013,11,11]],"date-time":"2013-11-11T14:56:15Z","timestamp":1384181775000},"page":"1-6","source":"Crossref","is-referenced-by-count":15,"title":["Reinforcement learning with state-dependent discount factor"],"prefix":"10.1109","author":[{"given":"Naoto","family":"Yoshida","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Eiji","family":"Uchibe","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kenji","family":"Doya","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"19","doi-asserted-by":"publisher","DOI":"10.2307\/3215024"},{"key":"22","first-page":"394","article-title":"On the complexity of solving Markov decision problems","author":"littman","year":"1995","journal-title":"Proceedings of the Eleventh Conference on Uncertainty in Artificial Intelligence"},{"key":"17","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2012.6288330"},{"key":"18","article-title":"Adaptive step-size for online temporal difference learning","author":"dabney","year":"2012","journal-title":"Twenty-Sixth AAAI Conference on Artificial Intelligence"},{"key":"15","article-title":"Optimality criteria in reinforcement learning","author":"mahadevan","year":"1996","journal-title":"Proceedings of the AAAI Fall Symposium on Learning Complex Behaviors in Adaptive Intelligent Systems"},{"key":"16","doi-asserted-by":"publisher","DOI":"10.1007\/BF00114727"},{"key":"13","article-title":"Convergence of Q-learning: A simple proof","author":"melo","year":"0","journal-title":"Institute of Systems and Robotics"},{"key":"14","doi-asserted-by":"publisher","DOI":"10.1007\/s12035-012-8232-6"},{"key":"11","doi-asserted-by":"publisher","DOI":"10.1109\/TAMD.2010.2051031"},{"key":"12","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1994.6.6.1185"},{"key":"21","author":"hansen","year":"2005","journal-title":"The CMA evolution strategy A tutorial"},{"key":"3","author":"watkins","year":"1989","journal-title":"Learning from delayed rewards"},{"key":"20","first-page":"278","article-title":"Policy invariance under reward transformations: Theory and application to reward shaping","author":"ng","year":"1999","journal-title":"Machine Learning Proceedings of the Sixteenth International Conference"},{"key":"2","doi-asserted-by":"crossref","first-page":"1593","DOI":"10.1126\/science.275.5306.1593","article-title":"A neural substrate of prediction and reward","volume":"275","author":"schultz","year":"1997","journal-title":"Science"},{"key":"1","first-page":"369","article-title":"Markov decision processes with statedependent discount factors and unbounded rewards\/costs","volume":"39","author":"wei","year":"2011","journal-title":"Oper Res Lett"},{"key":"10","first-page":"2601","article-title":"Where do rewards come from?","author":"singh","year":"2009","journal-title":"Proceedings of the 31st Annual Conference of the Cognitive Science Society"},{"key":"7","doi-asserted-by":"publisher","DOI":"10.1016\/S0893-6080(02)00044-8"},{"key":"6","author":"sutton","year":"1998","journal-title":"Reinforcement Learning An Introduction"},{"key":"5","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"key":"4","doi-asserted-by":"publisher","DOI":"10.1016\/S0893-6080(02)00056-4"},{"key":"9","article-title":"Temporal difference updating without a learning rate","author":"hutter","year":"2008","journal-title":"Conference on Advances in Neural Information Processing Systems (NIPS 2007)"},{"key":"8","doi-asserted-by":"publisher","DOI":"10.1016\/S0893-6080(02)00228-9"}],"event":{"name":"2013 IEEE International Conference on Development and Learning and Epigenetic Robotics (ICDL)","location":"Osaka, Japan","start":{"date-parts":[[2013,8,18]]},"end":{"date-parts":[[2013,8,22]]}},"container-title":["2013 IEEE Third Joint International Conference on Development and Learning and Epigenetic Robotics (ICDL)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6645681\/6652520\/06652533.pdf?arnumber=6652533","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,6,21]],"date-time":"2017-06-21T21:45:22Z","timestamp":1498081522000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/6652533\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013,8]]},"references-count":22,"URL":"https:\/\/doi.org\/10.1109\/devlrn.2013.6652533","relation":{},"subject":[],"published":{"date-parts":[[2013,8]]}}}