{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T20:40:34Z","timestamp":1773348034971,"version":"3.50.1"},"publisher-location":"Cham","reference-count":14,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783319700861","type":"print"},{"value":"9783319700878","type":"electronic"}],"license":[{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017]]},"DOI":"10.1007\/978-3-319-70087-8_81","type":"book-chapter","created":{"date-parts":[[2017,10,23]],"date-time":"2017-10-23T21:10:19Z","timestamp":1508793019000},"page":"789-800","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["Average Reward Optimization with Multiple Discounting Reinforcement Learners"],"prefix":"10.1007","author":[{"given":"Chris","family":"Reinke","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Eiji","family":"Uchibe","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kenji","family":"Doya","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2017,10,24]]},"reference":[{"issue":"4","key":"81_CR1","doi-asserted-by":"publisher","first-page":"560","DOI":"10.1287\/mnsc.45.4.560","volume":"45","author":"TK Das","year":"1999","unstructured":"Das, T.K., Gosavi, A., Mahadevan, S., Marchalleck, N.: Solving Semi-Markov decision problems using average reward reinforcement learning. Manage. Sci. 45(4), 560\u2013574 (1999)","journal-title":"Manage. Sci."},{"issue":"1\u20132","key":"81_CR2","first-page":"1","volume":"2","author":"MP Deisenroth","year":"2011","unstructured":"Deisenroth, M.P., Neumann, G., Peters, J.: A survey on policy search for robotics. Found. Trends Robot. 2(1\u20132), 1\u2013142 (2011)","journal-title":"Found. Trends Robot."},{"issue":"3","key":"81_CR3","doi-asserted-by":"publisher","first-page":"654","DOI":"10.1016\/S0377-2217(02)00874-3","volume":"155","author":"A Gosavi","year":"2004","unstructured":"Gosavi, A.: Reinforcement learning for long-run average cost. Eur. J. Oper. Res. 155(3), 654\u2013674 (2004)","journal-title":"Eur. J. Oper. Res."},{"issue":"10","key":"81_CR4","doi-asserted-by":"publisher","first-page":"e7362","DOI":"10.1371\/journal.pone.0007362","volume":"4","author":"Z Kurth-Nelson","year":"2009","unstructured":"Kurth-Nelson, Z., Redish, A.D.: Temporal-difference reinforcement learning with distributed representations. PLoS One 4(10), e7362 (2009)","journal-title":"PLoS One"},{"key":"81_CR5","unstructured":"Mahadevan, S., Marchalleck, N., Das, T.K., Gosavi, A.: Self-improving factory simulation using continuous-time average-reward reinforcement learning. In: Proceedings of the 14th International Conference on Machine Learning, pp. 202\u2013210 (1997)"},{"key":"81_CR6","doi-asserted-by":"publisher","DOI":"10.1002\/9780470316887","volume-title":"Markov Decision Processes: Discrete Stochastic Dynamic Programming","author":"ML Puterman","year":"1994","unstructured":"Puterman, M.L.: Markov Decision Processes: Discrete Stochastic Dynamic Programming, 1st edn. Wiley, New York (1994)","edition":"1"},{"key":"81_CR7","doi-asserted-by":"crossref","unstructured":"Reinke, C., Uchibe, E., Doya, K.: Maximizing the average reward in episodic reinforcement learning tasks. In: 2015 International Conference on Intelligent Informatics and Biomedical Sciences (ICIIBMS), pp. 420\u2013421. IEEE (2015)","DOI":"10.1109\/ICIIBMS.2015.7439495"},{"key":"81_CR8","doi-asserted-by":"crossref","unstructured":"Schwartz, A.: A reinforcement learning method for maximizing undiscounted rewards. In: Proceedings of the Tenth International Conference on Machine Learning, vol. 298, pp. 298\u2013305 (1993)","DOI":"10.1016\/B978-1-55860-307-3.50045-9"},{"key":"81_CR9","volume-title":"Reinforcement Learning: An Introduction","author":"RS Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. Cambridge University Press, Cambridge (1998)"},{"issue":"12","key":"81_CR10","doi-asserted-by":"publisher","first-page":"e1333","DOI":"10.1371\/journal.pone.0001333","volume":"2","author":"SC Tanaka","year":"2007","unstructured":"Tanaka, S.C., Schweighofer, N., Asahi, S., Shishida, K., Okamoto, Y., Yamawaki, S., Doya, K.: Serotonin differentially regulates short-and long-term prediction of rewards in the ventral and dorsal striatum. PLoS One 2(12), e1333 (2007)","journal-title":"PLoS One"},{"issue":"3","key":"81_CR11","first-page":"185","volume":"16","author":"JN Tsitsiklis","year":"1994","unstructured":"Tsitsiklis, J.N.: Asynchronous stochastic approximation and Q-learning. Mach. Learn. 16(3), 185\u2013202 (1994)","journal-title":"Mach. Learn."},{"issue":"3\u20134","key":"81_CR12","first-page":"279","volume":"8","author":"CJCH Watkins","year":"1992","unstructured":"Watkins, C.J.C.H., Dayan, P.: Q-learning. Mach. Learn. 8(3\u20134), 279\u2013292 (1992)","journal-title":"Mach. Learn."},{"key":"81_CR13","unstructured":"Watkins, C.J.C.H.: Learning from delayed rewards. Ph.D. thesis, University of Cambridge, England (1989)"},{"key":"81_CR14","doi-asserted-by":"crossref","unstructured":"Yang, S., Gao, Y., An, B., Wang, H., Chen, X.: Efficient average reward reinforcement learning using constant shifting values. In: Thirtieth AAAI Conference on Artificial Intelligence (2016)","DOI":"10.1609\/aaai.v30i1.10285"}],"container-title":["Lecture Notes in Computer Science","Neural Information Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-70087-8_81","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,13]],"date-time":"2024-03-13T15:25:26Z","timestamp":1710343526000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-319-70087-8_81"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017]]},"ISBN":["9783319700861","9783319700878"],"references-count":14,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-70087-8_81","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2017]]},"assertion":[{"value":"24 October 2017","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICONIP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Neural Information Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Guangzhou","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2017","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 November 2017","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 November 2017","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iconip2017","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.iconip2017.org\/index.html","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}