{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,10]],"date-time":"2025-10-10T02:04:50Z","timestamp":1760061890161,"version":"3.40.3"},"publisher-location":"Cham","reference-count":8,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319684444"},{"type":"electronic","value":"9783319684451"}],"license":[{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017]]},"DOI":"10.1007\/978-3-319-68445-1_33","type":"book-chapter","created":{"date-parts":[[2017,10,23]],"date-time":"2017-10-23T20:40:36Z","timestamp":1508791236000},"page":"282-290","source":"Crossref","is-referenced-by-count":0,"title":["Geometry of Policy Improvement"],"prefix":"10.1007","author":[{"given":"Guido","family":"Mont\u00fafar","sequence":"first","affiliation":[]},{"given":"Johannes","family":"Rauh","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,10,24]]},"reference":[{"key":"33_CR1","doi-asserted-by":"publisher","first-page":"147","DOI":"10.1007\/978-94-007-4792-0_20","volume-title":"Advances in Cognitive Neurodynamics (III)","author":"N Ay","year":"2013","unstructured":"Ay, N., Mont\u00fafar, G., Rauh, J.: Selection criteria for neuromanifolds of stochastic dynamics. In: Yamaguchi, Y. (ed.) Advances in Cognitive Neurodynamics (III), pp. 147\u2013154. Springer, Dordrecht (2013). doi:\n10.1007\/978-94-007-4792-0_20"},{"key":"33_CR2","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"244","DOI":"10.1007\/11894841_21","volume-title":"Algorithmic Learning Theory","author":"M Hutter","year":"2006","unstructured":"Hutter, M.: General discounting versus average reward. In: Balc\u00e1zar, J.L., Long, P.M., Stephan, F. (eds.) ALT 2006. LNCS, vol. 4264, pp. 244\u2013258. Springer, Heidelberg (2006). doi:\n10.1007\/11894841_21"},{"key":"33_CR3","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"605","DOI":"10.1007\/3-540-44581-1_40","volume-title":"Computational Learning Theory","author":"S Kakade","year":"2001","unstructured":"Kakade, S.: Optimizing average reward using discounted rewards. In: Helmbold, D., Williamson, B. (eds.) COLT 2001. LNCS, vol. 2111, pp. 605\u2013615. Springer, Heidelberg (2001). doi:\n10.1007\/3-540-44581-1_40"},{"key":"33_CR4","unstructured":"Mont\u00fafar, G., Ghazi-Zahedi, K., Ay, N.: Geometry and determinism of optimal stationary control in partially observable Markov decision processes. \narXiv:1503.07206\n\n (2015)"},{"key":"33_CR5","volume-title":"Introduction to Stochastic Dynamic Programming","author":"SM Ross","year":"1983","unstructured":"Ross, S.M.: Introduction to Stochastic Dynamic Programming. Academic Press Inc., Cambridge (1983)"},{"key":"33_CR6","volume-title":"Reinforcement Learning: An Introduction","author":"RS Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (1998)"},{"key":"33_CR7","unstructured":"Sutton, R.S., McAllester, D., Singh, S., Mansour, Y.: Policy gradient methods for reinforcement learning with function approximation. In: Advances in Neural Information Processing Systems 12, pp. 1057\u20131063. MIT Press (2000)"},{"issue":"2","key":"33_CR8","doi-asserted-by":"crossref","first-page":"179","DOI":"10.1023\/A:1017980312899","volume":"49","author":"JN Tsitsiklis","year":"2002","unstructured":"Tsitsiklis, J.N., Van Roy, B.: On average versus discounted reward temporal-difference learning. Mach. Learn. 49(2), 179\u2013191 (2002)","journal-title":"Mach. Learn."}],"container-title":["Lecture Notes in Computer Science","Geometric Science of Information"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-68445-1_33","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,10,24]],"date-time":"2017-10-24T19:27:30Z","timestamp":1508873250000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-68445-1_33"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017]]},"ISBN":["9783319684444","9783319684451"],"references-count":8,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-68445-1_33","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2017]]}}}