{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,4]],"date-time":"2026-04-04T03:06:33Z","timestamp":1775271993734,"version":"3.50.1"},"publisher-location":"Cham","reference-count":34,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030041908","type":"print"},{"value":"9783030041915","type":"electronic"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-030-04191-5_7","type":"book-chapter","created":{"date-parts":[[2018,11,15]],"date-time":"2018-11-15T07:12:31Z","timestamp":1542265951000},"page":"92-105","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Tuning the Discount Factor in Order to Reach Average Optimality on Deterministic MDPs"],"prefix":"10.1007","author":[{"given":"Filipo Studzinski","family":"Perotto","sequence":"first","affiliation":[]},{"given":"Laurent","family":"Vercouter","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,11,16]]},"reference":[{"key":"7_CR1","volume-title":"Dynamic Programming","author":"R Bellman","year":"1957","unstructured":"Bellman, R.: Dynamic Programming. Princeton University Press, Princeton (1957)"},{"key":"7_CR2","volume-title":"Dynamic Programming and Optimal Control","author":"DP Bertsekas","year":"2005","unstructured":"Bertsekas, D.P.: Dynamic Programming and Optimal Control, 3rd edn. Athena Scientific, Belmont (2005)","edition":"3"},{"issue":"2","key":"7_CR3","doi-asserted-by":"publisher","first-page":"719","DOI":"10.1214\/aoms\/1177704593","volume":"33","author":"D Blackwell","year":"1962","unstructured":"Blackwell, D.: Discrete dynamic programming. Ann. Math. Stat. 33(2), 719\u2013726 (1962)","journal-title":"Ann. Math. Stat."},{"issue":"2","key":"7_CR4","doi-asserted-by":"publisher","first-page":"496","DOI":"10.1109\/TAC.2007.914282","volume":"53","author":"XR Cao","year":"2008","unstructured":"Cao, X.R., Zhang, J.: The $$n^{th}$$-order bias optimality for multichain Markov decision processes. Trans. Autom. Control 53(2), 496\u2013508 (2008)","journal-title":"Trans. Autom. Control"},{"issue":"4","key":"7_CR5","doi-asserted-by":"publisher","first-page":"385","DOI":"10.1145\/1027084.1027085","volume":"9","author":"A Dasdan","year":"2004","unstructured":"Dasdan, A.: Experimental analysis of the fastest optimum cycle ratio and mean algorithms. Trans. Des. Autom. Electr. Syst. 9(4), 385\u2013418 (2004)","journal-title":"Trans. Des. Autom. Electr. Syst."},{"key":"7_CR6","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"551","DOI":"10.1007\/978-3-642-14162-1_46","volume-title":"Automata, Languages and Programming","author":"J Fearnley","year":"2010","unstructured":"Fearnley, J.: Exponential lower bounds for policy iteration. In: Abramsky, S., Gavoille, C., Kirchner, C., Meyer auf der Heide, F., Spirakis, P.G. (eds.) ICALP 2010. LNCS, vol. 6199, pp. 551\u2013562. Springer, Heidelberg (2010). https:\/\/doi.org\/10.1007\/978-3-642-14162-1_46"},{"issue":"3","key":"7_CR7","doi-asserted-by":"publisher","first-page":"249","DOI":"10.1016\/j.orl.2013.02.002","volume":"41","author":"EA Feinberg","year":"2013","unstructured":"Feinberg, E.A., Huang, J.: Strong polynomiality of policy iterations for average-cost MDPs modeling replacement and maintenance problems. Oper. Res. Lett. 41(3), 249\u2013251 (2013)","journal-title":"Oper. Res. Lett."},{"issue":"2","key":"7_CR8","doi-asserted-by":"publisher","first-page":"179","DOI":"10.1016\/j.orl.2017.12.007","volume":"46","author":"EA Feinberg","year":"2018","unstructured":"Feinberg, E.A., Huang, J.: Reduction of total-cost and average-cost MDPs with weakly continuous transition probabilities to discounted MDPs. Oper. Res. Lett. 46(2), 179\u2013184 (2018)","journal-title":"Oper. Res. Lett."},{"issue":"1","key":"7_CR9","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1023\/B:MACH.0000019802.64038.6c","volume":"55","author":"A Gosavi","year":"2004","unstructured":"Gosavi, A.: A reinforcement learning algorithm based on policy iteration for average reward: empirical results with yield management and convergence analysis. Mach. Learn. 55(1), 5\u201329 (2004)","journal-title":"Mach. Learn."},{"issue":"1","key":"7_CR10","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2432622.2432623","volume":"60","author":"TD Hansen","year":"2013","unstructured":"Hansen, T.D., Miltersen, P.B., Zwick, U.: Strategy iteration is strongly polynomial for 2-player turn-based stochastic games with a constant discount factor. J. ACM 60(1), 1\u201316 (2013)","journal-title":"J. ACM"},{"key":"7_CR11","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"415","DOI":"10.1007\/978-3-642-17517-6_37","volume-title":"Algorithms and Computation","author":"TD Hansen","year":"2010","unstructured":"Hansen, T.D., Zwick, U.: Lower bounds for Howard\u2019s algorithm for finding minimum mean-cost cycles. In: Cheong, O., Chwa, K.-Y., Park, K. (eds.) ISAAC 2010. LNCS, vol. 6506, pp. 415\u2013426. Springer, Heidelberg (2010). https:\/\/doi.org\/10.1007\/978-3-642-17517-6_37"},{"key":"7_CR12","doi-asserted-by":"crossref","unstructured":"Hordijk, A., Yushkevich, A.: Blackwell optimality. In: Feinberg, E.A., Shwartz, A. (eds.) The Handbook of Markov Decision Processes: Methods and Applications, chap. 8, pp. 231\u2013268. Kluwer (2002)","DOI":"10.1007\/978-1-4615-0805-2_8"},{"key":"7_CR13","volume-title":"Dynamic Programming and Markov Processes","author":"R Howard","year":"1960","unstructured":"Howard, R.: Dynamic Programming and Markov Processes. MIT Press, Cambridge (1960)"},{"key":"7_CR14","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1007\/978-1-4615-0805-2_2","volume-title":"International Series in Operations Research & Management Science","author":"Lodewijk Kallenberg","year":"2003","unstructured":"Kallenberg, L.: Finite state and action MDPS. In: Feinberg, E.A., Shwartz, A. (eds.) Handbook of Markov Decision Processes. International Series in Operations Research and Management Science, vol. 40, pp. 21\u201387. Springer, Boston (2003). https:\/\/doi.org\/10.1007\/978-1-4615-0805-2_2"},{"key":"7_CR15","doi-asserted-by":"crossref","unstructured":"Lewis, M.E., Puterman, M.L.: Bias optimality. In: Feinberg, E.A., Shwartz, A. (eds.) The Handbook of Markov Decision Processes: Methods and Applications, chap. 3, pp. 89\u2013111. Kluwer (2002)","DOI":"10.1007\/978-1-4615-0805-2_3"},{"key":"7_CR16","unstructured":"Littman, M.L., Dean, T.L., Kaelbling, L.P.: On the complexity of solving Markov decision problems. In: Proceedings of the 11th UAI, p. 394402 (1994)"},{"issue":"1\u20133","key":"7_CR17","doi-asserted-by":"crossref","first-page":"159","DOI":"10.1023\/A:1018064306595","volume":"22","author":"S Mahadevan","year":"1996","unstructured":"Mahadevan, S.: Average reward reinforcement learning: foundations, algorithms, and empirical results. Mach. Learn. 22(1\u20133), 159\u2013195 (1996)","journal-title":"Mach. Learn."},{"key":"7_CR18","unstructured":"Mahadevan, S.: Sensitive discount optimality: unifying discounted and average reward reinforcement learning. In: Saitta, L. (ed.) Proceedings of the 13th ICML, pp. 328\u2013336. Morgan Kaufmann (1996)"},{"issue":"4","key":"7_CR19","doi-asserted-by":"publisher","first-page":"403","DOI":"10.1561\/2200000003","volume":"1","author":"S Mahadevan","year":"2009","unstructured":"Mahadevan, S.: Learning representation and control in Markov decision processes: new frontiers. Found. Trends Mach. Learn. 1(4), 403\u2013565 (2009)","journal-title":"Found. Trends Mach. Learn."},{"issue":"3","key":"7_CR20","doi-asserted-by":"publisher","first-page":"441","DOI":"10.1287\/moor.12.3.441","volume":"12","author":"C Papadimitriou","year":"1987","unstructured":"Papadimitriou, C., Tsitsiklis, J.N.: The complexity of Markov decision processes. Math. Oper. Res. 12(3), 441\u2013450 (1987)","journal-title":"Math. Oper. Res."},{"key":"7_CR21","doi-asserted-by":"publisher","DOI":"10.1002\/9780470316887","volume-title":"Markov Decision Processes: Discrete Stochastic Dynamic Programming","author":"M Puterman","year":"1994","unstructured":"Puterman, M.: Markov Decision Processes: Discrete Stochastic Dynamic Programming. Wiley, New York (1994)"},{"key":"7_CR22","doi-asserted-by":"crossref","unstructured":"Puterman, M., Patrick, J.: Dynamic programming. In: Sammut, C., Webb, G. (eds.) Encyclopedia of Machine Learning, pp. 298\u2013308. Springer (2010)","DOI":"10.1007\/978-0-387-30164-8_237"},{"key":"7_CR23","unstructured":"Kalyanakrishnan, S., Mall, U., Goyal, R.: Batch-switching policy iteration. In: Proceedings of the 25th IJCAI. AAAI Press (2016)"},{"issue":"3","key":"7_CR24","doi-asserted-by":"publisher","first-page":"758","DOI":"10.1287\/moor.2015.0753","volume":"41","author":"B Scherrer","year":"2016","unstructured":"Scherrer, B.: Improved and generalized upper bounds on the complexity of policy iteration. Math. Oper. Res. 41(3), 758\u2013774 (2016)","journal-title":"Math. Oper. Res."},{"key":"7_CR25","unstructured":"Sigaud, O., Buffet, O. (eds.): Markov Decision Processes in Artificial Intelligence. iSTE - Wiley (2010)"},{"key":"7_CR26","volume-title":"Introduction to Reinforcement Learning","author":"R Sutton","year":"1998","unstructured":"Sutton, R., Barto, A.: Introduction to Reinforcement Learning. MIT Press, Cambridge (1998)"},{"key":"7_CR27","doi-asserted-by":"crossref","unstructured":"Tadepalli, P.: Average-reward reinforcement learning. In: Sammut, C., Webb, G. (eds.) Encyclopedia of Machine Learning, pp. 64\u201368. Springer (2010)","DOI":"10.1007\/978-0-387-30164-8_49"},{"issue":"1\u20132","key":"7_CR28","doi-asserted-by":"publisher","first-page":"177","DOI":"10.1016\/S0004-3702(98)00002-2","volume":"100","author":"P Tadepalli","year":"1998","unstructured":"Tadepalli, P., Ok, D.: Model-based average reward reinforcement learning. Artif. Int. 100(1\u20132), 177\u2013224 (1998)","journal-title":"Artif. Int."},{"key":"7_CR29","unstructured":"Tokic, M., Fessler, J., Ertel, W.: The crawler, a class room demonstrator for reinforcement learning. In: Lane, C., Guesgen, H. (eds.) Proceedings of the 22th FLAIRS, pp. 160\u2013165. AAAI Press, Menlo Park (2009)"},{"key":"7_CR30","doi-asserted-by":"crossref","unstructured":"Uther, W.: Markov decision processes. In: Sammut, C., Webb, G. (eds.) Encyclopedia of Machine Learning, pp. 642\u2013646. Springer (2010)","DOI":"10.1007\/978-0-387-30164-8_512"},{"issue":"5","key":"7_CR31","doi-asserted-by":"publisher","first-page":"1635","DOI":"10.1214\/aoms\/1177697379","volume":"40","author":"A Veinott","year":"1969","unstructured":"Veinott, A.: Discrete dynamic programming with sensitive discount optimality criteria. Ann. Math. Stat. 40(5), 1635\u20131660 (1969)","journal-title":"Ann. Math. Stat."},{"key":"7_CR32","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1007\/978-3-642-27645-3_1","volume-title":"Adaptation, Learning, and Optimization","author":"Martijn van Otterlo","year":"2012","unstructured":"van Otterlo, M., Wiering, M.: Reinforcement learning and Markov decision processes. In: Wiering, M., van Otterlo, M. (eds.) Reinforcement Learning. Adaptation, Learning, and Optimization, vol. 12, pp. 3\u201342. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-27645-3_1"},{"key":"7_CR33","doi-asserted-by":"crossref","unstructured":"Yang, S., Gao, Y., An, B., Wang, H., Chen, X.: Efficient average reward reinforcement learning using constant shifting values. In: Proceedings of the 30th AAAI. AAAI Press\/The MIT Press (2016)","DOI":"10.1609\/aaai.v30i1.10285"},{"issue":"4","key":"7_CR34","doi-asserted-by":"publisher","first-page":"593603","DOI":"10.1287\/moor.1110.0516","volume":"36","author":"Y Ye","year":"2011","unstructured":"Ye, Y.: The simplex and policy-iteration methods are strongly polynomial for the Markov decision problem with a fixed discount rate. Math. Oper. Res. 36(4), 593603 (2011)","journal-title":"Math. Oper. Res."}],"container-title":["Lecture Notes in Computer Science","Artificial Intelligence XXXV"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-04191-5_7","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,4]],"date-time":"2026-04-04T02:23:07Z","timestamp":1775269387000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-04191-5_7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783030041908","9783030041915"],"references-count":34,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-04191-5_7","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018]]},"assertion":[{"value":"SGAI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Innovative Techniques and Applications of Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Cambridge","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2018","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11 December 2018","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 December 2018","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"38","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"sgai2018","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.bcs-sgai.org\/ai2018\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}