{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,28]],"date-time":"2026-01-28T06:46:15Z","timestamp":1769582775429,"version":"3.49.0"},"publisher-location":"Cham","reference-count":33,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783319573502","type":"print"},{"value":"9783319573519","type":"electronic"}],"license":[{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017]]},"DOI":"10.1007\/978-3-319-57351-9_1","type":"book-chapter","created":{"date-parts":[[2017,4,10]],"date-time":"2017-04-10T11:08:53Z","timestamp":1491822533000},"page":"3-14","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["On Generalized Bellman Equations and Temporal-Difference Learning"],"prefix":"10.1007","author":[{"given":"Huizhen","family":"Yu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ashique Rupam","family":"Mahmood","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Richard S.","family":"Sutton","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2017,4,11]]},"reference":[{"key":"1_CR1","volume-title":"Neuro-Dynamic Programming","author":"DP Bertsekas","year":"1996","unstructured":"Bertsekas, D.P., Tsitsiklis, J.N.: Neuro-Dynamic Programming. Athena Scientific, Belmont (1996)"},{"key":"1_CR2","unstructured":"Boyan, J.A.: Least-squares temporal difference learning. In: Proceedings of the 16th International Conference Machine Learning (ICML) (1999)"},{"key":"1_CR3","first-page":"809","volume":"15","author":"C Dann","year":"2014","unstructured":"Dann, C., Neumann, G., Peters, J.: Policy evaluation with temporal differences: a survey and comparison. J. Mach. Learn. Res. 15, 809\u2013883 (2014)","journal-title":"J. Mach. Learn. Res."},{"key":"1_CR4","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511755347","volume-title":"Real Analysis and Probability","author":"RM Dudley","year":"2002","unstructured":"Dudley, R.M.: Real Analysis and Probability. Cambridge University Press, Cambridge (2002)"},{"key":"1_CR5","first-page":"289","volume":"15","author":"M Geist","year":"2014","unstructured":"Geist, M., Scherrer, B.: Off-policy learning with eligibility traces: a survey. J. Mach. Learn. Res. 15, 289\u2013333 (2014)","journal-title":"J. Mach. Learn. Res."},{"key":"1_CR6","doi-asserted-by":"publisher","first-page":"1367","DOI":"10.1287\/mnsc.35.11.1367","volume":"35","author":"PW Glynn","year":"1989","unstructured":"Glynn, P.W., Iglehart, D.L.: Importance sampling for stochastic simulations. Manag. Sci. 35, 1367\u20131392 (1989)","journal-title":"Manag. Sci."},{"key":"1_CR7","volume-title":"Stochastic Approximation and Recursive Algorithms and Applications","author":"HJ Kushner","year":"2003","unstructured":"Kushner, H.J., Yin, G.G.: Stochastic Approximation and Recursive Algorithms and Applications, 2nd edn. Springer, New York (2003)","edition":"2"},{"key":"1_CR8","unstructured":"Liu, B., Liu, J., Ghavamzadeh, M., Mahadevan, S., Petrik, M.: Finite-sample analysis of proximal gradient TD algorithms. In: The 31st Conference on Uncertainty in Artificial Intelligence (UAI) (2015)"},{"key":"1_CR9","unstructured":"Maei, H.R.: Gradient temporal-difference learning algorithms. Ph.D. thesis, University of Alberta (2011)"},{"key":"1_CR10","unstructured":"Mahadevan, S., Liu, B., Thomas, P., Dabney, W., Giguere, S., Jacek, N., Gemp, I., Liu, J.: Proximal reinforcement learning (2014). \n                      arXiv:1405.6757"},{"key":"1_CR11","unstructured":"Mahmood, A.R., van Hasselt, H., Sutton, R.S.: Weighted importance sampling for off-policy learning with linear function approximation. In: Advances in Neural Information Processing Systems (NIPS), vol. 27 (2014)"},{"key":"1_CR12","unstructured":"Mahmood, A.R., Yu, H., Sutton, R.S.: Multi-step off-policy learning without importance-sampling ratios (2017). \n                      arXiv:1702.03006"},{"key":"1_CR13","doi-asserted-by":"publisher","first-page":"1409","DOI":"10.1137\/0327073","volume":"27","author":"S Meyn","year":"1989","unstructured":"Meyn, S.: Ergodic theorems for discrete time stochastic systems using a stochastic Lyapunov function. SIAM J. Control Optim. 27, 1409\u20131439 (1989)","journal-title":"SIAM J. Control Optim."},{"key":"1_CR14","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511626630","volume-title":"Markov Chains and Stochastic Stability","author":"S Meyn","year":"2009","unstructured":"Meyn, S., Tweedie, R.L.: Markov Chains and Stochastic Stability, 2nd edn. Cambridge University Press, Cambridge (2009)","edition":"2"},{"key":"1_CR15","unstructured":"Munos, R., Stepleton, T., Harutyunyan, A., Bellemare, M.G.: Safe and efficient off-policy reinforcement learning. In: Advances in Neural Information Processing Systems (NIPS), vol. 29 (2016)"},{"key":"1_CR16","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511526237","volume-title":"General Irreducible Markov Chains and Non-Negative Operators","author":"E Nummelin","year":"1984","unstructured":"Nummelin, E.: General Irreducible Markov Chains and Non-Negative Operators. Cambridge University Press, Cambridge (1984)"},{"key":"1_CR17","unstructured":"Precup, D., Sutton, R.S., Dasgupta, S.: Off-policy temporal-difference learning with function approximation. In: The 18th International Conference on Machine Learning (ICML) (2001)"},{"key":"1_CR18","unstructured":"Precup, D., Sutton, R.S., Singh, S.: Eligibility traces for off-policy policy evaluation. In: The 17th International Conference on Machine Learning (ICML) (2000)"},{"key":"1_CR19","doi-asserted-by":"publisher","DOI":"10.1002\/9780470316887","volume-title":"Markov Decision Processes: Discrete Stochastic Dynamic Programming","author":"ML Puterman","year":"1994","unstructured":"Puterman, M.L.: Markov Decision Processes: Discrete Stochastic Dynamic Programming. Wiley, New York (1994)"},{"issue":"1","key":"1_CR20","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/974734.974735","volume":"14","author":"RS Randhawa","year":"2004","unstructured":"Randhawa, R.S., Juneja, S.: Combining importance sampling and temporal difference control variates to simulate Markov chains. ACM Trans. Model. Comput. Simul. 14(1), 1\u201330 (2004)","journal-title":"ACM Trans. Model. Comput. Simul."},{"key":"1_CR21","volume-title":"Real and Complex Analysis","author":"W Rudin","year":"1966","unstructured":"Rudin, W.: Real and Complex Analysis. McGraw-Hill, New York (1966)"},{"key":"1_CR22","unstructured":"Scherrer, B.: Should one compute the temporal difference fix point or minimize the Bellman residual? In: The 27th International Conference on Machine Learning (ICML) (2010)"},{"key":"1_CR23","first-page":"9","volume":"3","author":"RS Sutton","year":"1988","unstructured":"Sutton, R.S.: Learning to predict by the methods of temporal differences. Mach. Learn. 3, 9\u201344 (1988)","journal-title":"Mach. Learn."},{"key":"1_CR24","doi-asserted-by":"crossref","unstructured":"Sutton, R.S.: TD models: modeling the world at a mixture of time scales. In: The 12th International Conference on Machine Learning (ICML) (1995)","DOI":"10.1016\/B978-1-55860-377-6.50072-4"},{"key":"1_CR25","volume-title":"Reinforcement Learning","author":"RS Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning. MIT Press, Cambridge (1998)"},{"issue":"73","key":"1_CR26","first-page":"1","volume":"17","author":"RS Sutton","year":"2016","unstructured":"Sutton, R.S., Mahmood, A.R., White, M.: An emphatic approach to the problem of off-policy temporal-difference learning. J. Mach. Learn. Res. 17(73), 1\u201329 (2016)","journal-title":"J. Mach. Learn. Res."},{"issue":"5","key":"1_CR27","doi-asserted-by":"publisher","first-page":"674","DOI":"10.1109\/9.580874","volume":"42","author":"JN Tsitsiklis","year":"1997","unstructured":"Tsitsiklis, J.N., Van Roy, B.: An analysis of temporal-difference learning with function approximation. IEEE Trans. Autom. Control 42(5), 674\u2013690 (1997)","journal-title":"IEEE Trans. Autom. Control"},{"key":"1_CR28","first-page":"1977","volume":"12","author":"T Ueno","year":"2011","unstructured":"Ueno, T., Maeda, S., Kawanabe, M., Ishii, S.: Generalized TD learning. J. Mach. Learn. Res. 12, 1977\u20132020 (2011)","journal-title":"J. Mach. Learn. Res."},{"key":"1_CR29","doi-asserted-by":"publisher","first-page":"3310","DOI":"10.1137\/100807879","volume":"50","author":"H Yu","year":"2012","unstructured":"Yu, H.: Least squares temporal difference methods: an analysis under general conditions. SIAM J. Control Optim. 50, 3310\u20133343 (2012)","journal-title":"SIAM J. Control Optim."},{"key":"1_CR30","unstructured":"Yu, H.: On convergence of emphatic temporal-difference learning. In: The 28th Annual Conference on Learning Theory (COLT) (2015). \n                      arXiv:1506.02582"},{"issue":"220","key":"1_CR31","first-page":"1","volume":"17","author":"H Yu","year":"2016","unstructured":"Yu, H.: Weak convergence properties of constrained emphatic temporal-difference learning with constant and slowly diminishing stepsize. J. Mach. Learn. Res. 17(220), 1\u201358 (2016)","journal-title":"J. Mach. Learn. Res."},{"issue":"2","key":"1_CR32","doi-asserted-by":"publisher","first-page":"306","DOI":"10.1287\/moor.1100.0441","volume":"35","author":"H Yu","year":"2010","unstructured":"Yu, H., Bertsekas, D.P.: Error bounds for approximations from projected linear equations. Math. Oper. Res. 35(2), 306\u2013329 (2010)","journal-title":"Math. Oper. Res."},{"key":"1_CR33","unstructured":"Yu, H., Bertsekas, D.P.: Weighted Bellman equations and their applications in approximate dynamic programming. LIDS Technical report 2876, MIT (2012)"}],"container-title":["Lecture Notes in Computer Science","Advances in Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-57351-9_1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,20]],"date-time":"2019-05-20T02:05:00Z","timestamp":1558317900000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-57351-9_1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017]]},"ISBN":["9783319573502","9783319573519"],"references-count":33,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-57351-9_1","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2017]]},"assertion":[{"value":"11 April 2017","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"Canadian AI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Canadian Conference on Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Edmonton","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Canada","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2017","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 May 2017","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 May 2017","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"canadianai2017","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/aigicrv.org\/2017\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}