{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,28]],"date-time":"2025-03-28T04:29:59Z","timestamp":1743136199306,"version":"3.40.3"},"publisher-location":"Cham","reference-count":32,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031434204"},{"type":"electronic","value":"9783031434211"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-43421-1_26","type":"book-chapter","created":{"date-parts":[[2023,9,17]],"date-time":"2023-09-17T20:37:24Z","timestamp":1694983044000},"page":"439-454","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["On the\u00a0Distributional Convergence of\u00a0Temporal Difference Learning"],"prefix":"10.1007","author":[{"given":"Jie","family":"Dai","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xuguang","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,9,18]]},"reference":[{"key":"26_CR1","doi-asserted-by":"crossref","unstructured":"Baird, L.: Residual algorithms: reinforcement learning with function approximation. In: Machine Learning, pp. 30\u201337 (1995)","DOI":"10.1016\/B978-1-55860-377-6.50013-X"},{"key":"26_CR2","doi-asserted-by":"crossref","unstructured":"Bertsekas, D.P.: A new class of incremental gradient methods for least squares problems. SIAM J. Optim. 7(4), 913\u2013926 (1997)","DOI":"10.1137\/S1052623495287022"},{"key":"26_CR3","unstructured":"Bhandari, J., Russo, D., Singal, R.: A finite time analysis of temporal difference learning with linear function approximation. In: Conference on learning theory (2018)"},{"key":"26_CR4","unstructured":"Borkar, V.S.: Stochastic approximation: a dynamical systems viewpoint, vol. 48. Springer (2009)"},{"key":"26_CR5","unstructured":"Brosse, N., Durmus, A., Moulines, E.: The promises and pitfalls of stochastic gradient langevin dynamics. In: Advances in Neural Information Processing Systems, vol. 31 (2018)"},{"key":"26_CR6","unstructured":"Can, B., Gurbuzbalaban, M., Zhu, L.: Accelerated linear convergence of stochastic momentum methods in Wasserstein distances. In: Proceedings of the 36th International Conference on Machine Learning, vol. 97, pp. 891\u2013901. PMLR, 09\u201315 Jun 2019"},{"key":"26_CR7","doi-asserted-by":"crossref","unstructured":"Dalal, G., Sz\u00f6r\u00e9nyi, B., Thoppe, G., Mannor, S.: Finite sample analyses for td(0) with function approximation. In: Thirty-Second AAAI Conference on Artificial Intelligence (2018)","DOI":"10.1609\/aaai.v32i1.12079"},{"issue":"3","key":"26_CR8","doi-asserted-by":"publisher","first-page":"1348","DOI":"10.1214\/19-AOS1850","volume":"48","author":"A Dieuleveut","year":"2020","unstructured":"Dieuleveut, A., Durmus, A., Bach, F.: Bridging the gap between constant step size stochastic gradient descent and markov chains. Ann. Stat. 48(3), 1348\u20131382 (2020)","journal-title":"Ann. Stat."},{"key":"26_CR9","doi-asserted-by":"crossref","unstructured":"Duchi, J.C., Agarwal, A., Johansson, M., Jordan, M.I.: Ergodic mirror descent. SIAM J. Optim. 22(4), 1549\u20131578 (2012)","DOI":"10.1137\/110836043"},{"key":"26_CR10","unstructured":"Gitman, I., Lang, H., Zhang, P., Xiao, L.: Understanding the role of momentum in stochastic gradient methods. In: Advances in Neural Information Processing Systems, vol. 32 (2019)"},{"issue":"4","key":"26_CR11","doi-asserted-by":"publisher","first-page":"1141","DOI":"10.1137\/21M1389808","volume":"3","author":"A Gupta","year":"2021","unstructured":"Gupta, A., Haskell, W.B.: Convergence of recursive stochastic algorithms using wasserstein divergence. SIAM J. Math. Data Sci. 3(4), 1141\u20131167 (2021)","journal-title":"SIAM J. Math. Data Sci."},{"issue":"4","key":"26_CR12","doi-asserted-by":"publisher","first-page":"967","DOI":"10.1137\/19M1258104","volume":"2","author":"A Gupta","year":"2020","unstructured":"Gupta, A., Chen, H., Pi, J., Tendolkar, G.: Some limit properties of markov chains induced by recursive stochastic algorithms. SIAM J. Math. Data Sci. 2(4), 967\u20131003 (2020)","journal-title":"SIAM J. Math. Data Sci."},{"key":"26_CR13","unstructured":"Hu, B., Syed, U.: Characterizing the exact behaviors of temporal difference learning algorithms using markov jump linear system theory. In: Advances in Neural Information Processing Systems, pp. 8477\u20138488, Vancouver, Canada, December 2019"},{"issue":"3","key":"26_CR14","doi-asserted-by":"publisher","first-page":"1157","DOI":"10.1137\/08073038X","volume":"20","author":"B Johansson","year":"2010","unstructured":"Johansson, B., Rabi, M., Johansson, M.: A randomized incremental subgradient method for distributed optimization in networked systems. SIAM J. Optim. 20(3), 1157\u20131170 (2010)","journal-title":"SIAM J. Optim."},{"key":"26_CR15","unstructured":"Lakshminarayanan, C., Szepesvari, C.: Linear stochastic approximation: how far does constant step-size and iterate averaging go? In: International Conference on Artificial Intelligence and Statistics, pp. 1347\u20131355 (2018)"},{"key":"26_CR16","unstructured":"Lee, D., He, N.: Target-based temporal-difference learning. In: International Conference on Machine Learning, pp. 3713\u20133722. PMLR (2019)"},{"key":"26_CR17","first-page":"1","volume":"18","author":"S Mandt","year":"2017","unstructured":"Mandt, S., Hoffman, M.D., Blei, D.M.: Stochastic gradient descent as approximate bayesian inference. J. Mach. Learn. Res. 18, 1\u201335 (2017)","journal-title":"J. Mach. Learn. Res."},{"key":"26_CR18","unstructured":"Meyn, S.P.: Markov Chains and Stochastic Stability. Markov Chains and Stochastic Stability (1999)"},{"key":"26_CR19","doi-asserted-by":"publisher","unstructured":"Nlar, E.: Probability and stochastics. Probability and Stochastics (2011). https:\/\/doi.org\/10.1007\/978-0-387-87859-1","DOI":"10.1007\/978-0-387-87859-1"},{"issue":"2","key":"26_CR20","doi-asserted-by":"publisher","first-page":"691","DOI":"10.1137\/080726380","volume":"20","author":"SS Ram","year":"2009","unstructured":"Ram, S.S., Nedi\u0107, A., Veeravalli, V.V.: Incremental stochastic subgradient algorithms for convex optimization. SIAM J. Optim. 20(2), 691\u2013717 (2009)","journal-title":"SIAM J. Optim."},{"key":"26_CR21","doi-asserted-by":"crossref","unstructured":"Robbins, H., Monro, S.: A stochastic approximation method. Ann. Math. Stat. 400\u2013407 (1951)","DOI":"10.1214\/aoms\/1177729586"},{"key":"26_CR22","unstructured":"Romoff, J., et al.: Tdprop: does adaptive optimization with jacobi preconditioning help temporal difference learning? In: Proceedings of the 20th International Conference on Autonomous Agents and MultiAgent Systems, pp. 1082\u20131090 (2021)"},{"issue":"3","key":"26_CR23","doi-asserted-by":"publisher","first-page":"377","DOI":"10.1137\/130912839","volume":"58","author":"V Simoncini","year":"2016","unstructured":"Simoncini, V.: Computational methods for linear matrix equations. SIAM Rev. 58(3), 377\u2013441 (2016)","journal-title":"SIAM Rev."},{"key":"26_CR24","unstructured":"Srikant, R., Ying, L.: Finite-time error bounds for linear stochastic approximation and TD learning. In: COLT (2019)"},{"key":"26_CR25","unstructured":"Sun, T., Li, D., Wang, B.: Adaptive random walk gradient descent for decentralized optimization. In: International Conference on Machine Learning, pp. 20790\u201320809. PMLR (2022)"},{"key":"26_CR26","doi-asserted-by":"crossref","unstructured":"Sun, T., Shen, H., Chen, T., Li, D.: Adaptive temporal difference learning with linear function approximation. IEEE Trans. Pattern Anal. Mach. Intell. 44(12), 8812\u20138824 (2021)","DOI":"10.1109\/TPAMI.2021.3119645"},{"key":"26_CR27","unstructured":"Sun, T., Sun, Y., Yin, W.: On markov chain gradient descent. In: Advances in Neural Information Processing Systems, vol. 31 (2018)"},{"issue":"1","key":"26_CR28","doi-asserted-by":"publisher","first-page":"9","DOI":"10.1007\/BF00115009","volume":"3","author":"RS Sutton","year":"1988","unstructured":"Sutton, R.S.: Learning to predict by the methods of temporal differences. Mach. Learn. 3(1), 9\u201344 (1988)","journal-title":"Mach. Learn."},{"key":"26_CR29","unstructured":"Sutton, R.S., Barto, A.G., et al.: Introduction to reinforcement learning, vol. 2. MIT Press, Cambridge (1998)"},{"key":"26_CR30","doi-asserted-by":"crossref","unstructured":"Tsitsiklis, J.N., Roy, B.V.: An analysis of temporal-difference learning with function approximation. IEEE Trans. Autom. Control (1997)","DOI":"10.1109\/9.580874"},{"key":"26_CR31","doi-asserted-by":"publisher","unstructured":"Villani, C.: Optimal Transport: Old and New, vol. 338. Springer, Berlin (2009). https:\/\/doi.org\/10.1007\/978-3-540-71050-9","DOI":"10.1007\/978-3-540-71050-9"},{"key":"26_CR32","first-page":"10460","volume":"35","author":"H Xiong","year":"2021","unstructured":"Xiong, H., Tengyu, X., Liang, Y., Zhang, W.: Non-asymptotic convergence of adam-type reinforcement learning algorithms under markovian sampling. Proc. AAAI Conf. Artif. Intell. 35, 10460\u201310468 (2021)","journal-title":"Proc. AAAI Conf. Artif. Intell."}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases: Research Track"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-43421-1_26","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,17]],"date-time":"2023-09-17T20:45:25Z","timestamp":1694983525000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-43421-1_26"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031434204","9783031434211"],"references-count":32,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-43421-1_26","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"18 September 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"Our paper is devoted to the theoretical aspect of general stochastic algorithm, which does not present any foreseeable societal consequence.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical Statement"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Turin","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 September 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 September 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2023.ecmlpkdd.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"829","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"196","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"24% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.63","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4.5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Applied Data Science Track: 239 submissions, 58 accepted papers; Demo Track: 31 submissions, 16 accepted papers.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}