{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,3]],"date-time":"2026-06-03T02:02:58Z","timestamp":1780452178866,"version":"3.54.1"},"reference-count":32,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2022,1,15]],"date-time":"2022-01-15T00:00:00Z","timestamp":1642204800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,15]],"date-time":"2022-01-15T00:00:00Z","timestamp":1642204800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Math. Control Signals Syst."],"published-print":{"date-parts":[[2022,6]]},"DOI":"10.1007\/s00498-021-00310-1","type":"journal-article","created":{"date-parts":[[2022,1,15]],"date-time":"2022-01-15T00:05:35Z","timestamp":1642205135000},"page":"217-271","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":31,"title":["Unified reinforcement Q-learning for mean field game and control problems"],"prefix":"10.1007","volume":"34","author":[{"given":"Andrea","family":"Angiuli","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jean-Pierre","family":"Fouque","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4380-1399","authenticated-orcid":false,"given":"Mathieu","family":"Lauri\u00e8re","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2022,1,15]]},"reference":[{"key":"310_CR1","unstructured":"Anahtarci B, Kariksiz CD, Saldi N (2020) Q-learning in regularized mean-field games. arXiv preprint arXiv:2003.12151"},{"key":"310_CR2","doi-asserted-by":"crossref","unstructured":"Angiuli A, Fouque J-P, Lauri\u00e8re M (2021) Reinforcement learning for mean field games, with applications to economics. To appear in the Handbook on Machine Learning in Financial Markets: A guide to contemporary practises, editors: A. Capponi and C.-A. Lehalle, Cambridge University Press.","DOI":"10.1007\/s00498-021-00310-1"},{"key":"310_CR3","unstructured":"Bellman RE, Dreyfus SE (2015) Applied dynamic programming, vol 2050. Princeton University Press"},{"key":"310_CR4","doi-asserted-by":"crossref","unstructured":"Bensoussan A, Frehse J, Chi PYS (2013) Mean field games and mean field type control theory. Springer Briefs in Mathematics, Springer, New York","DOI":"10.1007\/978-1-4614-8508-7"},{"key":"310_CR5","doi-asserted-by":"crossref","unstructured":"Borkar VS (1997) Stochastic approximation with two time scales. Syst Control Lett 29(5):291\u2013294","DOI":"10.1016\/S0167-6911(97)90015-3"},{"key":"310_CR6","unstructured":"Borkar VS (2008) Stochastic approximation. Cambridge University Press, Cambridge, Hindustan Book Agency, New Delhi. A dynamical systems viewpoint"},{"key":"310_CR7","doi-asserted-by":"crossref","unstructured":"Cardaliaguet P, Hadikhanloo S (2017) Learning in Mean Field Games: the Fictitious Play. COCV 23:569\u2013591.","DOI":"10.1051\/cocv\/2016004"},{"key":"310_CR8","doi-asserted-by":"crossref","unstructured":"Carmona R, Delarue F (2018) Probabilistic theory of mean field games with applications I\u2013II. Springer","DOI":"10.1007\/978-3-319-56436-4"},{"key":"310_CR9","unstructured":"Carmona R, Mathieu L (2019) Convergence analysis of machine learning algorithms for the numerical solution of mean field control and games: I\u2013the ergodic case. arXiv preprint arXiv:1907.05980"},{"key":"310_CR10","unstructured":"Carmona R, Lauri\u00e8re M (2019) Convergence analysis of machine learning algorithms for the numerical solution of mean field control and games: II\u2013the finite horizon case. arXiv preprint arXiv:1908.01613"},{"key":"310_CR11","unstructured":"Carmona R, Lauri\u00e8re M, Zongjun T (2019) Linear-quadratic mean-field reinforcement learning: convergence of policy gradient methods. Preprint"},{"key":"310_CR12","unstructured":"Carmona R, Lauri\u00e8re M, Zongjun T (2019) Mean-field MDP and mean-field Q-learning: model-free mean-field reinforcement learning. Preprint"},{"key":"310_CR13","doi-asserted-by":"crossref","unstructured":"Elie R, Perolat J, Lauri\u00e8re M, Geist M, Pietquin O (2020) On the convergence of model free learning in mean field games. In: Proceedings of AAAI","DOI":"10.1609\/aaai.v34i05.6203"},{"issue":"Dec","key":"310_CR14","first-page":"1","volume":"5","author":"Mansour Y Even-DE","year":"2003","unstructured":"Even-DE Mansour Y (2003) Learning rates for q-learning. J Mach Learn Res 5(Dec):1\u201325","journal-title":"J Mach Learn Res"},{"key":"310_CR15","doi-asserted-by":"crossref","unstructured":"Fouque JP, Zhang Z (2020) Deep learning methods for mean field control problems with delay. Front Appl Math Stat 6(11)","DOI":"10.3389\/fams.2020.00011"},{"key":"310_CR16","unstructured":"Fu Z, Yang Z, Chen Y, Wang Z (2019) Actor-critic provably finds nash equilibria of linear-quadratic mean-field games. arXiv preprint arXiv:1910.07498"},{"key":"310_CR17","unstructured":"Gu H, Guo X, Wei X, Xu R (2019) Dynamic programming principles for learning MFCS. arXiv preprint arXiv:1911.07314"},{"key":"310_CR18","unstructured":"Gu H, Guo X, Wei X, Xu R (2020) Mean-field controls with Q-learning for cooperative MARL: convergence and complexity analysis. arXiv preprint arXiv:2002.04131"},{"key":"310_CR19","unstructured":"Guo X, Hu A, Xu R, Zhang J (2019) Learning mean-field games. In: Advances in neural information processing systems, pp 4966\u20134976"},{"key":"310_CR20","unstructured":"Han J, Hu R (2020) Deep fictitious play for finding Markovian Nash equilibrium in multi-agent games. arXiv:1912.01809"},{"key":"310_CR21","doi-asserted-by":"crossref","unstructured":"Huang M, Caines PE, Malham\u00e9 RP (2007) Large-population cost-coupled LQG problems with nonuniform agents: individual-mass behavior and decentralized $$\\epsilon $$-Nash equilibria. IEEE Trans Autom Control 52(9):1560\u20131571","DOI":"10.1109\/TAC.2007.904450"},{"key":"310_CR22","doi-asserted-by":"crossref","unstructured":"Huang M, Malham\u00e9 RP, Caines PE (2006) Large population stochastic dynamic games: closed-loop McKean\u2013Vlasov systems and the Nash certainty equivalence principle. Commun Inf Syst 6(3):221\u2013251","DOI":"10.4310\/CIS.2006.v6.n3.a5"},{"key":"310_CR23","doi-asserted-by":"crossref","unstructured":"Lasry J-M, Lions P-L (2007) Mean field games. Jpn J Math 2(1):229\u2013260","DOI":"10.1007\/s11537-007-0657-8"},{"key":"310_CR24","doi-asserted-by":"crossref","unstructured":"Mguni D, Jennings J, de\u00a0Cote EM (2018) Decentralised learning in systems with many, many strategic agents. In: Thirty-second AAAI conference on artificial intelligence","DOI":"10.1609\/aaai.v32i1.11586"},{"key":"310_CR25","unstructured":"Motte M, Pham H (2019) Mean-field Markov decision processes with common noise and open-loop controls. arXiv preprint arXiv:1912.07883"},{"key":"310_CR26","unstructured":"Perrin S, P\u00e9rolat J, Lauri\u00e8re M, Geist M, Elie R, Olivier P (2020) Fictitious play for mean field games: continuous time analysis and applications. In preparation"},{"key":"310_CR27","unstructured":"Subramanian J, Mahajan A (2019) Reinforcement learning in stationary mean-field games. In: Proceedings. 18th international conference on autonomous agents and multiagent systems"},{"key":"310_CR28","unstructured":"Sutton RS, Barto AG (2018) Reinforcement learning: an introduction. MIT press"},{"key":"310_CR29","unstructured":"Watkins CJCH (1989) Learning from delayed rewards. PhD thesis, King\u2019s College, Cambridge"},{"key":"310_CR30","unstructured":"Xie Q, Yang Z, Wang Z, Minca A (2020) Provable fictitious play for general mean-field games. arXiv preprint arXiv:2010.04211"},{"key":"310_CR31","unstructured":"Yang J, Ye X, Trivedi R, Xu H, & Zha H (2018) Deep mean field games for learning optimal behavior policy of large populations. In International Conference on Learning Representations."},{"key":"310_CR32","unstructured":"Yang Y, Luo R, Li M, Zhou M, Zhang W, Wang J (2018) Mean field multi-agent reinforcement learning. In: International conference on machine learning, pp 5567\u20135576"}],"container-title":["Mathematics of Control, Signals, and Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00498-021-00310-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00498-021-00310-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00498-021-00310-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,23]],"date-time":"2023-01-23T01:44:20Z","timestamp":1674438260000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00498-021-00310-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,1,15]]},"references-count":32,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2022,6]]}},"alternative-id":["310"],"URL":"https:\/\/doi.org\/10.1007\/s00498-021-00310-1","relation":{},"ISSN":["0932-4194","1435-568X"],"issn-type":[{"value":"0932-4194","type":"print"},{"value":"1435-568X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,1,15]]},"assertion":[{"value":"15 October 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 October 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 January 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}