{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T13:35:11Z","timestamp":1768311311370,"version":"3.49.0"},"reference-count":39,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T00:00:00Z","timestamp":1765238400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T00:00:00Z","timestamp":1765238400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,12,9]]},"DOI":"10.1109\/cdc57313.2025.11312444","type":"proceedings-article","created":{"date-parts":[[2026,1,12]],"date-time":"2026-01-12T18:19:56Z","timestamp":1768241996000},"page":"6338-6345","source":"Crossref","is-referenced-by-count":0,"title":["Population-aware Online Mirror Descent for Mean-Field Games with Common Noise by Deep Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Zida","family":"Wu","sequence":"first","affiliation":[{"name":"UCLA"}]},{"given":"Mathieu","family":"Lauriere","sequence":"additional","affiliation":[{"name":"New York University Shanghai"}]},{"given":"Matthieu","family":"Geist","sequence":"additional","affiliation":[{"name":"Earth Species Project"}]},{"given":"Olivier","family":"Pietquin","sequence":"additional","affiliation":[{"name":"Earth Species Project"}]},{"given":"Ankur","family":"Mehta","sequence":"additional","affiliation":[{"name":"UCLA"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2018.2831228"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2005.864190"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2007.895842"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1049\/ip-sen:19971023"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2018.2857475"},{"key":"ref6","article-title":"Multi-agent actor-critic for mixed cooperative-competitive environments","volume":"30","author":"Lowe","year":"2017","journal-title":"Advances in neural information processing systems"},{"issue":"1","key":"ref7","first-page":"7234","article-title":"Monotonic value function factorisation for deep multi-agent reinforcement learning","volume":"21","author":"Rashid","year":"2020","journal-title":"The Journal of Machine Learning Research"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/s11537-007-0657-8"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2007.904450"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-56436-4"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4614-8508-7"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/3308558.3313433"},{"key":"ref13","article-title":"Learning mean-field games","volume":"32","author":"Guo","year":"2019","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref14","first-page":"1909","article-title":"Approximately solving mean field games via entropy-regularized deep reinforcement learning","volume-title":"International Conference on Artificial Intelligence and Statistics","author":"Cui"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/s13235-022-00450-2"},{"issue":"1","key":"ref16","first-page":"374","article-title":"Iterative solution of games by fictitious play","volume":"13","author":"Brown","year":"1951","journal-title":"Act. Anal. Prod Allocation"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1016\/j.jet.2005.12.010"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1051\/cocv\/2016004"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1016\/j.matpur.2019.02.006"},{"key":"ref20","first-page":"13199","article-title":"Fictitious play for mean field games: Continuous time analysis and applications","volume":"33","author":"Perrin","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref21","first-page":"12078","article-title":"Scalable deep rein-forcement learning algorithms for mean field games","volume-title":"International Conference on Machine Learning","author":"Lauri\u00e8re"},{"key":"ref22","article-title":"Learning in anonymous nonatomic games with applications to first-order mean field games","author":"Hadikhanloo","year":"2017"},{"key":"ref23","article-title":"Learning in mean field games","volume-title":"PhD thesis","author":"Hadikhanloo","year":"2018"},{"key":"ref24","first-page":"1028","article-title":"Scaling mean field games by online mirror descent","volume-title":"Proceedings of the 21st International Conference on Autonomous Agents and Multiagent Systems","author":"Perolat"},{"key":"ref25","first-page":"4235","article-title":"Munchausen reinforcement APPENDIX learning","volume":"33","author":"Vieillard","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref26","article-title":"Learning in mean field games: A survey","author":"Lauri\u00e8re","year":"2022"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.2307\/j.ctvckq7qf"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i9.21173"},{"key":"ref29","article-title":"Population-aware online mirror descent for mean-field games by deep reinforcement learning","author":"Wu","year":"2024"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/66"},{"key":"ref31","first-page":"12163","article-title":"Leverage the average: an analysis of kl regularization in reinforcement learning","volume":"33","author":"Vieillard","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref33","article-title":"Prioritized experience replay","author":"Schaul","year":"2015"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1611835114"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1214\/15-AOP1060"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-024-06547-6"},{"key":"ref37","article-title":"Concave utility reinforcement learning: the mean-field game viewpoint","author":"Geist","year":"2021"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1007\/s11579-012-0089-y"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1007\/s10957-015-0819-4"}],"event":{"name":"2025 IEEE 64th Conference on Decision and Control (CDC)","location":"Rio de Janeiro, Brazil","start":{"date-parts":[[2025,12,9]]},"end":{"date-parts":[[2025,12,12]]}},"container-title":["2025 IEEE 64th Conference on Decision and Control (CDC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11311984\/11311968\/11312444.pdf?arnumber=11312444","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T08:14:01Z","timestamp":1768292041000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11312444\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,9]]},"references-count":39,"URL":"https:\/\/doi.org\/10.1109\/cdc57313.2025.11312444","relation":{},"subject":[],"published":{"date-parts":[[2025,12,9]]}}}