{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,27]],"date-time":"2026-03-27T16:19:32Z","timestamp":1774628372722,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":49,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,7,12]],"date-time":"2022-07-12T00:00:00Z","timestamp":1657584000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"DSTA","award":["031017-00016"],"award-info":[{"award-number":["031017-00016"]}]},{"name":"TUBITAK BIDEB 2232-B","award":["121C124"],"award-info":[{"award-number":["121C124"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,7,12]]},"DOI":"10.1145\/3490486.3538289","type":"proceedings-article","created":{"date-parts":[[2022,7,13]],"date-time":"2022-07-13T13:29:36Z","timestamp":1657718976000},"page":"919-936","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":7,"title":["Fictitious Play in Markov Games with Single Controller"],"prefix":"10.1145","author":[{"given":"Muhammed O.","family":"Sayin","sequence":"first","affiliation":[{"name":"Bilkent University, Ankara, Turkey"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kaiqing","family":"Zhang","sequence":"additional","affiliation":[{"name":"Massachusetts Institute of Technology, Cambridge, MA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Asuman","family":"Ozdaglar","sequence":"additional","affiliation":[{"name":"Massachusetts Institute of Technology, Cambridge, MA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2022,7,13]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2016.2598476"},{"key":"e_1_3_2_1_2_1","volume-title":"Dynamic Games and Applications in Economics","author":"Basar T.","unstructured":"T. Basar . 1986. Dynamic Games and Applications in Economics . Vol. 265 . Springer Science & Business Media . T. Basar. 1986. Dynamic Games and Applications in Economics. Vol. 265. Springer Science & Business Media."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"crossref","unstructured":"T. Basar and G. J. Olsder. 1998. Dynamic Noncooperative Game Theory .SIAM.  T. Basar and G. J. Olsder. 1998. Dynamic Noncooperative Game Theory .SIAM.","DOI":"10.1137\/1.9781611971132"},{"key":"e_1_3_2_1_4_1","volume-title":"Best-Response Dynamics and Fictitious Play in Identical Interest Stochastic Games. arXiv preprint arXiv:2111.04317","author":"Baudin L.","year":"2021","unstructured":"L. Baudin . 2021. Best-Response Dynamics and Fictitious Play in Identical Interest Stochastic Games. arXiv preprint arXiv:2111.04317 ( 2021 ). L. Baudin. 2021. Best-Response Dynamics and Fictitious Play in Identical Interest Stochastic Games. arXiv preprint arXiv:2111.04317 (2021)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1137\/S0363012904439301"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jet.2004.02.003"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jet.2008.01.007"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(00)00039-4"},{"key":"e_1_3_2_1_9_1","first-page":"374","article-title":"Iterative solution of games by fictitious play","volume":"13","author":"Brown G. W.","year":"1951","unstructured":"G. W. Brown . 1951 . Iterative solution of games by fictitious play . Activity Analysis of Production and Allocation , Vol. 13 , 1 (1951), 374 -- 376 . G. W. Brown. 1951. Iterative solution of games by fictitious play. Activity Analysis of Production and Allocation, Vol. 13, 1 (1951), 374--376.","journal-title":"Activity Analysis of Production and Allocation"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCC.2007.913919"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.geb.2013.07.001"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"crossref","unstructured":"N. Cesa-Bianchi and G. Lugosi. 2006. Prediction Learning and Games .Cambridge University Press.  N. Cesa-Bianchi and G. Lugosi. 2006. Prediction Learning and Games .Cambridge University Press.","DOI":"10.1017\/CBO9780511546921"},{"key":"e_1_3_2_1_13_1","unstructured":"C. Daskalakis D. J. Foster and N. Golowich. 2020. Independent Policy Gradient Methods for Competitive Reinforcement Learning. In Advances in Neural Information Processing Systems .  C. Daskalakis D. J. Foster and N. Golowich. 2020. Independent Policy Gradient Methods for Competitive Reinforcement Learning. In Advances in Neural Information Processing Systems ."},{"key":"e_1_3_2_1_14_1","volume-title":"IEEE Annual Symposium on Foundations of Computer Science. IEEE, 11--20","author":"Daskalakis C.","unstructured":"C. Daskalakis and Q. Pan . 2014. A counter-example to Karlin's strong conjecture for fictitious play . In IEEE Annual Symposium on Foundations of Computer Science. IEEE, 11--20 . C. Daskalakis and Q. Pan. 2014. A counter-example to Karlin's strong conjecture for fictitious play. In IEEE Annual Symposium on Foundations of Computer Science. IEEE, 11--20."},{"key":"e_1_3_2_1_15_1","unstructured":"D. Ding C. Wei K. Zhang and M. Jovanovic. 2022. Independent Policy Gradient for Large-Scale Markov Potential Games: Sharper Rates Function Approximation and Game-Agnostic Convergence. arXiv preprint arXiv:2202.04129 (2022).  D. Ding C. Wei K. Zhang and M. Jovanovic. 2022. Independent Policy Gradient for Large-Scale Markov Potential Games: Sharper Rates Function Approximation and Game-Agnostic Convergence. arXiv preprint arXiv:2202.04129 (2022)."},{"key":"e_1_3_2_1_16_1","volume-title":"IEEE International Conference on Communications. IEEE, 1--6.","author":"Eldosouky A.","unstructured":"A. Eldosouky , W. Saad , and D. Niyato . 2016. Single controller stochastic games for optimized moving target defense . In IEEE International Conference on Communications. IEEE, 1--6. A. Eldosouky, W. Saad, and D. Niyato. 2016. Single controller stochastic games for optimized moving target defense. In IEEE International Conference on Communications. IEEE, 1--6."},{"key":"e_1_3_2_1_17_1","unstructured":"J. Filar and K. Vrieze. 2012. Competitive Markov Decision Processes .Springer Science & Business Media.  J. Filar and K. Vrieze. 2012. Competitive Markov Decision Processes .Springer Science & Business Media."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.32917\/hmj\/1206139508"},{"key":"e_1_3_2_1_19_1","unstructured":"R. Fox S. McAleer W. Overman and I. Panageas. 2021. Independent Natural Policy Gradient Always Converges in Markov Potential Games. arXiv preprint arXiv:2110.10614 (2021).  R. Fox S. McAleer W. Overman and I. Panageas. 2021. Independent Natural Policy Gradient Always Converges in Markov Potential Games. arXiv preprint arXiv:2110.10614 (2021)."},{"key":"e_1_3_2_1_20_1","unstructured":"Z. Gao Q. Ma T. Basar and J. R. Birge. 2021. Finite-Sample Analysis of Decentralized Q-Learning for Stochastic Games. arXiv preprint arXiv:2112.07859 (2021).  Z. Gao Q. Ma T. Basar and J. R. Birge. 2021. Finite-Sample Analysis of Decentralized Q-Learning for Stochastic Games. arXiv preprint arXiv:2112.07859 (2021)."},{"key":"e_1_3_2_1_21_1","volume-title":"IEEE Conference on Decision and Control. IEEE, 7075--7080","author":"Guan P.","unstructured":"P. Guan , M. Raginsky , R. Willett , and D. Zois . 2016. Regret minimization algorithms for single-controller zero-sum stochastic games . In IEEE Conference on Decision and Control. IEEE, 7075--7080 . P. Guan, M. Raginsky, R. Willett, and D. Zois. 2016. Regret minimization algorithms for single-controller zero-sum stochastic games. In IEEE Conference on Decision and Control. IEEE, 7075--7080."},{"key":"e_1_3_2_1_22_1","volume-title":"Advances in Neural Information Processing Systems","volume":"30","author":"Heliou A.","year":"2017","unstructured":"A. Heliou , J. Cohen , and P. Mertikopoulos . 2017. Learning with bandit feedback in potential games . Advances in Neural Information Processing Systems , Vol. 30 ( 2017 ). A. Heliou, J. Cohen, and P. Mertikopoulos. 2017. Learning with bandit feedback in potential games. Advances in Neural Information Processing Systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1111\/1468-0262.00376"},{"key":"e_1_3_2_1_24_1","unstructured":"C. Jin Q. Liu Y. Wang and T. Yu. 2021. V-Learning--A Simple Efficient Decentralized Algorithm for Multiagent RL. arXiv preprint arXiv:2110.14555 (2021).  C. Jin Q. Liu Y. Wang and T. Yu. 2021. V-Learning--A Simple Efficient Decentralized Algorithm for Multiagent RL. arXiv preprint arXiv:2110.14555 (2021)."},{"key":"e_1_3_2_1_25_1","unstructured":"S. Leonardos W. Overman I. Panageas and G. Piliouras. 2021. Global convergence of multi-agent policy gradient in Markov potential games. arXiv preprint arXiv:2106.01969 (2021).  S. Leonardos W. Overman I. Panageas and G. Piliouras. 2021. Global convergence of multi-agent policy gradient in Markov potential games. arXiv preprint arXiv:2106.01969 (2021)."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1137\/S0363012903437976"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.geb.2005.08.005"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jet.2020.105095"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-335-6.50027-1"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"crossref","unstructured":"W. Mao and T. Basar. 2022. Provably efficient reinforcement learning in decentralized general-sum Markov games. Dynamic Games and Applications (2022) 1--22.  W. Mao and T. Basar. 2022. Provably efficient reinforcement learning in decentralized general-sum Markov games. Dynamic Games and Applications (2022) 1--22.","DOI":"10.1007\/s13235-021-00420-0"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1137\/070680199"},{"key":"e_1_3_2_1_32_1","volume-title":"1988 a. A theory of dynamic oligopoly, I: Overview and quantity competition with large fixed costs. Econometrica: Journal of the Econometric Society","author":"Maskin E.","year":"1988","unstructured":"E. Maskin and J. Tirole . 1988 a. A theory of dynamic oligopoly, I: Overview and quantity competition with large fixed costs. Econometrica: Journal of the Econometric Society ( 1988 ), 549--569. E. Maskin and J. Tirole. 1988 a. A theory of dynamic oligopoly, I: Overview and quantity competition with large fixed costs. Econometrica: Journal of the Econometric Society (1988), 549--569."},{"key":"e_1_3_2_1_33_1","volume-title":"1988 b. A theory of dynamic oligopoly, II: Price competition, kinked demand curves, and Edgeworth cycles. Econometrica: Journal of the Econometric Society","author":"Maskin E.","year":"1988","unstructured":"E. Maskin and J. Tirole . 1988 b. A theory of dynamic oligopoly, II: Price competition, kinked demand curves, and Edgeworth cycles. Econometrica: Journal of the Econometric Society ( 1988 ), 571--599. E. Maskin and J. Tirole. 1988 b. A theory of dynamic oligopoly, II: Price competition, kinked demand curves, and Edgeworth cycles. Econometrica: Journal of the Econometric Society (1988), 571--599."},{"key":"e_1_3_2_1_34_1","volume-title":"On the convergence of the learning process in a 2x2 non-zero-sum game. Economic Research Program","author":"Miyasawa K.","year":"1961","unstructured":"K. Miyasawa . 1961. On the convergence of the learning process in a 2x2 non-zero-sum game. Economic Research Program , Princeton University , Research Memorandum , Vol. 33 ( 1961 ). K. Miyasawa. 1961. On the convergence of the learning process in a 2x2 non-zero-sum game. Economic Research Program, Princeton University, Research Memorandum, Vol. 33 (1961)."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1006\/jeth.1996.0014"},{"key":"e_1_3_2_1_36_1","volume-title":"Non-cooperative games. Annals of Mathematics","author":"Nash J.","year":"1951","unstructured":"J. Nash . 1951. Non-cooperative games. Annals of Mathematics ( 1951 ), 286--295. J. Nash. 1951. Non-cooperative games. Annals of Mathematics (1951), 286--295."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"crossref","unstructured":"A. Neyman and S. Sorin. 2003. Stochastic Games and Applications. Vol. 570. Springer Science & Business Media.  A. Neyman and S. Sorin. 2003. Stochastic Games and Applications. Vol. 570. Springer Science & Business Media.","DOI":"10.1007\/978-94-010-0189-2"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1007\/BF00935250"},{"key":"e_1_3_2_1_39_1","volume-title":"International Conference on Artificial Intelligence and Statistics. 919--928","author":"P\u00e9rolat J.","unstructured":"J. P\u00e9rolat , B. Piot , and O. Pietquin . 2018. Actor-critic fictitious play in simultaneous move multistage games . In International Conference on Artificial Intelligence and Statistics. 919--928 . J. P\u00e9rolat, B. Piot, and O. Pietquin. 2018. Actor-critic fictitious play in simultaneous move multistage games. In International Conference on Artificial Intelligence and Statistics. 919--928."},{"key":"e_1_3_2_1_40_1","volume-title":"Provably Efficient Fictitious Play Policy Optimization for Zero-Sum Markov Games with Structured Transitions. In International Conference on Machine Learning. PMLR, 8715--8725","author":"Qiu S.","unstructured":"S. Qiu , X. Wei , J. Ye , Z. Wang , and Z. Yang . 2021 . Provably Efficient Fictitious Play Policy Optimization for Zero-Sum Markov Games with Structured Transitions. In International Conference on Machine Learning. PMLR, 8715--8725 . S. Qiu, X. Wei, J. Ye, Z. Wang, and Z. Yang. 2021. Provably Efficient Fictitious Play Policy Optimization for Zero-Sum Markov Games with Structured Transitions. In International Conference on Machine Learning. PMLR, 8715--8725."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.2307\/1969530"},{"key":"e_1_3_2_1_42_1","series-title":"SIAM J. Control Optim. ( in print)","volume-title":"in print. Fictitious play in zero-sum stochastic games","author":"Sayin M. O.","unstructured":"M. O. Sayin , F. Parise , and A. Ozdaglar . in print. Fictitious play in zero-sum stochastic games . SIAM J. Control Optim. ( in print) . M. O. Sayin, F. Parise, and A. Ozdaglar. in print. Fictitious play in zero-sum stochastic games. SIAM J. Control Optim. ( in print)."},{"key":"e_1_3_2_1_43_1","volume-title":"Advances in Neural Information Processing Systems","volume":"34","author":"Sayin M. O.","year":"2021","unstructured":"M. O. Sayin , K. Zhang , D. Leslie , T. Basar , and A. Ozdaglar . 2021. Decentralized Q-learning in zero-sum Markov games . Advances in Neural Information Processing Systems , Vol. 34 ( 2021 ). M. O. Sayin, K. Zhang, D. Leslie, T. Basar, and A. Ozdaglar. 2021. Decentralized Q-learning in zero-sum Markov games. Advances in Neural Information Processing Systems, Vol. 34 (2021)."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1007\/s001990050345"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.39.10.1953"},{"key":"e_1_3_2_1_46_1","unstructured":"Z. Song S. Mei and Y. Bai. 2021. When Can We Learn General-Sum Markov Games with a Large Number of Players Sample-Efficiently? arXiv preprint arXiv:2110.04184 (2021).  Z. Song S. Mei and Y. Bai. 2021. When Can We Learn General-Sum Markov Games with a Large Number of Players Sample-Efficiently? arXiv preprint arXiv:2110.04184 (2021)."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1137\/17M1139461"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"crossref","unstructured":"K. Zhang Z. Yang and T. Basar. 2020. Multi-Agent Reinforcement Learning: A Selective Overview of Theories and Algorithms. Studies in Systems Decision and Control Handbook on RL and Control (2020).  K. Zhang Z. Yang and T. Basar. 2020. Multi-Agent Reinforcement Learning: A Selective Overview of Theories and Algorithms. Studies in Systems Decision and Control Handbook on RL and Control (2020).","DOI":"10.1007\/978-3-030-60990-0_12"},{"key":"e_1_3_2_1_49_1","unstructured":"R. Zhang Z. Ren and N. Li. 2021. Gradient play in stochastic games: Stationary points convergence and sample complexity. arXiv preprint arXiv:2106.00198 (2021).  R. Zhang Z. Ren and N. Li. 2021. Gradient play in stochastic games: Stationary points convergence and sample complexity. arXiv preprint arXiv:2106.00198 (2021)."}],"event":{"name":"EC '22: The 23rd ACM Conference on Economics and Computation","location":"Boulder CO USA","acronym":"EC '22","sponsor":["SIGecom Special Interest Group on Economics and Computation"]},"container-title":["Proceedings of the 23rd ACM Conference on Economics and Computation"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3490486.3538289","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3490486.3538289","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:31:04Z","timestamp":1750188664000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3490486.3538289"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,7,12]]},"references-count":49,"alternative-id":["10.1145\/3490486.3538289","10.1145\/3490486"],"URL":"https:\/\/doi.org\/10.1145\/3490486.3538289","relation":{},"subject":[],"published":{"date-parts":[[2022,7,12]]},"assertion":[{"value":"2022-07-13","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}