{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T23:42:54Z","timestamp":1768347774489,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":31,"publisher":"ACM","license":[{"start":{"date-parts":[[2020,10,18]],"date-time":"2020-10-18T00:00:00Z","timestamp":1602979200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"National Science Foundation"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2020,10,19]]},"DOI":"10.1145\/3412815.3416888","type":"proceedings-article","created":{"date-parts":[[2020,10,15]],"date-time":"2020-10-15T23:37:54Z","timestamp":1602805074000},"page":"139-148","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":4,"title":["On Reinforcement Learning for Turn-based Zero-sum Markov Games"],"prefix":"10.1145","author":[{"given":"Devavrat","family":"Shah","sequence":"first","affiliation":[{"name":"Massachusetts Institute of Technology, Cambridge, MA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Varun","family":"Somani","sequence":"additional","affiliation":[{"name":"Cornell University, Ithaca, NY, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qiaomin","family":"Xie","sequence":"additional","affiliation":[{"name":"Cornell University, Ithaca, NY, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhi","family":"Xu","sequence":"additional","affiliation":[{"name":"Massachusetts Institute of Technology, Cambridge, MA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2020,10,18]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"International joint conference on artificial intelligence","author":"Bowling Michael"},{"key":"e_1_3_2_1_2_1","first-page":"213","article-title":"R-max-a general polynomial time algorithm for near-optimal reinforcement learning","volume":"3","author":"Brafman Ronen I","year":"2002","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/2432622.2432623"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.5555\/945365.964288"},{"key":"e_1_3_2_1_5_1","volume-title":"ICML","volume":"98","author":"Hu Junling","year":"1998"},{"key":"e_1_3_2_1_6_1","unstructured":"Zeyu Jia Lin F. Yang and Mengdi Wang. 2019. Feature-Based Q-Learning for Two-Player Stochastic Games. arxiv: 1906.00423 [cs.LG]  Zeyu Jia Lin F. Yang and Mengdi Wang. 2019. Feature-Based Q-Learning for Two-Player Stochastic Games. arxiv: 1906.00423 [cs.LG]"},{"key":"e_1_3_2_1_7_1","unstructured":"Emilie Kaufmann and Wouter M. Koolen. 2017. Monte-carlo tree search by best arm identification. In Advances in Neural Information Processing Systems. 4897--4906.  Emilie Kaufmann and Wouter M. Koolen. 2017. Monte-carlo tree search by best arm identification. In Advances in Neural Information Processing Systems. 4897--4906."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"crossref","unstructured":"Michael Kearns Yishay Mansour and Andrew Y Ng. 2002. A sparse sampling algorithm for near-optimal planning in large Markov decision processes. Machine learning Vol. 49 2--3 (2002) 193--208.  Michael Kearns Yishay Mansour and Andrew Y Ng. 2002. A sparse sampling algorithm for near-optimal planning in large Markov decision processes. Machine learning Vol. 49 2--3 (2002) 193--208.","DOI":"10.1023\/A:1017932429737"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1137\/0319039"},{"key":"e_1_3_2_1_11_1","volume-title":"Proceedings of the Eighteenth conference on Uncertainty in artificial intelligence. Morgan Kaufmann Publishers Inc., 283--292","author":"Lagoudakis Michail G","year":"2002"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.5555\/3091574.3091594"},{"key":"e_1_3_2_1_13_1","first-page":"322","article-title":"a. Friend-or-foe Q-learning in general-sum games","volume":"1","author":"Littman Michael L","year":"2001","journal-title":"ICML"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1016\/S1389-0417(01)00015-8"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1007\/BF00927915"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1007\/BF00928474"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1007\/BF01737555"},{"key":"e_1_3_2_1_18_1","unstructured":"Stephen D. Patek. 1997. Stochastic Shortest Path Games: Theory and Algorithms . PhD dissertation. Massachusetts Institute of Technology.  Stephen D. Patek. 1997. Stochastic Shortest Path Games: Theory and Algorithms . PhD dissertation. Massachusetts Institute of Technology."},{"key":"e_1_3_2_1_19_1","volume-title":"ICML 2016--33rd International Conference on Machine Learning .","author":"P\u00e9rolat Julien","year":"2016"},{"key":"e_1_3_2_1_20_1","volume-title":"Proceedings of the 32nd International Conference on Machine Learning. 1321--1329","author":"Perolat Julien","year":"2015"},{"key":"e_1_3_2_1_21_1","volume-title":"Non-Asymptotic Analysis of Monte Carlo Tree Search. ACM SIGMETRICS 2020","author":"Shah Devavrat","year":"2020"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.39.10.1953"},{"key":"e_1_3_2_1_23_1","unstructured":"Aaron Sidford Mengdi Wang Lin F. Yang and Yinyu Ye. 2019. Solving Discounted Stochastic Two-Player Games with Near-Optimal Time and Sample Complexity. arxiv: 1908.11071 [cs.LG]  Aaron Sidford Mengdi Wang Lin F. Yang and Yinyu Ye. 2019. Solving Discounted Stochastic Two-Player Games with Near-Optimal Time and Sample Complexity. arxiv: 1908.11071 [cs.LG]"},{"key":"e_1_3_2_1_24_1","volume-title":"Nature","volume":"529","author":"Silver David","year":"2016"},{"key":"e_1_3_2_1_25_1","unstructured":"David Silver Thomas Hubert Julian Schrittwieser Ioannis Antonoglou Matthew Lai Arthur Guez Marc Lanctot Laurent Sifre Dharshan Kumaran Thore Graepel et almbox. 2017a. Mastering Chess and Shogi by Self-Play with a General Reinforcement Learning Algorithm. arXiv preprint arXiv:1712.01815 (2017).  David Silver Thomas Hubert Julian Schrittwieser Ioannis Antonoglou Matthew Lai Arthur Guez Marc Lanctot Laurent Sifre Dharshan Kumaran Thore Graepel et almbox. 2017a. Mastering Chess and Shogi by Self-Play with a General Reinforcement Learning Algorithm. arXiv preprint arXiv:1712.01815 (2017)."},{"key":"e_1_3_2_1_26_1","volume-title":"Nature","volume":"550","author":"Silver David","year":"2017"},{"key":"e_1_3_2_1_27_1","unstructured":"Charles J. Stone. 1982. Optimal Global Rates of Convergence for Nonparametric Regression. The Annals of Statistics (1982) 1040--1053.  Charles J. Stone. 1982. Optimal Global Rates of Convergence for Nonparametric Regression. The Annals of Statistics (1982) 1040--1053."},{"key":"e_1_3_2_1_28_1","volume-title":"Proceedings of International Conference of Machine Learning","volume":"96","author":"Szepesv\u00e1ri Csaba"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"crossref","unstructured":"Alexandre B. Tsybakov. 2009. Introduction to Nonparametric Estimation. Springer.  Alexandre B. Tsybakov. 2009. Introduction to Nonparametric Estimation. Springer.","DOI":"10.1007\/b13794"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"crossref","unstructured":"Martin J. Wainwright. 2019. High-dimensional statistics: A non-asymptotic viewpoint. Vol. 48. Cambridge University Press.  Martin J. Wainwright. 2019. High-dimensional statistics: A non-asymptotic viewpoint. Vol. 48. Cambridge University Press.","DOI":"10.1017\/9781108627771"},{"key":"e_1_3_2_1_31_1","unstructured":"Chen-Yu Wei Yi-Te Hong and Chi-Jen Lu. 2017. Online reinforcement learning in stochastic games. In Advances in Neural Information Processing Systems. 4987--4997.  Chen-Yu Wei Yi-Te Hong and Chi-Jen Lu. 2017. Online reinforcement learning in stochastic games. In Advances in Neural Information Processing Systems. 4987--4997."},{"key":"e_1_3_2_1_32_1","unstructured":"Zhuora Yang Yuchen Xie and Zhaoran Wang. 2019. A Theoretical Analysis of Deep Q-Learning. arxiv: 1901.00137 [cs.LG]  Zhuora Yang Yuchen Xie and Zhaoran Wang. 2019. A Theoretical Analysis of Deep Q-Learning. arxiv: 1901.00137 [cs.LG]"}],"event":{"name":"FODS '20: ACM-IMS Foundations of Data Science Conference","location":"Virtual Event USA","acronym":"FODS '20"},"container-title":["Proceedings of the 2020 ACM-IMS on Foundations of Data Science Conference"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3412815.3416888","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3412815.3416888","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T21:25:02Z","timestamp":1750195502000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3412815.3416888"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,10,18]]},"references-count":31,"alternative-id":["10.1145\/3412815.3416888","10.1145\/3412815"],"URL":"https:\/\/doi.org\/10.1145\/3412815.3416888","relation":{},"subject":[],"published":{"date-parts":[[2020,10,18]]},"assertion":[{"value":"2020-10-18","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}