{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,28]],"date-time":"2026-03-28T17:41:24Z","timestamp":1774719684171,"version":"3.50.1"},"publisher-location":"Cham","reference-count":43,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031255489","type":"print"},{"value":"9783031255496","type":"electronic"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-25549-6_6","type":"book-chapter","created":{"date-parts":[[2023,3,21]],"date-time":"2023-03-21T09:07:44Z","timestamp":1679389664000},"page":"74-87","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["A Game-Theoretic Approach to\u00a0Multi-agent Trust Region Optimization"],"prefix":"10.1007","author":[{"given":"Ying","family":"Wen","sequence":"first","affiliation":[]},{"given":"Hui","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Yaodong","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Minne","family":"Li","sequence":"additional","affiliation":[]},{"given":"Zheng","family":"Tian","sequence":"additional","affiliation":[]},{"given":"Xu","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Jun","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,3,22]]},"reference":[{"key":"6_CR1","unstructured":"Balduzzi, D., Garnelo, M., Bachrach, Y., Czarnecki, W., et al.: Open-ended learning in symmetric zero-sum games. In: ICML, vol. 97, pp. 434\u2013443. PMLR (2019)"},{"key":"6_CR2","unstructured":"Balduzzi, D., Racani\u00e8re, S., Martens, J., Foerster, J.N., et al.: The mechanics of n-player differentiable games. In: ICML, vol. 80, pp. 363\u2013372. PMLR (2018)"},{"key":"6_CR3","unstructured":"Berner, C., Brockman, G., Chan, B., Cheung, V., et al.: Dota 2 with large scale deep reinforcement learning. arXiv preprint arXiv:1912.06680 (2019)"},{"key":"6_CR4","doi-asserted-by":"publisher","first-page":"353","DOI":"10.1613\/jair.1332","volume":"22","author":"M Bowling","year":"2004","unstructured":"Bowling, M., Veloso, M.: Existence of multiagent equilibria with limited agents. J. Artif. Intell. Res. 22, 353\u2013384 (2004)","journal-title":"J. Artif. Intell. Res."},{"key":"6_CR5","doi-asserted-by":"publisher","first-page":"183","DOI":"10.1007\/978-3-642-14435-6_7","volume-title":"Innovations in Multi-agent Systems and Applications - 1","author":"L Bu\u015foniu","year":"2010","unstructured":"Bu\u015foniu, L., Babu\u0161ka, R., De Schutter, B.: Multi-agent reinforcement learning: an overview. In: Srinivasan, D., Jain, L.C. (eds.) Innovations in Multi-agent Systems and Applications - 1, pp. 183\u2013221. Springer, Heidelberg (2010). https:\/\/doi.org\/10.1007\/978-3-642-14435-6_7"},{"issue":"1","key":"6_CR6","doi-asserted-by":"publisher","first-page":"195","DOI":"10.1137\/070699652","volume":"39","author":"C Daskalakis","year":"2009","unstructured":"Daskalakis, C., Goldberg, P.W., Papadimitriou, C.H.: The complexity of computing a nash equilibrium. SIAM J. Comput. 39(1), 195\u2013259 (2009)","journal-title":"SIAM J. Comput."},{"key":"6_CR7","doi-asserted-by":"crossref","unstructured":"Fabrikant, A., Papadimitriou, C., Talwar, K.: The complexity of pure nash equilibria. In: STOC, pp. 604\u2013612 (2004)","DOI":"10.1145\/1007352.1007445"},{"key":"6_CR8","unstructured":"Foerster, J., Chen, R.Y., Al-Shedivat, M., Whiteson, S., et al.: Learning with opponent-learning awareness. In: AAMAS, pp. 122\u2013130 (2018)"},{"key":"6_CR9","doi-asserted-by":"crossref","unstructured":"Foerster, J.N., Farquhar, G., Afouras, T., Nardelli, N., et al.: Counterfactual multi-agent policy gradients. In: AAAI, pp. 2974\u20132982. AAAI Press (2018)","DOI":"10.1609\/aaai.v32i1.11794"},{"key":"6_CR10","unstructured":"Fudenberg, D., Drew, F., Levine, D.K., Levine, D.K.: The Theory of Learning in Games, vol. 2. MIT Press (1998)"},{"key":"6_CR11","unstructured":"Hernandez-Leal, P., Kaisers, M., Baarslag, T., de Cote, E.M.: A survey of learning in multiagent environments: dealing with non-stationarity. arXiv preprint arXiv:1707.09183 (2017)"},{"key":"6_CR12","unstructured":"Kakade, S.M., Langford, J.: Approximately optimal approximate reinforcement learning. In: ICML, pp. 267\u2013274. Morgan Kaufmann (2002)"},{"key":"6_CR13","unstructured":"Koul, A.: A collection of multi agent environments based on OpenAI gym (2019). https:\/\/github.com\/koulanurag\/ma-gym.git"},{"key":"6_CR14","unstructured":"Lanctot, M., Zambaldi, V.F., Gruslys, A., Lazaridou, A., et al.: A unified game-theoretic approach to multiagent reinforcement learning. In: NeurIPS, pp. 4190\u20134203 (2017)"},{"key":"6_CR15","unstructured":"Letcher, A., Foerster, J.N., Balduzzi, D., Rockt\u00e4schel, T., et al.: Stable opponent shaping in differentiable games. In: ICLR (2019)"},{"key":"6_CR16","unstructured":"Li, W., Wang, X., Jin, B., Sheng, J., et al.: Dealing with non-stationarity in multi-agent reinforcement learning via trust region decomposition. arXiv preprint arXiv:2102.10616 (2021)"},{"key":"6_CR17","unstructured":"Lillicrap, T.P., Hunt, J.J., Pritzel, A., Heess, N., et al.: Continuous control with deep reinforcement learning. In: ICLR (2016)"},{"key":"6_CR18","doi-asserted-by":"crossref","unstructured":"Littman, M.L.: Markov games as a framework for multi-agent reinforcement learning. In: Machine Learning Proceedings, pp. 157\u2013163. Elsevier (1994)","DOI":"10.1016\/B978-1-55860-335-6.50027-1"},{"key":"6_CR19","unstructured":"Lowe, R., Wu, Y., Tamar, A., Harb, J., et al.: Multi-agent actor-critic for mixed cooperative-competitive environments. In: NeurIPS, pp. 6379\u20136390 (2017)"},{"issue":"1","key":"6_CR20","doi-asserted-by":"publisher","first-page":"103","DOI":"10.1137\/18M1231298","volume":"2","author":"E Mazumdar","year":"2020","unstructured":"Mazumdar, E., Ratliff, L.J., Sastry, S.S.: On gradient-based learning in continuous games. SIAM J. Math. Data Sci. 2(1), 103\u2013131 (2020)","journal-title":"SIAM J. Math. Data Sci."},{"key":"6_CR21","unstructured":"Mertikopoulos, P., Lecouat, B., Zenati, H., Foo, C., et al.: Optimistic mirror descent in saddle-point problems: going the extra (gradient) mile. In: ICLR (2019)"},{"key":"6_CR22","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., Graves, A., et al.: Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602 (2013)"},{"key":"6_CR23","unstructured":"Muller, P., Omidshafiei, S., Rowland, M., Tuyls, K., et al.: A generalized training approach for multiagent learning. In: ICLR (2020)"},{"issue":"1","key":"6_CR24","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1038\/s41598-019-45619-9","volume":"9","author":"S Omidshafiei","year":"2019","unstructured":"Omidshafiei, S., Papadimitriou, C., Piliouras, G., Tuyls, K., et al.: $$\\alpha $$-rank: multi-agent evaluation by evolution. Sci. Rep. 9(1), 1\u201329 (2019)","journal-title":"Sci. Rep."},{"key":"6_CR25","unstructured":"Peng, P., Wen, Y., Yang, Y., Yuan, Q., et al.: Multiagent bidirectionally-coordinated nets: emergence of human-level coordination in learning to play starcraft combat games. arXiv preprint arXiv:1703.10069 (2017)"},{"key":"6_CR26","unstructured":"Rashid, T., Samvelyan, M., de Witt, C.S., Farquhar, G., et al.: QMIX: monotonic value function factorisation for deep multi-agent reinforcement learning. In: ICML, vol. 80, pp. 4292\u20134301 (2018)"},{"key":"6_CR27","unstructured":"Schulman, J., Levine, S., Abbeel, P., Jordan, M.I., et al.: Trust region policy optimization. In: ICML. JMLR Workshop and Conference Proceedings, vol. 37, pp. 1889\u20131897. JMLR.org (2015)"},{"key":"6_CR28","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., et al.: Proximal policy optimization algorithms. CoRR abs\/1707.06347 (2017)"},{"key":"6_CR29","doi-asserted-by":"crossref","unstructured":"Shoham, Y., Leyton-Brown, K.: Multiagent Systems: Algorithmic, Game-Theoretic, and Logical Foundations. Cambridge University Press (2008)","DOI":"10.1017\/CBO9780511811654"},{"key":"6_CR30","unstructured":"Son, K., Kim, D., Kang, W.J., Hostallero, D., et al.: QTRAN: learning to factorize with transformation for cooperative multi-agent reinforcement learning. In: ICML, vol. 97, pp. 5887\u20135896. PMLR (2019)"},{"key":"6_CR31","unstructured":"Sunehag, P., Lever, G., Gruslys, A., Czarnecki, W.M., et al.: Value-decomposition networks for cooperative multi-agent learning based on team reward. In: AAMAS, pp. 2085\u20132087 (2018)"},{"key":"6_CR32","doi-asserted-by":"crossref","unstructured":"Tan, M.: Multi-agent reinforcement learning: Independent vs. cooperative agents. In: AAAI, pp. 330\u2013337 (1993)","DOI":"10.1016\/B978-1-55860-307-3.50049-6"},{"key":"6_CR33","unstructured":"Tang, J., Paster, K., Abbeel, P.: Equilibrium finding via asymmetric self-play reinforcement learning. In: Deep Reinforcement Learning Workshop NeurIPS 2018 (2018)"},{"key":"6_CR34","unstructured":"Terry, J.K., Black, B.: Multiplayer support for the arcade learning environment. arXiv preprint arXiv:2009.09341 (2020)"},{"issue":"1","key":"6_CR35","doi-asserted-by":"publisher","first-page":"7","DOI":"10.1007\/s10458-019-09432-y","volume":"34","author":"K Tuyls","year":"2020","unstructured":"Tuyls, K., Perolat, J., Lanctot, M., Hughes, E., et al.: Bounds and dynamics for empirical game theoretic analysis. Auton. Agent. Multi-agent Syst. 34(1), 7 (2020)","journal-title":"Auton. Agent. Multi-agent Syst."},{"key":"6_CR36","unstructured":"Tuyls, K., Perolat, J., Lanctot, M., Leibo, J.Z., et al.: A generalised method for empirical game theoretic analysis. In: AAMAS, pp. 77\u201385 (2018)"},{"issue":"7782","key":"6_CR37","doi-asserted-by":"publisher","first-page":"350","DOI":"10.1038\/s41586-019-1724-z","volume":"575","author":"O Vinyals","year":"2019","unstructured":"Vinyals, O., Babuschkin, I., Czarnecki, W.M., Mathieu, M., et al.: Grandmaster level in starcraft II using multi-agent reinforcement learning. Nature 575(7782), 350\u2013354 (2019)","journal-title":"Nature"},{"key":"6_CR38","unstructured":"Wellman, M.P.: Methods for empirical game-theoretic analysis. In: AAAI, pp. 1552\u20131556 (2006)"},{"key":"6_CR39","unstructured":"de Witt, C.S., Peng, B., Kamienny, P.A., Torr, P., et al.: Deep multi-agent reinforcement learning for decentralized continuous cooperative control (2020)"},{"key":"6_CR40","unstructured":"Yang, Y., Wang, J.: An overview of multi-agent reinforcement learning from game theoretical perspective. arXiv preprint arXiv:2011.00583 (2020)"},{"key":"6_CR41","unstructured":"Yang, Y., Wen, Y., Wang, J., Chen, L., et al.: Multi-agent determinantal q-learning. In: ICML 2020, pp. 10757\u201310766. PMLR (2020)"},{"key":"6_CR42","doi-asserted-by":"crossref","unstructured":"Zhang, C., Lesser, V.R.: Multi-agent learning with policy prediction. In: AAAI. AAAI Press (2010)","DOI":"10.1609\/aaai.v24i1.7639"},{"key":"6_CR43","unstructured":"Zhou, M., Luo, J., Villela, J., Yang, Y., et al.: Smarts: scalable multi-agent reinforcement learning training school for autonomous driving. arXiv preprint arXiv:2010.09776 (2020)"}],"container-title":["Lecture Notes in Computer Science","Distributed Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-25549-6_6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,3,21]],"date-time":"2023-03-21T09:09:36Z","timestamp":1679389776000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-25549-6_6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031255489","9783031255496"],"references-count":43,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-25549-6_6","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"22 March 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"DAI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Distributed Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Tianjin","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 December 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17 December 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"dai22022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.adai.ai\/dai\/2022\/2022.html","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Easychair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"16","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"31% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}