{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:21:22Z","timestamp":1750220482780,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":27,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,9,24]],"date-time":"2021-09-24T00:00:00Z","timestamp":1632441600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,9,24]]},"DOI":"10.1145\/3488933.3489029","type":"proceedings-article","created":{"date-parts":[[2022,2,25]],"date-time":"2022-02-25T11:36:59Z","timestamp":1645789019000},"page":"636-643","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Meta Actor-Critic Framework for Multi-Agent Reinforcement Learning"],"prefix":"10.1145","author":[{"given":"Jiateng","family":"Huang","sequence":"first","affiliation":[{"name":"Institute for Quantum Information &amp; State Key Laboratory of High Performance Computing, College of Computer, National University of Defense Technology, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wanrong","family":"Huang","sequence":"additional","affiliation":[{"name":"Artifcial Intelligence Research Center, National Innovation Institute of Defense Technology, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dan","family":"Wu","sequence":"additional","affiliation":[{"name":"College of Computer &amp; Hefei Interdisciplinary Center, National University of Defense Technology, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Long","family":"Lan","sequence":"additional","affiliation":[{"name":"Institute for Quantum Information &amp; State Key Laboratory of High Performance Computing, College of Computer, National University of Defense Technology, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2022,2,25]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"arXiv","author":"Large Scale Deep Reinforcement AI","year":"1912","unstructured":"Open AI Dota 2 with Large Scale Deep Reinforcement Learning. 2019. arXiv : 1912 .06680 [cs.LG]. OpenAI Dota 2 with Large Scale Deep Reinforcement Learning. 2019. arXiv: 1912.06680 [cs.LG]."},{"key":"e_1_3_2_1_2_1","volume-title":"Standardizing Research in Multi-Agent Reinforcement Learning for Demand Response and Urban Energy Management","author":"Vazquez-Canteli Jose R","year":"2020","unstructured":"Jose R Vazquez-Canteli CityLearn : Standardizing Research in Multi-Agent Reinforcement Learning for Demand Response and Urban Energy Management . 2020 . arXiv: 2012.10504 [cs.LG] http:\/\/www.iesl.cs.umass.edu\/data\/data-umasscitationfield Jose R Vazquez-Canteli CityLearn: Standardizing Research in Multi-Agent Reinforcement Learning for Demand Response and Urban Energy Management. 2020. arXiv: 2012.10504 [cs.LG] http:\/\/www.iesl.cs.umass.edu\/data\/data-umasscitationfield"},{"key":"e_1_3_2_1_3_1","volume-title":"Advances in Neural Information Processing Systems.Ed. by I. Guyon","author":"Lowe Ryan","year":"2017","unstructured":"Ryan Lowe \u201c Multi-Agent Actor-Critic for Mixed Cooperative-Competitive Environments \u201d. In: Advances in Neural Information Processing Systems.Ed. by I. Guyon Vol. 30 . Curran Associates, Inc. , 2017 . Ryan Lowe \u201cMulti-Agent Actor-Critic for Mixed Cooperative-Competitive Environments\u201d. In: Advances in Neural Information Processing Systems.Ed. by I. Guyon Vol. 30. Curran Associates, Inc., 2017."},{"key":"e_1_3_2_1_4_1","volume-title":"arXiv","author":"Reducing Overestimation Johannes Ackermann","year":"1910","unstructured":"Johannes Ackermann Reducing Overestimation Bias in Multi-Agent Domains Using Double Centralized Critics . 2019. arXiv : 1910 .01465 [cs.LG]. Johannes Ackermann Reducing Overestimation Bias in Multi-Agent Domains Using Double Centralized Critics. 2019. arXiv: 1910.01465 [cs.LG]."},{"key":"e_1_3_2_1_5_1","first-page":"4301","volume-title":"Proceedings of the 35th International Conference on Machine Learning, ICML 2018, Stockholmsm\u00e4ssan, Stockholm, Sweden, July 10-15, 2018. Ed. by Jennifer G. Dy and Andreas Krause.","volume":"80","author":"Rashid Tabish","year":"2018","unstructured":"Tabish Rashid \u201c QMIX: Monotonic Value Function Factorisation for Deep Multi-Agent Reinforcement Learning \u201d. In: Proceedings of the 35th International Conference on Machine Learning, ICML 2018, Stockholmsm\u00e4ssan, Stockholm, Sweden, July 10-15, 2018. Ed. by Jennifer G. Dy and Andreas Krause. Vol. 80 . Proceedings of Machine Learning Research. PMLR , 2018 , pp. 4292\u2013 4301 . Tabish Rashid \u201cQMIX: Monotonic Value Function Factorisation for Deep Multi-Agent Reinforcement Learning\u201d. In: Proceedings of the 35th International Conference on Machine Learning, ICML 2018, Stockholmsm\u00e4ssan, Stockholm, Sweden, July 10-15, 2018. Ed. by Jennifer G. Dy and Andreas Krause. Vol. 80. Proceedings of Machine Learning Research. PMLR, 2018, pp. 4292\u20134301."},{"key":"e_1_3_2_1_6_1","volume-title":"A Survey.2020. arXiv","author":"Neural Networks Timothy Hospedales","year":"2004","unstructured":"Timothy Hospedales Meta-Learning in Neural Networks : A Survey.2020. arXiv : 2004 . 05439 [cs.LG]. Timothy Hospedales Meta-Learning in Neural Networks: A Survey.2020. arXiv: 2004. 05439 [cs.LG]."},{"key":"e_1_3_2_1_7_1","first-page":"1135","volume-title":"Proceedings of the 34th International Conference on Machine Learning, ICML 2017, Sydney, NSW, Australia, 6-11 August 2017.Ed. by Doina Precup and Yee Whye Teh.","volume":"70","author":"Finn Chelsea","year":"2017","unstructured":"Chelsea Finn , Pieter Abbeel , and Sergey Levine . \u201c Model-Agnostic Meta-Learning for Fast Adaptation of Deep Networks \u201d. In: Proceedings of the 34th International Conference on Machine Learning, ICML 2017, Sydney, NSW, Australia, 6-11 August 2017.Ed. by Doina Precup and Yee Whye Teh. Vol. 70 . Proceedings of Machine Learning Research. PMLR , 2017 , pp. 1126\u2013 1135 . 9 Chelsea Finn, Pieter Abbeel, and Sergey Levine. \u201cModel-Agnostic Meta-Learning for Fast Adaptation of Deep Networks\u201d. In: Proceedings of the 34th International Conference on Machine Learning, ICML 2017, Sydney, NSW, Australia, 6-11 August 2017.Ed. by Doina Precup and Yee Whye Teh. Vol. 70. Proceedings of Machine Learning Research. PMLR, 2017, pp. 1126\u20131135. 9"},{"key":"e_1_3_2_1_8_1","volume-title":"8th International Conference on Learning Representations, ICLR 2020","author":"Alet Ferran","year":"2020","unstructured":"Ferran Alet \u201c Meta-learning curiosity algorithms \u201d. In: 8th International Conference on Learning Representations, ICLR 2020 , Addis Ababa, Ethiopia , April 26-30, 2020 . OpenRe view.net, 2020. Ferran Alet \u201cMeta-learning curiosity algorithms\u201d. In: 8th International Conference on Learning Representations, ICLR 2020, Addis Ababa, Ethiopia, April 26-30, 2020. OpenRe view.net, 2020."},{"key":"e_1_3_2_1_9_1","volume-title":"Advances in Neural Information Processing Systems 33: Annual Conference on Neural In formation Processing Systems 2020","author":"Zhou Wei","year":"2020","unstructured":"Wei Zhou \u201c Online Meta-Critic Learning for Off-Policy Actor-Critic Methods \u201d. In: Advances in Neural Information Processing Systems 33: Annual Conference on Neural In formation Processing Systems 2020 , NeurIPS 2020 , December 6-12, 2020, virtual.Ed. by Hugo Larochelle 2020. Wei Zhou \u201cOnline Meta-Critic Learning for Off-Policy Actor-Critic Methods\u201d. In: Advances in Neural Information Processing Systems 33: Annual Conference on Neural In formation Processing Systems 2020, NeurIPS 2020, December 6-12, 2020, virtual.Ed. by Hugo Larochelle 2020."},{"key":"e_1_3_2_1_10_1","first-page":"9317","volume-title":"Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019","author":"Veeriah Vivek","year":"2019","unstructured":"Vivek Veeriah \u201c Discovery of Useful Questions as Auxiliary Tasks \u201d. In: Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019 , NeurIPS 2019 , December 8-14, 2019, Vancouver, BC, Canada. Ed. by Hanna M. Wallach 2019, pp. 9306\u2013 9317 . Vivek Veeriah \u201cDiscovery of Useful Questions as Auxiliary Tasks\u201d. In: Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019, NeurIPS 2019, December 8-14, 2019, Vancouver, BC, Canada. Ed. by Hanna M. Wallach 2019, pp. 9306\u20139317."},{"key":"e_1_3_2_1_11_1","volume-title":"8th International Confer ence on Learning Representations, ICLR 2020","author":"Kirsch Louis","year":"2020","unstructured":"Louis Kirsch , Sjoerd van Steenkiste , and J\u00fcrgen Schmidhuber . \u201c Improving Generalization in Meta Reinforcement Learning using Learned Objectives\u201d. In : 8th International Confer ence on Learning Representations, ICLR 2020 , Addis Ababa, Ethiopia , April 26-30, 2020 .OpenReview.net, 2020. Louis Kirsch, Sjoerd van Steenkiste, and J\u00fcrgen Schmidhuber. \u201cImproving Generalization in Meta Reinforcement Learning using Learned Objectives\u201d. In: 8th International Confer ence on Learning Representations, ICLR 2020, Addis Ababa, Ethiopia, April 26-30, 2020.OpenReview.net, 2020."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.3389\/neuro.12.006.2007"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"crossref","unstructured":"Igor Mordatch and Pieter Abbeel. \u201cEmergence of Grounded Compositional Language in Multi-Agent Populations\u201d. In: arXiv preprint arXiv:1703.04908 (2017).  Igor Mordatch and Pieter Abbeel. \u201cEmergence of Grounded Compositional Language in Multi-Agent Populations\u201d. In: arXiv preprint arXiv:1703.04908 (2017).","DOI":"10.1609\/aaai.v32i1.11492"},{"key":"e_1_3_2_1_14_1","volume-title":"ma-gym: Collection of multi-agent environments based on OpenAI gym. https: \/\/github.com\/koulanurag\/ma-gym","author":"Koul Anurag","year":"2019","unstructured":"Anurag Koul . ma-gym: Collection of multi-agent environments based on OpenAI gym. https: \/\/github.com\/koulanurag\/ma-gym . 2019 . Anurag Koul. ma-gym: Collection of multi-agent environments based on OpenAI gym. https: \/\/github.com\/koulanurag\/ma-gym. 2019."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-335-6.50027-1"},{"key":"e_1_3_2_1_16_1","volume-title":"4th International Conference on Learning Representations, ICLR 2016, San Juan, Puerto Rico, May 2-4, 2016, Conference Track Proceedings.Ed. by Yoshua Bengio and Yann LeCun.","author":"Lillicrap Timothy P.","year":"2016","unstructured":"Timothy P. Lillicrap \u201cContinuous control with deep reinforcement learning \u201d. In: 4th International Conference on Learning Representations, ICLR 2016, San Juan, Puerto Rico, May 2-4, 2016, Conference Track Proceedings.Ed. by Yoshua Bengio and Yann LeCun. 2016 . Timothy P. Lillicrap \u201cContinuous control with deep reinforcement learning\u201d. In: 4th International Conference on Learning Representations, ICLR 2016, San Juan, Puerto Rico, May 2-4, 2016, Conference Track Proceedings.Ed. by Yoshua Bengio and Yann LeCun. 2016."},{"key":"e_1_3_2_1_17_1","first-page":"1591","volume-title":"Proceedings of the 35th International Conference on Machine Learning, ICML 2018, Stockholmsm\u00e4ssan, Stockholm, Sweden, July 10-15, 2018. Ed. by Jennifer G. Dy and Andreas Krause.","volume":"80","author":"Fujimoto Scott","year":"2018","unstructured":"Scott Fujimoto , Herke van Hoof , and David Meger . \u201c Addressing Function Approximation Error in Actor-Critic Methods \u201d. In: Proceedings of the 35th International Conference on Machine Learning, ICML 2018, Stockholmsm\u00e4ssan, Stockholm, Sweden, July 10-15, 2018. Ed. by Jennifer G. Dy and Andreas Krause. Vol. 80 . Proceedings of Machine Learning Research. PMLR , 2018 , pp. 1582\u2013 1591 . Scott Fujimoto, Herke van Hoof, and David Meger. \u201cAddressing Function Approximation Error in Actor-Critic Methods\u201d. In: Proceedings of the 35th International Conference on Machine Learning, ICML 2018, Stockholmsm\u00e4ssan, Stockholm, Sweden, July 10-15, 2018. Ed. by Jennifer G. Dy and Andreas Krause. Vol. 80. Proceedings of Machine Learning Research. PMLR, 2018, pp. 1582\u20131591."},{"key":"e_1_3_2_1_18_1","volume-title":"6th International Conference on Learning Representations, ICLR 2018, Vancouver, BC, Canada, April 30 - May 3, 2018, Conference Track Proceedings.OpenReview.net","author":"Al-Shedivat Maruan","year":"2018","unstructured":"Maruan Al-Shedivat \u201c Continuous Adaptation via Meta-Learning in Nonstationary and Competitive Environments \u201d. In: 6th International Conference on Learning Representations, ICLR 2018, Vancouver, BC, Canada, April 30 - May 3, 2018, Conference Track Proceedings.OpenReview.net , 2018 . Maruan Al-Shedivat \u201cContinuous Adaptation via Meta-Learning in Nonstationary and Competitive Environments\u201d. In: 6th International Conference on Learning Representations, ICLR 2018, Vancouver, BC, Canada, April 30 - May 3, 2018, Conference Track Proceedings.OpenReview.net, 2018."},{"key":"e_1_3_2_1_19_1","volume-title":"Stadie Some Considerations on Learning to Explore via Meta-Reinforcement Learning","author":"Bradly","year":"2019","unstructured":"Bradly C. Stadie Some Considerations on Learning to Explore via Meta-Reinforcement Learning . 2019 . arXiv: 1803.01118 [cs.AI]. Bradly C. Stadie Some Considerations on Learning to Explore via Meta-Reinforcement Learning. 2019. arXiv: 1803.01118 [cs.AI]."},{"key":"e_1_3_2_1_20_1","first-page":"5701","volume-title":"Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019","author":"Garcia Francisco M.","year":"2019","unstructured":"Francisco M. Garcia and Philip S. Thomas . \u201cA Meta-MDP Approach to Exploration for Lifelong Reinforcement Learning \u201d. In: Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019 , NeurIPS 2019 , December 8-14, 2019, Vancouver, BC, Canada.Ed. by Hanna M. Wallach 2019, pp. 5692\u2013 5701 . Francisco M. Garcia and Philip S. Thomas. \u201cA Meta-MDP Approach to Exploration for Lifelong Reinforcement Learning\u201d. In: Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019, NeurIPS 2019, December 8-14, 2019, Vancouver, BC, Canada.Ed. by Hanna M. Wallach 2019, pp. 5692\u20135701."},{"key":"e_1_3_2_1_21_1","unstructured":"John Schulman Proximal Policy Optimization Algorithms. 2017. arXiv: 1707.06347 [cs.LG].  John Schulman Proximal Policy Optimization Algorithms. 2017. arXiv: 1707.06347 [cs.LG]."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2019.01.011"},{"key":"e_1_3_2_1_23_1","unstructured":"Jianzhun Shao Credit Assignment with Meta-Policy Gradient for Multi-Agent Reinforce ment Learning.2021. arXiv: 2102.12957 [cs.LG].  Jianzhun Shao Credit Assignment with Meta-Policy Gradient for Multi-Agent Reinforce ment Learning.2021. arXiv: 2102.12957 [cs.LG]."},{"key":"e_1_3_2_1_24_1","volume-title":"Learning Roles to Decompose Multi-Agent Tasks.2020. arXiv","author":"Tonghan Wang","year":"2010","unstructured":"Tonghan Wang RODE : Learning Roles to Decompose Multi-Agent Tasks.2020. arXiv : 2010 .01523 [cs.LG]. Tonghan Wang RODE: Learning Roles to Decompose Multi-Agent Tasks.2020. arXiv: 2010.01523 [cs.LG]."},{"key":"e_1_3_2_1_25_1","volume-title":"Dif-MAML: Decentralized Multi-Agent Meta Learning.2020. arXiv","author":"Kayaalp Mert","year":"2010","unstructured":"Mert Kayaalp , Stefan Vlaski , and Ali H. Sayed . Dif-MAML: Decentralized Multi-Agent Meta Learning.2020. arXiv : 2010 .02870 [cs.LG]. Mert Kayaalp, Stefan Vlaski, and Ali H. Sayed. Dif-MAML: Decentralized Multi-Agent Meta Learning.2020. arXiv: 2010.02870 [cs.LG]."},{"key":"e_1_3_2_1_26_1","volume-title":"arXiv","author":"Distributed Ye Hu","year":"2012","unstructured":"Ye Hu Distributed Multi-agent Meta Learning for Trajectory Design in Wireless Drone Networks .2020. arXiv : 2012 .03158 [cs.LG]. Ye Hu Distributed Multi-agent Meta Learning for Trajectory Design in Wireless Drone Networks.2020. arXiv: 2012.03158 [cs.LG]."},{"key":"e_1_3_2_1_27_1","volume-title":"Training an Interactive Helper.2019. arXiv","author":"Woodward Mark","year":"1906","unstructured":"Mark Woodward , Chelsea Finn , and Karol Hausman . Training an Interactive Helper.2019. arXiv : 1906 .10165 [cs.AI]. Mark Woodward, Chelsea Finn, and Karol Hausman. Training an Interactive Helper.2019. arXiv: 1906.10165 [cs.AI]."}],"event":{"name":"AIPR 2021: 2021 4th International Conference on Artificial Intelligence and Pattern Recognition","acronym":"AIPR 2021","location":"Xiamen China"},"container-title":["2021 4th International Conference on Artificial Intelligence and Pattern Recognition"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3488933.3489029","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3488933.3489029","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:49:00Z","timestamp":1750193340000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3488933.3489029"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,9,24]]},"references-count":27,"alternative-id":["10.1145\/3488933.3489029","10.1145\/3488933"],"URL":"https:\/\/doi.org\/10.1145\/3488933.3489029","relation":{},"subject":[],"published":{"date-parts":[[2021,9,24]]},"assertion":[{"value":"2022-02-25","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}