{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,18]],"date-time":"2025-09-18T10:29:17Z","timestamp":1758191357983,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":29,"publisher":"ACM","funder":[{"name":"National Science and Technology Major Project","award":["2022ZD0116404"],"award-info":[{"award-number":["2022ZD0116404"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,12,18]]},"DOI":"10.1145\/3719545.3719553","type":"proceedings-article","created":{"date-parts":[[2025,9,16]],"date-time":"2025-09-16T09:38:41Z","timestamp":1758015521000},"page":"46-57","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Variational Stochastic Games"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-2201-0733","authenticated-orcid":false,"given":"Zhiyu","family":"Zhao","sequence":"first","affiliation":[{"name":"The Institute of Automation of the Chinese Academy of Sciences, Beijing, China and School of Artificial Intelligence, University of the Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-4502-1760","authenticated-orcid":false,"given":"Haifeng","family":"Zhang","sequence":"additional","affiliation":[{"name":"The Institute of Automation of the Chinese Academy of Sciences, Beijing, China and School of Artificial Intelligence, University of the Chinese Academy of Sciences, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2025,9,16]]},"reference":[{"key":"e_1_3_3_1_2_2","volume-title":"6th International Conference on Learning Representations, ICLR 2018, Vancouver, BC, Canada, April 30 - May 3, 2018, Conference Track Proceedings","author":"Abdolmaleki Abbas","year":"2018","unstructured":"Abbas Abdolmaleki, Jost\u00a0Tobias Springenberg, Yuval Tassa, R\u00e9mi Munos, Nicolas Heess, and Martin\u00a0A. Riedmiller. 2018. Maximum a Posteriori Policy Optimisation. In 6th International Conference on Learning Representations, ICLR 2018, Vancouver, BC, Canada, April 30 - May 3, 2018, Conference Track Proceedings. OpenReview.net. https:\/\/openreview.net\/forum?id=S1ANxQW0b"},{"key":"e_1_3_3_1_3_2","unstructured":"Alekh Agarwal Sham\u00a0M. Kakade Jason\u00a0D. Lee and Gaurav Mahajan. 2021. On the Theory of Policy Gradient Methods: Optimality Approximation and Distribution Shift. J. Mach. Learn. Res. 22 (2021) 98:1\u201398:76. http:\/\/jmlr.org\/papers\/v22\/19-736.html"},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"crossref","unstructured":"David\u00a0M Blei Alp Kucukelbir and Jon\u00a0D McAuliffe. 2017. Variational inference: A review for statisticians. Journal of the American statistical Association 112 518 (2017) 859\u2013877.","DOI":"10.1080\/01621459.2017.1285773"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"crossref","unstructured":"S\u00e9bastien Bubeck et\u00a0al. 2015. Convex optimization: Algorithms and complexity. Foundations and Trends\u00ae in Machine Learning 8 3-4 (2015) 231\u2013357.","DOI":"10.1561\/2200000050"},{"key":"e_1_3_3_1_6_2","first-page":"1909","volume-title":"International Conference on Artificial Intelligence and Statistics","author":"Cui Kai","year":"2021","unstructured":"Kai Cui and Heinz Koeppl. 2021. Approximately solving mean field games via entropy-regularized deep reinforcement learning. In International Conference on Artificial Intelligence and Statistics. PMLR, 1909\u20131917."},{"key":"e_1_3_3_1_7_2","volume-title":"Proceedings of the Thirty-Second Conference on Uncertainty in Artificial Intelligence, UAI 2016, June 25-29, 2016, New York City, NY, USA","author":"Fox Roy","year":"2016","unstructured":"Roy Fox, Ari Pakman, and Naftali Tishby. 2016. Taming the Noise in Reinforcement Learning via Soft Updates. In Proceedings of the Thirty-Second Conference on Uncertainty in Artificial Intelligence, UAI 2016, June 25-29, 2016, New York City, NY, USA, Alexander Ihler and Dominik Janzing (Eds.). AUAI Press. http:\/\/auai.org\/uai2016\/proceedings\/papers\/219.pdf"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/37"},{"key":"e_1_3_3_1_9_2","series-title":"JMLR Workshop and Conference Proceedings","first-page":"1804","volume-title":"Proceedings of the 33nd International Conference on Machine Learning, ICML 2016, New York City, NY, USA, June 19-24, 2016","volume":"48","author":"He He","year":"2016","unstructured":"He He and Jordan\u00a0L. Boyd-Graber. 2016. Opponent Modeling in Deep Reinforcement Learning. In Proceedings of the 33nd International Conference on Machine Learning, ICML 2016, New York City, NY, USA, June 19-24, 2016(JMLR Workshop and Conference Proceedings, Vol.\u00a048), Maria-Florina Balcan and Kilian\u00a0Q. Weinberger (Eds.). JMLR.org, 1804\u20131813. http:\/\/proceedings.mlr.press\/v48\/he16.html"},{"key":"e_1_3_3_1_10_2","first-page":"1531","volume-title":"Advances in Neural Information Processing Systems 14 [Neural Information Processing Systems: Natural and Synthetic, NIPS 2001, December 3-8, 2001, Vancouver, British Columbia, Canada]","author":"Kakade Sham\u00a0M.","year":"2001","unstructured":"Sham\u00a0M. Kakade. 2001. A Natural Policy Gradient. In Advances in Neural Information Processing Systems 14 [Neural Information Processing Systems: Natural and Synthetic, NIPS 2001, December 3-8, 2001, Vancouver, British Columbia, Canada], Thomas\u00a0G. Dietterich, Suzanna Becker, and Zoubin Ghahramani (Eds.). MIT Press, 1531\u20131538. https:\/\/proceedings.neurips.cc\/paper\/2001\/hash\/4b86abe48d358ecf194c56c69108433e-Abstract.html"},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"crossref","unstructured":"Rudolph\u00a0Emil Kalman. 1960. A new approach to linear filtering and prediction problems. (1960).","DOI":"10.1115\/1.3662552"},{"key":"e_1_3_3_1_12_2","unstructured":"Sergey Levine. 2018. Reinforcement Learning and Control as Probabilistic Inference: Tutorial and Review. CoRR abs\/1805.00909 (2018). arXiv:https:\/\/arXiv.org\/abs\/1805.00909http:\/\/arxiv.org\/abs\/1805.00909"},{"key":"e_1_3_3_1_13_2","first-page":"207","volume-title":"Advances in Neural Information Processing Systems 26: 27th Annual Conference on Neural Information Processing Systems 2013. Proceedings of a meeting held December 5-8, 2013, Lake Tahoe, Nevada, United States","author":"Levine Sergey","year":"2013","unstructured":"Sergey Levine and Vladlen Koltun. 2013. Variational Policy Search via Trajectory Optimization. In Advances in Neural Information Processing Systems 26: 27th Annual Conference on Neural Information Processing Systems 2013. Proceedings of a meeting held December 5-8, 2013, Lake Tahoe, Nevada, United States, Christopher J.\u00a0C. Burges, L\u00e9on Bottou, Zoubin Ghahramani, and Kilian\u00a0Q. Weinberger (Eds.). 207\u2013215. https:\/\/proceedings.neurips.cc\/paper\/2013\/hash\/38af86134b65d0f10fe33d30dd76442e-Abstract.html"},{"key":"e_1_3_3_1_14_2","volume-title":"4th International Conference on Learning Representations, ICLR 2016, San Juan, Puerto Rico, May 2-4, 2016, Conference Track Proceedings","author":"Lillicrap Timothy\u00a0P.","year":"2016","unstructured":"Timothy\u00a0P. Lillicrap, Jonathan\u00a0J. Hunt, Alexander Pritzel, Nicolas Heess, Tom Erez, Yuval Tassa, David Silver, and Daan Wierstra. 2016. Continuous control with deep reinforcement learning. In 4th International Conference on Learning Representations, ICLR 2016, San Juan, Puerto Rico, May 2-4, 2016, Conference Track Proceedings, Yoshua Bengio and Yann LeCun (Eds.). http:\/\/arxiv.org\/abs\/1509.02971"},{"key":"e_1_3_3_1_15_2","unstructured":"Ryan Lowe Yi Wu Aviv Tamar Jean Harb Pieter Abbeel and Igor Mordatch. 2017. Multi-Agent Actor-Critic for Mixed Cooperative-Competitive Environments. CoRR abs\/1706.02275 (2017). arXiv:https:\/\/arXiv.org\/abs\/1706.02275http:\/\/arxiv.org\/abs\/1706.02275"},{"key":"e_1_3_3_1_16_2","series-title":"Proceedings of Machine Learning Research","first-page":"7688","volume-title":"Proceedings of the 38th International Conference on Machine Learning, ICML 2021, 18-24 July 2021, Virtual Event","volume":"139","author":"Mguni David\u00a0Henry","year":"2021","unstructured":"David\u00a0Henry Mguni, Yutong Wu, Yali Du, Yaodong Yang, Ziyi Wang, Minne Li, Ying Wen, Joel Jennings, and Jun Wang. 2021. Learning in Nonzero-Sum Stochastic Games with Potentials. In Proceedings of the 38th International Conference on Machine Learning, ICML 2021, 18-24 July 2021, Virtual Event(Proceedings of Machine Learning Research, Vol.\u00a0139), Marina Meila and Tong Zhang (Eds.). PMLR, 7688\u20137699. http:\/\/proceedings.mlr.press\/v139\/mguni21a.html"},{"key":"e_1_3_3_1_17_2","first-page":"4257","volume-title":"International conference on machine learning","author":"Raileanu Roberta","year":"2018","unstructured":"Roberta Raileanu, Emily Denton, Arthur Szlam, and Rob Fergus. 2018. Modeling others using oneself in multi-agent reinforcement learning. In International conference on machine learning. PMLR, 4257\u20134266."},{"key":"e_1_3_3_1_18_2","unstructured":"Konrad Rawlik Marc Toussaint and Sethu Vijayakumar. 2010. Approximate Inference and Stochastic Optimal Control. CoRR abs\/1009.3958 (2010). arxiv:https:\/\/arXiv.org\/abs\/1009.3958http:\/\/arxiv.org\/abs\/1009.3958"},{"key":"e_1_3_3_1_19_2","first-page":"3052","volume-title":"IJCAI 2013, Proceedings of the 23rd International Joint Conference on Artificial Intelligence, Beijing, China, August 3-9, 2013","author":"Rawlik Konrad","year":"2013","unstructured":"Konrad Rawlik, Marc Toussaint, and Sethu Vijayakumar. 2013. On Stochastic Optimal Control and Reinforcement Learning by Approximate Inference (Extended Abstract). In IJCAI 2013, Proceedings of the 23rd International Joint Conference on Artificial Intelligence, Beijing, China, August 3-9, 2013, Francesca Rossi (Ed.). IJCAI\/AAAI, 3052\u20133056. http:\/\/www.aaai.org\/ocs\/index.php\/IJCAI\/IJCAI13\/paper\/view\/6658"},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"crossref","unstructured":"Lloyd\u00a0S Shapley. 1953. Stochastic games. Proceedings of the national academy of sciences 39 10 (1953) 1095\u20131100.","DOI":"10.1073\/pnas.39.10.1953"},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511811654"},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"publisher","unstructured":"Zheng Tian Ying Wen Zhichen Gong Faiz Punakkath Shihao Zou and Jun Wang. 2019. A Regularized Opponent Model with Maximum Entropy Objective. 10.48550\/arXiv.1905.08087 arXiv:https:\/\/arXiv.org\/abs\/1905.08087 [cs].","DOI":"10.48550\/arXiv.1905.08087"},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/85"},{"key":"e_1_3_3_1_24_2","unstructured":"Marc Toussaint. 2009. Probabilistic inference as a model of planned behavior. Kunstliche Intelligenz 3 (01 2009)."},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553508"},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143963"},{"key":"e_1_3_3_1_27_2","volume-title":"2018 AAAI Spring Symposia, Stanford University, Palo Alto, California, USA, March 26-28, 2018","author":"Wei Ermo","year":"2018","unstructured":"Ermo Wei, Drew Wicke, David Freelan, and Sean Luke. 2018. Multiagent Soft Q-Learning. In 2018 AAAI Spring Symposia, Stanford University, Palo Alto, California, USA, March 26-28, 2018. AAAI Press. https:\/\/aaai.org\/ocs\/index.php\/SSS\/SSS18\/paper\/view\/17508"},{"key":"e_1_3_3_1_28_2","volume-title":"7th International Conference on Learning Representations, ICLR 2019, New Orleans, LA, USA, May 6-9, 2019","author":"Wen Ying","year":"2019","unstructured":"Ying Wen, Yaodong Yang, Rui Luo, Jun Wang, and Wei Pan. 2019. Probabilistic Recursive Reasoning for Multi-Agent Reinforcement Learning. In 7th International Conference on Learning Representations, ICLR 2019, New Orleans, LA, USA, May 6-9, 2019. OpenReview.net. https:\/\/openreview.net\/forum?id=rkl6As0cF7"},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2020\/58"},{"key":"e_1_3_3_1_30_2","unstructured":"Kaiqing Zhang Zhuoran Yang and Tamer Basar. 2019. Multi-Agent Reinforcement Learning: A Selective Overview of Theories and Algorithms. CoRR abs\/1911.10635 (2019). arXiv:https:\/\/arXiv.org\/abs\/1911.10635http:\/\/arxiv.org\/abs\/1911.10635"}],"event":{"name":"DAI '24: 6th International Conference on Distributed Artificial Intelligences","acronym":"DAI '24","location":"Singapore Singapore"},"container-title":["Proceedings of the 2024 Sixth International Conference on Distributed Artificial Intelligences"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3719545.3719553","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,17]],"date-time":"2025-09-17T13:11:51Z","timestamp":1758114711000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3719545.3719553"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,18]]},"references-count":29,"alternative-id":["10.1145\/3719545.3719553","10.1145\/3719545"],"URL":"https:\/\/doi.org\/10.1145\/3719545.3719553","relation":{},"subject":[],"published":{"date-parts":[[2024,12,18]]},"assertion":[{"value":"2025-09-16","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}