{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T18:43:01Z","timestamp":1777488181713,"version":"3.51.4"},"publisher-location":"Berlin, Heidelberg","reference-count":61,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"value":"9783642276446","type":"print"},{"value":"9783642276453","type":"electronic"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-27645-3_14","type":"book-chapter","created":{"date-parts":[[2012,3,5]],"date-time":"2012-03-05T22:18:12Z","timestamp":1330985892000},"page":"441-470","source":"Crossref","is-referenced-by-count":132,"title":["Game Theory and Multi-agent Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Ann","family":"Now\u00e9","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Peter","family":"Vrancx","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yann-Micha\u00ebl","family":"De Hauwere","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"issue":"1","key":"14_CR1","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1016\/0304-4068(74)90037-8","volume":"1","author":"R. Aumann","year":"1974","unstructured":"Aumann, R.: Subjectivity and Correlation in Randomized Strategies. Journal of Mathematical Economics\u00a01(1), 67\u201396 (1974)","journal-title":"Journal of Mathematical Economics"},{"key":"14_CR2","unstructured":"Bowling, M.: Convergence and No-Regret in Multiagent Learning. In: Advances in Neural Information Processing Systems 17 (NIPS), pp. 209\u2013216 (2005)"},{"key":"14_CR3","unstructured":"Bowling, M., Veloso, M.: Convergence of Gradient Dynamics with a Variable Learning Rate. In: Proceedings of the Eighteenth International Conference on Machine Learning (ICML), pp. 27\u201334 (2001)"},{"key":"14_CR4","unstructured":"Bowling, M., Veloso, M.: Scalable Learning in Stochastic Games. In: AAAI Workshop on Game Theoretic and Decision Theoretic Agents (2002)"},{"issue":"2","key":"14_CR5","doi-asserted-by":"publisher","first-page":"156","DOI":"10.1109\/TSMCC.2007.913919","volume":"38","author":"L. Busoniu","year":"2008","unstructured":"Busoniu, L., Babuska, R., De Schutter, B.: A comprehensive survey of multiagent reinforcement learning. IEEE Transactions on Systems, Man, and Cybernetics, Part C: Applications and Reviews\u00a038(2), 156\u2013172 (2008)","journal-title":"IEEE Transactions on Systems, Man, and Cybernetics, Part C: Applications and Reviews"},{"key":"14_CR6","unstructured":"Chalkiadakis, G., Boutilier, C.: Sequential Decision Making in Repeated Coalition Formation under Uncertainty. In: Parkes, P.M., Parsons (eds.) Proceedings of 7th Int. Conf. on Autonomous Agents and Multiagent Systems (AAMAS 2008), pp. 347\u2013354 (2008)"},{"key":"14_CR7","unstructured":"Claus, C., Boutilier, C.: The Dynamics of Reinforcement Learning in Cooperative Multiagent Systems. In: Proceedings of the National Conference on Artificial Intelligence, pp. 746\u2013752. John Wiley & Sons Ltd. (1998)"},{"key":"14_CR8","unstructured":"De Hauwere, Y.M., Vrancx, P., Now\u00e9, A.: Learning Multi-Agent State Space Representations. In: Proceedings of the 9th International Conference on Autonomous Agents and Multi-Agent Systems, Toronto, Canada, pp. 715\u2013722 (2010)"},{"key":"14_CR9","unstructured":"De Hauwere, Y.M., Vrancx, P., Now\u00e9, A.: Detecting and Solving Future Multi-Agent Interactions. In: Proceedings of the AAMAS Workshop on Adaptive and Learning Agents, Taipei, Taiwan, pp. 45\u201352 (2011)"},{"key":"14_CR10","doi-asserted-by":"publisher","DOI":"10.1007\/b99492","volume-title":"Ant Colony Optimization","author":"M. Dorigo","year":"2004","unstructured":"Dorigo, M., St\u00fctzle, T.: Ant Colony Optimization. Bradford Company, MA (2004)"},{"key":"14_CR11","unstructured":"Fakir, M.: Resource Optimization Methods for Telecommunication Networks. PhD thesis, Department of Electronics and Informatics, Vrije Universiteit Brussel, Belgium (2004)"},{"key":"14_CR12","unstructured":"Foster, D., Young, H.: Regret Testing: A Simple Payoff-based Procedure for Learning Nash Equilibrium. University of Pennsylvania and Johns Hopkins University, Mimeo (2003)"},{"key":"14_CR13","first-page":"178","volume":"39","author":"D. Gillette","year":"1957","unstructured":"Gillette, D.: Stochastic Games with Zero Stop Probabilities. Ann. Math. Stud.\u00a039, 178\u2013187 (1957)","journal-title":"Ann. Math. Stud."},{"key":"14_CR14","unstructured":"Gintis, H.: Game Theory Evolving. Princeton University Press (2000)"},{"key":"14_CR15","unstructured":"Greenwald, A., Hall, K., Serrano, R.: Correlated Q-learning. In: Proceedings of the Twentieth International Conference on Machine Learning, pp. 242\u2013249 (2003)"},{"key":"14_CR16","unstructured":"Guestrin, C., Lagoudakis, M., Parr, R.: Coordinated Reinforcement Learning. In: Proceedings of the 19th International Conference on Machine Learning, pp. 227\u2013234 (2002a)"},{"key":"14_CR17","first-page":"253","volume-title":"18th National Conference on Artificial Intelligence","author":"C. Guestrin","year":"2002","unstructured":"Guestrin, C., Venkataraman, S., Koller, D.: Context-Specific Multiagent Coordination and Planning with Factored MDPs. In: 18th National Conference on Artificial Intelligence, pp. 253\u2013259. American Association for Artificial Intelligence, Menlo Park (2002b)"},{"key":"14_CR18","doi-asserted-by":"crossref","unstructured":"Hart, S., Mas-Colell, A.: A Reinforcement Procedure Leading to Correlated Equilibrium. Economic Essays: A Festschrift for Werner Hildenbrand, 181\u2013200 (2001)","DOI":"10.1007\/978-3-662-04623-4_12"},{"key":"14_CR19","first-page":"1039","volume":"4","author":"J. Hu","year":"2003","unstructured":"Hu, J., Wellman, M.: Nash Q-learning for General-Sum Stochastic Games. The Journal of Machine Learning Research\u00a04, 1039\u20131069 (2003)","journal-title":"The Journal of Machine Learning Research"},{"key":"14_CR20","first-page":"326","volume-title":"Proceedings of the National Conference on Artificial Intelligence","author":"S. Kapetanakis","year":"2002","unstructured":"Kapetanakis, S., Kudenko, D.: Reinforcement Learning of Coordination in Cooperative Multi-Agent Systems. In: Proceedings of the National Conference on Artificial Intelligence, pp. 326\u2013331. AAAI Press, MIT Press, Menlo Park, Cambridge (2002)"},{"key":"14_CR21","doi-asserted-by":"crossref","unstructured":"Kapetanakis, S., Kudenko, D., Strens, M.: Learning to Coordinate Using Commitment Sequences in Cooperative Multiagent-Systems. In: Proceedings of the Third Symposium on Adaptive Agents and Multi-agent Systems (AAMAS-2003), p. 2004 (2003)","DOI":"10.1007\/978-3-540-32274-0_7"},{"key":"14_CR22","volume-title":"Proceedings of the 21st International Conference on Machine Learning","author":"J. Kok","year":"2004","unstructured":"Kok, J., Vlassis, N.: Sparse Cooperative Q-learning. In: Proceedings of the 21st International Conference on Machine Learning. ACM, New York (2004a)"},{"key":"14_CR23","unstructured":"Kok, J., Vlassis, N.: Sparse Tabular Multiagent Q-learning. In: Proceedings of the 13th Benelux Conference on Machine Learning, Benelearn (2004b)"},{"key":"14_CR24","first-page":"1789","volume":"7","author":"J. Kok","year":"2006","unstructured":"Kok, J., Vlassis, N.: Collaborative Multiagent Reinforcement Learning by Payoff Propagation. Journal of Machine Learning Research\u00a07, 1789\u20131828 (2006)","journal-title":"Journal of Machine Learning Research"},{"key":"14_CR25","unstructured":"Kok, J., \u2019t Hoen, P., Bakker, B., Vlassis, N.: Utile Coordination: Learning Interdependencies among Cooperative Agents. In: Proceedings of the IEEE Symposium on Computational Intelligence and Games (CIG 2005), pp. 29\u201336 (2005)"},{"key":"14_CR26","doi-asserted-by":"crossref","unstructured":"Kononen, V.: Asymmetric Multiagent Reinforcement Learning. In: IEEE\/WIC International Conference on Intelligent Agent Technology (IAT 2003), pp. 336\u2013342 (2003)","DOI":"10.1109\/IAT.2003.1241094"},{"key":"14_CR27","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"733","DOI":"10.1007\/978-3-540-28651-6_109","volume-title":"Intelligent Data Engineering and Automated Learning \u2013 IDEAL 2004","author":"V. K\u00f6n\u00f6nen","year":"2004","unstructured":"K\u00f6n\u00f6nen, V.: Policy Gradient Method for Team Markov Games. In: Yang, Z.R., Yin, H., Everson, R.M. (eds.) IDEAL 2004. LNCS, vol.\u00a03177, pp. 733\u2013739. Springer, Heidelberg (2004)"},{"issue":"1","key":"14_CR28","doi-asserted-by":"publisher","first-page":"1","DOI":"10.2200\/S00108ED1V01Y200802AIM003","volume":"2","author":"K. Leyton-Brown","year":"2008","unstructured":"Leyton-Brown, K., Shoham, Y.: Essentials of Game Theory: A Concise Multidisciplinary Introduction. Synthesis Lectures on Artificial Intelligence and Machine Learning\u00a02(1), 1\u201388 (2008)","journal-title":"Synthesis Lectures on Artificial Intelligence and Machine Learning"},{"key":"14_CR29","doi-asserted-by":"crossref","unstructured":"Littman, M.: Markov Games as a Framework for Multi-Agent Reinforcement Learning. In: Proceedings of the Eleventh International Conference on Machine Learning, pp. 157\u2013163. Morgan Kaufmann (1994)","DOI":"10.1016\/B978-1-55860-335-6.50027-1"},{"key":"14_CR30","unstructured":"Littman, M.: Friend-or-Foe Q-learning in General-Sum Games. In: Proceedings of the Eighteenth International Conference on Machine Learning, pp. 322\u2013328. Morgan Kaufmann (2001a)"},{"issue":"1","key":"14_CR31","doi-asserted-by":"publisher","first-page":"55","DOI":"10.1016\/S1389-0417(01)00015-8","volume":"2","author":"M. Littman","year":"2001","unstructured":"Littman, M.: Value-function Reinforcement Learning in Markov Games. Cognitive Systems Research\u00a02(1), 55\u201366 (2001b), http:\/\/www.sciencedirect.com\/science\/article\/B6W6C-430G1TK-4\/2\/822caf1574be32ae91adf15de90becc4 , doi:10.1016\/S1389-0417(01)00015-8","journal-title":"Cognitive Systems Research"},{"key":"14_CR32","unstructured":"Littman, M., Boyan, J.: A Distributed Reinforcement Learning Scheme for Network Routing. In: Proceedings of the 1993 International Workshop on Applications of Neural Networks to Telecommunications, pp. 45\u201351. Erlbaum (1993)"},{"key":"14_CR33","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"324","DOI":"10.1007\/3-540-44795-4_28","volume-title":"Machine Learning: ECML 2001","author":"C. Mariano","year":"2001","unstructured":"Mariano, C., Morales, E.: DQL: A New Updating Strategy for Reinforcement Learning Based on Q-Learning. In: Flach, P.A., De Raedt, L. (eds.) ECML 2001. LNCS (LNAI), vol.\u00a02167, pp. 324\u2013335. Springer, Heidelberg (2001)"},{"key":"14_CR34","unstructured":"Melo, F., Veloso, M.: Learning of Coordination: Exploiting Sparse Interactions in Multiagent Systems. In: Proceedings of the 8th International Conference on Autonomous Agents and Multi-Agent Systems, pp. 773\u2013780 (2009)"},{"key":"14_CR35","doi-asserted-by":"crossref","unstructured":"Nash, J.: Equilibrium Points in n-Person Games. Proceedings of the National Academy of Sciences of the United States of America, 48\u201349 (1950)","DOI":"10.1073\/pnas.36.1.48"},{"key":"14_CR36","first-page":"489","volume-title":"Proceedings of the 16th Conference on Uncertainty in Artificial Intelligence, UAI 2000","author":"L. Peshkin","year":"2000","unstructured":"Peshkin, L., Kim, K., Meuleau, N., Kaelbling, L.: Learning to Cooperate via Policy Search. In: Proceedings of the 16th Conference on Uncertainty in Artificial Intelligence, UAI 2000, pp. 489\u2013496. Morgan Kaufmann Publishers Inc., San Francisco (2000), http:\/\/portal.acm.org\/citation.cfm?id=647234.719893"},{"key":"14_CR37","volume-title":"Artificial Intelligence: A Modern Approach","author":"S. Russell","year":"2003","unstructured":"Russell, S., Norvig, P.: Artificial Intelligence: A Modern Approach, 2nd edn. Prentice-Hall, Englewood Cliffs (2003)","edition":"2"},{"issue":"5","key":"14_CR38","doi-asserted-by":"publisher","first-page":"769","DOI":"10.1109\/21.293490","volume":"24","author":"P. Sastry","year":"1994","unstructured":"Sastry, P., Phansalkar, V., Thathachar, M.: Decentralized Learning of Nash Equilibria in Multi-Person Stochastic Games with Incomplete Information. IEEE Transactions on Systems, Man and Cybernetics\u00a024(5), 769\u2013777 (1994)","journal-title":"IEEE Transactions on Systems, Man and Cybernetics"},{"issue":"10","key":"14_CR39","doi-asserted-by":"publisher","first-page":"1095","DOI":"10.1073\/pnas.39.10.1095","volume":"39","author":"L. Shapley","year":"1953","unstructured":"Shapley, L.: Stochastic Games. Proceedings of the National Academy of Sciences\u00a039(10), 1095\u20131100 (1953)","journal-title":"Proceedings of the National Academy of Sciences"},{"key":"14_CR40","doi-asserted-by":"crossref","unstructured":"Shoham, Y., Leyton-Brown, K.: Multiagent Systems: Algorithmic, Game-Theoretic, and Logical Foundations. Cambridge University Press (2009)","DOI":"10.1017\/CBO9780511811654"},{"key":"14_CR41","unstructured":"Singh, S., Kearns, M., Mansour, Y.: Nash Convergence of Gradient Dynamics in General-Sum Games. In: Proceedings of the Sixteenth Conference on Uncertainty in Artificial Intelligence, pp. 541\u2013548 (2000)"},{"key":"14_CR42","doi-asserted-by":"crossref","unstructured":"Smith, J.: Evolution and the Theory of Games. Cambridge Univ. Press (1982)","DOI":"10.1017\/CBO9780511806292"},{"issue":"6","key":"14_CR43","doi-asserted-by":"publisher","first-page":"1930","DOI":"10.1214\/aoms\/1177693059","volume":"42","author":"M. Sobel","year":"1971","unstructured":"Sobel, M.: Noncooperative Stochastic Games. The Annals of Mathematical Statistics\u00a042(6), 1930\u20131935 (1971)","journal-title":"The Annals of Mathematical Statistics"},{"key":"14_CR44","unstructured":"Spaan, M., Melo, F.: Interaction-Driven Markov Games for Decentralized Multiagent Planning under Uncertainty. In: Proceedings of the 7th International Conference on Autonomous Agents and Multi-Agent Systems (AAMAS), pp. 525\u2013532. International Foundation for Autonomous Agents and Multiagent Systems (2008)"},{"key":"14_CR45","unstructured":"Steenhaut, K., Nowe, A., Fakir, M., Dirkx, E.: Towards a Hardware Implementation of Reinforcement Learning for Call Admission Control in Networks for Integrated Services. In: Proceedings of the International Workshop on Applications of Neural Networks to Telecommunications, vol.\u00a03, p. 63. Lawrence Erlbaum (1997)"},{"key":"14_CR46","unstructured":"Stevens, J.P.: Intermediate Statistics: A Modern Approach. Lawrence Erlbaum (1990)"},{"key":"14_CR47","unstructured":"Sutton, R., McAllester, D., Singh, S., Mansour, Y.: Policy Gradient Methods for Reinforcement Learning with Function Approximation. In: Advances in Neural Information Processing Systems, vol.\u00a012(22) (2000)"},{"issue":"3","key":"14_CR48","first-page":"185","volume":"16","author":"J. Tsitsiklis","year":"1994","unstructured":"Tsitsiklis, J.: Asynchronous stochastic approximation and Q-learning. Machine Learning\u00a016(3), 185\u2013202 (1994)","journal-title":"Machine Learning"},{"issue":"01","key":"14_CR49","doi-asserted-by":"publisher","first-page":"63","DOI":"10.1017\/S026988890500041X","volume":"20","author":"K. Tuyls","year":"2005","unstructured":"Tuyls, K., Now\u00e9, A.: Evolutionary Game Theory and Multi-Agent Reinforcement Learning. The Knowledge Engineering Review\u00a020(01), 63\u201390 (2005)","journal-title":"The Knowledge Engineering Review"},{"key":"14_CR50","unstructured":"Verbeeck, K.: Coordinated Exploration in Multi-Agent Reinforcement Learning. PhD thesis, Computational Modeling Lab, Vrije Universiteit Brussel, Belgium (2004)"},{"key":"14_CR51","doi-asserted-by":"crossref","unstructured":"Verbeeck, K., Nowe, A., Tuyls, K.: Coordinated Exploration in Multi-Agent Reinforcement Learning: An Application to Loadbalancing. In: Proceedings of the 4th International Conference on Autonomous Agents and Multi-Agent Systems (2005)","DOI":"10.1145\/1082473.1082645"},{"key":"14_CR52","first-page":"307","volume-title":"Proceedings of the 7th International Joint Conference on Autonomous Agents and Multiagent Systems (AAMAS 2008)","author":"P. Vrancx","year":"2008","unstructured":"Vrancx, P., Tuyls, K., Westra, R.: Switching Dynamics of Multi-Agent Learning. In: Proceedings of the 7th International Joint Conference on Autonomous Agents and Multiagent Systems (AAMAS 2008), vol.\u00a01, pp. 307\u2013313. International Foundation for Autonomous Agents and Multiagent Systems, Richland (2008a), http:\/\/portal.acm.org\/citation.cfm?id=1402383.1402430"},{"issue":"4","key":"14_CR53","doi-asserted-by":"publisher","first-page":"976","DOI":"10.1109\/TSMCB.2008.920998","volume":"38","author":"P. Vrancx","year":"2008","unstructured":"Vrancx, P., Verbeeck, K., Nowe, A.: Decentralized Learning in Markov Games. IEEE Transactions on Systems, Man, and Cybernetics, Part B\u00a038(4), 976\u2013981 (2008b)","journal-title":"IEEE Transactions on Systems, Man, and Cybernetics, Part B"},{"key":"14_CR54","doi-asserted-by":"crossref","unstructured":"Vrancx, P., De Hauwere, Y.M., Now\u00e9, A.: Transfer learning for Multi-Agent Coordination. In: Proceedings of the 3th International Conference on Agents and Artificial Intelligence, Rome, Italy, pp. 263\u2013272 (2011)","DOI":"10.5220\/0003185602630272"},{"key":"14_CR55","unstructured":"Weiss, G.: Multiagent Systems, A Modern Approach to Distributed Artificial Intelligence. The MIT Press (1999)"},{"issue":"6","key":"14_CR56","doi-asserted-by":"publisher","first-page":"519","DOI":"10.1109\/TAC.1986.1104342","volume":"31","author":"R. Wheeler Jr.","year":"1986","unstructured":"Wheeler Jr., R., Narendra, K.: Decentralized Learning in Finite Markov Chains. IEEE Transactions on Automatic Control\u00a031(6), 519\u2013526 (1986)","journal-title":"IEEE Transactions on Automatic Control"},{"issue":"3","key":"14_CR57","first-page":"229","volume":"8","author":"R. Williams","year":"1992","unstructured":"Williams, R.: Simple Statistical Gradient-Following Algorithms for Connectionist Reinforcement Learning. Machine Learning\u00a08(3), 229\u2013256 (1992)","journal-title":"Machine Learning"},{"key":"14_CR58","unstructured":"Wooldridge, M.: An Introduction to Multi Agent Systems. John Wiley and Sons Ltd. (2002)"},{"key":"14_CR59","unstructured":"Wunder, M., Littman, M., Babes, M.: Classes of Multiagent Q-learning Dynamics with epsilon-greedy Exploration. In: Proceedings of the 27th International Conference on Machine Learning, Haifa, Israel, pp. 1167\u20131174 (2010)"},{"key":"14_CR60","unstructured":"Zinkevich, M.: Online Convex Programming and Generalized Infinitesimal Gradient Ascent. In: Machine Learning International Conference, vol.\u00a020(2), p. 928 (2003)"},{"key":"14_CR61","unstructured":"Zinkevich, M., Greenwald, A., Littman, M.: Cyclic equilibria in Markov games. In: Advances in Neural Information Processing Systems, vol.\u00a018, p. 1641 (2006)"}],"container-title":["Adaptation, Learning, and Optimization","Reinforcement Learning"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-27645-3_14","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,22]],"date-time":"2025-03-22T13:01:59Z","timestamp":1742648519000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-27645-3_14"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642276446","9783642276453"],"references-count":61,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-27645-3_14","relation":{},"ISSN":["1867-4534","1867-4542"],"issn-type":[{"value":"1867-4534","type":"print"},{"value":"1867-4542","type":"electronic"}],"subject":[],"published":{"date-parts":[[2012]]}}}