{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,7]],"date-time":"2026-04-07T15:44:23Z","timestamp":1775576663406,"version":"3.50.1"},"reference-count":30,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2011,9,6]],"date-time":"2011-09-06T00:00:00Z","timestamp":1315267200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Auton Agent Multi-Agent Syst"],"published-print":{"date-parts":[[2013,1]]},"DOI":"10.1007\/s10458-011-9181-6","type":"journal-article","created":{"date-parts":[[2011,9,6]],"date-time":"2011-09-06T16:23:58Z","timestamp":1315326238000},"page":"37-53","source":"Crossref","is-referenced-by-count":11,"title":["Continuous strategy replicator dynamics for multi-agent Q-learning"],"prefix":"10.1007","volume":"26","author":[{"given":"Aram","family":"Galstyan","sequence":"first","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2011,9,6]]},"reference":[{"key":"9181_CR1","doi-asserted-by":"crossref","first-page":"521","DOI":"10.1613\/jair.2628","volume":"33","author":"S. Abdallah","year":"2008","unstructured":"Abdallah S., Lesser V. (2008) A multiagent reinforcement learning algorithm with non-linear dynamics. Journal of Artificial Intelligence Research 33: 521\u2013549","journal-title":"Journal of Artificial Intelligence Research"},{"issue":"2","key":"9181_CR2","doi-asserted-by":"crossref","first-page":"156","DOI":"10.1109\/TSMCC.2007.913919","volume":"38","author":"L. Busoniu","year":"2008","unstructured":"Busoniu L., Babuska R., De Schutter B. (2008) A comprehensive survey of multiagent reinforcement learning. Systems, Man, and Cybernetics, Part C: Applications and Reviews, IEEE Transactions on 38(2): 156\u2013172","journal-title":"Systems, Man, and Cybernetics, Part C: Applications and Reviews, IEEE Transactions on"},{"issue":"1","key":"9181_CR3","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1006\/jeth.1997.2319","volume":"77","author":"T. Borgers","year":"1997","unstructured":"Borgers T., Sarin R. (1997) Learning through reinforcement and replicator dynamics. Journal of Economic Theory 77(1): 1\u201314","journal-title":"Journal of Economic Theory"},{"key":"9181_CR4","doi-asserted-by":"crossref","first-page":"215","DOI":"10.1016\/S0004-3702(02)00121-2","volume":"136","author":"M. Bowling","year":"2002","unstructured":"Bowling M., Veloso M. (2002) Multiagent learning using a variable learning rate. Artificial Intelligence 136: 215\u2013250","journal-title":"Artificial Intelligence"},{"key":"9181_CR5","unstructured":"Claus, C.,& Boutilier, C. (1998). The dynamics of reinforcement learning in cooperative multiagent systems. In Proceedings of the fifteenth national conference on artificial intelligence, (pp. 746\u2013752). Menlo Park, CA: AAAI Press."},{"issue":"2","key":"9181_CR6","doi-asserted-by":"crossref","first-page":"127","DOI":"10.1016\/j.mathsocsci.2005.03.001","volume":"50","author":"R. Cressman","year":"2005","unstructured":"Cressman R. (2005) Stability of the replicator equation with continuous strategy space. Mathematical Social Sciences 50(2): 127\u2013147","journal-title":"Mathematical Social Sciences"},{"key":"9181_CR7","unstructured":"Hennes, D., Tuyls, K., & Rauterberg, M. (2009). State-coupled replicator dynamics. In Proceeding of AAMAS (Vol. 2), pp. 789\u2013796."},{"key":"9181_CR8","volume-title":"The theory of evolution and dynamical systems","author":"J. Hofbauer","year":"1988","unstructured":"Hofbauer J., Sigmund K. (1988) The theory of evolution and dynamical systems. Cambridge University Press, Cambridge"},{"key":"9181_CR9","unstructured":"Hu, J., & Wellman, M. P. (1998). Multiagent reinforcement learning: Theoretical framework and an algorithm. In Proceedings of the fifteenth international conference on machine learning (pp. 242\u2013250). Massachusetts: Morgan Kaufmann."},{"key":"9181_CR10","first-page":"1039","volume":"4","author":"J. Hu","year":"2003","unstructured":"Hu J., Wellman M. P. (2003) Nash Q-learning for general-sum stochastic games. Journal of Machine Learning Research 4: 1039\u20131069","journal-title":"Journal of Machine Learning Research"},{"key":"9181_CR11","unstructured":"Kapetanakis, S., & Kudenko, D. (2002). Reinforcement learning of coordination in cooperative multi-agent systems. In Eighteenth national conference on artificial intelligence (pp. 326\u2013331). Menlo Park, CA: American Association for Artificial Intelligence."},{"key":"9181_CR12","doi-asserted-by":"crossref","first-page":"237","DOI":"10.1613\/jair.301","volume":"4","author":"L. P. Kaelbling","year":"1996","unstructured":"Kaelbling L. P., Littman M. L., Moore A. P. (1996) Reinforcement learning: A survey. Journal of Artificial Intelligence Research 4: 237\u2013285","journal-title":"Journal of Artificial Intelligence Research"},{"key":"9181_CR13","doi-asserted-by":"crossref","first-page":"1723","DOI":"10.1098\/rspb.1999.0838","volume":"266","author":"T. Killingback","year":"1999","unstructured":"Killingback T., Doebeli M., Knowlton N. (1999) Variable investment, the continuous prisonner\u2019s dilemma, and the origin of cooperation. Proceedings of Royal Society London B Biological Science 266: 1723\u20131728","journal-title":"Proceedings of Royal Society London B Biological Science"},{"issue":"2","key":"9181_CR14","doi-asserted-by":"crossref","first-page":"258","DOI":"10.1016\/j.jtbi.2006.09.016","volume":"245","author":"S. Le","year":"2007","unstructured":"Le S., Boyd R. (2007) Evolutionary dynamics of the continuous iterated Prisoner\u2019s dilemma. Journal of Theoretical Biology 245(2): 258\u2013267","journal-title":"Journal of Theoretical Biology"},{"key":"9181_CR15","doi-asserted-by":"crossref","first-page":"141","DOI":"10.1007\/PL00004092","volume":"17","author":"J. Oechssler","year":"2001","unstructured":"Oechssler J., Riedel F. (2001) Evolutionary dynamics on infinite strategy spaces. Economic Theory 17: 141\u2013162","journal-title":"Economic Theory"},{"key":"9181_CR16","first-page":"423","volume":"9","author":"L. Panait","year":"2008","unstructured":"Panait L., Tuyls K., Luke S. (2008) Theoretical advantages of lenient learners: An evolutionary game theoretic perspective. Journal of Machine Learning Research 9: 423\u2013457","journal-title":"Journal of Machine Learning Research"},{"key":"9181_CR17","unstructured":"Peshkin, L., Kim, K. E., Meuleau, N., & Kaelbling, L. P. (2000). Learning to cooperate via policy search. In Proceedings of UAI (pp. 489\u2013496). San Francisco, CA: Morgan Kaufmann."},{"key":"9181_CR18","unstructured":"Ruijgrok, M., & Ruijgrok, T. W. Replicator dynamics with mutations for games with a continuous strategy space. http:\/\/arxiv.org\/abs\/nlin\/0505032 ."},{"issue":"1","key":"9181_CR19","doi-asserted-by":"crossref","first-page":"015206","DOI":"10.1103\/PhysRevE.67.015206","volume":"67","author":"Y. Sato","year":"2003","unstructured":"Sato Y., Crutchfield J. P. (2003) Coupled replicator equations for the dynamics of learning in multiagent systems. Physical Review E 67(1): 015206","journal-title":"Physical Review E"},{"issue":"7","key":"9181_CR20","doi-asserted-by":"crossref","first-page":"4748","DOI":"10.1073\/pnas.032086299","volume":"99","author":"Y. Sato","year":"2002","unstructured":"Sato Y., Akiyama E., Farmer J. D. (2002) Chaos in learning a simple two-person game. Proceedings of the National Academy of Sciences of the United States of America 99(7): 4748\u20134751","journal-title":"Proceedings of the National Academy of Sciences of the United States of America"},{"key":"9181_CR21","unstructured":"Shoham, Y., Grenager, T., & Powers, R. (2003). Multi-agent reinforcement learning: A critical survey. Web manuscript."},{"issue":"3","key":"9181_CR22","doi-asserted-by":"crossref","first-page":"165","DOI":"10.1177\/105971230501300301","volume":"13","author":"P. Stone","year":"2005","unstructured":"Stone P., Sutton R. S., Kuhlmann G. (2005) Reinforcement learning for RoboCup-Soccer keepaway. Adaptive Behavior 13(3): 165\u2013188","journal-title":"Adaptive Behavior"},{"key":"9181_CR23","volume-title":"Reinforcement learning: An introduction. A Bradford book","author":"R. S. Sutton","year":"1998","unstructured":"Sutton R. S., Barto A. G. (1998) Reinforcement learning: An introduction. A Bradford book. MIT Press, Cambridge, MA"},{"key":"9181_CR24","doi-asserted-by":"crossref","first-page":"693","DOI":"10.1145\/860575.860687","volume":"2003","author":"K. Tuyls","year":"2003","unstructured":"Tuyls K., Verbeeck K., Lenaerts T. (2003) A selection-mutation model for q-learning in multi-agent systems. AAMAS 2003: 693\u2013700","journal-title":"AAMAS"},{"key":"9181_CR25","doi-asserted-by":"crossref","first-page":"115","DOI":"10.1007\/s10458-005-3783-9","volume":"12","author":"K. Tuyls","year":"2006","unstructured":"Tuyls K., Hoen P. J. T., Vanschoenwinkel B. (2006) An evolutionary dynamical analysis of multi-agent learning in iterated games. Autonomous Agents and Multi-Agent Systems 12: 115\u2013153","journal-title":"Autonomous Agents and Multi-Agent Systems"},{"key":"9181_CR26","doi-asserted-by":"crossref","unstructured":"Tuyls K., Westra R. (2009) Replicator dynamics in discrete and continuous strategy spaces. In: Uhrmacher A. M., Weyns D. (eds) Multi-agent systems: Simulation and applications. CRC, Boca Raton, FL","DOI":"10.1201\/9781420070248.ch7"},{"key":"9181_CR27","unstructured":"Vrancx, P., Tuyls, K., & Westra, R. L. (2008). Switching dynamics of multi-agent learning. AAMAS (1), 2008, 307\u2013313"},{"issue":"3","key":"9181_CR28","doi-asserted-by":"crossref","first-page":"307","DOI":"10.1006\/jtbi.1999.0996","volume":"200","author":"L. M. Wahl","year":"1999","unstructured":"Wahl L. M., Nowak M. A. (1999) The continuous Prisoner\u2019s Dilemma: I. Linear reactive strategies. Journal of Theoretical Biology 200(3): 307\u2013332","journal-title":"Journal of Theoretical Biology"},{"key":"9181_CR29","doi-asserted-by":"crossref","first-page":"323","DOI":"10.1006\/jtbi.1999.0997","volume":"200","author":"L. M. Wahl","year":"1999","unstructured":"Wahl L. M., Nowak M. A. (1999) The continuous Prisoner\u2019s dilemma: II. Linear reactive strategies with noise. Journal of Theoretical Biology 200: 323\u2013338","journal-title":"Journal of Theoretical Biology"},{"key":"9181_CR30","unstructured":"Watkins, C. J. C. H., & Dayan, P. (1992). Technical note: Q-learning. PhD thesis."}],"container-title":["Autonomous Agents and Multi-Agent Systems"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10458-011-9181-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10458-011-9181-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10458-011-9181-6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,6,15]],"date-time":"2019-06-15T00:56:30Z","timestamp":1560560190000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10458-011-9181-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2011,9,6]]},"references-count":30,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2013,1]]}},"alternative-id":["9181"],"URL":"https:\/\/doi.org\/10.1007\/s10458-011-9181-6","relation":{},"ISSN":["1387-2532","1573-7454"],"issn-type":[{"value":"1387-2532","type":"print"},{"value":"1573-7454","type":"electronic"}],"subject":[],"published":{"date-parts":[[2011,9,6]]}}}