{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,16]],"date-time":"2025-10-16T03:46:48Z","timestamp":1760586408471},"reference-count":26,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2006,11,10]],"date-time":"2006-11-10T00:00:00Z","timestamp":1163116800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2006,11,10]],"date-time":"2006-11-10T00:00:00Z","timestamp":1163116800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Auton Agent Multi-Agent Syst"],"published-print":{"date-parts":[[2007,6]]},"DOI":"10.1007\/s10458-006-9007-0","type":"journal-article","created":{"date-parts":[[2006,11,9]],"date-time":"2006-11-09T17:13:41Z","timestamp":1163092421000},"page":"239-269","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":27,"title":["Exploring selfish reinforcement learning in repeated games with stochastic rewards"],"prefix":"10.1007","volume":"14","author":[{"given":"Katja","family":"Verbeeck","sequence":"first","affiliation":[]},{"given":"Ann","family":"Now\u00e9","sequence":"additional","affiliation":[]},{"given":"Johan","family":"Parent","sequence":"additional","affiliation":[]},{"given":"Karl","family":"Tuyls","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2006,11,10]]},"reference":[{"key":"9007_CR1","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1016\/0304-4068(74)90037-8","volume":"1","author":"R. Aumann","year":"1974","unstructured":"Aumann R. (1974). Subjectivity and correlation in randomized strategies. Journal of Mathematical Economics, 1, 67\u201396","journal-title":"Journal of Mathematical Economics"},{"key":"9007_CR2","doi-asserted-by":"crossref","first-page":"11","DOI":"10.1613\/jair.1154","volume":"19","author":"R. Brafman","year":"2003","unstructured":"Brafman R., Tennenholtz M. (2003). Learning to coordinate efficiently: A model-based approach. Journal on Artificial Intelligence Research (JAIR), 19, 11\u201323","journal-title":"Journal on Artificial Intelligence Research (JAIR)"},{"key":"9007_CR3","unstructured":"Carpenter, M., & Kudenko, D. (2004). Baselines for joint-action reinforcement learning of coordination in cooperative multi-agent systems. In Proceedings of the 4th symposium on adaptive agents and multi-agent systems, (AISB04) Society for the study of Artificial Intelligence and Simulation of Behaviour (pp. 10\u201319)."},{"key":"9007_CR4","unstructured":"Claus, C., & Boutilier, C. (1998). The dynamics of reinforcement learning in cooperative multiagent systems. In Proceedings of the 15th national conference on artificial intelligence (pp. 746\u2013752)."},{"key":"9007_CR5","doi-asserted-by":"crossref","unstructured":"Geist, A., & Beguelin, A. (1994). PVM: Parallel virtual machine. MIT Press.","DOI":"10.7551\/mitpress\/5712.001.0001"},{"key":"9007_CR6","volume-title":"Game theory evolving: A problem-centered introduction to modeling strategic behavior","author":"H. Gintis","year":"2000","unstructured":"Gintis H. (2000). Game theory evolving: A problem-centered introduction to modeling strategic behavior. Princeton, New Jersey, Princeton University Press"},{"key":"9007_CR7","unstructured":"Greenwald, A., & Hall, K. (2003). Correlated q-learning. In Proceedings of the twentieth international conference on machine learning (pp. 242\u2013249)."},{"key":"9007_CR8","doi-asserted-by":"publisher","first-page":"1039","DOI":"10.1162\/jmlr.2003.4.6.1039","volume":"4","author":"J. Hu","year":"2003","unstructured":"Hu J., Wellman M. (2003). Nash q-learning for general-sum stochastic games. Journal of Machine Learning Research, 4:1039\u20131069","journal-title":"Journal of Machine Learning Research"},{"key":"9007_CR9","unstructured":"Kapetanakis, S., & Kudenko, D. (2002). Reinforcement learning of coordination in cooperative multi-agent systems. In Proceedings of the 18th national conference on artificial intelligence (pp. 326\u2013331)."},{"key":"9007_CR10","doi-asserted-by":"crossref","unstructured":"Kapetanakis, S., Kudenko, D., & Strens, M. (2003). Learning to coordinate using commitment sequences in cooperative multi-agent systems. In Proceedings of the 3rd symposium on adaptive agents and multi-agent systems, (AISB03) Society for the study of Artificial Intelligence and Simulation of Behaviour.","DOI":"10.1007\/3-540-44826-8_2"},{"key":"9007_CR11","unstructured":"Lauer, M., & Riedmiller, M. (2000). An algorithm for distributed reinforcement learning in cooperative multi-agent systems. In Proceedings of the 17th international conference on machine learning (pp. 535\u2013542)."},{"key":"9007_CR12","doi-asserted-by":"crossref","unstructured":"Littman, M. (1994). Markov games as a framework for multi-agent reinforcement learning. In Proceedings of the 11th international conference on machine learning (pp. 322\u2013328).","DOI":"10.1016\/B978-1-55860-335-6.50027-1"},{"key":"9007_CR13","unstructured":"Littman, M. (2001). Friend-or-foe q-learning in general-sum games. In Proceedings of the 18th international conference on machine learning (pp. 157\u2013163)."},{"key":"9007_CR14","unstructured":"Littman, M., & Szepesv\u00e1ri, C. (1996). A generalized reinforcement-learning model: Convergence and applications. In Proceedings of the 13th international conference on machine learning (pp. 310\u2013318)."},{"key":"9007_CR15","unstructured":"Narendra, K., & Thathachar, M. (1989). Learning automata: An introduction. Prentice-Hall International, Inc."},{"key":"9007_CR16","doi-asserted-by":"crossref","first-page":"48","DOI":"10.1073\/pnas.36.1.48","volume":"36","author":"J. Nash","year":"1950","unstructured":"Nash, J. (1950). Equilibrium points in n-person games. Proceedings of the national academy of siences 36, 48\u201349.","journal-title":"Proceedings of the national academy of siences"},{"key":"9007_CR17","doi-asserted-by":"crossref","unstructured":"Now\u00e9, A., Parent, J., & Verbeeck, K. (2001). Social agents playing a periodical policy. In Proceedings of the 12th European conference on machine learning pp. 382\u2013393. Freiburg, Germany: Springer-Verlag LNAI2168.","DOI":"10.1007\/3-540-44795-4_33"},{"key":"9007_CR18","volume-title":"A course in game theory","author":"J. Osborne","year":"1994","unstructured":"Osborne J., Rubinstein A. (1994). A course in game theory. Cambridge, MA, MIT Press"},{"key":"9007_CR19","volume-title":"Evolutionary games and equilibrium selection","author":"L. Samuelson","year":"1997","unstructured":"Samuelson L. (1997). Evolutionary games and equilibrium selection. Cambridge, MA, MIT Press"},{"issue":"5","key":"9007_CR20","doi-asserted-by":"publisher","first-page":"769","DOI":"10.1109\/21.293490","volume":"24","author":"P. Sastry","year":"1994","unstructured":"Sastry P., Phansalkar V., Thathachar M. (1994). Decentralized learning of nash equilibria in multi-person stochastic games with incomplete information. IEEE Transactions on Systems, Man, and Cybernetics, 24(5):769\u2013777","journal-title":"IEEE Transactions on Systems, Man, and Cybernetics"},{"key":"9007_CR21","volume-title":"Reinforcement learning: An introduction","author":"R. Sutton","year":"1998","unstructured":"Sutton R., Barto A. (1998). Reinforcement learning: An introduction. Cambridge, MA, MIT Press"},{"issue":"6","key":"9007_CR22","doi-asserted-by":"publisher","first-page":"711","DOI":"10.1109\/TSMCB.2002.1049606","volume":"32","author":"M. Thathachar","year":"2002","unstructured":"Thathachar M., Sastry P. (2002). Varieties of learning automata: An overview. IEEE Transactions on Systems, Man, and Cybernetics, Part B: Cybernetics, 32(6):711\u2013722","journal-title":"IEEE Transactions on Systems, Man, and Cybernetics, Part B: Cybernetics"},{"key":"9007_CR23","first-page":"185","volume":"16","author":"J. Tsitsiklis","year":"1994","unstructured":"Tsitsiklis J. (1994). Asynchronous stochastic approximation and q-learning. Machine Learning, 16, 185\u2013202","journal-title":"Machine Learning"},{"key":"9007_CR24","unstructured":"Tuyls, K. (2004). Multiagent reinforcement learning: A game theoretic approach. PhD Thesis, Computational Modeling Lab, Vrije Universiteit Brussel, Belgium."},{"key":"9007_CR25","unstructured":"Verbeeck, K. (2004). Coordinated exploration in multi-agent reinforcement learning. PhD Thesis, Computational Modeling Lab, Vrije Universiteit Brussel, Belgium."},{"key":"9007_CR26","doi-asserted-by":"crossref","unstructured":"Verbeeck, K., Now\u00e9, A., & Parent, J. (2002). Homo egualis reinforcement learning agents for load balancing. In Proceedings of the 1st NASA workshop on radical agent concepts, pp. 81\u201391. Springer-Verlag LNAI 2564.","DOI":"10.1007\/978-3-540-45173-0_6"}],"container-title":["Autonomous Agents and Multi-Agent Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10458-006-9007-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10458-006-9007-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10458-006-9007-0","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10458-006-9007-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,17]],"date-time":"2022-05-17T22:06:37Z","timestamp":1652825197000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10458-006-9007-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2006,11,10]]},"references-count":26,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2007,6]]}},"alternative-id":["9007"],"URL":"https:\/\/doi.org\/10.1007\/s10458-006-9007-0","relation":{},"ISSN":["1387-2532","1573-7454"],"issn-type":[{"value":"1387-2532","type":"print"},{"value":"1573-7454","type":"electronic"}],"subject":[],"published":{"date-parts":[[2006,11,10]]},"assertion":[{"value":"10 November 2006","order":1,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}