{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,9]],"date-time":"2026-04-09T14:47:13Z","timestamp":1775746033035,"version":"3.50.1"},"reference-count":47,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2013,2,28]],"date-time":"2013-02-28T00:00:00Z","timestamp":1362009600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Auton Agent Multi-Agent Syst"],"published-print":{"date-parts":[[2014,3]]},"DOI":"10.1007\/s10458-013-9222-4","type":"journal-article","created":{"date-parts":[[2013,2,27]],"date-time":"2013-02-27T12:53:49Z","timestamp":1361969629000},"page":"182-213","source":"Crossref","is-referenced-by-count":24,"title":["Multiagent learning in the presence of memory-bounded agents"],"prefix":"10.1007","volume":"28","author":[{"given":"Doran","family":"Chakraborty","sequence":"first","affiliation":[]},{"given":"Peter","family":"Stone","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2013,2,28]]},"reference":[{"key":"9222_CR1","first-page":"1","volume":"10","author":"S Airiau","year":"2007","unstructured":"Airiau, S., Saha, S., & Sen, S. (2007). Evolutionary tournament-based comparison of learning and non-learning algorithms for iterated games. Journal of Artificial Societies and Social Simulation, 10, 1\u201312.","journal-title":"Journal of Artificial Societies and Social Simulation"},{"key":"9222_CR2","doi-asserted-by":"crossref","first-page":"67","DOI":"10.1016\/0304-4068(74)90037-8","volume":"1","author":"R Aumann","year":"1974","unstructured":"Aumann, R. (1974). Subjectivity and correlation in randomized strategies. Journal of Mathematical Economics, 1, 67\u201396.","journal-title":"Journal of Mathematical Economics"},{"key":"9222_CR3","unstructured":"Banerjee, B., & Peng, J. (2004). Performance bounded reinforcement learning in strategic interactions. In L. Deborah, McGuinness, & G. Ferguson (Eds.),\u00a0 AAAI\u201904: Proceedings of the 19th National Conference on Artifical Intelligence (pp. 2\u20137). Menlo Park, CA: AAAI Press\/The MIT Press."},{"key":"9222_CR4","first-page":"825","volume-title":"Proceedings of the Seventeenth International Joint Conference on Artificial Intelligence","author":"B Banerjee","year":"2001","unstructured":"Banerjee, B., Sen, S., & Peng, J. (2001). Fast concurrent reinforcement learners. In N. Bernhard (Ed.), Proceedings of the Seventeenth International Joint Conference on Artificial Intelligence (pp. 825\u2013830). San Francisco, CA: Morgan Kaufmann."},{"key":"9222_CR5","unstructured":"Bouzy, B., & Metivier, M. (2010). Multi-agent learning experiments on repeated matrix games. In J. Furnkranz & T. Joachims (Eds.), Proceedings of the Twenty-Seventh International Conference on Machine Learning. Haifa: ICML."},{"key":"9222_CR6","unstructured":"Bowling, M. (2005). Convergence and no-regret in multiagent learning. In L. K. Saul, Y. Weiss, & L. Bottou (Eds.), NIPS\u201905: Advances in Neural Information Processing Systems (pp. 209\u2013216). Cambridge, MA: MIT Press."},{"key":"9222_CR7","unstructured":"Bowling, M., & Veloso, M. (2001a). Convergence of gradient dynamics with a variable learning rate. Procedings of the 18th International Conference on Machine Learning (pp. 27\u201334). Morgan Kaufmann, San Francisco, CA."},{"key":"9222_CR8","first-page":"1021","volume-title":"International Joint Conference on Artificial Intelligence","author":"M Bowling","year":"2001","unstructured":"Bowling, M., & Veloso, M. (2001b). Rational and convergent learning in stochastic games. In B. Nebel (Ed.), International Joint Conference on Artificial Intelligence (pp. 1021\u20131026). San Francisco, CA: Morgan Kaufmann."},{"key":"9222_CR9","volume-title":"R-max\u2014a general polynomial time algorithm for near-optimal reinforcement learning","author":"RI Brafman","year":"2003","unstructured":"Brafman, R. I., & Tennenholtz, M. (2003). R-max\u2014a general polynomial time algorithm for near-optimal reinforcement learning. Menlo Park, CA: MIT Press."},{"key":"9222_CR10","first-page":"374","volume-title":"Activity analysis of production and allocation","author":"G Brown","year":"1951","unstructured":"Brown, G. (1951). Iterative solution to games by fictitious play. In T. C. Koopmans (Ed.), Activity analysis of production and allocation (pp. 374\u2013376). New York, NY: Wiley."},{"key":"9222_CR11","doi-asserted-by":"crossref","unstructured":"Chakraborty, D., & Stone, P. (2008). Online multiagent learning against memory bounded adversaries. European Conference on Machine Learning (pp. 211\u2013226). Antwerp, Belgium.","DOI":"10.1007\/978-3-540-87479-9_32"},{"key":"9222_CR12","volume-title":"Proceedings of the Twenty-Seventh International Conference on Machine Learning","author":"D Chakraborty","year":"2010","unstructured":"Chakraborty, D., & Stone, P. (2010). Convergence, targeted optimality and safety in multiagent learning. In J. Furnkranz & T. Joachims (Eds.), Proceedings of the Twenty-Seventh International Conference on Machine Learning. Haifa: ICML."},{"key":"9222_CR13","doi-asserted-by":"crossref","unstructured":"Chen, X. & Deng, X. (2006). Settling the complexity of two-player Nash equilibrium. Proceedings of the 47th Foundations of Computer Science (FOCS) (pp. 261\u2013272). Berkeley, CA.","DOI":"10.1109\/FOCS.2006.69"},{"key":"9222_CR14","first-page":"3","volume":"30","author":"Y Chevaleyre","year":"2006","unstructured":"Chevaleyre, Y., Dunne, P. E., Endriss, U., Lang, J., Lema\u00eatre, M., Maudet, N., et al. (2006). Issues in multiagent resource allocation. Informatica, 30, 3\u201331.","journal-title":"Informatica"},{"key":"9222_CR15","first-page":"746","volume-title":"Proceedings of the Fifteenth National Conference on Artificial Intelligence","author":"C Claus","year":"1998","unstructured":"Claus, C., & Boutilier, C. (1998). The dynamics of reinforcement learning in cooperative multiagent systems. In J. Mostow & C. Rich (Eds.), Proceedings of the Fifteenth National Conference on Artificial Intelligence (pp. 746\u2013752). Menlo Park: AAAI Press."},{"key":"9222_CR16","doi-asserted-by":"crossref","first-page":"23","DOI":"10.1007\/s10994-006-0143-1","volume":"67","author":"V Conitzer","year":"2006","unstructured":"Conitzer, V., & Sandholm, T. (2006). AWESOME: A general multiagent learning algorithm that converges in self-play and learns a best response against stationary opponents. Machine Learning, 67, 23\u201343.","journal-title":"Machine Learning"},{"key":"9222_CR17","first-page":"704","volume":"41","author":"DP Foster","year":"1993","unstructured":"Foster, D. P., & Vohra, R. V. (1993). A randomization rule for selecting forecasts. Institute for Operations Research and the Management Sciences (INFORMS), 41, 704\u2013709.","journal-title":"Institute for Operations Research and the Management Sciences (INFORMS)"},{"key":"9222_CR18","doi-asserted-by":"crossref","first-page":"1065","DOI":"10.1016\/0165-1889(94)00819-4","volume":"19","author":"D Fudenberg","year":"1995","unstructured":"Fudenberg, D., & Levine, D. K. (1995). Consistency and cautious fictitious play. Journal of Economic Dynamics and Control, 19, 1065\u20131089.","journal-title":"Journal of Economic Dynamics and Control"},{"key":"9222_CR19","volume-title":"The theory of learning in games","author":"D Fudenberg","year":"1999","unstructured":"Fudenberg, D., & Levine, D. K. (1999). The theory of learning in games (1st ed.). Cambridge, MA: MIT Press.","edition":"1"},{"key":"9222_CR20","volume-title":"Approximation to Bayes risk in repeated plays. Contributions to the theory of games","author":"J Hannan","year":"1957","unstructured":"Hannan, J. (1957). Approximation to Bayes risk in repeated plays. Contributions to the theory of games. Princeton, NJ: Princeton University Press."},{"key":"9222_CR21","doi-asserted-by":"crossref","first-page":"1127","DOI":"10.1111\/1468-0262.00153","volume":"68","author":"S Hart","year":"2000","unstructured":"Hart, S., & Mas-Colel, A. (2000). A simple adaptive procedure leading to correlated equilibrium. Econometrica, 68, 1127\u20131150.","journal-title":"Econometrica"},{"key":"9222_CR22","unstructured":"Hu, J. & Wellman M.P. (1998). Multiagent reinforcement learning: Theoretical framework and an algorithm. Proceedings 15th International Conference on Machine Learning (pp. 242\u2013250). Morgan Kaufmann, San Francisco, CA."},{"key":"9222_CR23","unstructured":"Kaisers, M., & Tuyls, K. (2010). Frequency adjusted multi-agent Q-learning. Proceedings of the 9th International Conference on Autonomous Agents and Multiagent Systems: Volume 1\u2013 Volume 1 (pp. 309\u2013316). Richland, SC."},{"key":"9222_CR24","unstructured":"Kearns, M., & Singh, S. (1998). Near-optimal reinforcement learning in polynomial time. Proceedings of the 15th International Conference on Machine Learning (pp. 260\u2013268). Morgan Kaufmann, San Francisco, CA."},{"key":"9222_CR25","first-page":"157","volume-title":"Proceedings of the Eleventh International Conference on Machine Learning","author":"ML Littman","year":"1994","unstructured":"Littman, M. L. (1994). Markov games as a framework for multi-agent reinforcement learning. In W. W. Cohen & H. Hirsh (Eds.), Proceedings of the Eleventh International Conference on Machine Learning (pp. 157\u2013163). San Francisco, CA: Morgan Kaufmann."},{"key":"9222_CR26","first-page":"55","volume-title":"A polynomial-time Nash equilibrium algorithm for repeated games","author":"ML Littman","year":"2005","unstructured":"Littman, M. L., & Stone, P. (2005). A polynomial-time Nash equilibrium algorithm for repeated games (pp. 55\u201366). Amsterdam: Elsevier."},{"key":"9222_CR27","first-page":"310","volume-title":"Proceedings of the 13th International Conference on Machine Learning","author":"ML Littman","year":"1996","unstructured":"Littman, M. L., & Szepesvari, C. (1996). A generalized reinforcement-learning model: Convergence and applications. In L. Saitta (Ed.), Proceedings of the 13th International Conference on Machine Learning (pp. 310\u2013318). San Francisco, CA: Morgan Kaufmann Publishers."},{"key":"9222_CR28","doi-asserted-by":"crossref","unstructured":"Mahadevan, S. (1996). Average reward reinforcement learning: Foundations, algorithms, and empirical results. Machine Learning, 22(1\u20133), 159\u2013195.","DOI":"10.1007\/BF00114727"},{"key":"9222_CR29","doi-asserted-by":"crossref","first-page":"48","DOI":"10.1073\/pnas.36.1.48","volume":"36","author":"JF Nash Jr","year":"1950","unstructured":"Nash, J. F, Jr. (1950). Equilibrium points in n-person games. Proceedings of the National Academy of Sciences, 36, 48\u201349.","journal-title":"Proceedings of the National Academy of Sciences"},{"key":"9222_CR30","volume-title":"A course in game theory","author":"MJ Osborne","year":"1994","unstructured":"Osborne, M. J., & Rubinstein, A. (1994). A course in game theory. Cambridge, MA: The MIT Press."},{"key":"9222_CR31","unstructured":"Pardoe, D., Chakraborty, D., & Stone, P. (2010). TacTex09: A champion bidding agent for ad auctions. In van der Hoek, Kaminka, Lesperance, Luck, & Sen (Eds.), Proceedings of the 9th International Conference on Autonomous Agents and Multiagent Systems (AAMAS 2010). Dunbeath: International Foundation for Autonomous Agents and Multiagent Systems."},{"key":"9222_CR32","unstructured":"Powers, R., & Shoham, Y. (2005). Learning against opponents with bounded memory. Proceedings of the International Joint Conference on Artificial Intelligence (IJCAI) (pp. 817\u2013822). Edinburgh, Scotland."},{"key":"9222_CR33","doi-asserted-by":"crossref","first-page":"45","DOI":"10.1007\/s10994-006-9643-2","volume":"67","author":"R Powers","year":"2007","unstructured":"Powers, R., Shoham, Y., & Vu, T. (2007). A general criterion and an algorithmic framework for learning in multi-agent systems. Machine Learning, 67, 45\u201376.","journal-title":"Machine Learning"},{"key":"9222_CR34","doi-asserted-by":"crossref","DOI":"10.1002\/9780470316887","volume-title":"Markov Decision processes: Discrete stochastic dynamic programming","author":"ML Puterman","year":"1994","unstructured":"Puterman, M. L. (1994). Markov Decision processes: Discrete stochastic dynamic programming. New York, NY: Wiley."},{"key":"9222_CR35","unstructured":"Sela, A., & Herreiner, D. K. (1997). Fictitious play in coordination games. Discussion paper serie B. University of Bonn, Germany."},{"key":"9222_CR36","first-page":"541","volume-title":"Proceedings of the Sixteenth Conference on Uncertainty in Artificial Intelligence","author":"S Singh","year":"2000","unstructured":"Singh, S., Kearns, M., & Mansour, Y. (2000). Nash convergence of gradient dynamics in general-sum games. In C. Boutilier & M. Goldszmidt (Eds.), Proceedings of the Sixteenth Conference on Uncertainty in Artificial Intelligence (pp. 541\u2013548). San Francisco, CA: Morgan Kaufmann Publishers."},{"issue":"2","key":"9222_CR37","doi-asserted-by":"crossref","first-page":"159","DOI":"10.1007\/s10994-008-5091-5","volume":"74","author":"F Southey","year":"2008","unstructured":"Southey, F., Hoehn, B., & Holte, R. (2008). Effective short-term opponent exploitation in simplified poker. Machine Learning, 74(2), 159\u2013189.","journal-title":"Machine Learning"},{"key":"9222_CR38","unstructured":"Stone, P., Dresner, K., Fidelman, P., Kohl, N., Kuhlmann, G., Sridharan, M., et al. (2005). The UT Austin Villa 2005 RoboCup four-legged team: Technical report. The University of Texas, Austin."},{"key":"9222_CR39","first-page":"96","volume-title":"Pre-Proceedings of the Eighth International Workshop on Agent Theories, Architectures, and Languages (ATAL-2001)","author":"P Stone","year":"2001","unstructured":"Stone, P., & Littman, M. L. (2001). Implicit negotiation in repeated games. In J.-J. Meyer & M. Tambe (Eds.), Pre-Proceedings of the Eighth International Workshop on Agent Theories, Architectures, and Languages (ATAL-2001) (pp. 96\u2013105). Heidelberg: Springer."},{"issue":"3","key":"9222_CR40","doi-asserted-by":"crossref","first-page":"345","DOI":"10.1023\/A:1008942012299","volume":"8","author":"P Stone","year":"2000","unstructured":"Stone, P., & Veloso, M. (2000). Multiagent systems: A survey from a machine learning perspective. Autonomous Robots, 8(3), 345\u2013383.","journal-title":"Autonomous Robots"},{"key":"9222_CR41","volume-title":"Reinforcement learning","author":"RS Sutton","year":"1998","unstructured":"Sutton, R. S., & Barto, A. G. (1998). Reinforcement learning. Cambridge, MA: MIT Press."},{"key":"9222_CR42","first-page":"209","volume-title":"ECAI 2010: 19th European Conference on Artificial Intelligence","author":"AM Sykulski","year":"2010","unstructured":"Sykulski, A. M., Chapman, A. C., de Cote, E. M., & Jennings, N. R. (2010). EA2: The winning strategy for the inaugural lemonade stand game tournament. In H. Coelho, R. Studer, & M. Wooldridge (Eds.), ECAI 2010: 19th European Conference on Artificial Intelligence (pp. 209\u2013214). Amsterdam: IOS Press."},{"issue":"7","key":"9222_CR43","doi-asserted-by":"crossref","first-page":"406","DOI":"10.1016\/j.artint.2007.01.004","volume":"171","author":"K Tuyls","year":"2007","unstructured":"Tuyls, K., & Parson, S. (2007). What evolutionary game theory tells us about multiagent learning. Artificial Intelligence, 171(7), 406\u2013416.","journal-title":"Artificial Intelligence"},{"key":"9222_CR44","first-page":"377","volume-title":"Industrial and practical applications of DAI","author":"H Dyke Parunak Van","year":"1999","unstructured":"Van Dyke Parunak, H. (1999). Industrial and practical applications of DAI (pp. 377\u2013421). Cambridge, MA: The MIP Press."},{"key":"9222_CR45","first-page":"279","volume":"3","author":"CJCH Watkins","year":"1992","unstructured":"Watkins, C. J. C. H., & Dayan, P. D. (1992). Q-learning. Machine Learning, 3, 279\u2013292.","journal-title":"Machine Learning"},{"key":"9222_CR46","doi-asserted-by":"crossref","first-page":"67","DOI":"10.1109\/4235.585893","volume":"1","author":"DH Wolpert","year":"1997","unstructured":"Wolpert, D. H., & Macready, W. G. (1997). No free lunch theorems for optimization. IEEE Transactions on Evolutionary Computation, 1, 67\u201382.","journal-title":"IEEE Transactions on Evolutionary Computation"},{"key":"9222_CR47","volume-title":"Introduction to multiagent systems","author":"MJ Wooldridge","year":"2001","unstructured":"Wooldridge, M. J. (2001). Introduction to multiagent systems. New York, NY: Wiley."}],"container-title":["Autonomous Agents and Multi-Agent Systems"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10458-013-9222-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10458-013-9222-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10458-013-9222-4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,7,10]],"date-time":"2019-07-10T04:48:54Z","timestamp":1562734134000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10458-013-9222-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013,2,28]]},"references-count":47,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2014,3]]}},"alternative-id":["9222"],"URL":"https:\/\/doi.org\/10.1007\/s10458-013-9222-4","relation":{},"ISSN":["1387-2532","1573-7454"],"issn-type":[{"value":"1387-2532","type":"print"},{"value":"1573-7454","type":"electronic"}],"subject":[],"published":{"date-parts":[[2013,2,28]]}}}