{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2023,8,6]],"date-time":"2023-08-06T00:29:25Z","timestamp":1691281765029},"reference-count":36,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2007,6,8]],"date-time":"2007-06-08T00:00:00Z","timestamp":1181260800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Auton Agent Multi-Agent Syst"],"published-print":{"date-parts":[[2007,10,10]]},"DOI":"10.1007\/s10458-007-9013-x","type":"journal-article","created":{"date-parts":[[2007,6,7]],"date-time":"2007-06-07T16:19:23Z","timestamp":1181233163000},"page":"281-312","source":"Crossref","is-referenced-by-count":7,"title":["Generalized multiagent learning with performance bound"],"prefix":"10.1007","volume":"15","author":[{"given":"Bikramjit","family":"Banerjee","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jing","family":"Peng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2007,6,8]]},"reference":[{"key":"9013_CR1","doi-asserted-by":"crossref","unstructured":"Auer, P., Cesa-Bianchi, N., Freund, Y., & Schapire, R. E. (1995). Gambling in a rigged casino: The adversarial multi-arm bandit problem. In Proceedings of the thirtysixth annual symposium on foundations of computer science (pp. 322\u2013331). Milwaukee, WI: IEEE Computer Society Press.","DOI":"10.1109\/SFCS.1995.492488"},{"key":"9013_CR2","unstructured":"Banerjee, B., & Peng, J. (2004). Performance bounded reinforcement learning in strategic interactions. In Proceedings of the nineteenth national conference on artificial intelligence (AAAI-04) (pp. 2\u20137). San Jose, CA: AAAI Press."},{"key":"9013_CR3","unstructured":"Bowling, M. (2005). Convergence and no-regret in multiagent learning. In Proceedings of NIPS 2004\/5."},{"key":"9013_CR4","unstructured":"Bowling, M., & Veloso, M. (2001). Rational and convergent learning in stochastic games. In Proceedings of the seventeenth international joint conference on artificial intelligence (pp. 1021\u20131026). Seattle, WA."},{"key":"9013_CR5","doi-asserted-by":"crossref","first-page":"215","DOI":"10.1016\/S0004-3702(02)00121-2","volume":"136","author":"M. Bowling","year":"2002","unstructured":"Bowling M., Veloso M. (2002). Multiagent learning using a variable learning rate. Artificial Intelligence 136: 215\u2013250","journal-title":"Artificial Intelligence"},{"key":"9013_CR6","first-page":"213","volume":"3","author":"R.I. Brafman","year":"2002","unstructured":"Brafman R.I., Tennenholtz M. (2002). R-max - A general polynomial time algorithm for near-optimal reinforcement learning. Journal of Machine Learning Research 3: 213\u2013231","journal-title":"Journal of Machine Learning Research"},{"key":"9013_CR7","unstructured":"Claus, C., & Boutilier, C. (1998). The dynamics of reinforcement learning in cooperative multiagent systems. In Proceedings of the fifteenth national conference on artificial intelligence (pp. 746\u2013752). Menlo Park, CA: AAAI Press\/MIT Press."},{"key":"9013_CR8","unstructured":"Conitzer, V., & Sandholm, T. (2003). AWESOME: A general multiagent learning algorithm that converges in self-play and learns a best response against stationary opponents. In Proceedings of the twentieth international conference on machine learning."},{"key":"9013_CR9","doi-asserted-by":"crossref","unstructured":"Cover, T. M., & Thomas, J. A. (1991). Elements of Information Theory. Wiley.","DOI":"10.1002\/0471200611"},{"key":"9013_CR10","unstructured":"Flaxman, A., Kalai, A., & McMahan, H. B. (2005). Online convex optimization in the bandit setting: Gradient descent without a gradient. In Proceedings of the sixteenth annual ACM-SIAM symposium on discrete algorithms (SODA), (To appear)"},{"key":"9013_CR11","doi-asserted-by":"crossref","first-page":"79","DOI":"10.1006\/game.1999.0738","volume":"29","author":"Y. Freund","year":"1999","unstructured":"Freund Y., Schapire R.E. (1999). Adaptive game playing using multiplicative weights. Games and Economic Behavior 29: 79\u2013103","journal-title":"Games and Economic Behavior"},{"key":"9013_CR12","doi-asserted-by":"crossref","first-page":"1065","DOI":"10.1016\/0165-1889(94)00819-4","volume":"19","author":"D. Fudenberg","year":"1995","unstructured":"Fudenberg D., Levine D.K. (1995). Consistency and cautious fictitious play. Journal of Economic Dynamics and Control 19: 1065\u20131089","journal-title":"Journal of Economic Dynamics and Control"},{"key":"9013_CR13","volume-title":"The theory of learning in games","author":"D. Fudenberg","year":"1998","unstructured":"Fudenberg D., Levine K. (1998). The theory of learning in games. Cambridge, MA, MIT Press"},{"key":"9013_CR14","unstructured":"Greenwald, A., & Hall, K. (2002). Correlated q-learning. In Proceedings of the AAAI symposium on collaborative learning agents."},{"issue":"3","key":"9013_CR15","doi-asserted-by":"crossref","first-page":"1830","DOI":"10.1257\/000282803322655581","volume":"93","author":"S. Hart","year":"2003","unstructured":"Hart S., Mas-Colell A. (2003) Uncoupled dynamics do not lead to nash equilibrium. American Economic Review 93(3): 1830\u20131836","journal-title":"American Economic Review"},{"key":"9013_CR16","first-page":"1039","volume":"4","author":"J. Hu","year":"2003","unstructured":"Hu J., Wellman M.P. (2003). Nash Q-learning for general-sum stochastic games. Journal of Machine Learning Research 4: 1039\u20131069","journal-title":"Journal of Machine Learning Research"},{"key":"9013_CR17","unstructured":"Jafari, A., Greenwald, A., Gondek, D., & Ercal, G. (2001). On no-regret learning, fictitious play, and nash equilibrium. In Proceedings of the eighteenth international conference on machine learning, pp. 226\u2013223."},{"key":"9013_CR18","doi-asserted-by":"crossref","first-page":"212","DOI":"10.1006\/inco.1994.1009","volume":"108","author":"N. Littlestone","year":"1994","unstructured":"Littlestone N., Warmuth M. (1994). The weighted majority algorithm. Information and Computation 108: 212\u2013261","journal-title":"Information and Computation"},{"key":"9013_CR19","doi-asserted-by":"crossref","unstructured":"Littman, M. L. (1994). Markov games as a framework for multi-agent reinforcement learning. In Proceedings of the eleventh international conference on machine learning, (pp. 157\u2013163). San Mateo, CA: Morgan Kaufmann.","DOI":"10.1016\/B978-1-55860-335-6.50027-1"},{"key":"9013_CR20","unstructured":"Littman, M. L. (2001). Friend-or-foe Q-learning in general-sum games. In Proceedings of the eighteenth international conference on machine learnig, Williams College, MA, USA."},{"key":"9013_CR21","unstructured":"Littman, M. L., & Szepesvari, C. (1996). A generalized reinforcement learning model: Convergence and applications. In Proceedings of the 13th international conference on machine learning, pp. 310\u2013318."},{"key":"9013_CR22","doi-asserted-by":"crossref","first-page":"286","DOI":"10.2307\/1969529","volume":"54","author":"J.F. Nash","year":"1951","unstructured":"Nash J.F. (1951). Non-cooperative games. Annals of Mathematics 54: 286\u2013295","journal-title":"Annals of Mathematics"},{"key":"9013_CR23","doi-asserted-by":"crossref","first-page":"56","DOI":"10.1038\/364056a0","volume":"364","author":"M. Nowak","year":"1993","unstructured":"Nowak M., Sigmund K. (1993). A strategy of win-stay, lose-shift that outperforms tit-for-tat in the prisoner\u2019s dilemma game. Nature 364: 56\u201358","journal-title":"Nature"},{"key":"9013_CR24","volume-title":"Game Theory","author":"G. Owen","year":"1995","unstructured":"Owen G. (1995). Game Theory. UK, Academic Press"},{"key":"9013_CR25","unstructured":"Posch, M., & Brannath, W. (1997). Win-stay, lose-shift. A general learning rule for repeated normal form games. In Proceedings of the third international conference on computing in economics and finance, Stanford, CA, June 30\u2013July 2, 1997."},{"key":"9013_CR26","unstructured":"Powers, R., & Shoham, Y. (2005). New criteria and a new algorithm for learning in multi-agent systems. In Proceedings of NIPS 2004\/5."},{"key":"9013_CR27","doi-asserted-by":"crossref","unstructured":"Sandholm T., Crites R. (1996). On multiagent Q-learning in a semi-competitive domain. In G. Wei\u00df & S. Sen, (Eds.) Adaptation and learning in multi-agent systems. pp. 191\u2013205, Springer-Verlag.","DOI":"10.1007\/3-540-60923-7_28"},{"key":"9013_CR28","unstructured":"Sen, S., Sekaran, M., & Hale, J. (1994). Learning to coordinate without sharing information. In National conference on artificial intelligence, p. 426\u2013431, Menlo Park, CA: AAAI Press\/MIT Press. (Also published in READINGS in AGENTS, Michael Huhns, N, and Munindar Singh (Editors), p. 509\u2013514, Morgan Kaufmann Publishers Inc., San Francisco, CA, 1998.)."},{"key":"9013_CR29","doi-asserted-by":"crossref","unstructured":"Shapley, L. S. (1974). A note on the lemke howson algorithm. Mathematical programming study 1: Pivoting and extensions, pp. 175\u2013189.","DOI":"10.1007\/BFb0121248"},{"key":"9013_CR30","unstructured":"Singh, S., Kearns, M., & Mansour, Y. (2000). Nash convergence of gradient dynamics in general-sum games. In Proceedings of the sixteenth conference on uncertainty in artificial intelligence, pp. 541\u2013548."},{"key":"9013_CR31","doi-asserted-by":"crossref","unstructured":"Sutton, R., & Barto, A. G. (1998). Reinforcement learning: An introduction. MIT Press.","DOI":"10.1109\/TNN.1998.712192"},{"key":"9013_CR32","doi-asserted-by":"crossref","unstructured":"Tan, M. (1993). Multi-agent reinforcement learning: Independent vs. cooperative agents. In Proceedings of the tenth international conference on machine learning, pp. 330\u2013337.","DOI":"10.1016\/B978-1-55860-307-3.50049-6"},{"key":"9013_CR33","unstructured":"Tesauro, G. (2004). Extending Q-learning to general adaptive multi-agent systems. In S. Thrun, L. Saul, & B. Sch\u00f6lkopf, (Eds), Advances in neural information processing systems Vol. 16. Cambridge, MA: MIT Press."},{"key":"9013_CR34","unstructured":"Wang, X., & Sandholm, T. (2002). Reinforcement learning to play an optimal nash equilibrium in team markov games. In Advances in neural information processing systems 15, NIPS."},{"key":"9013_CR35","unstructured":"Weinberg, M., & Rosenschein, J. S. (2004). Best-Response multiagent learning in non-stationary environments. In Proceedings of the third international joint conference on autonomous agents and multiagent systems (AAMAS), (vol. 2, pp. 506\u2013513, New York, NY: ACM."},{"key":"9013_CR36","unstructured":"Zinkevich, M. (2003). Online convex programming and generalized infinitesimal gradient ascent. In Proceedings of the twentieth international conference on machine learning, Washington DC."}],"container-title":["Autonomous Agents and Multi-Agent Systems"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10458-007-9013-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10458-007-9013-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10458-007-9013-x","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,29]],"date-time":"2019-05-29T17:28:23Z","timestamp":1559150903000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10458-007-9013-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2007,6,8]]},"references-count":36,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2007,10,10]]}},"alternative-id":["9013"],"URL":"https:\/\/doi.org\/10.1007\/s10458-007-9013-x","relation":{},"ISSN":["1387-2532","1573-7454"],"issn-type":[{"value":"1387-2532","type":"print"},{"value":"1573-7454","type":"electronic"}],"subject":[],"published":{"date-parts":[[2007,6,8]]}}}