{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,4]],"date-time":"2026-02-04T14:18:42Z","timestamp":1770214722761,"version":"3.49.0"},"reference-count":35,"publisher":"Elsevier BV","issue":"2","license":[{"start":{"date-parts":[[2002,4,1]],"date-time":"2002-04-01T00:00:00Z","timestamp":1017619200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2013,8,22]],"date-time":"2013-08-22T00:00:00Z","timestamp":1377129600000},"content-version":"vor","delay-in-days":4161,"URL":"https:\/\/www.elsevier.com\/open-access\/userlicense\/1.0\/"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Artificial Intelligence"],"published-print":{"date-parts":[[2002,4]]},"DOI":"10.1016\/s0004-3702(02)00121-2","type":"journal-article","created":{"date-parts":[[2002,10,14]],"date-time":"2002-10-14T13:01:41Z","timestamp":1034600501000},"page":"215-250","source":"Crossref","is-referenced-by-count":478,"title":["Multiagent learning using a variable learning rate"],"prefix":"10.1016","volume":"136","author":[{"given":"Michael","family":"Bowling","sequence":"first","affiliation":[]},{"given":"Manuela","family":"Veloso","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/S0004-3702(02)00121-2_BIB001","article-title":"Gradient descent for general reinforcement learning","volume":"11","author":"Baird","year":"1999"},{"key":"10.1016\/S0004-3702(02)00121-2_BIB002","series-title":"Proc. 17th International Conference on Machine Learning, Stanford, CA","first-page":"41","article-title":"Reinforcement learning in pomdp's via direct gradient ascent","author":"Baxter","year":"2000"},{"key":"10.1016\/S0004-3702(02)00121-2_BIB003","series-title":"Dynamic Programming","author":"Bellman","year":"1957"},{"key":"10.1016\/S0004-3702(02)00121-2_BIB004","series-title":"Proc. 10th Annual Conference on Computational Learning Theory, Nashville, TN","article-title":"On-line learning and the metrical task system problem","author":"Blum","year":"1997"},{"key":"10.1016\/S0004-3702(02)00121-2_BIB005","series-title":"Proc. 17th International Conference on Machine Learning, Stanford, CA","first-page":"89","article-title":"Convergence problems of general-sum multiagent reinforcement learning","author":"Bowling","year":"2000"},{"key":"10.1016\/S0004-3702(02)00121-2_BIB006","series-title":"An analysis of stochastic game theory for multiagent reinforcement learning, Technical Report CMU-CS-00-165","author":"Bowling","year":"2000"},{"key":"10.1016\/S0004-3702(02)00121-2_BIB007","series-title":"Proc. IJCAI-01, Seattle, WA","first-page":"1021","article-title":"Rational and convergent learning in stochastic games","author":"Bowling","year":"2001"},{"key":"10.1016\/S0004-3702(02)00121-2_BIB008","series-title":"Proc. 18th International Conference on Machine Learning, Williamstown, MA","first-page":"27","article-title":"Variable learning rate and the convergence of gradient dynamics","author":"Bowling","year":"2001"},{"key":"10.1016\/S0004-3702(02)00121-2_BIB009","series-title":"Proc. AAAI-98, Madison, WI","article-title":"The dynamics of reinforcement learning in cooperative multiagent systems","author":"Claus","year":"1998"},{"key":"10.1016\/S0004-3702(02)00121-2_BIB010","series-title":"Competitive Markov Decision Processes","author":"Filar","year":"1997"},{"key":"10.1016\/S0004-3702(02)00121-2_BIB011","first-page":"89","article-title":"Equilibrium in a stochastic n-person game","volume":"28","author":"Fink","year":"1964","journal-title":"J. Sci. Hiroshima University, Series A-I"},{"key":"10.1016\/S0004-3702(02)00121-2_BIB012","series-title":"The Theory of Learning in Games","author":"Fudenberg","year":"1999"},{"key":"10.1016\/S0004-3702(02)00121-2_BIB013","series-title":"Dynamic Programming and Markov Processes","author":"Howard","year":"1960"},{"key":"10.1016\/S0004-3702(02)00121-2_BIB014","series-title":"Learning in dynamic noncooperative multiagent systems, Ph.D. Thesis","author":"Hu","year":"1999"},{"key":"10.1016\/S0004-3702(02)00121-2_BIB015","series-title":"Proc. 15th International Conference on Machine Learning, Madison, WI","first-page":"242","article-title":"Multiagent reinforcement learning: Theoretical framework and an algorithm","author":"Hu","year":"1998"},{"key":"10.1016\/S0004-3702(02)00121-2_BIB016","article-title":"Reinforcement learning algorithm for partially observable Markov decision problems","volume":"6","author":"Jaakkola","year":"1994"},{"key":"10.1016\/S0004-3702(02)00121-2_BIB017","doi-asserted-by":"crossref","first-page":"237","DOI":"10.1613\/jair.301","article-title":"Reinforcement learning: A survey","volume":"4","author":"Kaelbling","year":"1996","journal-title":"J. Artificial Intelligence Res."},{"key":"10.1016\/S0004-3702(02)00121-2_BIB018","series-title":"Proc. 11th International Conference on Machine Learning, New Brunswick, NJ","first-page":"157","article-title":"Markov games as a framework for multi-agent reinforcement learning","author":"Littman","year":"1994"},{"key":"10.1016\/S0004-3702(02)00121-2_BIB019","doi-asserted-by":"crossref","first-page":"348","DOI":"10.1016\/0022-247X(64)90021-6","article-title":"Two-person nonzero-sum games and quadratic programming","volume":"9","author":"Mangasarian","year":"1964","journal-title":"J. Math. Anal. Appl."},{"key":"10.1016\/S0004-3702(02)00121-2_BIB020","doi-asserted-by":"crossref","first-page":"48","DOI":"10.1073\/pnas.36.1.48","article-title":"Equilibrium points in n-person games","volume":"36","author":"Nash","year":"1950","journal-title":"Proc. Nat. Acad. Sci."},{"key":"10.1016\/S0004-3702(02)00121-2_BIB021","series-title":"A Course in Game Theory","author":"Osborne","year":"1994"},{"key":"10.1016\/S0004-3702(02)00121-2_BIB022","series-title":"Game Theory","author":"Owen","year":"1995"},{"key":"10.1016\/S0004-3702(02)00121-2_BIB023","series-title":"Differential Equations: Foundations and Applications","author":"Reinhard","year":"1987"},{"key":"10.1016\/S0004-3702(02)00121-2_BIB024","doi-asserted-by":"crossref","first-page":"296","DOI":"10.2307\/1969530","article-title":"An iterative method of solving a game","volume":"54","author":"Robinson","year":"1951","journal-title":"Ann. Math."},{"key":"10.1016\/S0004-3702(02)00121-2_BIB025","series-title":"Proc. AAAI-94, Seattle, WA","first-page":"426","article-title":"Learning to coordinate without sharing information","author":"Sen","year":"1994"},{"key":"10.1016\/S0004-3702(02)00121-2_BIB026","doi-asserted-by":"crossref","first-page":"1095","DOI":"10.1073\/pnas.39.10.1095","article-title":"Stochastic games","volume":"39","author":"Shapley","year":"1953","journal-title":"Proc. Nat. Acad. Sci."},{"key":"10.1016\/S0004-3702(02)00121-2_BIB027","series-title":"Proc. 16th Conference on Uncertainty in Artificial Intelligence, Stanford, CA","first-page":"541","article-title":"Nash convergence of gradient dynamics in general-sum games","author":"Singh","year":"2000"},{"key":"10.1016\/S0004-3702(02)00121-2_BIB028","doi-asserted-by":"crossref","first-page":"287","DOI":"10.1023\/A:1007678930559","article-title":"Convergence results for single-step on-policy reinforcement-learning algorithms","volume":"38","author":"Singh","year":"2000","journal-title":"Machine Learning"},{"key":"10.1016\/S0004-3702(02)00121-2_BIB029","series-title":"Reinforcement Learning","author":"Sutton","year":"1998"},{"key":"10.1016\/S0004-3702(02)00121-2_BIB030","article-title":"Policy gradient methods for reinforcement learning with function approximation","volume":"12","author":"Sutton","year":"2000"},{"key":"10.1016\/S0004-3702(02)00121-2_BIB031","series-title":"Proc. 10th International Conference on Machine Learning, Amherst, MA","first-page":"330","article-title":"Multi-agent reinforcement learning: Independent vs. cooperative agents","author":"Tan","year":"1993"},{"key":"10.1016\/S0004-3702(02)00121-2_BIB032","unstructured":"W. Uther, M. Veloso, Adversarial reinforcement learning, Technical Report, Carnegie Mellon University, Pittsburgh, PA, 1997. Unpublished"},{"key":"10.1016\/S0004-3702(02)00121-2_BIB033","series-title":"Stochastic Games with Finite State and Action Spaces","author":"Vrieze","year":"1987"},{"key":"10.1016\/S0004-3702(02)00121-2_BIB034","series-title":"Learning from delayed rewards, Ph.D. Thesis","author":"Watkins","year":"1989"},{"key":"10.1016\/S0004-3702(02)00121-2_BIB035","series-title":"Evolutionary Game Theory","author":"Weibull","year":"1995"}],"container-title":["Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0004370202001212?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0004370202001212?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2020,1,8]],"date-time":"2020-01-08T03:07:25Z","timestamp":1578452845000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0004370202001212"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2002,4]]},"references-count":35,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2002,4]]}},"alternative-id":["S0004370202001212"],"URL":"https:\/\/doi.org\/10.1016\/s0004-3702(02)00121-2","relation":{},"ISSN":["0004-3702"],"issn-type":[{"value":"0004-3702","type":"print"}],"subject":[],"published":{"date-parts":[[2002,4]]}}}