{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,23]],"date-time":"2026-03-23T08:55:15Z","timestamp":1774256115059,"version":"3.50.1"},"reference-count":33,"publisher":"Elsevier BV","issue":"1","license":[{"start":{"date-parts":[[2001,4,1]],"date-time":"2001-04-01T00:00:00Z","timestamp":986083200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Cognitive Systems Research"],"published-print":{"date-parts":[[2001,4]]},"DOI":"10.1016\/s1389-0417(01)00015-8","type":"journal-article","created":{"date-parts":[[2002,7,25]],"date-time":"2002-07-25T13:00:41Z","timestamp":1027602041000},"page":"55-66","source":"Crossref","is-referenced-by-count":281,"title":["Value-function reinforcement learning in Markov games"],"prefix":"10.1016","volume":"2","author":[{"given":"Michael L.","family":"Littman","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"78","reference":[{"key":"10.1016\/S1389-0417(01)00015-8_BIB1","series-title":"Learning and computational neuroscience: foundations of adaptive networks","article-title":"Learning and sequential decision making","author":"Barto","year":"1991"},{"key":"10.1016\/S1389-0417(01)00015-8_BIB2","series-title":"Dynamic Programming","author":"Bellman","year":"1957"},{"key":"10.1016\/S1389-0417(01)00015-8_BIB3","series-title":"Dynamic programming: deterministic and stochastic models","author":"Bertsekas","year":"1987"},{"key":"10.1016\/S1389-0417(01)00015-8_BIB4","series-title":"Proceedings of the Sixth Conference on Theoretical Aspects of Rationality and Knowledge (TARK-96)","article-title":"Planning, learning and coordination in multiagent decision processes","author":"Boutilier","year":"1996"},{"key":"10.1016\/S1389-0417(01)00015-8_BIB5","series-title":"Proceedings of the Seventeenth International Conference on Machine Learning","article-title":"Convergence problems of general-sum multiagent reinforcement learning","author":"Bowling","year":"2000"},{"key":"10.1016\/S1389-0417(01)00015-8_BIB6","series-title":"Technical Report CMU-CS-00-165","article-title":"An analysis of stochastic game theory for multiagent reinforcement learning","author":"Bowling","year":"2000"},{"key":"10.1016\/S1389-0417(01)00015-8_BIB7","series-title":"Proceedings of the Fifteenth National Conference on Artificial Intelligence","article-title":"The dynamics of reinforcement learning in cooperative multiagent systems","author":"Claus","year":"1998"},{"key":"10.1016\/S1389-0417(01)00015-8_BIB8","series-title":"Competitive Markov decision processes","author":"Filar","year":"1997"},{"key":"10.1016\/S1389-0417(01)00015-8_BIB9","series-title":"Dynamic programming and Markov processes","author":"Howard","year":"1960"},{"key":"10.1016\/S1389-0417(01)00015-8_BIB10","series-title":"Learning in Dynamic Noncooperative Multiagent Systems","author":"Hu","year":"1999"},{"key":"10.1016\/S1389-0417(01)00015-8_BIB11","series-title":"Proceedings of the Fifteenth International Conference on Machine Learning","article-title":"Multiagent reinforcement learning: theoretical framework and an algorithm","author":"Hu","year":"1998"},{"key":"10.1016\/S1389-0417(01)00015-8_BIB12","series-title":"Proceedings of the Seventeenth International Conference on Machine Learning","article-title":"Experimental results on Q-learning for general-sum stochastic games","author":"Hu","year":"2000"},{"issue":"6","key":"10.1016\/S1389-0417(01)00015-8_BIB13","doi-asserted-by":"crossref","first-page":"1185","DOI":"10.1162\/neco.1994.6.6.1185","article-title":"On the convergence of stochastic iterative dynamic programming algorithms","volume":"6","author":"Jaakkola","year":"1994","journal-title":"Neural Comput."},{"key":"10.1016\/S1389-0417(01)00015-8_BIB14","doi-asserted-by":"crossref","first-page":"237","DOI":"10.1613\/jair.301","article-title":"Reinforcement learning: a survey","volume":"4","author":"Kaelbling","year":"1996","journal-title":"J. Artificial Intell. Res."},{"key":"10.1016\/S1389-0417(01)00015-8_BIB15","series-title":"Proceedings of the Eleventh International Conference on Machine Learning","article-title":"Markov games as a framework for multi-agent reinforcement learning","author":"Littman","year":"1994"},{"key":"10.1016\/S1389-0417(01)00015-8_BIB16","series-title":"Proceedings of the Thirteenth International Conference on Machine Learning","article-title":"A generalized reinforcement-learning model: convergence and applications","author":"Littman","year":"1996"},{"key":"10.1016\/S1389-0417(01)00015-8_BIB17","series-title":"Proceedings of the Fourth International Conference on Multiagent Systems","article-title":"Evaluating concurrent reinforcement learners","author":"Mundhe","year":"2000"},{"key":"10.1016\/S1389-0417(01)00015-8_BIB18","author":"Owen","year":"1982"},{"key":"10.1016\/S1389-0417(01)00015-8_BIB19","series-title":"Markov decision processes \u2014 discrete stochastic dynamic programming","author":"Puterman","year":"1994"},{"key":"10.1016\/S1389-0417(01)00015-8_BIB20","series-title":"Proceedings of the Twelfth National Conference on Artificial Intelligence","article-title":"Learning to coordinate without sharing information","author":"Sen","year":"1994"},{"key":"10.1016\/S1389-0417(01)00015-8_BIB21","doi-asserted-by":"crossref","first-page":"1095","DOI":"10.1073\/pnas.39.10.1095","article-title":"Stochastic games","volume":"39","author":"Shapley","year":"1953","journal-title":"Proc. Natl. Acad. Sci. USA"},{"key":"10.1016\/S1389-0417(01)00015-8_BIB22","doi-asserted-by":"crossref","first-page":"287","DOI":"10.1023\/A:1007678930559","article-title":"Convergence results for single-step on-policy reinforcement-learning algorithms","volume":"39","author":"Singh","year":"2000","journal-title":"Machine Learn."},{"key":"10.1016\/S1389-0417(01)00015-8_BIB23","series-title":"Proceedings of PRIMA\u20192000, Lecture Notes in Artificial Intelligence","article-title":"Rationality assumptions and optimality of co-learning","author":"Sun","year":"2000"},{"issue":"1","key":"10.1016\/S1389-0417(01)00015-8_BIB24","doi-asserted-by":"crossref","first-page":"9","DOI":"10.1007\/BF00115009","article-title":"Learning to predict by the method of temporal differences","volume":"3","author":"Sutton","year":"1988","journal-title":"Machine Learn."},{"key":"10.1016\/S1389-0417(01)00015-8_BIB25","series-title":"Reinforcement learning: an introduction","author":"Sutton","year":"1998"},{"issue":"8","key":"10.1016\/S1389-0417(01)00015-8_BIB26","doi-asserted-by":"crossref","first-page":"2017","DOI":"10.1162\/089976699300016070","article-title":"A unified analysis of value-function-based reinforcement-learning algorithms","volume":"11","author":"Szepesv\u00e1ri","year":"1999","journal-title":"Neural Comput."},{"issue":"3","key":"10.1016\/S1389-0417(01)00015-8_BIB27","doi-asserted-by":"crossref","first-page":"185","DOI":"10.1007\/BF00993306","article-title":"Asynchronous stochastic approximation and Q-learning","volume":"16","author":"Tsitsiklis","year":"1994","journal-title":"Machine Learn."},{"key":"10.1016\/S1389-0417(01)00015-8_BIB28","series-title":"Adversarial reinforcement learning","author":"Uther","year":"1997"},{"key":"10.1016\/S1389-0417(01)00015-8_BIB29","series-title":"Stochastic dynamic programming","volume":"Vol. 139","author":"van der Wal","year":"1981"},{"key":"10.1016\/S1389-0417(01)00015-8_BIB30","series-title":"Theory of games and economic behavior","author":"von Neumann","year":"1947"},{"issue":"2","key":"10.1016\/S1389-0417(01)00015-8_BIB31","doi-asserted-by":"crossref","first-page":"71","DOI":"10.1007\/BF01769064","article-title":"Fictitious play applied to sequences of games and discounted stochastic games","volume":"11","author":"Vrieze","year":"1982","journal-title":"Int. J. Game Theory"},{"key":"10.1016\/S1389-0417(01)00015-8_BIB32","series-title":"Learning from delayed rewards","author":"Watkins","year":"1989"},{"issue":"3","key":"10.1016\/S1389-0417(01)00015-8_BIB33","doi-asserted-by":"crossref","first-page":"279","DOI":"10.1007\/BF00992698","article-title":"Q-learning","volume":"8","author":"Watkins","year":"1992","journal-title":"Machine Learn."}],"container-title":["Cognitive Systems Research"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1389041701000158?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1389041701000158?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2020,1,12]],"date-time":"2020-01-12T08:45:48Z","timestamp":1578818748000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S1389041701000158"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2001,4]]},"references-count":33,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2001,4]]}},"alternative-id":["S1389041701000158"],"URL":"https:\/\/doi.org\/10.1016\/s1389-0417(01)00015-8","relation":{},"ISSN":["1389-0417"],"issn-type":[{"value":"1389-0417","type":"print"}],"subject":[],"published":{"date-parts":[[2001,4]]}}}