{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,5]],"date-time":"2024-09-05T06:29:54Z","timestamp":1725517794064},"publisher-location":"Berlin, Heidelberg","reference-count":14,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783540855644"},{"type":"electronic","value":"9783540855651"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"DOI":"10.1007\/978-3-540-85565-1_48","type":"book-chapter","created":{"date-parts":[[2008,8,31]],"date-time":"2008-08-31T19:19:06Z","timestamp":1220210346000},"page":"379-390","source":"Crossref","is-referenced-by-count":0,"title":["A Learning Automata Approach to Multi-agent Policy Gradient Learning"],"prefix":"10.1007","author":[{"given":"Maarten","family":"Peeters","sequence":"first","affiliation":[]},{"given":"Ville","family":"K\u00f6n\u00f6nen","sequence":"additional","affiliation":[]},{"given":"Katja","family":"Verbeeck","sequence":"additional","affiliation":[]},{"given":"Ann","family":"Now\u00e9","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"48_CR1","unstructured":"Bertsekas, D.P., Tsitsiklis, J.N.: Neuro-Dynamic Programming. Athena Scientific (1996)"},{"key":"48_CR2","unstructured":"Sutton, R.S., McAllester, D., Singh, S.P., Mansour, Y.: Policy gradient methods for reinforcement learning with function approximation. In: Advances in Neural Information Processing Systems (NIPS 1999), Denver, CO, pp. 1057\u20131063 (2000)"},{"issue":"2","key":"48_CR3","doi-asserted-by":"publisher","first-page":"454","DOI":"10.1016\/S0004-3702(02)00121-2","volume":"136","author":"M. Bowling","year":"2002","unstructured":"Bowling, M., Veloso, M.M.: Multiagent learning using a variable learning rate. Artificial Intelligence\u00a0136(2), 454\u2013460 (2002)","journal-title":"Artificial Intelligence"},{"key":"48_CR4","unstructured":"Peshkin, L., Kim, K.E., Meuleau, N., Kaelbling, L.P.: Learning to cooperate via policy-search. In: Proceedings of the Sixteenth Conference on Uncertainty in Artifical Intelligence (UAI 2000), Stanford, CA, pp. 489\u2013496 (2000)"},{"issue":"1","key":"48_CR5","first-page":"17","volume":"3","author":"V. K\u00f6n\u00f6nen","year":"2005","unstructured":"K\u00f6n\u00f6nen, V.: Gradient based method for symmetric and asymmetric multiagent reinforcement learning. Web Intelligence and Agent Systems: An International Journal (WIAS)\u00a03(1), 17\u201330 (2005)","journal-title":"Web Intelligence and Agent Systems: An International Journal (WIAS)"},{"key":"48_CR6","unstructured":"K\u00f6n\u00f6nen, V.: Multiagent Reinforcement Learning in Markov Games: Asymmetric and Symmetric Approaches. PhD thesis, Helsinki University of Technology, Helsinki, Finland (2004)"},{"key":"48_CR7","volume-title":"Automata Theory and Modeling of Biological Systems","author":"M.L. Tsetlin","year":"1973","unstructured":"Tsetlin, M.L.: Automata Theory and Modeling of Biological Systems. Academic Press, New York (1973)"},{"key":"48_CR8","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4419-9052-5","volume-title":"Networks of Learning Automata: Techniques for Online Stochastic Optimization","author":"M.A.L. Thathachar","year":"2004","unstructured":"Thathachar, M.A.L., Sastry, P.S.: Networks of Learning Automata: Techniques for Online Stochastic Optimization. Kluwer Academic Publishers, Dordrecht (2004)"},{"key":"48_CR9","volume-title":"Learning Automata: An Introduction","author":"K.S. Narendra","year":"1989","unstructured":"Narendra, K.S., Thathachar, M.A.L.: Learning Automata: An Introduction. Prentice Hall, Englewood Cliffs (1989)"},{"key":"48_CR10","doi-asserted-by":"crossref","unstructured":"Williams, R.J.: Simple statistical gradient-following algorithms for connectionist reinforcement learning. Machine Learning\u00a08(3\u20134) (1992)","DOI":"10.1007\/BF00992696"},{"key":"48_CR11","volume-title":"Reinforcement Learning: An Introduction","author":"R.S. Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (1998)"},{"key":"48_CR12","volume-title":"Competitive Markov Decision Processes","author":"J.A. Filar","year":"1997","unstructured":"Filar, J.A., Vrieze, K.: Competitive Markov Decision Processes. Springer, Heidelberg (1997)"},{"key":"48_CR13","unstructured":"Claus, C., Boutilier, C.: The dynamics of reinforcement learning in cooperative multiagent systems. In: Proceedings of the Fifteenth National Conference of Artificial Intelligence (AAAI 1998), Madison, WI, pp. 746\u2013752 (1998)"},{"key":"48_CR14","first-page":"75","volume":"43","author":"A.H. Bowker","year":"1984","unstructured":"Bowker, A.H.: Bowker\u2019s test for symmetry. Journal of the American Statistical Association\u00a043, 75\u201383 (1984)","journal-title":"Journal of the American Statistical Association"}],"container-title":["Lecture Notes in Computer Science","Knowledge-Based Intelligent Information and Engineering Systems"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-540-85565-1_48.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,11,23]],"date-time":"2020-11-23T21:32:44Z","timestamp":1606167164000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-540-85565-1_48"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[null]]},"ISBN":["9783540855644","9783540855651"],"references-count":14,"URL":"https:\/\/doi.org\/10.1007\/978-3-540-85565-1_48","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[]}}