{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,28]],"date-time":"2025-03-28T00:50:01Z","timestamp":1743123001859,"version":"3.40.3"},"publisher-location":"Berlin, Heidelberg","reference-count":13,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642044274"},{"type":"electronic","value":"9783642044281"}],"license":[{"start":{"date-parts":[[2009,1,1]],"date-time":"2009-01-01T00:00:00Z","timestamp":1230768000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2009,1,1]],"date-time":"2009-01-01T00:00:00Z","timestamp":1230768000000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2009]]},"DOI":"10.1007\/978-3-642-04428-1_39","type":"book-chapter","created":{"date-parts":[[2009,10,9]],"date-time":"2009-10-09T12:05:58Z","timestamp":1255089958000},"page":"446-457","source":"Crossref","is-referenced-by-count":0,"title":["Anytime Self-play Learning to Satisfy Functional Optimality Criteria"],"prefix":"10.1007","author":[{"given":"Andriy","family":"Burkov","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Brahim","family":"Chaib-draa","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"issue":"2","key":"39_CR1","doi-asserted-by":"publisher","first-page":"215","DOI":"10.1016\/S0004-3702(02)00121-2","volume":"136","author":"M. Bowling","year":"2002","unstructured":"Bowling, M., Veloso, M.: Multiagent learning using a variable learning rate. Artificial Intelligence\u00a0136(2), 215\u2013250 (2002)","journal-title":"Artificial Intelligence"},{"key":"39_CR2","unstructured":"Claus, C., Boutilier, C.: The dynamics of reinforcement learning in cooperative multiagent systems. In: Proceedings of AAAI 1998 (1998)"},{"key":"39_CR3","first-page":"1039","volume":"4","author":"J. Hu","year":"2003","unstructured":"Hu, J., Wellman, M.: Nash Q-learning for general-sum stochastic games. Journal of ML Research\u00a04, 1039\u20131069 (2003)","journal-title":"Journal of ML Research"},{"key":"39_CR4","unstructured":"Banerjee, B., Peng, J.: Performance bounded reinforcement learning in strategic interactions. In: Proceedings of AAAI 2004 (2004)"},{"key":"39_CR5","unstructured":"Greenwald, A.: Correlated-Q learning. In: AAAI Spring Symposium (2003)"},{"key":"39_CR6","doi-asserted-by":"crossref","unstructured":"Crandall, J., Goodrich, M.: Learning to compete, compromise, and cooperate in repeated general-sum games. In: Proceedings ICML 2005 (2005)","DOI":"10.1145\/1102351.1102372"},{"issue":"1","key":"39_CR7","doi-asserted-by":"publisher","first-page":"55","DOI":"10.1016\/j.dss.2004.08.007","volume":"39","author":"M. Littman","year":"2005","unstructured":"Littman, M., Stone, P.: A polynomial-time Nash equilibrium algorithm for repeated games. Decision Support Systems\u00a039(1), 55\u201366 (2005)","journal-title":"Decision Support Systems"},{"issue":"2","key":"39_CR8","doi-asserted-by":"publisher","first-page":"155","DOI":"10.2307\/1907266","volume":"18","author":"J. Nash","year":"1950","unstructured":"Nash, J.: The Bargaining Problem. Econometrica\u00a018(2), 155\u2013162 (1950)","journal-title":"Econometrica"},{"key":"39_CR9","volume-title":"Reinforcement Learning: An Introduction","author":"R.S. Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (1998)"},{"key":"39_CR10","volume-title":"Advances in Neural Information Processing Systems 17: Proceedings of The 2004 Conference","author":"D. de Farias","year":"2005","unstructured":"de Farias, D., Megiddo, N., Cambridge, M., San Jose, C.: Exploration-Exploitation Tradeoffs for Experts Algorithms in Reactive Environments. In: Advances in Neural Information Processing Systems 17: Proceedings of The 2004 Conference. MIT Press, Cambridge (2005)"},{"issue":"3","key":"39_CR11","doi-asserted-by":"publisher","first-page":"287","DOI":"10.1023\/A:1007678930559","volume":"38","author":"S. Singh","year":"2000","unstructured":"Singh, S., Jaakkola, T., Littman, M., Szepesv\u00e1ri, C.: Convergence Results for Single-Step On-Policy Reinforcement-Learning Algorithms. Machine Learning\u00a038(3), 287\u2013308 (2000)","journal-title":"Machine Learning"},{"key":"39_CR12","doi-asserted-by":"crossref","unstructured":"Chalkiadakis, G., Boutilier, C.: Coordination in multiagent reinforcement learning: A bayesian approach. In: Proceedings of the Second International Joint Conference on Autonomous Agents and Multiagent Systems (AAMAS 2003), Melbourne, Australia (2003)","DOI":"10.1145\/860575.860689"},{"issue":"6","key":"39_CR13","first-page":"562","volume":"81","author":"S. Brams","year":"1993","unstructured":"Brams, S.: Theory of Moves. American Scientist\u00a081(6), 562\u2013570 (1993)","journal-title":"American Scientist"}],"container-title":["Lecture Notes in Computer Science","Algorithmic Decision Theory"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-04428-1_39","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,10,10]],"date-time":"2020-10-10T10:07:17Z","timestamp":1602324437000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-04428-1_39"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2009]]},"ISBN":["9783642044274","9783642044281"],"references-count":13,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-04428-1_39","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2009]]}}}