{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,27]],"date-time":"2025-10-27T20:44:41Z","timestamp":1761597881305,"version":"3.37.3"},"publisher-location":"Cham","reference-count":33,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319716817"},{"type":"electronic","value":"9783319716824"}],"license":[{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017]]},"DOI":"10.1007\/978-3-319-71682-4_15","type":"book-chapter","created":{"date-parts":[[2017,11,24]],"date-time":"2017-11-24T16:24:12Z","timestamp":1511540652000},"page":"239-257","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":12,"title":["Towards a Fast Detection of Opponents in Repeated Stochastic Games"],"prefix":"10.1007","author":[{"given":"Pablo","family":"Hernandez-Leal","sequence":"first","affiliation":[]},{"given":"Michael","family":"Kaisers","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,11,25]]},"reference":[{"key":"15_CR1","doi-asserted-by":"crossref","first-page":"63","DOI":"10.1016\/j.artint.2016.02.004","volume":"235","author":"SV Albrecht","year":"2016","unstructured":"Albrecht, S.V., Crandall, J.W., Ramamoorthy, S.: Belief and truth in hypothesised behaviours. Artif. Intell. 235, 63\u201394 (2016)","journal-title":"Artif. Intell."},{"issue":"2\/3","key":"15_CR2","doi-asserted-by":"crossref","first-page":"235","DOI":"10.1023\/A:1013689704352","volume":"47","author":"P Auer","year":"2002","unstructured":"Auer, P., Cesa-Bianchi, N., Fischer, P.: Finite-time analysis of the multiarmed bandit problem. Mach. Learn. 47(2\/3), 235\u2013256 (2002)","journal-title":"Mach. Learn."},{"key":"15_CR3","unstructured":"Banerjee, B., Stone, P.: General game learning using knowledge transfer. In: International Joint Conference on Artificial Intelligence, pp. 672\u2013677 (2007)"},{"key":"15_CR4","doi-asserted-by":"crossref","unstructured":"Barrett, S., Stone, P.: Cooperating with unknown teammates in complex domains: a robot soccer case study of ad hoc teamwork. In: Proceedings of the 29th Conference on Artificial Intelligence, pp. 2010\u20132016. Austin, Texas, USA (2014)","DOI":"10.1609\/aaai.v29i1.9428"},{"issue":"1","key":"15_CR5","doi-asserted-by":"crossref","first-page":"12","DOI":"10.1016\/j.geb.2011.06.009","volume":"74","author":"J Bednar","year":"2012","unstructured":"Bednar, J., Chen, Y., Liu, T.X., Page, S.: Behavioral spillovers and cognitive load in multiple games: an experimental study. Games Econ. Behav. 74(1), 12\u201331 (2012)","journal-title":"Games Econ. Behav."},{"issue":"5","key":"15_CR6","first-page":"679","volume":"6","author":"R Bellman","year":"1957","unstructured":"Bellman, R.: A Markovian decision process. J. Math. Mech. 6(5), 679\u2013684 (1957)","journal-title":"J. Math. Mech."},{"key":"15_CR7","doi-asserted-by":"crossref","first-page":"659","DOI":"10.1613\/jair.4818","volume":"53","author":"D Bloembergen","year":"2015","unstructured":"Bloembergen, D., Tuyls, K., Hennes, D., Kaisers, M.: Evolutionary dynamics of multi-agent learning: a survey. J. Artif. Intell. Res. 53, 659\u2013697 (2015)","journal-title":"J. Artif. Intell. Res."},{"key":"15_CR8","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"249","DOI":"10.1007\/978-3-642-29946-9_25","volume-title":"Recent Advances in Reinforcement Learning","author":"G Boutsioukis","year":"2012","unstructured":"Boutsioukis, G., Partalas, I., Vlahavas, I.: Transfer learning in multi-agent reinforcement learning domains. In: Sanner, S., Hutter, M. (eds.) EWRL 2011. LNCS (LNAI), vol. 7188, pp. 249\u2013260. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-29946-9_25"},{"issue":"2","key":"15_CR9","doi-asserted-by":"crossref","first-page":"215","DOI":"10.1016\/S0004-3702(02)00121-2","volume":"136","author":"M Bowling","year":"2002","unstructured":"Bowling, M., Veloso, M.: Multiagent learning using a variable learning rate. Artif. Intell. 136(2), 215\u2013250 (2002)","journal-title":"Artif. Intell."},{"key":"15_CR10","first-page":"213","volume":"3","author":"RI Brafman","year":"2003","unstructured":"Brafman, R.I., Tennenholtz, M.: R-MAX a general polynomial time algorithm for near-optimal reinforcement learning. J. Mach. Learn. Res. 3, 213\u2013231 (2003)","journal-title":"J. Mach. Learn. Res."},{"key":"15_CR11","unstructured":"Brunskill, E., Li, L.: PAC-inspired option discovery in lifelong reinforcement learning. In: Proceedings of the 22nd Conference on Artificial Intelligence, pp. 1599\u20131610 (2014)"},{"issue":"2","key":"15_CR12","doi-asserted-by":"crossref","first-page":"156","DOI":"10.1109\/TSMCC.2007.913919","volume":"38","author":"L Busoniu","year":"2008","unstructured":"Busoniu, L., Babuska, R., De Schutter, B.: A comprehensive survey of multiagent reinforcement learning. IEEE Trans. Syst. Man Cybern. Part C (Appl. Rev.) 38(2), 156\u2013172 (2008)","journal-title":"IEEE Trans. Syst. Man Cybern. Part C (Appl. Rev.)"},{"issue":"2","key":"15_CR13","doi-asserted-by":"crossref","first-page":"182","DOI":"10.1007\/s10458-013-9222-4","volume":"28","author":"D Chakraborty","year":"2013","unstructured":"Chakraborty, D., Stone, P.: Multiagent learning in the presence of memory-bounded agents. Auton. Agents Multi-Agent Syst. 28(2), 182\u2013213 (2013)","journal-title":"Auton. Agents Multi-Agent Syst."},{"issue":"1\u20132","key":"15_CR14","first-page":"23","volume":"67","author":"V Conitzer","year":"2006","unstructured":"Conitzer, V., Sandholm, T.: AWESOME: a general multiagent learning algorithm that converges in self-play and learns a best response against stationary opponents. Mach. Learn. 67(1\u20132), 23\u201343 (2006)","journal-title":"Mach. Learn."},{"key":"15_CR15","unstructured":"Crandall, J.W.: Just add pepper: extending learning algorithms for repeated matrix games to repeated markov games. In: Proceedings of the 11th International Conference on Autonomous Agents and Multiagent Systems, pp. 399\u2013406. Valencia, Spain (2012)"},{"issue":"1","key":"15_CR16","doi-asserted-by":"crossref","first-page":"111","DOI":"10.1613\/jair.4202","volume":"49","author":"JW Crandall","year":"2014","unstructured":"Crandall, J.W.: Towards minimizing disappointment in repeated games. J. Artif. Intell. Res. 49(1), 111\u2013142 (2014)","journal-title":"J. Artif. Intell. Res."},{"key":"15_CR17","unstructured":"Crandall, J.W.: Robust learning for repeated stochastic games via meta-gaming. In: Proceedings of the Twenty-Fourth International Joint Conference on Artificial Intelligence, pp. 3416\u20133422. Buenos Aires, Argentina (2015)"},{"key":"15_CR18","doi-asserted-by":"crossref","unstructured":"Da Silva, B.C., Basso, E.W., Bazzan, A.L., Engel, P.M.: Dealing with non-stationary environments using context detection. In: Proceedings of the 23rd International Conference on Machine Learnig, pp. 217\u2013224. Pittsburgh, Pennsylvania (2006)","DOI":"10.1145\/1143844.1143872"},{"key":"15_CR19","unstructured":"De Hauwere, Y.M., Vrancx, P., Nowe, A.: Learning multi-agent state space representations. In: Proceedings of the 9th International Conference on Autonomous Agents and Multiagent Systems, pp. 715\u2013722. Toronto, Canada (2010)"},{"key":"15_CR20","unstructured":"Elidrisi, M., Johnson, N., Gini, M., Crandall, J.W.: Fast adaptive learning in repeated stochastic games by game abstraction. In: Proceedings of the 13th International Conference on Autonomous Agents and Multiagent Systems, pp. 1141\u20131148. Paris, France (2014)"},{"key":"15_CR21","doi-asserted-by":"crossref","unstructured":"Fern\u00e1ndez, F., Veloso, M.: Probabilistic policy reuse in a reinforcement learning agent. In: Proceedings of the 5th International Conference on Autonomous Agents and Multiagent Systems, pp. 720\u2013727. ACM, Hakodata, Hokkaido, Japan (2006)","DOI":"10.1145\/1160633.1160762"},{"key":"15_CR22","volume-title":"Game Theory","author":"D Fudenberg","year":"1991","unstructured":"Fudenberg, D., Tirole, J.: Game Theory. The MIT Press, Cambridge (1991)"},{"issue":"2","key":"15_CR23","doi-asserted-by":"crossref","first-page":"103","DOI":"10.1080\/09540091.2014.885294","volume":"26","author":"P Hernandez-Leal","year":"2014","unstructured":"Hernandez-Leal, P., Munoz de Cote, E., Sucar, L.E.: A framework for learning and planning against switching strategies in repeated games. Connect. Sci. 26(2), 103\u2013122 (2014)","journal-title":"Connect. Sci."},{"key":"15_CR24","doi-asserted-by":"crossref","unstructured":"Hernandez-Leal, P., Kaisers, M.: Learning against sequential opponents in repeated stochastic games. In: The 3rd Multi-disciplinary Conference on Reinforcement Learning and Decision Making, Ann Arbor (2017)","DOI":"10.1007\/978-3-319-71682-4_15"},{"key":"15_CR25","doi-asserted-by":"crossref","unstructured":"Hernandez-Leal, P., Taylor, M.E., Rosman, B., Sucar, L.E., Munoz de Cote, E.: Identifying and tracking switching, non-stationary opponents: a bayesian approach. In: Multiagent Interaction without Prior Coordination Workshop at AAAI, Phoenix, AZ, USA (2016)","DOI":"10.1007\/s10458-016-9352-6"},{"issue":"4","key":"15_CR26","doi-asserted-by":"crossref","first-page":"767","DOI":"10.1007\/s10458-016-9352-6","volume":"31","author":"P Hernandez-Leal","year":"2017","unstructured":"Hernandez-Leal, P., Zhan, Y., Taylor, M.E., Sucar, L.E., Munoz de Cote, E.: Efficiently detecting switches against non-stationary opponents. Auton. Agents Multi-Agent Syst. 31(4), 767\u2013789 (2017)","journal-title":"Auton. Agents Multi-Agent Syst."},{"key":"15_CR27","unstructured":"Langford, J., Zhang, T.: The epoch-greedy algorithm for multi-armed bandits with side information. In: Advances in Neural Information Processing Systems, pp. 817\u2013824 (2008)"},{"key":"15_CR28","unstructured":"Lazaric, A., Ghavamzadeh, M.: Bayesian multi-task reinforcement learning. In: Proceedings of the 27th International Conference on Machine Learning, Haifa, Israel (2010)"},{"key":"15_CR29","doi-asserted-by":"crossref","unstructured":"Lazaric, A., Restelli, M., Bonarini, A.: Transfer of samples in batch reinforcement learning. In: International Conference on Machine Learning, pp. 544\u2013551. ACM, Helsinki, Finland (2008)","DOI":"10.1145\/1390156.1390225"},{"issue":"1","key":"15_CR30","doi-asserted-by":"crossref","first-page":"99","DOI":"10.1007\/s10994-016-5547-y","volume":"104","author":"B Rosman","year":"2016","unstructured":"Rosman, B., Hawasly, M., Ramamoorthy, S.: Bayesian policy reuse. Mach. Learn. 104(1), 99\u2013127 (2016)","journal-title":"Mach. Learn."},{"key":"15_CR31","first-page":"2125","volume":"8","author":"ME Taylor","year":"2007","unstructured":"Taylor, M.E., Stone, P., Liu, Y.: Transfer learning via inter-task mappings for temporal difference learning. J. Mach. Learn. Res. 8, 2125\u20132167 (2007)","journal-title":"J. Mach. Learn. Res."},{"key":"15_CR32","first-page":"1633","volume":"10","author":"ME Taylor","year":"2009","unstructured":"Taylor, M.E., Stone, P.: Transfer learning for reinforcement learning domains: a survey. J. Mach. Learn. Res. 10, 1633\u20131685 (2009)","journal-title":"J. Mach. Learn. Res."},{"key":"15_CR33","unstructured":"Watkins, J.: Learning from delayed rewards. Ph.D. thesis, King\u2019s College, Cambridge, UK (1989)"}],"container-title":["Lecture Notes in Computer Science","Autonomous Agents and Multiagent Systems"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-71682-4_15","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,8]],"date-time":"2022-08-08T15:13:41Z","timestamp":1659971621000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-71682-4_15"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017]]},"ISBN":["9783319716817","9783319716824"],"references-count":33,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-71682-4_15","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2017]]}}}