{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,16]],"date-time":"2025-10-16T10:02:31Z","timestamp":1760608951354},"reference-count":52,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2016,10,13]],"date-time":"2016-10-13T00:00:00Z","timestamp":1476316800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Auton Agent Multi-Agent Syst"],"published-print":{"date-parts":[[2017,9]]},"DOI":"10.1007\/s10458-016-9347-3","type":"journal-article","created":{"date-parts":[[2016,10,13]],"date-time":"2016-10-13T14:54:38Z","timestamp":1476370478000},"page":"971-1002","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["An exploration strategy for non-stationary opponents"],"prefix":"10.1007","volume":"31","author":[{"given":"Pablo","family":"Hernandez-Leal","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yusen","family":"Zhan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Matthew E.","family":"Taylor","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"L. Enrique","family":"Sucar","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Enrique","family":"Munoz de Cote","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2016,10,13]]},"reference":[{"issue":"2\/3","key":"9347_CR1","doi-asserted-by":"crossref","first-page":"235","DOI":"10.1023\/A:1013689704352","volume":"47","author":"P Auer","year":"2002","unstructured":"Auer, P., Cesa-Bianchi, N., & Fischer, P. (2002). Finite-time analysis of the multiarmed Bandit problem. Machine Learning, 47(2\/3), 235\u2013256.","journal-title":"Machine Learning"},{"issue":"27","key":"9347_CR2","doi-asserted-by":"crossref","first-page":"1390","DOI":"10.1126\/science.7466396","volume":"211","author":"R Axelrod","year":"1981","unstructured":"Axelrod, R., & Hamilton, W. D. (1981). The evolution of cooperation. Science, 211(27), 1390\u20131396.","journal-title":"Science"},{"key":"9347_CR3","unstructured":"Babes, M., Munoz de Cote, E., & Littman, M. L. (2008). Social reward shaping in the prisoner\u2019s dilemma. In Proceedings of the 7th International Conference on Autonomous Agents and Multiagent Systems, (pp. 1389\u20131392). Estoril: International Foundation for Autonomous Agents and Multiagent Systems."},{"key":"9347_CR4","doi-asserted-by":"crossref","unstructured":"Banerjee, B., & Peng, J. (2005). Efficient learning of multi-step best response. In Proceedings of the 4th International Conference on Autonomous Agents and Multiagent Systems, (pp. 60\u201366). Utretch: ACM.","DOI":"10.1145\/1082473.1082483"},{"key":"9347_CR5","unstructured":"Bard, N., Johanson, M., Burch, N., & Bowling, M. (2013). Online implicit agent modelling. In Proceedings of the 12th International Conference on Autonomous Agents and Multiagent Systems, (pp. 255\u2013262). Saint Paul, MN: International Foundation for Autonomous Agents and Multiagent Systems."},{"issue":"2","key":"9347_CR6","doi-asserted-by":"crossref","first-page":"215","DOI":"10.1016\/S0004-3702(02)00121-2","volume":"136","author":"M Bowling","year":"2002","unstructured":"Bowling, M., & Veloso, M. (2002). Multiagent learning using a variable learning rate. Artificial Intelligence, 136(2), 215\u2013250.","journal-title":"Artificial Intelligence"},{"key":"9347_CR7","first-page":"213","volume":"3","author":"RI Brafman","year":"2003","unstructured":"Brafman, R. I., & Tennenholtz, M. (2003). R-MAX a general polynomial time algorithm for near-optimal reinforcement learning. The Journal of Machine Learning Research, 3, 213\u2013231.","journal-title":"The Journal of Machine Learning Research"},{"issue":"2","key":"9347_CR8","doi-asserted-by":"crossref","first-page":"156","DOI":"10.1109\/TSMCC.2007.913919","volume":"38","author":"L Busoniu","year":"2008","unstructured":"Busoniu, L., Babuska, R., & De Schutter, B. (2008). A comprehensive survey of multiagent reinforcement learning. IEEE Transactions on Systems, Man and Cybernetics, Part C Applications and Reviews, 38(2), 156\u2013172.","journal-title":"IEEE Transactions on Systems, Man and Cybernetics, Part C Applications and Reviews"},{"issue":"2","key":"9347_CR9","doi-asserted-by":"crossref","first-page":"141","DOI":"10.1023\/A:1010007108196","volume":"2","author":"D Carmel","year":"1999","unstructured":"Carmel, D., & Markovitch, S. (1999). Exploration strategies for model-based learning in multi-agent systems. Autonomous Agents and Multi-Agent Systems, 2(2), 141\u2013172.","journal-title":"Autonomous Agents and Multi-Agent Systems"},{"issue":"11","key":"9347_CR10","doi-asserted-by":"crossref","first-page":"2585","DOI":"10.1016\/S0031-3203(03)00136-5","volume":"36","author":"GC Cawley","year":"2003","unstructured":"Cawley, G. C., & Talbot, N. L. C. (2003). Efficient leave-one-out cross-validation of kernel fisher discriminant classifiers. Pattern Recognition, 36(11), 2585\u20132592.","journal-title":"Pattern Recognition"},{"key":"9347_CR11","unstructured":"Chakraborty, D., Agmon, N., & Stone, P. (2013). Targeted opponent modeling of memory-bounded agents. In Proceedings of the Adaptive Learning Agents Workshop (ALA). Saint Paul, MN, USA."},{"key":"9347_CR12","doi-asserted-by":"crossref","unstructured":"Chakraborty, D., & Stone, P. (2008). Online multiagent learning against memory bounded adversaries. In Machine Learning and Knowledge Discovery in Databases (pp. 211\u2013226). Berlin: Springer.","DOI":"10.1007\/978-3-540-87479-9_32"},{"issue":"2","key":"9347_CR13","doi-asserted-by":"crossref","first-page":"182","DOI":"10.1007\/s10458-013-9222-4","volume":"28","author":"D Chakraborty","year":"2013","unstructured":"Chakraborty, D., & Stone, P. (2013). Multiagent learning in the presence of memory-bounded agents. Autonomous Agents and Multi-Agent Systems, 28(2), 182\u2013213.","journal-title":"Autonomous Agents and Multi-Agent Systems"},{"key":"9347_CR14","unstructured":"Choi, S. P. M., Yeung, D. Y., & Zhang, N. L. (1999). An environment model for nonstationary reinforcement learning. In Advances in Neural Information Processing Systems, (pp. 987\u2013993). Denver, CO, USA."},{"key":"9347_CR15","doi-asserted-by":"crossref","unstructured":"Da\u00a0Silva, B. C., Basso, E. W., Bazzan, A. L., & Engel, P. M. (2006). Dealing with non-stationary environments using context detection. In Proceedings of the 23rd International Conference on Machine Learnig, (pp. 217\u2013224). Pittsburgh, PA, USA.","DOI":"10.1145\/1143844.1143872"},{"key":"9347_CR16","doi-asserted-by":"crossref","unstructured":"Dietterich, T. G. (2000). Ensemble methods in machine learning. In Multiple Classifier Systems, (pp. 1\u201315). Berlin: Springer","DOI":"10.1007\/3-540-45014-9_1"},{"key":"9347_CR17","unstructured":"Doshi, P., & Gmytrasiewicz, P. J. (2006). On the difficulty of achieving equilibrium in interactive POMDPs. In Twenty-first National Conference on Artificial Intelligence, (pp. 1131\u20131136). Boston, MA, USA."},{"key":"9347_CR18","unstructured":"Elidrisi, M., Johnson, N., & Gini, M. (2012). Fast learning against adaptive adversarial opponents. In Proceedings of the Adaptive Learning Agents Workshop (ALA), Valencia, Spain."},{"key":"9347_CR19","unstructured":"Elidrisi, M., Johnson, N., Gini, M., & Crandall, J. W. (2014). Fast adaptive learning in repeated stochastic games by game abstraction. In Proceedings of the 13th International Conference on Autonomous Agents and Multiagent Systems, (pp. 1141\u20131148). Paris, France."},{"key":"9347_CR20","unstructured":"Fulda, N., & Ventura, D. (2007). Predicting and preventing coordination problems in cooperative Q-learning systems. In Proceedings of the Twentieth International Joint Conference on Artificial Intelligence, (pp. 780\u2013785). Hyderabad, India."},{"key":"9347_CR21","doi-asserted-by":"crossref","unstructured":"Garivier, A., & Moulines, E. (2011). On upper-confidence bound policies for switching bandit problems. In Algorithmic Learning Theory, (pp. 174\u2013188). Berlin: Springer.","DOI":"10.1007\/978-3-642-24412-4_16"},{"key":"9347_CR22","unstructured":"Geibel, P. (2001). Reinforcement learning with bounded risk. In Proceedings of the Eighteenth International Conference on Machine Learning, (pp. 162\u2013169). Williamstown, MA: Morgan Kaufmann Publishers Inc."},{"issue":"2","key":"9347_CR23","doi-asserted-by":"crossref","first-page":"148","DOI":"10.1111\/j.2517-6161.1979.tb01068.x","volume":"41","author":"JC Gittins","year":"1979","unstructured":"Gittins, J. C. (1979). Bandit processes and dynamic allocation indices. Journal of the Royal Statistical Society, 41(2), 148\u2013177.","journal-title":"Journal of the Royal Statistical Society"},{"key":"9347_CR24","unstructured":"Hans, A., Schneega\u00df, D., Sch\u00e4fer, A. M., & Udluft, S. (2008). Safe exploration for reinforcement learning. In European Symposium on Artificial Neural Networks, (pp. 143\u2013148). Bruges, Belgium."},{"key":"9347_CR25","unstructured":"Hernandez-Leal, P., Munoz de Cote, E., & Sucar, L. E. (2013). Modeling non-stationary opponents. In Proceedings of the 12th International Conference on Autonomous Agents and Multiagent Systems, (pp. 1135\u20131136). International Foundation for Autonomous Agents and Multiagent Systems, Saint Paul, MN, USA."},{"issue":"2","key":"9347_CR26","doi-asserted-by":"crossref","first-page":"103","DOI":"10.1080\/09540091.2014.885294","volume":"26","author":"P Hernandez-Leal","year":"2014","unstructured":"Hernandez-Leal, P., Munoz de Cote, E., & Sucar, L. E. (2014). A framework for learning and planning against switching strategies in repeated games. Connection Science, 26(2), 103\u2013122.","journal-title":"Connection Science"},{"key":"9347_CR27","unstructured":"Hernandez-Leal, P., Munoz de Cote, E., & Sucar, L. E. (2014). Exploration strategies to detect strategy switches. In Proceedings of the Adaptive Learning Agents Workshop (ALA). Paris, France."},{"key":"9347_CR28","unstructured":"Hernandez-Leal, P., Taylor, M. E., Rosman, B., Sucar, L. E., & Munoz de Cote, E. (2016). Identifying and tracking switching, non-stationary opponents: a Bayesian approach. In In Multiagent Interaction without Prior Coordination Workshop at AAAI. Phoenix, AZ, USA."},{"key":"9347_CR29","unstructured":"HolmesParker, C., Taylor, M. E., Agogino, A., & Tumer, K. (2014). CLEANing the reward: counterfactual actions to remove exploratory action noise in multiagent learning. In Proceedings of the 13th International Conference on Autonomous Agents and Multiagent Systems, (pp. 1353\u20131354). International Foundation for Autonomous Agents and Multiagent Systems, Paris, France."},{"key":"9347_CR30","unstructured":"Kakade, S. M. (2003). On the sample complexity of reinforcement learning. Ph.D. thesis, Gatsby Computational Neuroscience Unit, University College London."},{"key":"9347_CR31","doi-asserted-by":"crossref","unstructured":"Lazaric, A., Munoz de Cote, E., & Gatti, N. (2007). Reinforcement learning in extensive form games with incomplete information: The bargaining case study. In Proceedings of the 6th International Conference on Autonomous Agents and Multiagent Systems. Honolulu, HI: ACM.","DOI":"10.1145\/1329125.1329180"},{"key":"9347_CR32","doi-asserted-by":"crossref","unstructured":"Littman, M. L. (1994). Markov games as a framework for multi-agent reinforcement learning. In Proceedings of the 11th International Conference on Machine Learning, (pp. 157\u2013163). New Brunswick, NJ.","DOI":"10.1016\/B978-1-55860-335-6.50027-1"},{"key":"9347_CR33","unstructured":"Littman, M. L., & Stone, P. (2001). Implicit Negotiation in Repeated Games. In ATAL \u201901: Revised Papers from the 8th International Workshop on Intelligent Agents VIII."},{"key":"9347_CR34","unstructured":"Lopes, M., Lang, T., Toussaint, M., & Oudeyer, P. Y. (2012). Exploration in model-based reinforcement learning by empirically estimating learning progress. In Advances in Neural Information Processing Systems, (pp. 206\u2013214). Lake Tahoe, NV."},{"key":"9347_CR35","unstructured":"MacAlpine, P., Urieli, D., Barrett, S., Kalyanakrishnan, S., Barrera, F., Lopez-Mobilia, A., \u015etiurc\u0103, N., Vu, V., & Stone, P. (2012). UT Austin Villa 2011: a champion agent in the RoboCup 3D Soccer simulation competition. In Proceedings of the 11th International Conference on Autonomous Agents and Multiagent Systems, (pp. 129\u2013136). International Foundation for Autonomous Agents and Multiagent Systems, Valencia, Spain."},{"key":"9347_CR36","unstructured":"Marinescu, A., Dusparic, I., Taylor, A., Cahill, V., & Clarke, S. (2015). P-MARL: Prediction-based multi-agent reinforcement learning for non-stationary environments. In Proceedings of the 14th International Conference on Autonomous Agents and Multiagent Systems. International Foundation for Autonomous Agents and Multiagent Systems."},{"key":"9347_CR37","doi-asserted-by":"crossref","unstructured":"Mohan, Y., & Ponnambalam, S. G. (2011). Exploration strategies for learning in multi-agent foraging. In Swarm, Evolutionary, and Memetic Computing 2011, (pp. 17\u201326). Springer.","DOI":"10.1007\/978-3-642-27242-4_3"},{"key":"9347_CR38","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1287\/mnsc.28.1.1","volume":"28","author":"GE Monahan","year":"1982","unstructured":"Monahan, G. E. (1982). A survey of partially observable Markov decision processes: Theory, models, and algorithms. Management Science, 28, 1\u201316.","journal-title":"Management Science"},{"key":"9347_CR39","unstructured":"Mota, P., Melo, F., & Coheur, L. (2015). Modeling students self-studies behaviors. In Proceedings of the 14th International Conference on Autonomous Agents and Multiagent Systems, (pp. 1521\u20131528). Istanbul, Turkey"},{"key":"9347_CR40","unstructured":"Munoz de Cote, E., Chapman, A. C., Sykulski, A. M., & Jennings, N. R. (2010). Automated planning in repeated adversarial games. In Uncertainty in Artificial Intelligence, (pp. 376\u2013383). Catalina Island, CA."},{"key":"9347_CR41","unstructured":"Munoz de Cote, E., & Jennings, N. R. (2010). Planning against fictitious players in repeated normal form games. In Proceedings of the 9th International Conference on Autonomous Agents and Multiagent Systems, (pp. 1073\u20131080). International Foundation for Autonomous Agents and Multiagent Systems, Toronto, Canada."},{"key":"9347_CR42","unstructured":"Ng, A. Y., Harada, D., & Russell, S. J. (1999). Policy invariance under reward transformations: Theory and application to reward shaping. In Proceedings of the Sixteenth International Conference on Machine Learning, (pp. 278\u2013287). Bled, Slovenia."},{"key":"9347_CR43","doi-asserted-by":"crossref","DOI":"10.1002\/9780470316887","volume-title":"Markov decision processes: Discrete stochastic dynamic programming","author":"M Puterman","year":"1994","unstructured":"Puterman, M. (1994). Markov decision processes: Discrete stochastic dynamic programming. New York: Wiley."},{"key":"9347_CR44","doi-asserted-by":"crossref","unstructured":"Rejeb, L., Guessoum, Z., & M\u2019Hallah, R. (2005). An adaptive approach for the exploration\u2013exploitation dilemma for learning agents. In Proceedings of the 4th international Central and Eastern European conference on Multi-Agent Systems and Applications, (pp. 316\u2013325). Springer, Budapest, Hungary.","DOI":"10.1007\/11559221_32"},{"key":"9347_CR45","volume-title":"Bargaining theory","author":"I Stahl","year":"1972","unstructured":"Stahl, I. (1972). Bargaining theory. Stockolm: Stockolm School of Economics."},{"issue":"3","key":"9347_CR46","doi-asserted-by":"crossref","first-page":"345","DOI":"10.1023\/A:1008942012299","volume":"8","author":"P Stone","year":"2000","unstructured":"Stone, P., & Veloso, M. (2000). Multiagent systems: A survey from a machine learning perspective. Autonomous Robots, 8(3), 345\u2013383.","journal-title":"Autonomous Robots"},{"key":"9347_CR47","doi-asserted-by":"crossref","unstructured":"Suematsu, N., & Hayashi, A. (2002). A multiagent reinforcement learning algorithm using extended optimal response. In Proceedings of the 1st International Conference on Autonomous Agents and Multiagent Systems, (pp. 370\u2013377). ACM Request Permissions, Bologna, Italy.","DOI":"10.1145\/544741.544831"},{"key":"9347_CR48","first-page":"1633","volume":"10","author":"ME Taylor","year":"2009","unstructured":"Taylor, M. E., & Stone, P. (2009). Transfer learning for reinforcement learning domains: A survey. The Journal of Machine Learning Research, 10, 1633\u20131685.","journal-title":"The Journal of Machine Learning Research"},{"issue":"1","key":"9347_CR49","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/2719648","volume":"10","author":"P Vrancx","year":"2015","unstructured":"Vrancx, P., Gurzi, P., Rodriguez, A., Steenhaut, K., & Nowe, A. (2015). A reinforcement learning approach for interdomain routing with link prices. ACM Transactions on Autonomous and Adaptive Systems, 10(1), 1\u201326.","journal-title":"ACM Transactions on Autonomous and Adaptive Systems"},{"key":"9347_CR50","first-page":"279","volume":"8","author":"C Watkins","year":"1992","unstructured":"Watkins, C., & Dayan, P. (1992). Q-learning. Machine Learning, 8, 279\u2013292.","journal-title":"Machine Learning"},{"key":"9347_CR51","unstructured":"Weinberg, M., & Rosenschein, J. S. (2004). Best-response multiagent learning in non-stationary environments. In Proceedings of the 3rd International Conference on Autonomous Agents and Multiagent Systems, (pp. 506\u2013513). New York: IEEE Computer Society."},{"issue":"1","key":"9347_CR52","doi-asserted-by":"crossref","first-page":"35","DOI":"10.1145\/1978721.1978730","volume":"10","author":"MA Zinkevich","year":"2011","unstructured":"Zinkevich, M. A., Bowling, M., & Wunder, M. (2011). The lemonade stand game competition: Solving unsolvable games. SIGecom Exchanges, 10(1), 35\u201338.","journal-title":"SIGecom Exchanges"}],"container-title":["Autonomous Agents and Multi-Agent Systems"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10458-016-9347-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10458-016-9347-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10458-016-9347-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,19]],"date-time":"2024-06-19T22:37:32Z","timestamp":1718836652000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10458-016-9347-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,10,13]]},"references-count":52,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2017,9]]}},"alternative-id":["9347"],"URL":"https:\/\/doi.org\/10.1007\/s10458-016-9347-3","relation":{},"ISSN":["1387-2532","1573-7454"],"issn-type":[{"value":"1387-2532","type":"print"},{"value":"1573-7454","type":"electronic"}],"subject":[],"published":{"date-parts":[[2016,10,13]]}}}