{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,11]],"date-time":"2026-04-11T13:06:31Z","timestamp":1775912791876,"version":"3.50.1"},"reference-count":41,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2020,10,26]],"date-time":"2020-10-26T00:00:00Z","timestamp":1603670400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,10,26]],"date-time":"2020-10-26T00:00:00Z","timestamp":1603670400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Auton Agent Multi-Agent Syst"],"published-print":{"date-parts":[[2021,4]]},"DOI":"10.1007\/s10458-020-09480-9","type":"journal-article","created":{"date-parts":[[2020,10,26]],"date-time":"2020-10-26T18:02:54Z","timestamp":1603735374000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":11,"title":["Efficient policy detecting and reusing for non-stationarity in Markov games"],"prefix":"10.1007","volume":"35","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2741-058X","authenticated-orcid":false,"given":"Yan","family":"Zheng","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jianye","family":"Hao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zongzhang","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhaopeng","family":"Meng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tianpei","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yanran","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Changjie","family":"Fan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2020,10,26]]},"reference":[{"key":"9480_CR1","doi-asserted-by":"publisher","first-page":"66","DOI":"10.1016\/j.artint.2018.01.002","volume":"258","author":"SV Albrecht","year":"2018","unstructured":"Albrecht, S. V., & Stone, P. (2018). Autonomous agents modelling other agents: A comprehensive survey and open problems. Artificial Intelligence, 258, 66\u201395.","journal-title":"Artificial Intelligence"},{"key":"9480_CR2","doi-asserted-by":"crossref","unstructured":"Banerjee, T., Liu, M., & How, J. P. (2017). Quickest change detection approach to optimal control in Markov decision processes with model changes. In 2017 American control conference (ACC) (pp. 399\u2013405).","DOI":"10.23919\/ACC.2017.7962986"},{"key":"9480_CR3","first-page":"213","volume":"3","author":"RI Brafman","year":"2003","unstructured":"Brafman, R. I., & Tennenholtz, M. (2003). R-max\u2014A general polynomial time algorithm for near-optimal reinforcement learning. Journal of Machine Learning Research, 3, 213\u2013231.","journal-title":"Journal of Machine Learning Research"},{"key":"9480_CR4","doi-asserted-by":"crossref","unstructured":"Chalkiadakis, G., & Boutilier, C. (2003). Coordination in multiagent reinforcement learning: A Bayesian approach. In Proceedings of the 2nd international conference on autonomous agents and multiagent systems (AAMAS) (pp. 709\u2013716).","DOI":"10.1145\/860575.860689"},{"key":"9480_CR5","unstructured":"Crandall, J. W. (2012). Just add pepper: Extending learning algorithms for repeated matrix games to repeated Markov games. In Proceedings of the 11th international conference on autonomous agents and multiagent systems (AAMAS) (pp. 399\u2013406)."},{"key":"9480_CR6","doi-asserted-by":"crossref","unstructured":"da\u00a0Silva, B. C., Basso, E. W., Bazzan, A. L. C., & Engel, P. M. (2006). Dealing with non-stationary environments using context detection. In Proceedings of the 23rd international conference on machine learning (ICML) (pp. 217\u2013224).","DOI":"10.1145\/1143844.1143872"},{"key":"9480_CR7","doi-asserted-by":"crossref","unstructured":"de\u00a0Weerd, H., Verbrugge, R., & Verheij, B. (2013). Higher-order theory of mind in negotiations under incomplete information. In Proceedings of the 16th international conference on principles and practice of multi-agent systems (PRIMA) (pp. 101\u2013116).","DOI":"10.1007\/978-3-642-44927-7_8"},{"key":"9480_CR8","unstructured":"Foerster, J. N., Chen, R. Y., Al-Shedivat, M., Whiteson, S., Abbeel, P., & Mordatch, I. (2018). Learning with opponent-learning awareness. In Proceedings of the 17th international conference on autonomous agents and multiagent systems (AAMAS) (pp. 122\u2013130)."},{"key":"9480_CR9","doi-asserted-by":"crossref","unstructured":"Gupta, J. K., Egorov, M., & Kochenderfer, M. J. (2017). Cooperative multi-agent control using deep reinforcement learning. In Adaptive learning agents workshop.","DOI":"10.1007\/978-3-319-71682-4_5"},{"key":"9480_CR10","unstructured":"Hadoux, E., Beynier, A., & Weng, P. (2014). Sequential decision-making under non-stationary environments via sequential change-point detection. In Learning over multiple contexts (LMCE)."},{"key":"9480_CR11","unstructured":"He, H., & Boyd-Graber, J. L. (2016). Opponent modeling in deep reinforcement learning. In Proceedings of the 33rd international conference on machine learning (ICML) (pp. 1804\u20131813)."},{"issue":"2","key":"9480_CR12","first-page":"103","volume":"26","author":"P Hernandez-Leal","year":"2014","unstructured":"Hernandez-Leal, P., de Cote, E. M., & Sucar, L. E. (2014). A framework for learning and planning against switching strategies in repeated games. Adaptive and Learning Agents, 26(2), 103\u2013122.","journal-title":"Adaptive and Learning Agents"},{"key":"9480_CR13","doi-asserted-by":"crossref","unstructured":"Hernandez-Leal, P., & Kaisers, M. (2017). Learning against sequential opponents in repeated stochastic games. In The 3rd multi-disciplinary conference on reinforcement learning and decision making.","DOI":"10.1007\/978-3-319-71682-4_15"},{"key":"9480_CR14","doi-asserted-by":"crossref","unstructured":"Hernandez-Leal, P., & Kaisers, M. (2017). Towards a fast detection of opponents in repeated stochastic games. In Proceedings of the 16th international conference on autonomous agents and multiagent systems (AAMAS) (pp. 239\u2013257).","DOI":"10.1007\/978-3-319-71682-4_15"},{"key":"9480_CR15","unstructured":"Hernandez-Leal, P., Kaisers, M., Baarslag, T., & de\u00a0Cote, E. M. (2017). A survey of learning in multiagent environments: Dealing with non-stationarity. CoRR. arXiv:1707.09183."},{"key":"9480_CR16","unstructured":"Hernandez-Leal, P., Rosman, B., Taylor, M. E., Sucar, L. E., & de\u00a0Cote, E. M. (2016). A Bayesian approach for learning and tracking switching, non-stationary opponents (extended abstract). In Proceedings of the 15th international conference on autonomous agents and multiagent systems (AAMAS) (pp. 1315\u20131316)."},{"issue":"4","key":"9480_CR17","doi-asserted-by":"publisher","first-page":"767","DOI":"10.1007\/s10458-016-9352-6","volume":"31","author":"P Hernandez-Leal","year":"2017","unstructured":"Hernandez-Leal, P., Zhan, Y., Taylor, M. E., Sucar, L. E., & de Cote, E. M. (2017). Efficiently detecting switches against non-stationary opponents. Autonomous Agents and Multi-Agent Systems, 31(4), 767\u2013789.","journal-title":"Autonomous Agents and Multi-Agent Systems"},{"key":"9480_CR18","unstructured":"Hinton, G. E., Vinyals, O., & Dean, J. (2015). Distilling the knowledge in a neural network. CoRR. arXiv:1503.02531."},{"key":"9480_CR19","first-page":"4565","volume":"29","author":"J Ho","year":"2016","unstructured":"Ho, J., & Ermon, S. (2016). Generative adversarial imitation learning. Advances in Neural Information Processing Systems, 29, 4565\u20134573.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"9480_CR20","unstructured":"Hong, Z., Su, S., Shann, T., Chang, Y., & Lee, C. (2018). A deep policy inference q-network for multi-agent systems. In Proceedings of the 17th international conference on autonomous agents and multiagent systems (AAMAS) (pp. 1388\u20131396)."},{"key":"9480_CR21","unstructured":"Hu, J., & Wellman, M. P. (1998). Multiagent reinforcement learning: Theoretical framework and an algorithm. In Proceedings of the 15th international conference on machine learning (ICML) (pp. 242\u2013250)."},{"key":"9480_CR22","unstructured":"Lillicrap, T. P., Hunt, J. J., Pritzel, A., Heess, N., Erez, T., Tassa, Y., Silver, D., & Wierstra, D. (2016). Continuous control with deep reinforcement learning. In International conference on learning representations (ICLR)."},{"key":"9480_CR23","doi-asserted-by":"crossref","unstructured":"Littman, M. L. (1994). Markov games as a framework for multi-agent reinforcement learning. In Proceedings of the 11th international conference on machine learning (ICML) (pp. 157\u2013163).","DOI":"10.1016\/B978-1-55860-335-6.50027-1"},{"key":"9480_CR24","first-page":"206","volume":"25","author":"M Lopes","year":"2012","unstructured":"Lopes, M., Lang, T., Toussaint, M., & Yves Oudeyer, P. (2012). Exploration in model-based reinforcement learning by empirically estimating learning progress. Advances in Neural Information Processing Systems, 25, 206\u2013214.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"9480_CR25","first-page":"6382","volume":"30","author":"R Lowe","year":"2017","unstructured":"Lowe, R., Wu, Y., Tamar, A., Harb, J., Abbeel, P., & Mordatch, I. (2017). Multi-agent actor-critic for mixed cooperative-competitive environments. Advances in Neural Information Processing Systems, 30, 6382\u20136393.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"9480_CR26","unstructured":"Mnih, V., Badia, A. P., Mirza, M., Graves, A., Lillicrap, T. P., Harley, T., Silver, D., & Kavukcuoglu, K. (2016). Asynchronous methods for deep reinforcement learning. In Proceedings of the 33rd international conference on machine learning (ICML) (pp. 1928\u20131937)."},{"issue":"7540","key":"9480_CR27","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., Rusu, A. A., Anda, M. G., Bellemare, J. V., et al. (2015). Human-level control through deep reinforcement learning. Nature, 518(7540), 529\u2013533.","journal-title":"Nature"},{"key":"9480_CR28","unstructured":"Palmer, G., Tuyls, K., Bloembergen, D., & Savani, R. (2018). Lenient multi-agent deep reinforcement learning. In Proceedings of the 17th international conference on autonomous agents and multiagent systems (AAMAS) (pp. 443\u2013451)."},{"issue":"1","key":"9480_CR29","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1007\/s10994-016-5547-y","volume":"104","author":"B Rosman","year":"2016","unstructured":"Rosman, B., Hawasly, M., & Ramamoorthy, S. (2016). Bayesian policy reuse. Machine Learning, 104(1), 99\u2013127.","journal-title":"Machine Learning"},{"key":"9480_CR30","unstructured":"Rusu, A. A., Colmenarejo, S. G., G\u00fcl\u00e7ehre, \u00c7., Desjardins, G., Kirkpatrick, J., Pascanu, R., Mnih, V., Kavukcuoglu, K., & Hadsell, R. (2015). Policy distillation. CoRR. arXiv:1511.06295."},{"key":"9480_CR31","unstructured":"Schaul, T., Quan, J., Antonoglou, I., & Silver, D. (2016). Prioritized experience replay. In International conference on learning representations (ICLR)."},{"issue":"4","key":"9480_CR32","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1371\/journal.pone.0172395","volume":"12","author":"A Tampuu","year":"2017","unstructured":"Tampuu, A., Matiisen, T., Kodelja, D., Kuzovkin, I., Korjus, K., Aru, J., et al. (2017). Multiagent cooperation and competition with deep reinforcement learning. PLoS ONE, 12(4), 1\u201315. https:\/\/doi.org\/10.1371\/journal.pone.0172395.","journal-title":"PLoS ONE"},{"key":"9480_CR33","unstructured":"Vinyals, O., Ewalds, T., Bartunov, S., Georgiev, P., Vezhnevets, A. S., Yeo, M., Makhzani, A., K\u00fcttler, H., Agapiou, J., Schrittwieser, J., et\u00a0al. (2017). StarCraft II: A new challenge for reinforcement learning. arXiv:1708.04782."},{"key":"9480_CR34","doi-asserted-by":"crossref","unstructured":"van Hasselt, H., Guez, A., & Silver, D. (2016). Deep reinforcement learning with double Q-learning. In Proceedings of the 30th AAAI conference on artificial intelligence (AAAI) (pp. 2094\u20132100).","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"9480_CR35","doi-asserted-by":"crossref","unstructured":"von\u00a0der Osten, F. B., Kirley, M., & Miller, T. (2017). The minds of many: Opponent modeling in a stochastic game. In Proceedings of the 27th international joint conference on artificial intelligence, (IJCAI) (pp. 3845\u20133851).","DOI":"10.24963\/ijcai.2017\/537"},{"key":"9480_CR36","first-page":"5320","volume":"30","author":"Z Wang","year":"2017","unstructured":"Wang, Z., Merel, J. S., Reed, S. E., de Freitas, N., Wayne, G., & Heess, N. (2017). Robust imitation of diverse behaviors. Advances in Neural Information Processing Systems, 30, 5320\u20135329.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"9480_CR37","unstructured":"Wang, Z., Schaul, T., Hessel, M., van Hasselt, H., Lanctot, M., & de\u00a0Freitas, N. (2016). Dueling network architectures for deep reinforcement learning. In Proceedings of the 33rd international conference on machine learning (ICML) (pp. 1995\u20132003)."},{"issue":"1","key":"9480_CR38","first-page":"69","volume":"23","author":"G Widmer","year":"1996","unstructured":"Widmer, G., & Kubat, M. (1996). Learning in the presence of concept drift and hidden contexts. Machine Learning, 23(1), 69\u2013101.","journal-title":"Machine Learning"},{"key":"9480_CR39","unstructured":"Yang, T., Hao, J., Meng, Z., Zheng, Y., Zhang, C., & Zheng, Z. (2019). Bayes-tomop: A fast detection and best response algorithm towards sophisticated opponents. In Proceedings of the 18th international conference on autonomous agents and multiagent systems (AAMAS) (pp. 2282\u20132284). International Foundation for Autonomous Agents and Multiagent Systems."},{"key":"9480_CR40","doi-asserted-by":"crossref","unstructured":"Zhao, X., Zhang, L., Ding, Z., Yin, D., Zhao, Y., & Tang, J. (2018). Deep reinforcement learning for list-wise recommendations. CoRR. arXiv:1801.00209.","DOI":"10.1145\/3240323.3240374"},{"key":"9480_CR41","first-page":"954","volume":"31","author":"Y Zheng","year":"2018","unstructured":"Zheng, Y., Meng, Z., Hao, J., Zhang, Z., Yang, T., & Fan, C. (2018). A deep Bayesian policy reuse approach against non-stationary agents. Advances in Neural Information Processing Systems, 31, 954\u2013964.","journal-title":"Advances in Neural Information Processing Systems"}],"container-title":["Autonomous Agents and Multi-Agent Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10458-020-09480-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10458-020-09480-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10458-020-09480-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,24]],"date-time":"2022-11-24T20:39:55Z","timestamp":1669322395000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10458-020-09480-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,10,26]]},"references-count":41,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2021,4]]}},"alternative-id":["9480"],"URL":"https:\/\/doi.org\/10.1007\/s10458-020-09480-9","relation":{},"ISSN":["1387-2532","1573-7454"],"issn-type":[{"value":"1387-2532","type":"print"},{"value":"1573-7454","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,10,26]]},"assertion":[{"value":"26 October 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"2"}}