{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T22:44:18Z","timestamp":1773269058669,"version":"3.50.1"},"reference-count":28,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T00:00:00Z","timestamp":1773187200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"},{"start":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T00:00:00Z","timestamp":1773187200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"}],"funder":[{"name":"Montanuniversit\u00e4t Leoben"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Auton Agent Multi-Agent Syst"],"published-print":{"date-parts":[[2026,6]]},"DOI":"10.1007\/s10458-026-09739-7","type":"journal-article","created":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T06:59:15Z","timestamp":1773212355000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Quantification of transfer in reinforcement learning via regret bounds for learning agents"],"prefix":"10.1007","volume":"40","author":[{"given":"Adrienne","family":"Tuynman","sequence":"first","affiliation":[]},{"given":"Ronald","family":"Ortner","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,3,11]]},"reference":[{"key":"9739_CR1","doi-asserted-by":"crossref","unstructured":"Banerjee, B., Lyle, J., Kraemer, L., & Yellamraju, R. (2012). Sample bounded distributed reinforcement learning for decentralized POMDPs. In Proceedings 26th AAAI Conference on Artificial Intelligence (pp. 1256\u20131262). AAAI Press.","DOI":"10.1609\/aaai.v26i1.8260"},{"key":"9739_CR2","doi-asserted-by":"crossref","unstructured":"Boone, V., & Zhang, Z. (2024). Achieving tractable minimax optimal regret in average reward MDPs. In Advances in Neural Information Processing Systems (vol. 38).","DOI":"10.52202\/079017-0840"},{"key":"9739_CR3","unstructured":"Bourel, H., Maillard, O. A., & Talebi, M. S. (2020). Tightening exploration in upper confidence reinforcement learning. In Proceedings 37th International Conference on Machine Learning (pp. 1056\u20131066)."},{"key":"9739_CR4","doi-asserted-by":"crossref","unstructured":"Bu\u015foniu, L., Babu\u0161ka, R., & De Schutter, B. (2010). Multi-agent reinforcement learning: An overview. In J. Kacprzyk, D. Srinivasan, & L. C. Jain (Eds.), Innovations in Multi-Agent Systems and Applications - 1, volume 310 of Studies in Computational Intelligence (pp. 183\u2013221). Springer.","DOI":"10.1007\/978-3-642-14435-6_7"},{"key":"9739_CR5","doi-asserted-by":"crossref","unstructured":"Da Silva, F., Warnell, G., Costa, A., & Stone, P. (2020). Agents teaching agents: A survey on inter-agent transfer learning. Autonomous Agent Multi-Agent Systems, 34(9).","DOI":"10.1007\/s10458-019-09430-0"},{"key":"9739_CR6","doi-asserted-by":"publisher","first-page":"645","DOI":"10.1613\/jair.1.11396","volume":"64","author":"FL da Silva","year":"2019","unstructured":"Da Silva, F. L., & Costa, A. H. R. (2019). A survey on transfer learning for multiagent reinforcement learning systems. Journal of Artificial Intelligence Research, 64, 645\u2013703.","journal-title":"Journal of Artificial Intelligence Research"},{"issue":"1","key":"9739_CR7","doi-asserted-by":"publisher","first-page":"3215","DOI":"10.1007\/s10462-020-09938-y","volume":"54","author":"W Du","year":"2021","unstructured":"Du, W., & Ding, S. (2021). A survey on multi-agent deep reinforcement learning: From the perspective of challenges and applications. Artificial Intelligence Review, 54(1), 3215\u20133238.","journal-title":"Artificial Intelligence Review"},{"key":"9739_CR8","unstructured":"Espeholt, L., Soyer, H., Munos, R., Simonyan, K., Mnih, V., Ward, T., Doron, Y., Firoiu, V., Harley, T., Dunning, I., Legg, S., & Kavukcuoglu, K. (2018). IMPALA: Scalable distributed Deep-RL with importance weighted actor- learner architectures. In Proceedings 35th International Conference on Machine Learning (pp. 1407\u20131416)."},{"key":"9739_CR9","doi-asserted-by":"crossref","unstructured":"Filippi, S., Capp\u00e9, O., & Garivier, A. (2010). Optimism in reinforcement learning and Kullback-Leibler divergence. In 48th Annual Allerton Conference on Communication, Control, and Computing (Allerton) (pp. 115\u2013122).","DOI":"10.1109\/ALLERTON.2010.5706896"},{"key":"9739_CR10","unstructured":"Fruit, R., Pirotta, M., & Lazaric, A. (2020). Improved analysis of UCRL2 with empirical Bernstein inequality. CoRR. https:\/\/arxiv.org\/abs\/2007.05456"},{"issue":"2","key":"9739_CR11","doi-asserted-by":"publisher","first-page":"895","DOI":"10.1007\/s10462-021-09996-w","volume":"55","author":"S Gronauer","year":"2022","unstructured":"Gronauer, S., & Diepold, K. (2022). Multi-agent deep reinforcement learning: A survey. Artificial Intelligence Review, 55(2), 895\u2013943.","journal-title":"Artificial Intelligence Review"},{"key":"9739_CR12","first-page":"1563","volume":"11","author":"T Jaksch","year":"2010","unstructured":"Jaksch, T., Ortner, R., & Auer, P. (2010). Near-optimal regret bounds for reinforcement learning. Journal of Machine Learning Research, 11, 1563\u20131600.","journal-title":"Journal of Machine Learning Research"},{"key":"9739_CR13","unstructured":"Khodadadian, S., Sharma, P., Joshi, G., & Maguluri, S. T. (2022). Federated reinforcement learning: Linear speedup under Markovian sampling. In Proceedings 39th International Conference on Machine Learning (pp. 10997\u201311057)."},{"key":"9739_CR14","doi-asserted-by":"crossref","unstructured":"Lazaric, A. (2012). Transfer in reinforcement learning: A framework and a survey. In M. A. Wiering, & M. van Otterlo (Eds.), Reinforcement learning, volume 12 of Adaptation, Learning, and Optimization (pp. 143\u2013173). Springer.","DOI":"10.1007\/978-3-642-27645-3_5"},{"key":"9739_CR15","doi-asserted-by":"publisher","first-page":"13677","DOI":"10.1007\/s10489-022-04105-y","volume":"53","author":"A Oroojlooy","year":"2023","unstructured":"Oroojlooy, A., & Hajinezhad, D. (2023). A review of cooperative multi-agent deep reinforcement learning. Applied Intelligence, 53, 13677\u201313722.","journal-title":"Applied Intelligence"},{"issue":"10","key":"9739_CR16","doi-asserted-by":"publisher","first-page":"1345","DOI":"10.1109\/TKDE.2009.191","volume":"22","author":"SJ Pan","year":"2010","unstructured":"Pan, S. J., & Yang, Q. (2010). A survey on transfer learning. IEEE Transactions on Knowledge and Data Engineering, 22(10), 1345\u20131359.","journal-title":"IEEE Transactions on Knowledge and Data Engineering"},{"key":"9739_CR17","doi-asserted-by":"publisher","DOI":"10.1002\/9780470316887","volume-title":"Markov Decision Processes: Discrete Stochastic Dynamic Programming","author":"ML Puterman","year":"1994","unstructured":"Puterman, M. L. (1994). Markov Decision Processes: Discrete Stochastic Dynamic Programming. New York, NY, USA: John Wiley & Sons Inc."},{"issue":"1","key":"9739_CR18","first-page":"18","volume":"1","author":"J Qi","year":"2021","unstructured":"Qi, J., Zhou, Q., Lei, L., & Zheng, K. (2021). Federated reinforcement learning: Techniques, applications, and open challenges. Intelligent Robot, 1(1), 18\u201357.","journal-title":"Intelligent Robot"},{"issue":"3","key":"9739_CR19","doi-asserted-by":"publisher","first-page":"1699","DOI":"10.1109\/TCYB.2021.3108237","volume":"53","author":"H Shi","year":"2023","unstructured":"Shi, H., Li, J., Mao, J., & Hwang, K.-S. (2023). Lateral transfer learning for multiagent reinforcement learning. IEEE Transactions on Cybernetics, 53(3), 1699\u20131711.","journal-title":"IEEE Transactions on Cybernetics"},{"issue":"3","key":"9739_CR20","doi-asserted-by":"publisher","first-page":"345","DOI":"10.1023\/A:1008942012299","volume":"8","author":"P Stone","year":"2000","unstructured":"Stone, P., & Veloso, M. M. (2000). Multiagent systems: A survey from a machine learning perspective. Autonomous Robots, 8(3), 345\u2013383.","journal-title":"Autonomous Robots"},{"issue":"8","key":"9739_CR21","doi-asserted-by":"publisher","first-page":"1309","DOI":"10.1016\/j.jcss.2007.08.009","volume":"74","author":"AL Strehl","year":"2008","unstructured":"Strehl, A. L., & Littman, M. L. (2008). An analysis of model-based interval estimation for Markov decision processes. Journal of Computer and System Sciences, 74(8), 1309\u20131331.","journal-title":"Journal of Computer and System Sciences"},{"key":"9739_CR22","first-page":"1633","volume":"10","author":"ME Taylor","year":"2009","unstructured":"Taylor, M. E., & Stone, P. (2009). Transfer learning for reinforcement learning domains: A survey. Journal of Machine Learning Research, 10, 1633\u20131685.","journal-title":"Journal of Machine Learning Research"},{"key":"9739_CR23","doi-asserted-by":"crossref","unstructured":"Verstraeten, T., Libin, P. J. K., & Now\u00e9, A. (2020). Fleet control using coregionalized Gaussian process policy iteration. In 24th European Conference on Artificial Intelligence (ECAI 2020), volume 325 of Frontiers in Artificial Intelligence and Applications (pp. 1571\u20131578).","DOI":"10.3233\/FAIA200266"},{"key":"9739_CR24","unstructured":"Weissman, T., Ordentlich, E., Seroussi, G., Verdu, S., & Weinberger, M. L. (2003). Inequalities for the L1 deviation of the empirical distribution. Technical Report HPL-2003-97, HP Laboratories Palo Alto."},{"key":"9739_CR25","unstructured":"Yang, T., Wang, W., Tang, H., Hao, J., Meng, Z., Mao, H., Li, D., Liu, W., Chen, Y., Hu, Y., Fan, C., & Zhang, C. (2021). An efficient transfer learning framework for multiagent reinforcement learning. In Advances in neural information processing systems (vol. 34, pp. 17037\u201317048)."},{"key":"9739_CR26","doi-asserted-by":"crossref","unstructured":"Yu, C., Velu, A., Vinitsky, E., Gao, J., Wang, Y., Bayen, A., & Wu, Y. (2022). The surprising effectiveness of PPO in cooperative multi- agent games. In Advances in neural information processing systems (vol. 35, pp. 24611\u201324624).","DOI":"10.52202\/068431-1787"},{"key":"9739_CR27","doi-asserted-by":"publisher","unstructured":"Zhou, T., Zhang, F., Shao, K., Li, K., Huang, W., Luo, J., Wang, W., Yang, Y., Mao, H., Wang, B., Li, D., Liu, W., & Hao J. (2021). Cooperative multi- agent transfer learning with level- adaptive credit assignment. https:\/\/doi.org\/10.48550\/arXiv.2106.00517","DOI":"10.48550\/arXiv.2106.00517"},{"issue":"11","key":"9739_CR28","doi-asserted-by":"publisher","first-page":"13344","DOI":"10.1109\/TPAMI.2023.3292075","volume":"45","author":"Z Zhu","year":"2023","unstructured":"Zhu, Z., Lin, K., Jain, A. K., & Zhou, J. (2023). Transfer learning in deep reinforcement learning: A survey. IEEE Transactions on Pattern Analysis and Machine Intelligence, 45(11), 13344\u201313362.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"}],"container-title":["Autonomous Agents and Multi-Agent Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10458-026-09739-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10458-026-09739-7","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10458-026-09739-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T06:59:20Z","timestamp":1773212360000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10458-026-09739-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3,11]]},"references-count":28,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2026,6]]}},"alternative-id":["9739"],"URL":"https:\/\/doi.org\/10.1007\/s10458-026-09739-7","relation":{},"ISSN":["1387-2532","1573-7454"],"issn-type":[{"value":"1387-2532","type":"print"},{"value":"1573-7454","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,3,11]]},"assertion":[{"value":"7 November 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 February 2026","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 March 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing Interests"}}],"article-number":"16"}}