{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T17:52:25Z","timestamp":1775065945714,"version":"3.50.1"},"publisher-location":"Berlin, Heidelberg","reference-count":73,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"value":"9783642276446","type":"print"},{"value":"9783642276453","type":"electronic"}],"license":[{"start":{"date-parts":[[2012,1,1]],"date-time":"2012-01-01T00:00:00Z","timestamp":1325376000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2012,1,1]],"date-time":"2012-01-01T00:00:00Z","timestamp":1325376000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-27645-3_5","type":"book-chapter","created":{"date-parts":[[2012,3,5]],"date-time":"2012-03-05T22:18:12Z","timestamp":1330985892000},"page":"143-173","source":"Crossref","is-referenced-by-count":116,"title":["Transfer in Reinforcement Learning: A Framework and a Survey"],"prefix":"10.1007","author":[{"given":"Alessandro","family":"Lazaric","sequence":"first","affiliation":[]}],"member":"297","reference":[{"key":"5_CR1","doi-asserted-by":"publisher","first-page":"89","DOI":"10.1007\/s10994-007-5038-2","volume":"71","author":"A. Antos","year":"2008","unstructured":"Antos, A., Szepesv\u00e1ri, C., Munos, R.: Learning near-optimal policies with Bellman-residual minimization based fitted policy iteration and a single sample path. Machine Learning Journal\u00a071, 89\u2013129 (2008)","journal-title":"Machine Learning Journal"},{"issue":"3","key":"5_CR2","doi-asserted-by":"publisher","first-page":"243","DOI":"10.1007\/s10994-007-5040-8","volume":"73","author":"A. Argyriou","year":"2008","unstructured":"Argyriou, A., Evgeniou, T., Pontil, M.: Convex multi-task feature learning. Machine Learning Journal\u00a073(3), 243\u2013272 (2008)","journal-title":"Machine Learning Journal"},{"key":"5_CR3","unstructured":"Asadi, M., Huber, M.: Effective control knowledge transfer through learning skill and representation hierarchies. In: Proceedings of the 20th International Joint Conference on Artificial Intelligence (IJCAI-2007), pp. 2054\u20132059 (2007)"},{"key":"5_CR4","unstructured":"Banerjee, B., Stone, P.: General game learning using knowledge transfer. In: Proceedings of the 20th International Joint Conference on Artificial Intelligence (IJCAI-2007), pp. 672\u2013677 (2007)"},{"key":"5_CR5","first-page":"35","volume-title":"Proceedings of the Twenty-Fifth Conference on Uncertainty in Artificial Intelligence (UAI-2009)","author":"P.L. Bartlett","year":"2009","unstructured":"Bartlett, P.L., Tewari, A.: Regal: a regularization based algorithm for reinforcement learning in weakly communicating mdps. In: Proceedings of the Twenty-Fifth Conference on Uncertainty in Artificial Intelligence (UAI-2009), pp. 35\u201342. AUAI Press, Arlington (2009)"},{"key":"5_CR6","doi-asserted-by":"crossref","first-page":"149","DOI":"10.1613\/jair.731","volume":"12","author":"J. Baxter","year":"2000","unstructured":"Baxter, J.: A model of inductive bias learning. Journal of Artificial Intelligence Research\u00a012, 149\u2013198 (2000)","journal-title":"Journal of Artificial Intelligence Research"},{"issue":"3","key":"5_CR7","doi-asserted-by":"publisher","first-page":"273","DOI":"10.1007\/s10994-007-5043-5","volume":"73","author":"S. Ben-David","year":"2008","unstructured":"Ben-David, S., Schuller-Borbely, R.: A notion of task relatedness yiealding provable multiple-task learning guarantees. Machine Learning Journal\u00a073(3), 273\u2013287 (2008)","journal-title":"Machine Learning Journal"},{"key":"5_CR8","unstructured":"Bernstein, D.S.: Reusing old policies to accelerate learning on new mdps. Tech. rep., University of Massachusetts, Amherst, MA, USA (1999)"},{"key":"5_CR9","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"357","DOI":"10.1007\/11840541_30","volume-title":"From Animals to Animats 9","author":"A. Bonarini","year":"2006","unstructured":"Bonarini, A., Lazaric, A., Restelli, M.: Incremental Skill Acquisition for Self-motivated Learning Animats. In: Nolfi, S., Baldassarre, G., Calabretta, R., Hallam, J.C.T., Marocco, D., Meyer, J.-A., Miglino, O., Parisi, D. (eds.) SAB 2006. LNCS (LNAI), vol.\u00a04095, pp. 357\u2013368. Springer, Heidelberg (2006)"},{"key":"5_CR10","doi-asserted-by":"crossref","unstructured":"Cesa-Bianchi, N., Lugosi, G.: Prediction, Learning, and Games. Cambridge University Press (2006)","DOI":"10.1017\/CBO9780511546921"},{"key":"5_CR11","first-page":"1757","volume":"9","author":"K. Crammer","year":"2008","unstructured":"Crammer, K., Kearns, M., Wortman, J.: Learning from multiple sources. Journal of Machine Learning Research\u00a09, 1757\u20131774 (2008)","journal-title":"Journal of Machine Learning Research"},{"key":"5_CR12","doi-asserted-by":"crossref","first-page":"59","DOI":"10.1613\/jair.904","volume":"16","author":"C. Drummond","year":"2002","unstructured":"Drummond, C.: Accelerating reinforcement learning by composing solutions of automatically identified subtasks. Journal of Artificial Intelligence Research\u00a016, 59\u2013104 (2002)","journal-title":"Journal of Artificial Intelligence Research"},{"key":"5_CR13","doi-asserted-by":"crossref","unstructured":"Engel, Y., Mannor, S., Meir, R.: Reinforcement learning with Gaussian processes. In: Proceedings of the 22nd International Conference on Machine Learning (ICML-2005), pp. 201\u2013208 (2005)","DOI":"10.1145\/1102351.1102377"},{"key":"5_CR14","first-page":"503","volume":"6","author":"D. Ernst","year":"2005","unstructured":"Ernst, D., Geurts, P., Wehenkel, L.: Tree-based batch mode reinforcement learning. Journal of Machine Learning Research\u00a06, 503\u2013556 (2005)","journal-title":"Journal of Machine Learning Research"},{"key":"5_CR15","unstructured":"Farahmand, A.M., Ghavamzadeh, M., Szepesv\u00e1ri, C., Mannor, S.: Regularized policy iteration. In: Proceedings of the Twenty-Second Annual Conference on Advances in Neural Information Processing Systems (NIPS-2008), pp. 441\u2013448 (2008)"},{"key":"5_CR16","unstructured":"Fawcett, T., Callan, J., Matheus, C., Michalski, R., Pazzani, M., Rendell, L., Sutton, R. (eds.): Constructive Induction Workshop at the Eleventh International Conference on Machine Learning (1994)"},{"key":"5_CR17","unstructured":"Ferguson, K., Mahadevan, S.: Proto-transfer learning in markov decision processes using spectral methods. In: Workshop on Structural Knowledge Transfer for Machine Learning at the Twenty-Third International Conference on Machine Learning (2006)"},{"key":"5_CR18","unstructured":"Ferns, N., Panangaden, P., Precup, D.: Metrics for finite markov decision processes. In: Proceedings of the 20th Conference on Uncertainty in Artificial Intelligence (UAI-2004), pp. 162\u2013169 (2004)"},{"key":"5_CR19","unstructured":"Ferrante, E., Lazaric, A., Restelli, M.: Transfer of task representation in reinforcement learning using policy-based proto-value functions. In: Proceedings of the Seventh International Joint Conference on Autonomous Agents and Multiagent Systems (AAMAS-2008), pp. 1329\u20131332 (2008)"},{"issue":"2-3","key":"5_CR20","doi-asserted-by":"publisher","first-page":"325","DOI":"10.1023\/A:1017944732463","volume":"49","author":"D.J. Foster","year":"2002","unstructured":"Foster, D.J., Dayan, P.: Structure in the space of value functions. Machine Learning Journal\u00a049(2-3), 325\u2013346 (2002)","journal-title":"Machine Learning Journal"},{"issue":"2","key":"5_CR21","doi-asserted-by":"publisher","first-page":"393","DOI":"10.1037\/0022-0663.95.2.393","volume":"95","author":"D. Gentner","year":"2003","unstructured":"Gentner, D., Loewenstein, J., Thompson, L.: Learning and transfer: A general role for analogical encoding. Journal of Educational Psychology\u00a095(2), 393\u2013408 (2003)","journal-title":"Journal of Educational Psychology"},{"key":"5_CR22","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/0010-0285(83)90002-6","volume":"15","author":"M.L. Gick","year":"1983","unstructured":"Gick, M.L., Holyoak, K.J.: Schema induction and analogical transfer. Cognitive Psychology\u00a015, 1\u201338 (1983)","journal-title":"Cognitive Psychology"},{"key":"5_CR23","unstructured":"Hauskrecht, M.: Planning with macro-actions: Effect of initial value function estimate on convergence rate of value iteration. Tech. rep., Department of Computer Science, University of Pittsburgh (1998)"},{"key":"5_CR24","unstructured":"Hengst, B.: Discovering hierarchy in reinforcement learning. PhD thesis, University of New South Wales (2003)"},{"key":"5_CR25","first-page":"1563","volume":"11","author":"T. Jaksch","year":"2010","unstructured":"Jaksch, T., Ortner, R., Auer, P.: Near-optimal regret bounds for reinforcement learning. Journal of Machine Learning Research\u00a011, 1563\u20131600 (2010)","journal-title":"Journal of Machine Learning Research"},{"key":"5_CR26","unstructured":"Kalmar, Z., Szepesvari, C.: An evaluation criterion for macro-learning and some results. Tech. Rep. TR-99-01, Mindmaker Ltd. (1999)"},{"key":"5_CR27","doi-asserted-by":"crossref","unstructured":"Konidaris, G., Barto, A.: Autonomous shaping: knowledge transfer in reinforcement learning. In: Proceedings of the Twenty-Third International Conference on Machine Learning (ICML-2006), pp. 489\u2013496 (2006)","DOI":"10.1145\/1143844.1143906"},{"key":"5_CR28","unstructured":"Konidaris, G., Barto, A.G.: Building portable options: Skill transfer in reinforcement learning. In: Proceedings of the 20th International Joint Conference on Artificial Intelligence (IJCAI-2007), pp. 895\u2013900 (2007)"},{"key":"5_CR29","unstructured":"Langley, P.: Transfer of knowledge in cognitive systems. In: Talk, Workshop on Structural Knowledge Transfer for Machine Learning at the Twenty-Third International Conference on Machine Learning (2006)"},{"key":"5_CR30","unstructured":"Lazaric, A.: Knowledge transfer in reinforcement learning. PhD thesis, Poltecnico di Milano (2008)"},{"key":"5_CR31","unstructured":"Lazaric, A., Ghavamzadeh, M.: Bayesian multi-task reinforcement learning. In: Proceedings of the Twenty-Seventh International Conference on Machine Learning, ICML-2010 (2010) (submitted)"},{"key":"5_CR32","doi-asserted-by":"crossref","unstructured":"Lazaric, A., Restelli, M., Bonarini, A.: Transfer of samples in batch reinforcement learning. In: Proceedings of the Twenty-Fifth Annual International Conference on Machine Learning (ICML-2008), pp. 544\u2013551 (2008)","DOI":"10.1145\/1390156.1390225"},{"key":"5_CR33","unstructured":"Lazaric, A., Ghavamzadeh, M., Munos, R.: Finite-sample analysis of lstd. In: Proceedings of the Twenty-Seventh International Conference on Machine Learning, ICML-2010 (2010)"},{"key":"5_CR34","first-page":"1131","volume":"10","author":"H. Li","year":"2009","unstructured":"Li, H., Liao, X., Carin, L.: Multi-task reinforcement learning in partially observable stochastic environments. Journal of Machine Learning Research\u00a010, 1131\u20131186 (2009)","journal-title":"Journal of Machine Learning Research"},{"issue":"3-4","key":"5_CR35","doi-asserted-by":"publisher","first-page":"375","DOI":"10.1023\/B:AIRE.0000036264.95672.64","volume":"21","author":"M.G. Madden","year":"2004","unstructured":"Madden, M.G., Howley, T.: Transfer of experience between reinforcement learning environments with progressive difficulty. Artificial Intelligence Review\u00a021(3-4), 375\u2013398 (2004)","journal-title":"Artificial Intelligence Review"},{"key":"5_CR36","first-page":"2169","volume":"38","author":"S. Mahadevan","year":"2007","unstructured":"Mahadevan, S., Maggioni, M.: Proto-value functions: A laplacian framework for learning representation and control in markov decision processes. Journal of Machine Learning Research\u00a038, 2169\u20132231 (2007)","journal-title":"Journal of Machine Learning Research"},{"key":"5_CR37","unstructured":"Maillard, O.A., Lazaric, A., Ghavamzadeh, M., Munos, R.: Finite-sample analysis of bellman residual minimization. In: Proceedings of the Second Asian Conference on Machine Learning, ACML-2010 (2010)"},{"key":"5_CR38","unstructured":"McGovern, A., Barto, A.G.: Automatic discovery of subgoals in reinforcement learning using diverse density. In: Proceedings of the Eighteenth International Conference on Machine Learning, ICML 2001 (2001)"},{"issue":"3","key":"5_CR39","doi-asserted-by":"publisher","first-page":"289","DOI":"10.1007\/s10994-008-5061-y","volume":"73","author":"N. Mehta","year":"2008","unstructured":"Mehta, N., Natarajan, S., Tadepalli, P., Fern, A.: Transfer in variable-reward hierarchical reinforcement learning. Machine Learning Journal\u00a073(3), 289\u2013312 (2008)","journal-title":"Machine Learning Journal"},{"key":"5_CR40","doi-asserted-by":"crossref","unstructured":"Menache, I., Mannor, S., Shimkin, N.: Q-cut - dynamic discovery of sub-goals in reinforcement learning. In: Proceedings of the Thirteen European Conference on Machine Learning, pp. 295\u2013306 (2002)","DOI":"10.1007\/3-540-36755-1_25"},{"key":"5_CR41","first-page":"815","volume":"9","author":"R. Munos","year":"2008","unstructured":"Munos, R., Szepesv\u00e1ri, C.: Finite time bounds for fitted value iteration. Journal of Machine Learning Research\u00a09, 815\u2013857 (2008)","journal-title":"Journal of Machine Learning Research"},{"issue":"22","key":"5_CR42","doi-asserted-by":"publisher","first-page":"1345","DOI":"10.1109\/TKDE.2009.191","volume":"22","author":"S.J. Pan","year":"2010","unstructured":"Pan, S.J., Yang, Q.: A survey on transfer learning. IEEE Transactions on Knowledge and Data Engineering\u00a022(22), 1345\u20131359 (2010)","journal-title":"IEEE Transactions on Knowledge and Data Engineering"},{"key":"5_CR43","unstructured":"Perkins, D.N., Salomon, G., Press, P.: Transfer of learning. In: International Encyclopedia of Education. Pergamon Press (1992)"},{"key":"5_CR44","unstructured":"Perkins, T.J., Precup, D.: Using options for knowledge transfer in reinforcement learning. Tech. rep., University of Massachusetts, Amherst, MA, USA (1999)"},{"key":"5_CR45","unstructured":"Phillips, C.: Knowledge transfer in markov decision processes. McGill School of Computer Science (2006), http:\/\/www.cs.mcgill.ca\/~martin\/usrs\/phillips.pdf"},{"key":"5_CR46","unstructured":"Ravindran, B., Barto, A.G.: Relativized options: Choosing the right transformation. In: Proceedings of the Twentieth International Conference on Machine Learning (ICML 2003), pp. 608\u2013615 (2003)"},{"key":"5_CR47","unstructured":"Sherstov, A.A., Stone, P.: Improving action selection in MDP\u2019s via knowledge transfer. In: Proceedings of the Twentieth National Conference on Artificial Intelligence, AAAI-2005 (2005)"},{"key":"5_CR48","unstructured":"Silver, D.: Selective transfer of neural network task knowledge. PhD thesis, University of Western Ontario (2000)"},{"key":"5_CR49","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"313","DOI":"10.1007\/978-3-540-73053-8_31","volume-title":"Bio-inspired Modeling of Cognitive Tasks","author":"D.L. Silver","year":"2007","unstructured":"Silver, D.L., Poirier, R.: Requirements for Machine Lifelong Learning. In: Mira, J., \u00c1lvarez, J.R. (eds.) IWINAC 2007, Part I. LNCS, vol.\u00a04527, pp. 313\u2013319. Springer, Heidelberg (2007)"},{"key":"5_CR50","doi-asserted-by":"crossref","unstructured":"Simsek, O., Wolfe, A.P., Barto, A.G.: Identifying useful subgoals in reinforcement learning by local graph partitioning. In: Proceedings of the Twenty-Second International Conference of Machine Learning, ICML 2005 (2005)","DOI":"10.1145\/1102351.1102454"},{"key":"5_CR51","doi-asserted-by":"crossref","unstructured":"Singh, S., Barto, A., Chentanez, N.: Intrinsically motivated reinforcement learning. In: Proceedings of the Eighteenth Annual Conference on Neural Information Processing Systems, NIPS-2004 (2004)","DOI":"10.21236\/ADA440280"},{"key":"5_CR52","unstructured":"Soni, V., Singh, S.P.: Using homomorphisms to transfer options across continuous reinforcement learning domains. In: Proceedings of the Twenty-first National Conference on Artificial Intelligence, AAAI-2006 (2006)"},{"issue":"3","key":"5_CR53","doi-asserted-by":"publisher","first-page":"165","DOI":"10.1177\/105971230501300301","volume":"13","author":"P. Stone","year":"2005","unstructured":"Stone, P., Sutton, R.S., Kuhlmann, G.: Reinforcement learning for RoboCup-soccer keepaway. Adaptive Behavior\u00a013(3), 165\u2013188 (2005)","journal-title":"Adaptive Behavior"},{"key":"5_CR54","unstructured":"Sunmola, F.T., Wyatt, J.L.: Model transfer for markov decision tasks via parameter matching. In: Proceedings of the 25th Workshop of the UK Planning and Scheduling Special Interest Group, PlanSIG 2006 (2006)"},{"key":"5_CR55","volume-title":"Reinforcement Learning: An Introduction","author":"R.S. Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (1998)"},{"key":"5_CR56","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","volume":"112","author":"R.S. Sutton","year":"1999","unstructured":"Sutton, R.S., Precup, D., Singh, S.: Between mdps and semi-mdps: a framework for temporal abstraction in reinforcement learning. Artificial Intelligence\u00a0112, 181\u2013211 (1999)","journal-title":"Artificial Intelligence"},{"key":"5_CR57","unstructured":"Talvitie, E., Singh, S.: An experts algorithm for transfer learning. In: Proceedings of the 20th International Joint Conference on Artificial Intelligence (IJCAI-2007), pp. 1065\u20131070 (2007)"},{"key":"5_CR58","doi-asserted-by":"crossref","unstructured":"Tanaka, F., Yamamura, M.: Multitask reinforcement learning on the distribution of mdps. In: IEEE International Symposium on Computational Intelligence in Robotics and Automation, vol.\u00a03, pp. 1108\u20131113 (2003)","DOI":"10.1109\/CIRA.2003.1222152"},{"key":"5_CR59","doi-asserted-by":"crossref","unstructured":"Taylor, M.E., Stone, P.: Behavior transfer for value-function-based reinforcement learning. In: Proceedings of the Fourth International Joint Conference on Autonomous Agents and Multiagent Systems (AAMAS-2005), pp. 53\u201359 (2005)","DOI":"10.1145\/1082473.1082482"},{"key":"5_CR60","unstructured":"Taylor, M.E., Stone, P.: Representation transfer for reinforcement learning. In: AAAI 2007 Fall Symposium on Computational Approaches to Representation Change during Learning and Development (2007)"},{"issue":"1","key":"5_CR61","first-page":"1633","volume":"10","author":"M.E. Taylor","year":"2009","unstructured":"Taylor, M.E., Stone, P.: Transfer learning for reinforcement learning domains: A survey. Journal of Machine Learning Research\u00a010(1), 1633\u20131685 (2009)","journal-title":"Journal of Machine Learning Research"},{"key":"5_CR62","unstructured":"Taylor, M.E., Stone, P., Liu, Y.: Value functions for RL-based behavior transfer: A comparative study. In: Proceedings of the Twentieth National Conference on Artificial Intelligence, AAAI-2005 (2005)"},{"key":"5_CR63","first-page":"2125","volume":"8","author":"M.E. Taylor","year":"2007","unstructured":"Taylor, M.E., Stone, P., Liu, Y.: Transfer learning via inter-task mappings for temporal difference learning. Journal of Machine Learning Research\u00a08, 2125\u20132167 (2007a)","journal-title":"Journal of Machine Learning Research"},{"key":"5_CR64","doi-asserted-by":"crossref","unstructured":"Taylor, M.E., Whiteson, S., Stone, P.: Transfer via inter-task mappings in policy search reinforcement learning. In: Proceedings of the Sixth International Joint Conference on Autonomous Agents and Multiagent Systems, AAMAS-2007 (2007b)","DOI":"10.1145\/1329125.1329170"},{"key":"5_CR65","doi-asserted-by":"crossref","unstructured":"Taylor, M.E., Jong, N.K., Stone, P.: Transferring instances for model-based reinforcement learning. In: Proceedings of the European Conference on Machine Learning (ECML-2008), pp. 488\u2013505 (2008a)","DOI":"10.1007\/978-3-540-87481-2_32"},{"key":"5_CR66","unstructured":"Taylor, M.E., Kuhlmann, G., Stone, P.: Autonomous transfer for reinforcement learning. In: Proceedings of the Seventh International Joint Conference on Autonomous Agents and Multiagent Systems (AAMAS-2008), pp. 283\u2013290 (2008b)"},{"key":"5_CR67","doi-asserted-by":"crossref","unstructured":"Thorndike, E.L., Woodworth, R.S.: The influence of improvement in one mental function upon the efficiency of other functions. Psychological Review\u00a08 (1901)","DOI":"10.1037\/h0074898"},{"key":"5_CR68","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"412","DOI":"10.1007\/11564096_40","volume-title":"Machine Learning: ECML 2005","author":"L. Torrey","year":"2005","unstructured":"Torrey, L., Walker, T., Shavlik, J., Maclin, R.: Using Advice to Transfer Knowledge Acquired in one Reinforcement Learning Task to Another. In: Gama, J., Camacho, R., Brazdil, P.B., Jorge, A.M., Torgo, L. (eds.) ECML 2005. LNCS (LNAI), vol.\u00a03720, pp. 412\u2013424. Springer, Heidelberg (2005)"},{"key":"5_CR69","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"425","DOI":"10.1007\/11871842_41","volume-title":"Machine Learning: ECML 2006","author":"L. Torrey","year":"2006","unstructured":"Torrey, L., Shavlik, J., Walker, T., Maclin, R.: Skill Acquisition Via Transfer Learning and Advice Taking. In: F\u00fcrnkranz, J., Scheffer, T., Spiliopoulou, M. (eds.) ECML 2006. LNCS (LNAI), vol.\u00a04212, pp. 425\u2013436. Springer, Heidelberg (2006)"},{"key":"5_CR70","first-page":"163","volume":"2","author":"P. Utgoff","year":"1986","unstructured":"Utgoff, P.: Shift of bias for inductive concept learning. Machine Learning\u00a02, 163\u2013190 (1986)","journal-title":"Machine Learning"},{"key":"5_CR71","unstructured":"Walsh, T.J., Li, L., Littman, M.L.: Transferring state abstractions between mdps. In: ICML Workshop on Structural Knowledge Transfer for Machine Learning (2006)"},{"key":"5_CR72","first-page":"279","volume":"8","author":"C. Watkins","year":"1992","unstructured":"Watkins, C., Dayan, P.: Q-learning. Machine Learning\u00a08, 279\u2013292 (1992)","journal-title":"Machine Learning"},{"key":"5_CR73","doi-asserted-by":"crossref","unstructured":"Wilson, A., Fern, A., Ray, S., Tadepalli, P.: Multi-task reinforcement learning: a hierarchical bayesian approach. In: Proceedings of the Twenty-Forth International Conference on Machine learning (ICML-2007), pp. 1015\u20131022 (2007)","DOI":"10.1145\/1273496.1273624"}],"container-title":["Adaptation, Learning, and Optimization","Reinforcement Learning"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-27645-3_5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,22]],"date-time":"2025-03-22T13:02:42Z","timestamp":1742648562000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-642-27645-3_5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642276446","9783642276453"],"references-count":73,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-27645-3_5","relation":{},"ISSN":["1867-4534","1867-4542"],"issn-type":[{"value":"1867-4534","type":"print"},{"value":"1867-4542","type":"electronic"}],"subject":[],"published":{"date-parts":[[2012]]}}}