{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,4,5]],"date-time":"2024-04-05T13:34:39Z","timestamp":1712324079878},"reference-count":89,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2013,9,5]],"date-time":"2013-09-05T00:00:00Z","timestamp":1378339200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Auton Agent Multi-Agent Syst"],"published-print":{"date-parts":[[2014,7]]},"DOI":"10.1007\/s10458-013-9235-z","type":"journal-article","created":{"date-parts":[[2013,9,4]],"date-time":"2013-09-04T14:08:33Z","timestamp":1378303713000},"page":"637-681","source":"Crossref","is-referenced-by-count":2,"title":["Learning potential functions and their representations for multi-task reinforcement learning"],"prefix":"10.1007","volume":"28","author":[{"given":"Matthijs","family":"Snel","sequence":"first","affiliation":[]},{"given":"Shimon","family":"Whiteson","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2013,9,5]]},"reference":[{"key":"9235_CR1","doi-asserted-by":"crossref","first-page":"25","DOI":"10.1016\/0025-5564(71)90051-4","volume":"10","author":"JS Albus","year":"1971","unstructured":"Albus, J. S. (1971). A theory of cerebellar function. Mathematical Biosciences, 10, 25\u201361.","journal-title":"Mathematical Biosciences"},{"issue":"3","key":"9235_CR2","doi-asserted-by":"crossref","first-page":"243","DOI":"10.1007\/s10994-007-5040-8","volume":"73","author":"A Argyriou","year":"2008","unstructured":"Argyriou, A., Evgeniou, T., & Pontil, M. (2008). Convex multi-task feature learning. Machine Learning, 73(3), 243\u2013272.","journal-title":"Machine Learning"},{"key":"9235_CR3","unstructured":"Asmuth, J., Littman, M., & Zinkov, R. (2008). Potential-based shaping in model-based reinforcement learning. In Proceedings of the 23rd AAAI Conference on Artificial Intelligence ( pp. 604\u2013609). Cambridge: The AAAI Press."},{"key":"9235_CR4","unstructured":"Babes, M., de Cote, E.M., & Littman, M. L. (2008). Social reward shaping in the prisoner\u2019s dilemma. In 7th International Joint Conference on Autonomous Agents and Multiagent Systems (AAMAS 2008) (pp. 1389\u20131392)."},{"key":"9235_CR5","doi-asserted-by":"crossref","first-page":"149","DOI":"10.1613\/jair.731","volume":"12","author":"J Baxter","year":"2000","unstructured":"Baxter, J. (2000). A model of inductive bias learning. Journal of Artificial Intelligence Research (JAIR), 12, 149\u2013198.","journal-title":"Journal of Artificial Intelligence Research (JAIR)"},{"key":"9235_CR6","volume-title":"Dynamic programming and optimal control","author":"DP Bertsekas","year":"1995","unstructured":"Bertsekas, D. P. (1995). Dynamic programming and optimal control. Belmont: Athena."},{"issue":"1\u20132","key":"9235_CR7","doi-asserted-by":"crossref","first-page":"49","DOI":"10.1016\/S0004-3702(00)00033-3","volume":"121","author":"C Boutilier","year":"2000","unstructured":"Boutilier, C., Dearden, R., & Goldszmidt, M. (2000). Stochastic dynamic programming with factored representations. Artificial Intelligence, 121(1\u20132), 49\u2013107.","journal-title":"Artificial Intelligence"},{"issue":"1","key":"9235_CR8","doi-asserted-by":"crossref","first-page":"41","DOI":"10.1023\/A:1007379606734","volume":"28","author":"R Caruana","year":"1997","unstructured":"Caruana, R. (1997). Multitask learning. Machine Learning, 28(1), 41\u201375.","journal-title":"Machine Learning"},{"key":"9235_CR9","unstructured":"Caruana, R. (2005). Inductive transfer retrospective and review. In NIPS 2005 Workshop on Inductive Transfer: 10 Years Later."},{"key":"9235_CR10","unstructured":"Devlin, S., Grzes, M., & Kudenko, D. (2011). Multi-agent, reward shaping for robocup keepaway. In AAMAS (pp. 1227\u20131228)."},{"key":"9235_CR11","unstructured":"Devlin, S., & Kudenko, D. (2011). Theoretical considerations of potential-based reward shaping for multi-agent systems. In AAMAS, AAMAS \u201911 (pp. 225\u2013232)."},{"key":"9235_CR12","unstructured":"Devlin, S., & Kudenko, D. (2012). Dynamic potential-based reward shaping. In AAMAS (pp. 433\u2013440)."},{"key":"9235_CR13","doi-asserted-by":"crossref","unstructured":"Diuk, C., Li, L., & Leffler, B. R. (2009). The adaptive k-meteorologists problem and its application to structure learning and feature selection in reinforcement learning. In ICML (p. 32).","DOI":"10.1145\/1553374.1553406"},{"issue":"2","key":"9235_CR14","doi-asserted-by":"crossref","first-page":"321","DOI":"10.1016\/0004-3702(94)90047-7","volume":"71","author":"M Dorigo","year":"1994","unstructured":"Dorigo, M., & Colombetti, M. (1994). Robot shaping: Developing autonomous agents through learning. Artificial Intelligence, 71(2), 321\u2013370.","journal-title":"Artificial Intelligence"},{"issue":"6","key":"9235_CR15","doi-asserted-by":"crossref","first-page":"400","DOI":"10.1177\/1059712308092835","volume":"16","author":"S Elfwing","year":"2008","unstructured":"Elfwing, S., Uchibe, E., Doya, K., & Christensen, H. (2008). Co-evolution of shaping: Rewards and meta-parameters in reinforcement learning. Adaptive Behavior, 16(6), 400\u2013412.","journal-title":"Adaptive Behavior"},{"issue":"2","key":"9235_CR16","doi-asserted-by":"crossref","first-page":"101","DOI":"10.1177\/1059712310397633","volume":"19","author":"S Elfwing","year":"2011","unstructured":"Elfwing, S., Uchibe, E., Doya, K., & Christensen, H. I. (2011). Darwinian embodied evolution of the learning ability for survival. Adaptive Behavior, 19(2), 101\u2013120.","journal-title":"Adaptive Behavior"},{"key":"9235_CR17","doi-asserted-by":"crossref","unstructured":"Erez, T., & Smart, W. (2008) What does shaping mean for computational reinforcement learning? In 7th IEEE International Conference on Development and Learning, 2008. ICDL 2008 (pp. 215\u2013219).","DOI":"10.1109\/DEVLRN.2008.4640832"},{"key":"9235_CR18","unstructured":"Ferguson, K., & Mahadevan, S. (2006). Proto-transfer learning in markov decision processes using spectral methods. In ICML Workshop on Structural Knowledge Transfer for Machine Learning."},{"key":"9235_CR19","unstructured":"Ferrante, E., Lazaric, A., & Restelli, M. (2008). Transfer of task representation in reinforcement learning using policy-based proto-value functions. In AAMAS (pp. 1329\u20131332)."},{"issue":"2\u20133","key":"9235_CR20","doi-asserted-by":"crossref","first-page":"325","DOI":"10.1023\/A:1017944732463","volume":"49","author":"DJ Foster","year":"2002","unstructured":"Foster, D. J., & Dayan, P. (2002). Structure in the space of value functions. Machine Learning, 49(2\u20133), 325\u2013346.","journal-title":"Machine Learning"},{"key":"9235_CR21","unstructured":"Frommberger, L. (2011). Task space tile coding: In-task and cross-task generalization in reinforcement learning. In Proceedings of the 9th European Workshop on Reinforcement, Learning (EWRL9)."},{"issue":"6","key":"9235_CR22","doi-asserted-by":"crossref","first-page":"507","DOI":"10.1177\/1059712310391484","volume":"18","author":"L Frommberger","year":"2010","unstructured":"Frommberger, L., & Wolter, D. (2010). Structural knowledge transfer by spatial abstraction for reinforcement learning agents. Adaptive Behavior, 18(6), 507\u2013525.","journal-title":"Adaptive Behavior"},{"key":"9235_CR23","unstructured":"Geramifard, A., Doshi, F., Redding, J., Roy, N., & How, J. P. (2011). Online discovery of feature dependencies. In ICML (pp. 881\u2013888)."},{"key":"9235_CR24","unstructured":"Grze\u015b, M., & Kudenko, D. (2009). Learning shaping rewards in model-based reinforcement learning. In Proceedings of AAMAS 2009 Workshop on Adaptive Learning Agents."},{"key":"9235_CR25","doi-asserted-by":"crossref","unstructured":"Grzes, M., & Kudenko, D. (2009). Theoretical and empirical analysis of reward shaping in reinforcement learning. In ICMLA (pp. 337\u2013344).","DOI":"10.1109\/ICMLA.2009.33"},{"issue":"4","key":"9235_CR26","doi-asserted-by":"crossref","first-page":"541","DOI":"10.1016\/j.neunet.2010.01.001","volume":"23","author":"M Grze\u015b","year":"2010","unstructured":"Grze\u015b, M., & Kudenko, D. (2010). Online learning of shaping rewards in reinforcement learning. Neural Networks, 23(4), 541\u2013550.","journal-title":"Neural Networks"},{"key":"9235_CR27","doi-asserted-by":"crossref","unstructured":"Gullapalli, V., & Barto, A.G. (1992). Shaping as a method for accelerating reinforcement learning. In Proceedings of IEEE International Symposium on Intelligent, Control (pp. 554\u2013559).","DOI":"10.1109\/ISIC.1992.225046"},{"key":"9235_CR28","first-page":"1157","volume":"3","author":"I Guyon","year":"2003","unstructured":"Guyon, I., & Elisseeff, A. (2003). An introduction to variable and feature selection. Journal of Machine Learning Research, 3, 1157\u20131182.","journal-title":"Journal of Machine Learning Research"},{"key":"9235_CR29","doi-asserted-by":"crossref","unstructured":"Hachiya, H., & Sugiyama, M. (2010). Feature selection for reinforcement learning: Evaluating implicit state-reward dependency via conditional mutual information. In ECML\/PKDD (pp. 474\u2013489).","DOI":"10.1007\/978-3-642-15880-3_36"},{"key":"9235_CR30","unstructured":"Jong, N. K., & Stone, P. (2005). State abstraction discovery from irrelevant state variables. In IJCAI-05."},{"key":"9235_CR31","unstructured":"Kakade, S. M. (2003). On the sample complexity of reinforcement learning. Ph.D. Thesis, University College London, London."},{"key":"9235_CR32","unstructured":"Koller, D., & Sahami, M. (1996). Toward optimal feature selection. In ICML (pp. 284\u2013292)."},{"key":"9235_CR33","doi-asserted-by":"crossref","unstructured":"Kolter, J. Z., & Ng, A. Y. (2009). Regularization and feature selection in least-squares temporal difference learning. In ICML (p. 66).","DOI":"10.1145\/1553374.1553442"},{"key":"9235_CR34","doi-asserted-by":"crossref","unstructured":"Konidaris, G., & Barto, A. (2006). Autonomous shaping: Knowledge transfer in reinforcement learning. In Proceedings of 23rd International Conference on Machine Learning (pp. 489\u2013496).","DOI":"10.1145\/1143844.1143906"},{"key":"9235_CR35","first-page":"1333","volume":"13","author":"G Konidaris","year":"2012","unstructured":"Konidaris, G., Scheidwasser, I., & Barto, A. G. (2012). Transfer in reinforcement learning via shared features. Journal of Machine Learning Research, 13, 1333\u20131371.","journal-title":"Journal of Machine Learning Research"},{"key":"9235_CR36","doi-asserted-by":"crossref","unstructured":"Koren, Y., & Borenstein, J. (1991). Potential field methods and their inherent limitations for mobile robot navigation. In Proceedings of IEEE Conference on Robotics and Automation (pp. 1398\u20131404).","DOI":"10.1109\/ROBOT.1991.131810"},{"key":"9235_CR37","unstructured":"Kroon, M., & Whiteson, S. (2009). Automatic feature selection for model-based reinforcement learning in factored MDPs. In ICMLA 2009: Proceedings of the Eighth International Conference on Machine Learning and Applications (pp. 324\u2013330)."},{"key":"9235_CR38","unstructured":"Laud, A., & DeJong, G. (2002). Reinforcement learning and shaping: Encouraging intended behaviors. In Proceedings of 19th International Conference on Machine Learning (pp. 355\u2013362)."},{"key":"9235_CR39","unstructured":"Laud, A., & DeJong, G. (2003). The influence of reward on the speed of reinforcement learning: An analysis of shaping. In ICML (pp. 440\u2013447)."},{"key":"9235_CR40","unstructured":"Lazaric, A. (2008). Knowledge transfer in reinforcement learning. Ph.D. Thesis, Politecnico di Milano, Milan."},{"key":"9235_CR41","unstructured":"Lazaric, A., & Ghavamzadeh, M. (2010). Bayesian multi-task reinforcement learning. In ICML (pp. 599\u2013606)."},{"key":"9235_CR42","doi-asserted-by":"crossref","unstructured":"Lazaric, A., Restelli, M., & Bonarini, A. (2008). Transfer of samples in batch reinforcement learning. In ICML (pp. 544\u2013551).","DOI":"10.1145\/1390156.1390225"},{"key":"9235_CR43","unstructured":"Li, L., Walsh, T. J., & Littman, M. L. (2006). Towards a unified theory of state abstraction for mdps. In Aritificial Intelligence and Mathematics."},{"key":"9235_CR44","doi-asserted-by":"crossref","first-page":"397","DOI":"10.1613\/jair.3384","volume":"41","author":"X Lu","year":"2011","unstructured":"Lu, X., Schwartz, H. M., & Givigi, S. N. (2011). Policy invariance under reward transformations for general-sum stochastic games. Journal of Artificial Intelligence Research (JAIR), 41, 397\u2013406.","journal-title":"Journal of Artificial Intelligence Research (JAIR)"},{"issue":"1\u20133","key":"9235_CR45","first-page":"251","volume":"22","author":"R Maclin","year":"1996","unstructured":"Maclin, R., & Shavlik, J. W. (1996). Creating advice-taking reinforcement learners. Machine Learning, 22(1\u20133), 251\u2013281.","journal-title":"Machine Learning"},{"key":"9235_CR46","unstructured":"Mahadevan, S. (2010). Representation discovery in sequential decision making. In AAAI."},{"key":"9235_CR47","doi-asserted-by":"crossref","unstructured":"Manoonpong, P., W\u00f6rg\u00f6tter, F., & Morimoto, J. (2010). Extraction of reward-related feature space using correlation-based and reward-based learning methods. In ICONIP (Vol. 1, pp. 414\u2013421).","DOI":"10.1007\/978-3-642-17537-4_51"},{"key":"9235_CR48","doi-asserted-by":"crossref","first-page":"431","DOI":"10.1137\/0111030","volume":"11","author":"D Marquardt","year":"1963","unstructured":"Marquardt, D. (1963). An algorithm for least-squares estimation of nonlinear parameters. SIAM Journal of Applied Mathematics, 11, 431\u2013441.","journal-title":"SIAM Journal of Applied Mathematics"},{"key":"9235_CR49","doi-asserted-by":"crossref","unstructured":"Marthi, B. (2007). Automatic shaping and decomposition of reward functions. In Proceedings of 24th International Conference on Machine Learning (pp. 601\u2013608).","DOI":"10.1145\/1273496.1273572"},{"key":"9235_CR50","doi-asserted-by":"crossref","unstructured":"Matari\u0107, M. J. (1994). Reward functions for accelerated learning. In Proceedings of 11th International Conference on Machine Learning.","DOI":"10.1016\/B978-1-55860-335-6.50030-1"},{"issue":"3","key":"9235_CR51","doi-asserted-by":"crossref","first-page":"289","DOI":"10.1007\/s10994-008-5061-y","volume":"73","author":"N Mehta","year":"2008","unstructured":"Mehta, N., Natarajan, S., Tadepalli, P., & Fern, A. (2008). Transfer in variable-reward hierarchical reinforcement learning. Machine Learning, 73(3), 289\u2013312.","journal-title":"Machine Learning"},{"key":"9235_CR52","unstructured":"Midtgaard, M., Vinther, L., Christiansen, J. R., Christensen, A. M., & Zeng, Y. (2010). Time-based reward shaping in real-time strategy games. In Proceedings of the 6th International Conference on Agents and Data Mining Interaction, ADMI\u201910 (pp. 115\u2013125). Berlin, Heidelberg: Springer-Verlag."},{"key":"9235_CR53","unstructured":"Ng, A., Harada, D., & Russell, S.(1999). Policy invariance under reward transformations: Theory and application to reward shaping. In Proceedings of 16th International Conference on Machine Learning."},{"key":"9235_CR54","doi-asserted-by":"crossref","unstructured":"Parr, R., Li, L., Taylor, G., Painter-Wakefield, C., & Littman, M. L. (2008). An analysis of linear models, linear value-function approximation, and feature selection for reinforcement learning. In ICML (pp. 752\u2013759).","DOI":"10.1145\/1390156.1390251"},{"key":"9235_CR55","unstructured":"Petrik, M., Taylor, G., Parr, R., & Zilberstein, S. (2010). Feature selection using regularization in approximate linear programs for markov decision processes. InICML (pp. 871\u2013878)."},{"key":"9235_CR56","unstructured":"Proper, S., & Tumer, K. (2012). Modeling difference rewards for multiagent learning (extended abstract). In AAMAS, Valencia, Spain."},{"key":"9235_CR57","unstructured":"Randl\u00f8v, J., & Alstr\u00f8m, P. (1998). Learning to drive a bicycle using reinforcement learning and shaping. In Proceedings of 15th International Conference on Machine Learning."},{"key":"9235_CR58","unstructured":"Rummery, G., & Niranjan, M. (1994). On-line q-learning using connectionist systems. Technical Report CUED\/F-INFENG-RT 116, Engineering Department, Cambridge University, Cambridge."},{"issue":"3\u20134","key":"9235_CR59","doi-asserted-by":"crossref","first-page":"231","DOI":"10.1016\/S0921-8890(97)00041-9","volume":"22","author":"LM Saksida","year":"1997","unstructured":"Saksida, L. M., Raymond, S. M., & Touretzky, D. S. (1997). Shaping robot behavior using principles from instrumental conditioning. Robotics and Autonomous Systems, 22(3\u20134), 231\u2013249.","journal-title":"Robotics and Autonomous Systems"},{"key":"9235_CR60","doi-asserted-by":"crossref","unstructured":"van Seijen, H., Whiteson, S., & Kester, L. (2010). Switching between representations in reinforcement learning. In Interactive Collaborative, Information Systems (pp. 65\u201384).","DOI":"10.1007\/978-3-642-11688-9_3"},{"key":"9235_CR61","unstructured":"Selfridge, O., Sutton, R. S., & Barto, A. G. (1985). Training and tracking in robotics. In Proceedings of Ninth International Joint Conference on Artificial Intelligence."},{"key":"9235_CR62","unstructured":"Sherstov, A. A., & Stone, P. (2005). Improving action selection in MDP\u2019s via knowledge transfer. InProceedings of the Twentieth National Conference on Artificial Intelligence."},{"key":"9235_CR63","unstructured":"Singh, S., Lewis, R., & Barto, A. (2009). Where do rewards come from? In Proceedings of 31st Annual Conference of the Cognitive Science Society (pp. 2601\u20132606)."},{"issue":"1","key":"9235_CR64","first-page":"123","volume":"22","author":"S Singh","year":"1996","unstructured":"Singh, S., & Sutton, R. (1996). Reinforcement learning with replacing eligibility traces. Machine Learning, 22(1), 123\u2013158.","journal-title":"Machine Learning"},{"issue":"3","key":"9235_CR65","first-page":"323","volume":"8","author":"SP Singh","year":"1992","unstructured":"Singh, S. P. (1992). Transfer of learning by composing solutions of elemental sequential tasks. Machine Learning, 8(3), 323\u2013339.","journal-title":"Machine Learning"},{"key":"9235_CR66","doi-asserted-by":"crossref","unstructured":"Singh, S. P., Jaakkola, T., & Jordan, M. I. (1994). Learning without state-estimation in partially observable markovian decision processes. In ICML (pp. 284\u2013292).","DOI":"10.1016\/B978-1-55860-335-6.50042-8"},{"key":"9235_CR67","volume-title":"The behavior of organisms: An experimental analysis","author":"BF Skinner","year":"1938","unstructured":"Skinner, B. F. (1938). The behavior of organisms: An experimental analysis. New York: Appleton-Century-Crofts."},{"key":"9235_CR68","doi-asserted-by":"crossref","unstructured":"Snel, M., & Whiteson, S. (2010). Multi-task evolutionary shaping without pre-specified representations. In Genetic and Evolutionary Computation Conference (GECCO\u201910).","DOI":"10.1145\/1830483.1830671"},{"key":"9235_CR69","unstructured":"Snel, M., & Whiteson, S. (2011). Multi-task reinforcement learning: Shaping and feature selection. In Proceedings of the European Workshop on Reinforcement Learning (EWRL)."},{"key":"9235_CR70","unstructured":"Sorg, J., & Singh, S. (2009). Transfer via soft homomorphisms. In Proceedings of 8th International Conference on Autonomous Agents and Multiagent Systems (AAMAS 2009) (pp. 741\u2013748)."},{"key":"9235_CR71","unstructured":"Strehl, A. L., Diuk, C., & Littman, M. L. (2007). Efficient structure learning in factored-state mdps. In AAAI (pp. 645\u2013650)."},{"key":"9235_CR72","first-page":"9","volume":"3","author":"R Sutton","year":"1983","unstructured":"Sutton, R. (1983). Learning to predict by the method of temporal differences. Machine Learning, 3, 9\u201344.","journal-title":"Machine Learning"},{"key":"9235_CR73","volume-title":"Reinforcement learning: An introduction","author":"R Sutton","year":"1998","unstructured":"Sutton, R., & Barto, A. (1998). Reinforcement learning: An introduction. Cambridge: The MIT Press."},{"key":"9235_CR74","doi-asserted-by":"crossref","unstructured":"Tanaka, F., & Yamamura, M. (2003). Multitask reinforcement learning on the distribution of mdps. In Proceedings of 2003 IEEE International Symposium on Computational Intelligence in Robotics and Automation (CIRA 2003) (pp. 1108\u2013113).","DOI":"10.1109\/CIRA.2003.1222152"},{"key":"9235_CR75","unstructured":"Taylor, J., Precup, D., & Panagaden, P. (2009). Bounding performance loss in approximate mdp homomorphisms. In Koller D., Schuurmans D., Bengio Y., & Bottou L. (Eds.), Advances in Neural Information Processing Systems (Vol. 21, pp. 1649\u20131656)."},{"issue":"1","key":"9235_CR76","first-page":"1633","volume":"10","author":"M Taylor","year":"2009","unstructured":"Taylor, M., & Stone, P. (2009). Transfer learning for reinforcement learning domains: A survey. Journal of Machine Learning Research, 10(1), 1633\u20131685.","journal-title":"Journal of Machine Learning Research"},{"issue":"1","key":"9235_CR77","first-page":"2125","volume":"8","author":"M Taylor","year":"2007","unstructured":"Taylor, M., Stone, P., & Liu, Y. (2007). Transfer learning via inter-task mappings for temporal difference learning. Journal of Machine Learning Research, 8(1), 2125\u20132167.","journal-title":"Journal of Machine Learning Research"},{"key":"9235_CR78","doi-asserted-by":"crossref","unstructured":"Taylor, M. E., Whiteson, S., & Stone, P. (2007). Transfer via inter-task mappings in policy search reinforcement learning. In AAMAS (p. 37).","DOI":"10.1145\/1329125.1329170"},{"key":"9235_CR79","unstructured":"Thrun, S. (1995). Is learning the n-th thing any easier than learning the first? In Advances in Neural Information Processing (pp. 640\u2013646)."},{"key":"9235_CR80","unstructured":"Torrey, L., Shavlik, J. W., Walker, T., & Maclin, R. (2010). Transfer learning via advice taking. In Advances in Machine Learning I (pp. 147\u2013170). New York: Springer."},{"key":"9235_CR81","doi-asserted-by":"crossref","unstructured":"Torrey, L., Walker, T., Shavlik, J. W., & Maclin, R.: Using advice to transfer knowledge acquired in one reinforcement learning task to another. In Proceedings of the Sixteenth European Conference on Machine Learning (ECML 2005) (pp. 412\u2013424).","DOI":"10.1007\/11564096_40"},{"key":"9235_CR82","unstructured":"Vlassis, N., Littman, M. L., & Barber, D. (2011). On the computational complexity of stochastic controller optimization in pomdps. CoRR abs\/1107.3090."},{"key":"9235_CR83","unstructured":"Walsh, T. J., Li, L., & Littman, M. L. (2006). Transferring state abstractions between mdps. In ICML-06 Workshop on Structural Knowledge Transfer for Machine Learning."},{"key":"9235_CR84","first-page":"279","volume":"8","author":"CJCH Watkins","year":"1992","unstructured":"Watkins, C. J. C. H., & Dayan, P. (1992). Q-learning. Machine Learning, 8, 279\u2013292.","journal-title":"Machine Learning"},{"key":"9235_CR85","unstructured":"Whitehead, S. D. (1991). A complexity analysis of cooperative mechanisms in reinforcement learning. In Proceedings AAAI-91 (pp. 607\u2013613)."},{"key":"9235_CR86","unstructured":"Whiteson, S., Tanner, B., Taylor, M. E., & Stone, P. (2011). Protecting against evaluation overfitting in empirical reinforcement learning. In ADPRL 2011: Proceedings of the IEEE Symposium on Adaptive Dynamic Programming and Reinforcement, Learning (pp. 120\u2013127)."},{"key":"9235_CR87","doi-asserted-by":"crossref","first-page":"205","DOI":"10.1613\/jair.1190","volume":"19","author":"E Wiewiora","year":"2003","unstructured":"Wiewiora, E. (2003). Potential-based shaping and q-value initialization are equivalent. Journal of Artificial Intelligence Research, 19, 205\u2013208.","journal-title":"Journal of Artificial Intelligence Research"},{"key":"9235_CR88","unstructured":"Wiewiora, E., Cottrell, G., & Elkan, C.(2003). Principled methods for advising reinforcement learning agents. InProceedings of 20th International Conference on Machine Learning (pp. 792\u2013799)."},{"key":"9235_CR89","doi-asserted-by":"crossref","unstructured":"Wilson, A., Fern, A., Ray, S., & Tadepalli, P. (2007). Multi-task reinforcement learning: A hierarchical Bayesian approach. In ICML (pp. 1015\u20131022).","DOI":"10.1145\/1273496.1273624"}],"container-title":["Autonomous Agents and Multi-Agent Systems"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10458-013-9235-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10458-013-9235-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10458-013-9235-z","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,7,23]],"date-time":"2019-07-23T08:07:24Z","timestamp":1563869244000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10458-013-9235-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013,9,5]]},"references-count":89,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2014,7]]}},"alternative-id":["9235"],"URL":"https:\/\/doi.org\/10.1007\/s10458-013-9235-z","relation":{},"ISSN":["1387-2532","1573-7454"],"issn-type":[{"value":"1387-2532","type":"print"},{"value":"1573-7454","type":"electronic"}],"subject":[],"published":{"date-parts":[[2013,9,5]]}}}