{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,27]],"date-time":"2025-11-27T20:50:14Z","timestamp":1764276614342,"version":"3.40.3"},"publisher-location":"Berlin, Heidelberg","reference-count":173,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642276446"},{"type":"electronic","value":"9783642276453"}],"license":[{"start":{"date-parts":[[2012,1,1]],"date-time":"2012-01-01T00:00:00Z","timestamp":1325376000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2012,1,1]],"date-time":"2012-01-01T00:00:00Z","timestamp":1325376000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-27645-3_8","type":"book-chapter","created":{"date-parts":[[2012,3,5]],"date-time":"2012-03-05T22:18:12Z","timestamp":1330985892000},"page":"253-292","source":"Crossref","is-referenced-by-count":6,"title":["Solving Relational and First-Order Logical Markov Decision Processes: A Survey"],"prefix":"10.1007","author":[{"given":"Martijn","family":"van Otterlo","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"issue":"1","key":"8_CR1","first-page":"37","volume":"6","author":"D. Aha","year":"1991","unstructured":"Aha, D., Kibler, D., Albert, M.: Instance-based learning algorithms. Machine Learning\u00a06(1), 37\u201366 (1991)","journal-title":"Machine Learning"},{"key":"8_CR2","volume-title":"Introduction to Machine Learning","author":"E. Alpaydin","year":"2004","unstructured":"Alpaydin, E.: Introduction to Machine Learning. The MIT Press, Cambridge (2004)"},{"key":"8_CR3","unstructured":"Andersen, C.C.S.: Hierarchical relational reinforcement learning. Master\u2019s thesis, Aalborg University, Denmark (2005)"},{"key":"8_CR4","doi-asserted-by":"crossref","unstructured":"Asgharbeygi, N., Stracuzzi, D.J., Langley, P.: Relational temporal difference learning. In: Proceedings of the International Conference on Machine Learning (ICML), pp. 49\u201356 (2006)","DOI":"10.1145\/1143844.1143851"},{"key":"8_CR5","unstructured":"Aycenina, M.: Hierarchical relational reinforcement learning. In: Stanford Doctoral Symposium (2002) (unpublished)"},{"issue":"2","key":"8_CR6","doi-asserted-by":"publisher","first-page":"155","DOI":"10.1023\/A:1007593124513","volume":"35","author":"E.B. Baum","year":"1999","unstructured":"Baum, E.B.: Toward a model of intelligence as an economy of agents. Machine Learning\u00a035(2), 155\u2013185 (1999)","journal-title":"Machine Learning"},{"key":"8_CR7","volume-title":"What is Thought?","author":"E.B. Baum","year":"2004","unstructured":"Baum, E.B.: What is Thought? The MIT Press, Cambridge (2004)"},{"key":"8_CR8","doi-asserted-by":"crossref","DOI":"10.7551\/mitpress\/3731.001.0001","volume-title":"Inductive Logic Programming: From Machine Learning to Software Engineering","author":"F. Bergadano","year":"1995","unstructured":"Bergadano, F., Gunetti, D.: Inductive Logic Programming: From Machine Learning to Software Engineering. The MIT Press, Cambridge (1995)"},{"key":"8_CR9","volume-title":"Neuro-Dynamic Programming","author":"D.P. Bertsekas","year":"1996","unstructured":"Bertsekas, D.P., Tsitsiklis, J.: Neuro-Dynamic Programming. Athena Scientific, Belmont (1996)"},{"key":"8_CR10","unstructured":"Boutilier, C., Poole, D.: Computing optimal policies for partially observable markov decision processes using compact representations. In: Proceedings of the National Conference on Artificial Intelligence (AAAI), pp. 1168\u20131175 (1996)"},{"key":"8_CR11","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1613\/jair.575","volume":"11","author":"C. Boutilier","year":"1999","unstructured":"Boutilier, C., Dean, T., Hanks, S.: Decision theoretic planning: Structural assumptions and computational leverage. Journal of Artificial Intelligence Research\u00a011, 1\u201394 (1999)","journal-title":"Journal of Artificial Intelligence Research"},{"issue":"1-2","key":"8_CR12","doi-asserted-by":"publisher","first-page":"49","DOI":"10.1016\/S0004-3702(00)00033-3","volume":"121","author":"C. Boutilier","year":"2000","unstructured":"Boutilier, C., Dearden, R.W., Goldszmidt, M.: Stochastic dynamic programming with factored representations. Artificial Intelligence\u00a0121(1-2), 49\u2013107 (2000)","journal-title":"Artificial Intelligence"},{"key":"8_CR13","unstructured":"Boutilier, C., Reiter, R., Price, B.: Symbolic dynamic programming for first-order MDP\u2019s. In: Proceedings of the International Joint Conference on Artificial Intelligence (IJCAI), pp. 690\u2013697 (2001)"},{"key":"8_CR14","unstructured":"Boyan, J.A., Moore, A.W.: Generalization in reinforcement learning: Safely approximating the value function. In: Proceedings of the Neural Information Processing Conference (NIPS), pp. 369\u2013376 (1995)"},{"key":"8_CR15","volume-title":"Knowledge Representation and Reasoning","author":"R.J. Brachman","year":"2004","unstructured":"Brachman, R.J., Levesque, H.J.: Knowledge Representation and Reasoning. Morgan Kaufmann Publishers, San Francisco (2004)"},{"key":"8_CR16","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"185","DOI":"10.1007\/978-3-540-30498-2_19","volume-title":"Advances in Artificial Intelligence \u2013 IBERAMIA 2004","author":"M.A. Castilho","year":"2004","unstructured":"Castilho, M.A., Kunzle, L.A., Lecheta, E., Palodeto, V., Silva, F.: An Investigation on Genetic Algorithms for Generic STRIPS Planning. In: Lema\u00eetre, C., Reyes, C.A., Gonz\u00e1lez, J.A. (eds.) IBERAMIA 2004. LNCS (LNAI), vol.\u00a03315, pp. 185\u2013194. Springer, Heidelberg (2004)"},{"key":"8_CR17","unstructured":"Chapman, D., Kaelbling, L.P.: Input generalization in delayed reinforcement learning: An algorithm and performance comparisons. In: Proceedings of the International Joint Conference on Artificial Intelligence (IJCAI), pp. 726\u2013731 (1991)"},{"key":"8_CR18","unstructured":"Chen, J., Muggleton, S.: Decision-theoretic logic programs. In: Proceedings of the International Conference on Inductive Logic Programming (ILP) (2010)"},{"key":"8_CR19","doi-asserted-by":"crossref","unstructured":"Cocora, A., Kersting, K., Plagemann, C., Burgard, W., De Raedt, L.: Learning relational navigation policies. In: Proceedings of the IEEE\/RSJ International Conference on Intelligent Robots and Systems, IROS (2006)","DOI":"10.1109\/IROS.2006.282061"},{"key":"8_CR20","unstructured":"Cole, J., Lloyd, J.W., Ng, K.S.: Symbolic learning for adaptive agents. In: Proceedings of the Annual Partner Conference, Smart Internet Technology Cooperative Research Centre (2003), http:\/\/csl.anu.edu.au\/jwl\/crc_paper.pdf"},{"key":"8_CR21","unstructured":"Croonenborghs, T.: Model-assisted approaches for relational reinforcement learning. PhD thesis, Department of Computer Science, Catholic University of Leuven, Belgium (2009)"},{"key":"8_CR22","unstructured":"Croonenborghs, T., Driessens, K., Bruynooghe, M.: Learning relational options for inductive transfer in relational reinforcement learning. In: Proceedings of the International Conference on Inductive Logic Programming (ILP) (2007a)"},{"key":"8_CR23","unstructured":"Croonenborghs, T., Ramon, J., Blockeel, H., Bruynooghe, M.: Online learning and exploiting relational models in reinforcement learning. In: Proceedings of the International Joint Conference on Artificial Intelligence (IJCAI), pp. 726\u2013731 (2007b)"},{"key":"8_CR24","unstructured":"Dabney, W., McGovern, A.: Utile distinctions for relational reinforcement learning. In: Proceedings of the International Joint Conference on Artificial Intelligence (IJCAI), pp. 738\u2013743 (2007)"},{"key":"8_CR25","unstructured":"de la Rosa, T., Jimenez, S., Borrajo, D.: Learning relational decision trees for guiding heuristic planning. In: Proceedings of the International Conference on Artificial Intelligence Planning Systems (ICAPS) (2008)"},{"key":"8_CR26","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-68856-3","volume-title":"Logical and Relational Learning","author":"L. De Raedt","year":"2008","unstructured":"De Raedt, L.: Logical and Relational Learning. Springer, Heidelberg (2008)"},{"issue":"503","key":"8_CR27","doi-asserted-by":"publisher","first-page":"169","DOI":"10.1023\/A:1007355226281","volume":"28","author":"T.G. Dietterich","year":"1997","unstructured":"Dietterich, T.G., Flann, N.S.: Explanation-based learning and reinforcement learning: A unified view. Machine Learning\u00a028(503), 169\u2013210 (1997)","journal-title":"Machine Learning"},{"key":"8_CR28","unstructured":"Diuk, C.: An object-oriented representation for efficient reinforcement learning. PhD thesis, Rutgers University, Computer Science Department (2010)"},{"key":"8_CR29","doi-asserted-by":"crossref","unstructured":"Diuk, C., Cohen, A., Littman, M.L.: An object-oriented representation for efficient reinforcement learning. In: Proceedings of the International Conference on Machine Learning (ICML) (2008)","DOI":"10.1145\/1390156.1390187"},{"key":"8_CR30","unstructured":"Driessens, K., Blockeel, H.: Learning Digger using hierarchical reinforcement learning for concurrent goals. In: Proceedings of the European Workshop on Reinforcement Learning, EWRL (2001)"},{"key":"8_CR31","unstructured":"Driessens, K., D\u017eeroski, S.: Integrating experimentation and guidance in relational reinforcement learning. In: Proceedings of the Nineteenth International Conference on Machine Learning, pp. 115\u2013122 (2002)"},{"key":"8_CR32","doi-asserted-by":"crossref","unstructured":"Driessens, K., D\u017eeroski, S.: Combining model-based and instance-based learning for first order regression. In: Proceedings of the International Conference on Machine Learning (ICML), pp. 193\u2013200 (2005)","DOI":"10.1145\/1102351.1102376"},{"key":"8_CR33","unstructured":"Driessens, K., Ramon, J.: Relational instance based regression for relational reinforcement learning. In: Proceedings of the International Conference on Machine Learning (ICML), pp. 123\u2013130 (2003)"},{"key":"8_CR34","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"97","DOI":"10.1007\/3-540-44795-4_9","volume-title":"Machine Learning: ECML 2001","author":"K. Driessens","year":"2001","unstructured":"Driessens, K., Ramon, J., Blockeel, H.: Speeding Up Relational Reinforcement Learning Through the Use of an Incremental First Order Decision Tree Learner. In: Flach, P.A., De Raedt, L. (eds.) ECML 2001. LNCS (LNAI), vol.\u00a02167, pp. 97\u2013108. Springer, Heidelberg (2001)"},{"key":"8_CR35","doi-asserted-by":"crossref","unstructured":"D\u017eeroski, S., De Raedt, L., Blockeel, H.: Relational reinforcement learning. In: Shavlik, J. (ed.) Proceedings of the International Conference on Machine Learning (ICML), pp. 136\u2013143 (1998)","DOI":"10.1007\/BFb0027307"},{"key":"8_CR36","doi-asserted-by":"publisher","first-page":"7","DOI":"10.1023\/A:1007694015589","volume":"43","author":"S. D\u017eeroski","year":"2001","unstructured":"D\u017eeroski, S., De Raedt, L., Driessens, K.: Relational reinforcement learning. Machine Learning\u00a043, 7\u201352 (2001)","journal-title":"Machine Learning"},{"key":"8_CR37","unstructured":"Feng, Z., Dearden, R.W., Meuleau, N., Washington, R.: Dynamic programming for structured continuous Markov decision problems. In: Proceedings of the Conference on Uncertainty in Artificial Intelligence (UAI), pp. 154\u2013161 (2004)"},{"key":"8_CR38","doi-asserted-by":"crossref","first-page":"75","DOI":"10.1613\/jair.1700","volume":"25","author":"A. Fern","year":"2006","unstructured":"Fern, A., Yoon, S.W., Givan, R.: Approximate policy iteration with a policy language bias: Solving relational markov decision processes. Journal of Artificial Intelligence Research (JAIR)\u00a025, 75\u2013118 (2006); special issue on the International Planning Competition 2004","journal-title":"Journal of Artificial Intelligence Research (JAIR)"},{"key":"8_CR39","volume-title":"Reinforcement learning in relational domains: A policy-language approach","author":"A. Fern","year":"2007","unstructured":"Fern, A., Yoon, S.W., Givan, R.: Reinforcement learning in relational domains: A policy-language approach. The MIT Press, Cambridge (2007)"},{"key":"8_CR40","doi-asserted-by":"crossref","unstructured":"Fikes, R.E., Nilsson, N.J.: STRIPS: A new approach to the application of theorem proving to problem solving. Artificial Intelligence\u00a02(2) (1971)","DOI":"10.1016\/0004-3702(71)90010-5"},{"key":"8_CR41","unstructured":"Finney, S., Gardiol, N.H., Kaelbling, L.P., Oates, T.: The thing that we tried Didn\u2019t work very well: Deictic representations in reinforcement learning. In: Proceedings of the Conference on Uncertainty in Artificial Intelligence (UAI), pp. 154\u2013161 (2002)"},{"key":"8_CR42","unstructured":"Finzi, A., Lukasiewicz, T.: Game-theoretic agent programming in Golog. In: Proceedings of the European Conference on Artificial Intelligence (ECAI) (2004a)"},{"key":"8_CR43","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"320","DOI":"10.1007\/978-3-540-30227-8_28","volume-title":"Logics in Artificial Intelligence","author":"A. Finzi","year":"2004","unstructured":"Finzi, A., Lukasiewicz, T.: Relational Markov Games. In: Alferes, J.J., Leite, J. (eds.) JELIA 2004. LNCS (LNAI), vol.\u00a03229, pp. 320\u2013333. Springer, Heidelberg (2004)"},{"key":"8_CR44","unstructured":"Garc\u00eda-Dur\u00e1n, R., Fern\u00e1ndez, F., Borrajo, D.: Learning and transferring relational instance-based policies. In: Proceedings of the AAAI-2008 Workshop on Transfer Learning for Complex Tasks (2008)"},{"key":"8_CR45","unstructured":"Gardiol, N.H., Kaelbling, L.P.: Envelope-based planning in relational MDPs. In: Proceedings of the Neural Information Processing Conference (NIPS) (2003)"},{"key":"8_CR46","unstructured":"Gardiol, N.H., Kaelbling, L.P.: Adaptive envelope MDPs for relational equivalence-based planning. Tech. Rep. MIT-CSAIL-TR-2008-050, MIT CS & AI Lab, Cambridge, MA (2008)"},{"key":"8_CR47","doi-asserted-by":"crossref","unstructured":"G\u00e4rtner, T., Driessens, K., Ramon, J.: Graph kernels and Gaussian processes for relational reinforcement learning. In: Proceedings of the International Conference on Inductive Logic Programming (ILP) (2003)","DOI":"10.1007\/978-3-540-39917-9_11"},{"key":"8_CR48","unstructured":"Gearhart, C.: Genetic programming as policy search in Markov decision processes. In: Genetic Algorithms and Genetic Programming at Stanford, pp. 61\u201367 (2003)"},{"key":"8_CR49","unstructured":"Geffner, H., Bonet, B.: High-level planning and control with incomplete information using pomdps. In: Proceedings Fall AAAI Symposium on Cognitive Robotics (1998)"},{"key":"8_CR50","doi-asserted-by":"crossref","unstructured":"Gil, Y.: Learning by experimentation: Incremental refinement of incomplete planning domains. In: Proceedings of the International Conference on Machine Learning (ICML) (1994)","DOI":"10.21236\/ADA269671"},{"key":"8_CR51","doi-asserted-by":"crossref","unstructured":"Gordon, G.J.: Stable function approximation in dynamic programming. In: Proceedings of the International Conference on Machine Learning (ICML), pp. 261\u2013268 (1995)","DOI":"10.1016\/B978-1-55860-377-6.50040-2"},{"key":"8_CR52","unstructured":"Gretton, C.: Gradient-based relational reinforcement-learning of temporally extended policies. In: Proceedings of the International Conference on Artificial Intelligence Planning Systems (ICAPS) (2007a)"},{"key":"8_CR53","unstructured":"Gretton, C.: Gradient-based relational reinforcement learning of temporally extended policies. In: Workshop on Artificial Intelligence Planning and Learning at the International Conference on Automated Planning Systems (2007b)"},{"key":"8_CR54","unstructured":"Gretton, C., Thi\u00e9baux, S.: Exploiting first-order regression in inductive policy selection. In: Proceedings of the Conference on Uncertainty in Artificial Intelligence (UAI), pp. 217\u2013225 (2004a)"},{"key":"8_CR55","unstructured":"Gretton, C., Thi\u00e9baux, S.: Exploiting first-order regression in inductive policy selection (extended abstract). In: Proceedings of the Workshop on Relational Reinforcement Learning at ICML 2004 (2004b)"},{"key":"8_CR56","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/S1567-8326(03)00039-0","volume":"57","author":"J.F. Groote","year":"2003","unstructured":"Groote, J.F., Tveretina, O.: Binary decision diagrams for first-order predicate logic. The Journal of Logic and Algebraic Programming\u00a057, 1\u201322 (2003)","journal-title":"The Journal of Logic and Algebraic Programming"},{"key":"8_CR57","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"75","DOI":"10.1007\/978-3-540-77949-0_6","volume-title":"Adaptive Agents and Multi-Agent Systems III. Adaptation and Multi-Agent Learning","author":"M. Grounds","year":"2008","unstructured":"Grounds, M., Kudenko, D.: Combining Reinforcement Learning with Symbolic Planning. In: Tuyls, K., Nowe, A., Guessoum, Z., Kudenko, D. (eds.) ALAMAS 2005, ALAMAS 2006, and ALAMAS 2007. LNCS (LNAI), vol.\u00a04865, pp. 75\u201386. Springer, Heidelberg (2008)"},{"key":"8_CR58","unstructured":"Guestrin, C.: Planning under uncertainty in complex structured environments. PhD thesis, Computer Science Department, Stanford University (2003)"},{"key":"8_CR59","unstructured":"Guestrin, C., Koller, D., Gearhart, C., Kanodia, N.: Generalizing plans to new environments in relational MDPs. In: Proceedings of the International Joint Conference on Artificial Intelligence (IJCAI), pp. 1003\u20131010 (2003a)"},{"key":"8_CR60","doi-asserted-by":"crossref","first-page":"399","DOI":"10.1613\/jair.1000","volume":"19","author":"C. Guestrin","year":"2003","unstructured":"Guestrin, C., Koller, D., Parr, R., Venkataraman, S.: Efficient solution algorithms for factored MDPs. Journal of Artificial Intelligence Research (JAIR)\u00a019, 399\u2013468 (2003b)","journal-title":"Journal of Artificial Intelligence Research (JAIR)"},{"key":"8_CR61","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"409","DOI":"10.1007\/978-3-540-76631-5_39","volume-title":"MICAI 2007: Advances in Artificial Intelligence","author":"F. Halbritter","year":"2007","unstructured":"Halbritter, F., Geibel, P.: Learning Models of Relational MDPs Using Graph Kernels. In: Gelbukh, A., Kuri Morales, \u00c1.F. (eds.) MICAI 2007. LNCS (LNAI), vol.\u00a04827, pp. 409\u2013419. Springer, Heidelberg (2007)"},{"issue":"1","key":"8_CR62","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/0004-3702(94)90002-7","volume":"66","author":"S. Hanks","year":"1994","unstructured":"Hanks, S., McDermott, D.V.: Modeling a dynamic and uncertain world I: Symbolic and probabilistic reasoning about change. Artificial Intelligence\u00a066(1), 1\u201355 (1994)","journal-title":"Artificial Intelligence"},{"key":"8_CR63","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"218","DOI":"10.1007\/978-3-540-30200-1_12","volume-title":"Computational Logic in Multi-Agent Systems","author":"A. Guerra-Hern\u00e1ndez","year":"2004","unstructured":"Guerra-Hern\u00e1ndez, A., Fallah-Seghrouchni, A.E., Soldano, H.: Learning in BDI Multi-Agent Systems. In: Dix, J., Leite, J. (eds.) CLIMA 2004. LNCS (LNAI), vol.\u00a03259, pp. 218\u2013233. Springer, Heidelberg (2004)"},{"key":"8_CR64","doi-asserted-by":"publisher","first-page":"69","DOI":"10.4236\/jilsa.2010.22010","volume":"2","author":"J. Hern\u00e1ndez","year":"2010","unstructured":"Hern\u00e1ndez, J., Morales, E.F.: Relational reinforcement learning with continuous actions by combining behavioral cloning and locally weighted regression. Journal of Intelligent Systems and Applications\u00a02, 69\u201379 (2010)","journal-title":"Journal of Intelligent Systems and Applications"},{"key":"8_CR65","series-title":"Communications in Computer and Information Science","doi-asserted-by":"publisher","first-page":"301","DOI":"10.1007\/978-3-642-03969-0_28","volume-title":"Engineering Applications of Neural Networks","author":"K. H\u00e4ming","year":"2009","unstructured":"H\u00e4ming, K., Peters, G.: Relational Reinforcement Learning Applied to Appearance-Based Object Recognition. In: Palmer-Brown, D., Draganova, C., Pimenidis, E., Mouratidis, H. (eds.) EANN 2009. Communications in Computer and Information Science, vol.\u00a043, pp. 301\u2013312. Springer, Heidelberg (2009)"},{"key":"8_CR66","unstructured":"H\u00f6lldobler, S., Skvortsova, O.: A logic-based approach to dynamic programming. In: Proceedings of the AAAI Workshop on Learning and Planning in Markov Processes - Advances and Challenges (2004)"},{"key":"8_CR67","unstructured":"Itoh, H., Nakamura, K.: Towards learning to learn and plan by relational reinforcement learning. In: Proceedings of the ICML Workshop on Relational Reinforcement Learning (2004)"},{"key":"8_CR68","unstructured":"Joshi, S.: First-order decision diagrams for decision-theoretic planning. PhD thesis, Tufts University, Computer Science Department (2010)"},{"key":"8_CR69","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1016\/S0004-3702(98)00023-X","volume":"101","author":"L.P. Kaelbling","year":"1998","unstructured":"Kaelbling, L.P., Littman, M.L., Cassandra, A.R.: Planning and acting in partially observable stochastic domains. Artificial Intelligence\u00a0101, 99\u2013134 (1998)","journal-title":"Artificial Intelligence"},{"key":"8_CR70","unstructured":"Kaelbling, L.P., Oates, T., Gardiol, N.H., Finney, S.: Learning in worlds with objects. In: The AAAI Spring Symposium (2001)"},{"key":"8_CR71","unstructured":"Karabaev, E., Skvortsova, O.: A heuristic search algorithm for solving first-order MDPs. In: Proceedings of the Conference on Uncertainty in Artificial Intelligence (UAI) (2005)"},{"key":"8_CR72","unstructured":"Karabaev, E., Ramm\u00e9, G., Skvortsova, O.: Efficient symbolic reasoning for first-order MDPs. In: ECAI Workshop on Planning, Learning and Monitoring with Uncertainty and Dynamic Worlds (2006)"},{"key":"8_CR73","doi-asserted-by":"crossref","unstructured":"Katz, D., Pyuro, Y., Brock, O.: Learning to manipulate articulated objects in unstructured environments using a grounded relational representation. In: Proceedings of Robotics: Science and Systems IV (2008)","DOI":"10.15607\/RSS.2008.IV.033"},{"key":"8_CR74","doi-asserted-by":"crossref","unstructured":"Kersting, K., De Raedt, L.: Logical Markov decision programs and the convergence of TD(\u03bb). In: Proceedings of the International Conference on Inductive Logic Programming (ILP) (2004)","DOI":"10.1007\/978-3-540-30109-7_16"},{"key":"8_CR75","doi-asserted-by":"crossref","unstructured":"Kersting, K., Driessens, K.: Non-parametric gradients: A unified treatment of propositional and relational domains. In: Proceedings of the International Conference on Machine Learning (ICML) (2008)","DOI":"10.1145\/1390156.1390214"},{"key":"8_CR76","doi-asserted-by":"crossref","unstructured":"Kersting, K., van Otterlo, M., De Raedt, L.: Bellman goes relational. In: Proceedings of the International Conference on Machine Learning (ICML) (2004)","DOI":"10.1145\/1015330.1015401"},{"issue":"1","key":"8_CR77","doi-asserted-by":"publisher","first-page":"57","DOI":"10.1023\/A:1007571119753","volume":"35","author":"R. Khardon","year":"1999","unstructured":"Khardon, R.: Learning to take actions. Machine Learning\u00a035(1), 57\u201390 (1999)","journal-title":"Machine Learning"},{"key":"8_CR78","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"83","DOI":"10.1007\/3-540-36599-0_8","volume-title":"Genetic Programming","author":"M.J. Kochenderfer","year":"2003","unstructured":"Kochenderfer, M.J.: Evolving Hierarchical and Recursive Teleo-Reactive Programs Through Genetic Programming. In: Ryan, C., Soule, T., Keijzer, M., Tsang, E.P.K., Poli, R., Costa, E. (eds.) EuroGP 2003. LNCS, vol.\u00a02610, pp. 83\u201392. Springer, Heidelberg (2003)"},{"key":"8_CR79","unstructured":"Lane, T., Wilson, A.: Toward a topological theory of relational reinforcement learning for navigation tasks. In: Proceedings of the International Florida Artificial Intelligence Research Society Conference (FLAIRS) (2005)"},{"key":"8_CR80","doi-asserted-by":"crossref","unstructured":"Lang, T., Toussaint, M.: Approximate inference for planning in stochastic relational worlds. In: Proceedings of the International Conference on Machine Learning (ICML) (2009)","DOI":"10.1145\/1553374.1553450"},{"key":"8_CR81","unstructured":"Lang, T., Toussaint, M.: Probabilistic backward and forward reasoning in stochastic relational worlds. In: Proceedings of the International Conference on Machine Learning (ICML) (2010)"},{"key":"8_CR82","first-page":"33","volume":"27","author":"P. Langley","year":"2006","unstructured":"Langley, P.: Cognitive architectures and general intelligent systems. AI Magazine\u00a027, 33\u201344 (2006)","journal-title":"AI Magazine"},{"key":"8_CR83","doi-asserted-by":"publisher","first-page":"162","DOI":"10.1007\/s005000100113","volume":"6","author":"P.L. Lanzi","year":"2002","unstructured":"Lanzi, P.L.: Learning classifier systems from a reinforcement learning perspective. Soft Computing\u00a06, 162\u2013170 (2002)","journal-title":"Soft Computing"},{"key":"8_CR84","doi-asserted-by":"crossref","unstructured":"Lecoeuche, R.: Learning optimal dialogue management rules by using reinforcement learning and inductive logic programming. In: Proceedings of the North American Chapter of the Association for Computational Linguistics, NAACL (2001)","DOI":"10.3115\/1073336.1073364"},{"key":"8_CR85","unstructured":"Letia, I., Precup, D.: Developing collaborative Golog agents by reinforcement learning. In: Proceedings of the 13th IEEE International Conference on Tools with Artificial Intelligence (ICTAI 2001). IEEE Computer Society (2001)"},{"key":"8_CR86","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"684","DOI":"10.1007\/3-540-36605-9_62","volume-title":"Applications of Evolutionary Computing","author":"J. Levine","year":"2003","unstructured":"Levine, J., Humphreys, D.: Learning Action Strategies for Planning Domains Using Genetic Programming. In: Raidl, G.R., Cagnoni, S., Cardalda, J.J.R., Corne, D.W., Gottlieb, J., Guillot, A., Hart, E., Johnson, C.G., Marchiori, E., Meyer, J.-A., Middendorf, M. (eds.) EvoIASP 2003, EvoWorkshops 2003, EvoSTIM 2003, EvoROB\/EvoRobot 2003, EvoCOP 2003, EvoBIO 2003, and EvoMUSART 2003. LNCS, vol.\u00a02611, pp. 684\u2013695. Springer, Heidelberg (2003)"},{"key":"8_CR87","first-page":"7","volume-title":"ACL 2010: Proceedings of the ACL 2010 Student Research Workshop","author":"P. Lison","year":"2010","unstructured":"Lison, P.: Towards relational POMDPs for adaptive dialogue management. In: ACL 2010: Proceedings of the ACL 2010 Student Research Workshop, pp. 7\u201312. Association for Computational Linguistics, Morristown (2010)"},{"key":"8_CR88","unstructured":"Littman, M.L., Sutton, R.S., Singh, S.: Predictive representations of state. In: Proceedings of the Neural Information Processing Conference (NIPS) (2001)"},{"key":"8_CR89","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-662-08406-9","volume-title":"Logic for Learning: Learning Comprehensible Theories From Structured Data","author":"J.W. Lloyd","year":"2003","unstructured":"Lloyd, J.W.: Logic for Learning: Learning Comprehensible Theories From Structured Data. Springer, Heidelberg (2003)"},{"key":"8_CR90","unstructured":"Martin, M., Geffner, H.: Learning generalized policies in planning using concept languages. In: Proceedings of the International Conference on Principles of Knowledge Representation and Reasoning (KR) (2000)"},{"key":"8_CR91","unstructured":"Mausam, Weld, D.S.: Solving relational MDPs with first-order machine learning. In: Workshop on Planning under Uncertainty and Incomplete Information at ICAPS 2003 (2003)"},{"key":"8_CR92","doi-asserted-by":"crossref","unstructured":"McCallum, R.A.: Instance-based utile distinctions for reinforcement learning with hidden state. In: Proceedings of the International Conference on Machine Learning (ICML), pp. 387\u2013395 (1995)","DOI":"10.1016\/B978-1-55860-377-6.50055-4"},{"key":"8_CR93","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"169","DOI":"10.1007\/978-3-540-88138-4_10","volume-title":"Learning Classifier Systems","author":"D. Mellor","year":"2008","unstructured":"Mellor, D.: A Learning Classifier System Approach to Relational Reinforcement Learning. In: Bacardit, J., Bernad\u00f3-Mansilla, E., Butz, M.V., Kovacs, T., Llor\u00e0, X., Takadama, K. (eds.) IWLCS 2006 and IWLCS 2007. LNCS (LNAI), vol.\u00a04998, pp. 169\u2013188. Springer, Heidelberg (2008)"},{"key":"8_CR94","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4615-1567-8","volume-title":"Logic-Based Artificial Intelligence","author":"J. Minker","year":"2000","unstructured":"Minker, J.: Logic-Based Artificial Intelligence. Kluwer Academic Publishers Group, Dordrecht (2000)"},{"issue":"1-3","key":"8_CR95","doi-asserted-by":"publisher","first-page":"63","DOI":"10.1016\/0004-3702(89)90047-7","volume":"40","author":"S. Minton","year":"1989","unstructured":"Minton, S., Carbonell, J., Knoblock, C.A., Kuokka, D.R., Etzioni, O., Gil, Y.: Explanation-based learning: A problem solving perspective. Artificial Intelligence\u00a040(1-3), 63\u2013118 (1989)","journal-title":"Artificial Intelligence"},{"key":"8_CR96","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1613\/jair.148","volume":"3","author":"R.J. Mooney","year":"1995","unstructured":"Mooney, R.J., Califf, M.E.: Induction of first-order decision lists: Results on learning the past tense of english verbs. Journal of Artificial Intelligence Research (JAIR)\u00a03, 1\u201324 (1995)","journal-title":"Journal of Artificial Intelligence Research (JAIR)"},{"issue":"1","key":"8_CR97","first-page":"103","volume":"13","author":"A.W. Moore","year":"1993","unstructured":"Moore, A.W., Atkeson, C.G.: Prioritized sweeping: Reinforcement learning with less data and less time. Machine Learning\u00a013(1), 103\u2013130 (1993)","journal-title":"Machine Learning"},{"key":"8_CR98","unstructured":"Morales, E.F.: Scaling up reinforcement learning with a relational representation. In: Proceedings of the Workshop on Adaptability in Multi-Agent Systems at AORC 2003, Sydney (2003)"},{"key":"8_CR99","doi-asserted-by":"crossref","unstructured":"Morales, E.F.: Learning to fly by combining reinforcement learning with behavioral cloning. In: Proceedings of the International Conference on Machine Learning (ICML), pp. 598\u2013605 (2004)","DOI":"10.1145\/1015330.1015384"},{"key":"8_CR100","doi-asserted-by":"crossref","first-page":"241","DOI":"10.1613\/jair.613","volume":"11","author":"D.E. Moriarty","year":"1999","unstructured":"Moriarty, D.E., Schultz, A.C., Grefenstette, J.J.: Evolutionary algorithms for reinforcement learning. Journal of Artificial Intelligence Research (JAIR)\u00a011, 241\u2013276 (1999)","journal-title":"Journal of Artificial Intelligence Research (JAIR)"},{"key":"8_CR101","unstructured":"Mour\u00e3o, K., Petrick, R.P.A., Steedman, M.: Using kernel perceptrons to learn action effects for planning. In: Proceedings of the International Conference on Cognitive Systems (CogSys), pp. 45\u201350 (2008)"},{"key":"8_CR102","unstructured":"Muller, T.J., van Otterlo, M.: Evolutionary reinforcement learning in relational domains. In: Proceedings of the 7th European Workshop on Reinforcement Learning (2005)"},{"key":"8_CR103","doi-asserted-by":"crossref","unstructured":"Nason, S., Laird, J.E.: Soar-RL: Integrating reinforcement learning with soar. In: Proceedings of the Workshop on Relational Reinforcement Learning at ICML 2004 (2004)","DOI":"10.1016\/j.cogsys.2004.09.006"},{"key":"8_CR104","unstructured":"Nath, A., Domingos, P.: A language for relational decision theory. In: International Workshop on Statistical Relational Learning, SRL (2009)"},{"issue":"1","key":"8_CR105","first-page":"59","volume":"2","author":"R. Neruda","year":"2009","unstructured":"Neruda, R., Slusny, S.: Performance comparison of two reinforcement learning algorithms for small mobile robots. International Journal of Control and Automation\u00a02(1), 59\u201368 (2009)","journal-title":"International Journal of Control and Automation"},{"key":"8_CR106","unstructured":"Oates, T., Cohen, P.R.: Learning planning operators with conditional and probabilistic effects. In: Planning with Incomplete Information for Robot Problems: Papers from the 1996 AAAI Spring Symposium, pp. 86\u201394 (1996)"},{"key":"8_CR107","unstructured":"Pasula, H.M., Zettlemoyer, L.S., Kaelbling, L.P.: Learning probabilistic planning rules. In: Proceedings of the International Conference on Artificial Intelligence Planning Systems (ICAPS) (2004)"},{"key":"8_CR108","doi-asserted-by":"publisher","first-page":"7","DOI":"10.1016\/S0004-3702(97)00027-1","volume":"94","author":"D. Poole","year":"1997","unstructured":"Poole, D.: The independent choice logic for modeling multiple agents under uncertainty. Artificial Intelligence\u00a094, 7\u201356 (1997)","journal-title":"Artificial Intelligence"},{"key":"8_CR109","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"699","DOI":"10.1007\/978-3-540-74958-5_70","volume-title":"Machine Learning: ECML 2007","author":"J. Ramon","year":"2007","unstructured":"Ramon, J., Driessens, K., Croonenborghs, T.: Transfer Learning in Reinforcement Learning Problems Through Partial Policy Recycling. In: Kok, J.N., Koronacki, J., Lopez de Mantaras, R., Matwin, S., Mladeni\u010d, D., Skowron, A. (eds.) ECML 2007. LNCS (LNAI), vol.\u00a04701, pp. 699\u2013707. Springer, Heidelberg (2007)"},{"key":"8_CR110","doi-asserted-by":"crossref","DOI":"10.7551\/mitpress\/4074.001.0001","volume-title":"Knowledge in Action: Logical Foundations for Specifying and Implementing Dynamical Systems","author":"R. Reiter","year":"2001","unstructured":"Reiter, R.: Knowledge in Action: Logical Foundations for Specifying and Implementing Dynamical Systems. The MIT Press, Cambridge (2001)"},{"key":"8_CR111","unstructured":"Rodrigues, C., Gerard, P., Rouveirol, C.: On and off-policy relational reinforcement learning. In: Late-Breaking Papers of the International Conference on Inductive Logic Programming (2008)"},{"key":"8_CR112","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"206","DOI":"10.1007\/978-3-642-21295-6_24","volume-title":"Inductive Logic Programming","author":"C. Rodrigues","year":"2011","unstructured":"Rodrigues, C., G\u00e9rard, P., Rouveirol, C.: IncremEntal Learning of Relational Action Models in Noisy Environments. In: Frasconi, P., Lisi, F.A. (eds.) ILP 2010. LNCS, vol.\u00a06489, pp. 206\u2013213. Springer, Heidelberg (2011)"},{"key":"8_CR113","unstructured":"Roncagliolo, S., Tadepalli, P.: Function approximation in hierarchical relational reinforcement learning. In: Proceedings of the Workshop on Relational Reinforcement Learning at ICML (2004)"},{"key":"8_CR114","volume-title":"Artificial Intelligence: a Modern Approach","author":"S.J. Russell","year":"2003","unstructured":"Russell, S.J., Norvig, P.: Artificial Intelligence: a Modern Approach, 2nd edn. Prentice Hall, New Jersey (2003)","edition":"2"},{"key":"8_CR115","unstructured":"Ryan, M.R.K.: Using abstract models of behaviors to automatically generate reinforcement learning hierarchies. In: Proceedings of the International Conference on Machine Learning (ICML), pp. 522\u2013529 (2002)"},{"key":"8_CR116","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"341","DOI":"10.1007\/978-3-540-87993-0_27","volume-title":"Scalable Uncertainty Management","author":"E. Saad","year":"2008","unstructured":"Saad, E.: A Logical Framework to Reinforcement Learning Using Hybrid Probabilistic Logic Programs. In: Greco, S., Lukasiewicz, T. (eds.) SUM 2008. LNCS (LNAI), vol.\u00a05291, pp. 341\u2013355. Springer, Heidelberg (2008)"},{"key":"8_CR117","doi-asserted-by":"crossref","unstructured":"Safaei, J., Ghassem-Sani, G.: Incremental learning of planning operators in stochastic domains. In: Proceedings of the International Conference on Current Trends in Theory and Practice of Computer Science (SOFSEM), pp. 644\u2013655 (2007)","DOI":"10.1007\/978-3-540-69507-3_56"},{"key":"8_CR118","unstructured":"Sanner, S.: Simultaneous learning of structure and value in relational reinforcement learning. In: Driessens, K., Fern, A., van Otterlo, M. (eds.) Proceedings of the ICML-2005 Workshop on Rich Representations for Reinforcement Learning (2005)"},{"key":"8_CR119","unstructured":"Sanner, S.: Online feature discovery in relational reinforcement learning. In: Proceedings of the ICML-2006 Workshop on Open Problems in Statistical Relational Learning (2006)"},{"key":"8_CR120","unstructured":"Sanner, S., Boutilier, C.: Approximate linear programming for first-order MDPs. In: Proceedings of the Conference on Uncertainty in Artificial Intelligence (UAI) (2005)"},{"key":"8_CR121","unstructured":"Sanner, S., Boutilier, C.: Practical linear value-approximation techniques for first-order MDPs. In: Proceedings of the Conference on Uncertainty in Artificial Intelligence (UAI) (2006)"},{"key":"8_CR122","unstructured":"Sanner, S., Boutilier, C.: Approximate solution techniques for factored first-order MDPs. In: Proceedings of the International Conference on Artificial Intelligence Planning Systems (ICAPS) (2007)"},{"key":"8_CR123","doi-asserted-by":"crossref","unstructured":"Sanner, S., Kersting, K.: Symbolic dynamic programming for first-order pomdps. In: Proceedings of the National Conference on Artificial Intelligence (AAAI) (2010)","DOI":"10.1609\/aaai.v24i1.7747"},{"key":"8_CR124","volume-title":"Habilitationsschrift, Fakult\u00e4t IV, Elektrotechnik und Informatik","author":"U. Schmid","year":"2001","unstructured":"Schmid, U.: Inductive synthesis of functional programs: Learning domain-specific control rules and abstraction schemes. In: Habilitationsschrift, Fakult\u00e4t IV, Elektrotechnik und Informatik, Technische Universit\u00e4t Berlin, Germany (2001)"},{"key":"8_CR125","unstructured":"Schuurmans, D., Patrascu, R.: Direct value approximation for factored MDPs. In: Proceedings of the Neural Information Processing Conference (NIPS) (2001)"},{"key":"8_CR126","unstructured":"Shapiro, D., Langley, P.: Separating skills from preference. In: Proceedings of the International Conference on Machine Learning (ICML), pp. 570\u2013577 (2002)"},{"key":"8_CR127","unstructured":"Simpkins, C., Bhat, S., Isbell, C.L., Mateas, M.: Adaptive Programming: Integrating Reinforcement Learning into a Programming Language. In: Proceedings of the Twenty-Third ACM SIGPLAN International Conference on Object-Oriented Programming, Systems, Languages, and Applications, OOPSLA (2008)"},{"key":"8_CR128","doi-asserted-by":"publisher","first-page":"119","DOI":"10.1016\/S0004-3702(00)00079-5","volume":"125","author":"J. Slaney","year":"2001","unstructured":"Slaney, J., Thi\u00e9baux, S.: Blocks world revisited. Artificial Intelligence\u00a0125, 119\u2013153 (2001)","journal-title":"Artificial Intelligence"},{"key":"8_CR129","doi-asserted-by":"crossref","unstructured":"Song, Z.W., Chen, X.P.: States evolution in \u0398(\u03bb)-learning based on logical mdps with negation. In: IEEE International Conference on Systems, Man and Cybernetics, pp. 1624\u20131629 (2007)","DOI":"10.1109\/ICSMC.2007.4413971"},{"issue":"9","key":"8_CR130","doi-asserted-by":"publisher","first-page":"29","DOI":"10.4304\/jcp.3.9.29-38","volume":"3","author":"Z.W. Song","year":"2008","unstructured":"Song, Z.W., Chen, X.P.: Agent learning in relational domains based on logical mdps with negation. Journal of Computers\u00a03(9), 29\u201338 (2008)","journal-title":"Journal of Computers"},{"key":"8_CR131","unstructured":"Stone, P.: Learning and multiagent reasoning for autonomous agents. In: Proceedings of the International Joint Conference on Artificial Intelligence (IJCAI), Computers and Thought Award Paper (2007)"},{"key":"8_CR132","doi-asserted-by":"crossref","unstructured":"Stracuzzi, D.J., Asgharbeygi, N.: Transfer of knowledge structures with relational temporal difference learning. In: Proceedings of the ICML 2006 Workshop on Structural Knowledge Transfer for Machine Learning (2006)","DOI":"10.1145\/1143844.1143851"},{"key":"8_CR133","volume-title":"Reinforcement Learning: an Introduction","author":"R.S. Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: an Introduction. The MIT Press, Cambridge (1998)"},{"key":"8_CR134","unstructured":"Sutton, R.S., McAllester, D.A., Singh, S., Mansour, Y.: Policy gradient methods for reinforcement learning with function approximation. In: Proceedings of the Neural Information Processing Conference (NIPS), pp. 1057\u20131063 (2000)"},{"issue":"3-4","key":"8_CR135","first-page":"179","volume":"2","author":"M. Thielscher","year":"1998","unstructured":"Thielscher, M.: Introduction to the Fluent Calculus. Electronic Transactions on Artificial Intelligence\u00a02(3-4), 179\u2013192 (1998)","journal-title":"Electronic Transactions on Artificial Intelligence"},{"key":"8_CR136","unstructured":"Thon, I., Guttman, B., van Otterlo, M., Landwehr, N., De Raedt, L.: From non-deterministic to probabilistic planning with the help of statistical relational learning. In: Workshop on Planning and Learning at ICAPS (2009)"},{"key":"8_CR137","unstructured":"Torrey, L.: Relational transfer in reinforcement learning. PhD thesis, University of Wisconsin-Madison, Computer Science Department (2009)"},{"key":"8_CR138","unstructured":"Torrey, L., Shavlik, J., Walker, T., Maclin, R.: Relational macros for transfer in reinforcement learning. In: Proceedings of the International Conference on Inductive Logic Programming (ILP) (2007)"},{"key":"8_CR139","unstructured":"Torrey, L., Shavlik, J., Natarajan, S., Kuppili, P., Walker, T.: Transfer in reinforcement learning via markov logic networks. In: Proceedings of the AAAI-2008 Workshop on Transfer Learning for Complex Tasks (2008)"},{"key":"8_CR140","unstructured":"Toussaint, M.: Probabilistic inference as a model of planned behavior. K\u00fcnstliche Intelligenz (German Artificial Intelligence Journal)\u00a03 (2009)"},{"key":"8_CR141","doi-asserted-by":"crossref","unstructured":"Toussaint, M., Plath, N., Lang, T., Jetchev, N.: Integrated motor control, planning, grasping and high-level reasoning in a blocks world using probabilistic inference. In: IEEE International Conference on Robotics and Automation, ICRA (2010)","DOI":"10.1109\/ROBOT.2010.5509831"},{"key":"8_CR142","doi-asserted-by":"crossref","unstructured":"Van den Broeck, G., Thon, I., van Otterlo, M., De Raedt, L.: DTProbLog: A decision-theoretic probabilistic prolog. In: Proceedings of the National Conference on Artificial Intelligence (AAAI) (2010)","DOI":"10.1609\/aaai.v24i1.7755"},{"key":"8_CR143","unstructured":"van Otterlo, M.: Efficient reinforcement learning using relational aggregation. In: Proceedings of the Sixth European Workshop on Reinforcement Learning, Nancy, France (EWRL-6) (2003)"},{"key":"8_CR144","unstructured":"van Otterlo, M.: Reinforcement learning for relational MDPs. In: Now\u00e9, A., Lenaerts, T., Steenhaut, K. (eds.) Machine Learning Conference of Belgium and the Netherlands (BeNeLearn 2004), pp. 138\u2013145 (2004)"},{"key":"8_CR145","doi-asserted-by":"publisher","first-page":"169","DOI":"10.1016\/j.jalgor.2009.04.004","volume":"64","author":"M. van Otterlo","year":"2009","unstructured":"van Otterlo, M.: Intensional dynamic programming: A rosetta stone for structured dynamic programming. Journal of Algorithms\u00a064, 169\u2013191 (2009a)","journal-title":"Journal of Algorithms"},{"key":"8_CR146","volume-title":"The Logic of Adaptive Behavior: Knowledge Representation and Algorithms for Adaptive Sequential Decision Making under Uncertainty in First-Order and Relational Domains","author":"M. van Otterlo","year":"2009","unstructured":"van Otterlo, M.: The Logic of Adaptive Behavior: Knowledge Representation and Algorithms for Adaptive Sequential Decision Making under Uncertainty in First-Order and Relational Domains. IOS Press, Amsterdam (2009b)"},{"key":"8_CR147","unstructured":"van Otterlo, M., De Vuyst, T.: Evolving and transferring probabilistic policies for relational reinforcement learning. In: Proceedings of the Belgium-Netherlands Artificial Intelligence Conference (BNAIC), pp. 201\u2013208 (2009)"},{"key":"8_CR148","volume-title":"Toward Computational Sapience: Principles and Systems, ch. 9","author":"M. van Otterlo","year":"2007","unstructured":"van Otterlo, M., Wiering, M.A., Dastani, M., Meyer, J.J.: A characterization of sapient agents. In: Mayorga, R.V., Perlovsky, L.I. (eds.) Toward Computational Sapience: Principles and Systems, ch. 9. Springer, Heidelberg (2007)"},{"key":"8_CR149","doi-asserted-by":"crossref","unstructured":"Vargas, B., Morales, E.: Solving navigation tasks with learned teleo-reactive programs, pp. 4185\u20134185 (2008), doi:10.1109\/IROS.2008.4651240","DOI":"10.1109\/IROS.2008.4651240"},{"key":"8_CR150","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"892","DOI":"10.1007\/978-3-642-10268-4_105","volume-title":"Progress in Pattern Recognition, Image Analysis, Computer Vision, and Applications","author":"B. Vargas-Govea","year":"2009","unstructured":"Vargas-Govea, B., Morales, E.: Learning Relational Grammars from Sequences of Actions. In: Bayro-Corrochano, E., Eklundh, J.-O. (eds.) CIARP 2009. LNCS, vol.\u00a05856, pp. 892\u2013900. Springer, Heidelberg (2009)"},{"key":"8_CR151","unstructured":"Vere, S.A.: Induction of relational productions in the presence of background information. In: Proceedings of the International Joint Conference on Artificial Intelligence (IJCAI), pp. 349\u2013355 (1977)"},{"key":"8_CR152","unstructured":"Walker, T., Shavlik, J., Maclin, R.: Relational reinforcement learning via sampling the space of first-order conjunctive features. In: Proceedings of the Workshop on Relational Reinforcement Learning at ICML 2004 (2004)"},{"key":"8_CR153","unstructured":"Walker, T., Torrey, L., Shavlik, J., Maclin, R.: Building relational world models for reinforcement learning. In: Proceedings of the International Conference on Inductive Logic Programming (ILP) (2007)"},{"key":"8_CR154","unstructured":"Walsh, T.J.: Efficient learning of relational models for sequential decision making. PhD thesis, Rutgers University, Computer Science Department (2010)"},{"key":"8_CR155","unstructured":"Walsh, T.J., Littman, M.L.: Efficient learning of action schemas and web-service descriptions. In: Proceedings of the National Conference on Artificial Intelligence (AAAI) (2008)"},{"key":"8_CR156","unstructured":"Walsh, T.J., Li, L., Littman, M.L.: Transferring state abstractions between mdps. In: ICML-2006 Workshop on Structural Knowledge Transfer for Machine Learning (2006)"},{"key":"8_CR157","unstructured":"Wang, C.: First-order markov decision processes. PhD thesis, Department of Computer Science, Tufts University, U.S.A (2007)"},{"key":"8_CR158","unstructured":"Wang, C., Khardon, R.: Policy iteration for relational mdps. In: Proceedings of the Conference on Uncertainty in Artificial Intelligence (UAI) (2007)"},{"key":"8_CR159","doi-asserted-by":"crossref","unstructured":"Wang, C., Khardon, R.: Relational partially observable mdps. In: Proceedings of the National Conference on Artificial Intelligence (AAAI) (2010)","DOI":"10.1609\/aaai.v24i1.7742"},{"key":"8_CR160","unstructured":"Wang, C., Schmolze, J.: Planning with pomdps using a compact, logic-based representation. In: Proceedings of the IEEE International Conference on Tools with Artificial Intelligence, ICTAI (2005)"},{"key":"8_CR161","doi-asserted-by":"crossref","unstructured":"Wang, C., Joshi, S., Khardon, R.: First order decision diagrams for relational MDPs. In: Proceedings of the International Joint Conference on Artificial Intelligence (IJCAI) (2007)","DOI":"10.1613\/jair.2489"},{"key":"8_CR162","doi-asserted-by":"crossref","first-page":"431","DOI":"10.1613\/jair.2489","volume":"31","author":"C. Wang","year":"2008","unstructured":"Wang, C., Joshi, S., Khardon, R.: First order decision diagrams for relational MDPs. Journal of Artificial Intelligence Research (JAIR)\u00a031, 431\u2013472 (2008a)","journal-title":"Journal of Artificial Intelligence Research (JAIR)"},{"key":"8_CR163","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"230","DOI":"10.1007\/978-3-540-88636-5_22","volume-title":"MICAI 2008: Advances in Artificial Intelligence","author":"W. Wang","year":"2008","unstructured":"Wang, W., Gao, Y., Chen, X., Ge, S.: Reinforcement Learning with Markov Logic Networks. In: Gelbukh, A., Morales, E.F. (eds.) MICAI 2008. LNCS (LNAI), vol.\u00a05317, pp. 230\u2013242. Springer, Heidelberg (2008b)"},{"key":"8_CR164","doi-asserted-by":"crossref","unstructured":"Wang, X.: Learning by observation and practice: An incremental approach for planning operator acquisition. In: Proceedings of the International Conference on Machine Learning (ICML), pp. 549\u2013557 (1995)","DOI":"10.1016\/B978-1-55860-377-6.50074-8"},{"key":"8_CR165","unstructured":"Wingate, D., Soni, V., Wolfe, B., Singh, S.: Relational knowledge with predictive state representations. In: Proceedings of the International Joint Conference on Artificial Intelligence (IJCAI) (2007)"},{"key":"8_CR166","volume-title":"An introduction to MultiAgent Systems","author":"M. Wooldridge","year":"2002","unstructured":"Wooldridge, M.: An introduction to MultiAgent Systems. John Wiley & Sons Ltd., West Sussex (2002)"},{"key":"8_CR167","unstructured":"Wu, J.H., Givan, R.: Discovering relational domain features for probabilistic planning. In: Proceedings of the International Conference on Artificial Intelligence Planning Systems (ICAPS) (2007)"},{"key":"8_CR168","unstructured":"Wu, K., Yang, Q., Jiang, Y.: ARMS: Action-relation modelling system for learning action models. In: Proceedings of the National Conference on Artificial Intelligence (AAAI) (2005)"},{"key":"8_CR169","doi-asserted-by":"crossref","unstructured":"Xu, J.Z., Laird, J.E.: Instance-based online learning of deterministic relational action models. In: Proceedings of the International Conference on Machine Learning (ICML) (2010)","DOI":"10.1609\/aaai.v24i1.7569"},{"key":"8_CR170","unstructured":"Yoon, S.W., Fern, A., Givan, R.: Inductive policy selection for first-order MDPs. In: Proceedings of the Conference on Uncertainty in Artificial Intelligence (UAI) (2002)"},{"key":"8_CR171","unstructured":"Zettlemoyer, L.S., Pasula, H.M., Kaelbling, L.P.: Learning planning rules in noisy stochastic worlds. In: Proceedings of the National Conference on Artificial Intelligence (AAAI) (2005)"},{"key":"8_CR172","doi-asserted-by":"crossref","unstructured":"Zhao, H., Doshi, P.: Haley: A hierarchical framework for logical composition of web services. In: Proceedings of the International Conference on Web Services (ICWS), pp. 312\u2013319 (2007)","DOI":"10.1109\/ICWS.2007.95"},{"key":"8_CR173","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"565","DOI":"10.1007\/978-3-540-74171-8_56","volume-title":"Advanced Intelligent Computing Theories and Applications. With Aspects of Theoretical and Methodological Issues","author":"H. Zhuo","year":"2007","unstructured":"Zhuo, H., Li, L., Bian, R., Wan, H.: Requirement Specification Based on Action Model Learning. In: Huang, D.-S., Heutte, L., Loog, M. (eds.) ICIC 2007. LNCS, vol.\u00a04681, pp. 565\u2013574. Springer, Heidelberg (2007)"}],"container-title":["Adaptation, Learning, and Optimization","Reinforcement Learning"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-27645-3_8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,22]],"date-time":"2025-03-22T13:03:57Z","timestamp":1742648637000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-642-27645-3_8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642276446","9783642276453"],"references-count":173,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-27645-3_8","relation":{},"ISSN":["1867-4534","1867-4542"],"issn-type":[{"type":"print","value":"1867-4534"},{"type":"electronic","value":"1867-4542"}],"subject":[],"published":{"date-parts":[[2012]]}}}