{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,13]],"date-time":"2026-02-13T23:15:10Z","timestamp":1771024510970,"version":"3.50.1"},"reference-count":57,"publisher":"Springer Science and Business Media LLC","issue":"12","license":[{"start":{"date-parts":[[2017,9,19]],"date-time":"2017-09-19T00:00:00Z","timestamp":1505779200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Mach Learn"],"published-print":{"date-parts":[[2017,12]]},"DOI":"10.1007\/s10994-017-5669-x","type":"journal-article","created":{"date-parts":[[2017,9,19]],"date-time":"2017-09-19T20:46:35Z","timestamp":1505853995000},"page":"1905-1932","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["Planning in hybrid relational MDPs"],"prefix":"10.1007","volume":"106","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0031-6094","authenticated-orcid":false,"given":"Davide","family":"Nitti","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Vaishak","family":"Belle","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tinne","family":"De Laet","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Luc","family":"De Raedt","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2017,9,19]]},"reference":[{"key":"5669_CR1","unstructured":"Anand, A., Grover, A., & Singla, P. (2015). ASAP-UCT: Abstraction of state-action pairs in UCT. In Proceedings of IJCAI (pp. 1509\u20131515)."},{"key":"5669_CR2","unstructured":"Apt, K. (1997). From logic programming to Prolog. Prentice-Hall international series in computer science. Upper Saddle River: Prentice Hall."},{"key":"5669_CR3","doi-asserted-by":"crossref","unstructured":"Belle, V., & Levesque, H. J. (2014). PREGO: An action language for belief-based cognitive robotics in continuous domains. In Proceedings of the twenty-eighth AAAI conference on artificial intelligence, July 27\u201331, 2014, Qu\u00e9bec City, Qu\u00e9bec, Canada (pp. 989\u2013995).","DOI":"10.1609\/aaai.v28i1.8865"},{"key":"5669_CR4","unstructured":"Browne, C., Powley, E. J., Whitehouse, D., Lucas, S. M., Cowling, P. I., Rohlfshagen, P., et al. (2012). A survey of Monte Carlo tree search methods. IEEE Transactions on Computational Intelligence and AI in Games, 4(1), 1\u201343. http:\/\/dblp.uni-trier.de\/db\/journals\/tciaig\/tciaig4.html ."},{"key":"5669_CR5","unstructured":"Couetoux, A. (2013). Monte Carlo tree search for continuous and stochastic sequential decision making problems. Thesis, Universit\u00e9 Paris Sud - Paris XI."},{"key":"5669_CR6","unstructured":"Driessens, K., & Ramon, J. (2003). Relational instance based regression for relational reinforcement learning. In Proceedings of the ICML (pp. 123\u2013130)."},{"issue":"1\u20132","key":"5669_CR7","doi-asserted-by":"publisher","first-page":"7","DOI":"10.1023\/A:1007694015589","volume":"43","author":"S D\u017eeroski","year":"2001","unstructured":"D\u017eeroski, S., De Raedt, L., & Driessens, K. (2001). Relational reinforcement learning. Machine Learning, 43(1\u20132), 7\u201352.","journal-title":"Machine Learning"},{"key":"5669_CR8","unstructured":"Feng, Z., Dearden, R., Meuleau, N., & Washington, R. (2004). Dynamic programming for structured continuous Markov decision problems. In Proceedings of the UAI (pp. 154\u2013161)."},{"key":"5669_CR9","unstructured":"Forbes, J., & Andre, D. (2002). Representations for learning control policies. In Proceedings of the ICML workshop on development of representations (pp. 7\u201314)."},{"issue":"12","key":"5669_CR10","doi-asserted-by":"publisher","first-page":"163","DOI":"10.1016\/S0004-3702(02)00376-4","volume":"147","author":"R Givan","year":"2003","unstructured":"Givan, R., Dean, T., & Greig, M. (2003). Equivalence notions and model minimization in markov decision processes. Artificial Intelligence, 147(12), 163\u2013223.","journal-title":"Artificial Intelligence"},{"key":"5669_CR11","unstructured":"Goodman, N., Mansinghka, V. K., Roy, D. M., Bonawitz, K., & Tenenbaum, J. B. (2008). Church: A language for generative models. In Proceedings of the UAI (pp. 220\u2013229)."},{"key":"5669_CR12","doi-asserted-by":"crossref","unstructured":"Gutmann, B., Thon, I., Kimmig, A., Bruynooghe, M., & De Raedt, L. (2011). The magic of logical inference in probabilistic programming. Theory and Practice of Logic Programming, 11, 663\u2013680.","DOI":"10.1017\/S1471068411000238"},{"key":"5669_CR13","doi-asserted-by":"crossref","first-page":"419","DOI":"10.1613\/jair.1965","volume":"27","author":"S H\u00f6lldobler","year":"2006","unstructured":"H\u00f6lldobler, S., Karabaev, E., & Skvortsova, O. (2006). Flucap: A heuristic search planner for first-order MDPs. Journal of Artificial Intelligence Research, 27, 419\u2013439.","journal-title":"Journal of Artificial Intelligence Research"},{"key":"5669_CR14","doi-asserted-by":"crossref","unstructured":"Hostetler, J., Fern, A., & Dietterich, T. (2014). State aggregation in Monte Carlo tree search. In Proceedings of AAAI.","DOI":"10.1609\/aaai.v28i1.9066"},{"key":"5669_CR15","unstructured":"Jiang, N., Singh, S., & Lewis, R. (2014). Improving UCT planning via approximate homomorphisms. In Proceedings of the 2014 international conference on autonomous agents and multi-agent systems (pp. 1289\u20131296). International Foundation for Autonomous Agents and Multiagent Systems."},{"key":"5669_CR16","doi-asserted-by":"crossref","unstructured":"Joshi, S., Kersting, K., & Khardon, R. (2010). Self-taught decision theoretic planning with first order decision diagrams. In ICAPS (pp. 89\u201396).","DOI":"10.1609\/icaps.v20i1.13411"},{"issue":"2\u20133","key":"5669_CR17","doi-asserted-by":"publisher","first-page":"193","DOI":"10.1023\/A:1017932429737","volume":"49","author":"M Kearns","year":"2002","unstructured":"Kearns, M., Mansour, Y., & Ng, A. Y. (2002). A sparse sampling algorithm for near-optimal planning in large Markov decision processes. Machine Learning, 49(2\u20133), 193\u2013208.","journal-title":"Machine Learning"},{"key":"5669_CR18","doi-asserted-by":"crossref","unstructured":"Keller, T., & Eyerich, P. (2012). PROST: Probabilistic planning based on UCT. In Proceedings of the ICAPS.","DOI":"10.1609\/icaps.v22i1.13518"},{"key":"5669_CR19","doi-asserted-by":"crossref","unstructured":"Kersting, K., Otterlo, M. V., & De Raedt, L. (2004). Bellman goes relational. In Proceedings of the ICML (p.\u00a059).","DOI":"10.1145\/1015330.1015401"},{"key":"5669_CR20","unstructured":"Kimmig, A., Demoen, B., De\u00a0Raedt, L., Santos Costa, V., & Rocha, R. (2010). On the implementation of the probabilistic logic programming language ProbLog. Theory and Practice of Logic Programming (TPLP), 11, 235\u2013262."},{"key":"5669_CR21","unstructured":"Kimmig, A., Santos Costa, V., Rocha, R., Demoen, B., & De Raedt, L. (2008). On the efficient execution of ProbLog programs. In Logic programming. Lecture notes in computer science (pp. 175\u2013189). Berlin: Springer."},{"key":"5669_CR22","doi-asserted-by":"crossref","unstructured":"Kocsis, L., & Szepesv\u00e1ri, C. (2006). Bandit based Monte-Carlo planning. In Proceedings of the ECML.","DOI":"10.1007\/11871842_29"},{"key":"5669_CR23","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1613\/jair.3093","volume":"39","author":"T Lang","year":"2010","unstructured":"Lang, T., & Toussaint, M. (2010). Planning with noisy probabilistic relational rules. Journal of Artificial Intelligence Research, 39, 1\u201349.","journal-title":"Journal of Artificial Intelligence Research"},{"key":"5669_CR24","unstructured":"Li, L., Walsh, T. J., & Littman, M. L. (2006). Towards a unified theory of state abstraction for MDPS. In ISAIM."},{"key":"5669_CR25","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-83189-8","volume-title":"Foundations of logic programming","author":"J Lloyd","year":"1987","unstructured":"Lloyd, J. (1987). Foundations of logic programming. New York: Springer."},{"key":"5669_CR26","doi-asserted-by":"crossref","unstructured":"Mansley, C. R., Weinstein, A., & Littman, M. L. (2011). Sample-based planning for continuous action Markov decision processes. In Proceedings of the ICAPS.","DOI":"10.1609\/icaps.v21i1.13484"},{"key":"5669_CR27","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-031-01559-5","volume-title":"Planning with Markov decision processes: An AI perspective","author":"AK Mausam","year":"2012","unstructured":"Mausam, A. K. (2012). Planning with Markov decision processes: An AI perspective. San Rafael: Morgan & Claypool Publishers."},{"issue":"1","key":"5669_CR28","doi-asserted-by":"crossref","first-page":"27","DOI":"10.1613\/jair.2529","volume":"34","author":"N Meuleau","year":"2009","unstructured":"Meuleau, N., Benazera, E., Brafman, R. I., Hansen, E. A., & Mausam, M. (2009). A heuristic search approach to planning with continuous resources in stochastic domains. Journal of Artificial Intelligence Research, 34(1), 27.","journal-title":"Journal of Artificial Intelligence Research"},{"key":"5669_CR29","unstructured":"Milch, B., Marthi, B., Russell, S., Sontag, D., Ong, D., & Kolobov, A. (2005a). BLOG: Probabilistic models with unknown objects. In Proceedings of the IJCAI."},{"key":"5669_CR30","unstructured":"Milch, B., Marthi, B., Sontag, D., Russell, S., Ong, D. L., & Kolobov, A. (2005b). Approximate inference for infinite contingent Bayesian networks. In Proceedings of the 10th international workshop on artificial intelligence and statistics."},{"key":"5669_CR31","doi-asserted-by":"crossref","unstructured":"Munos, R. (2014). From bandits to Monte-Carlo tree search: The optimistic principle applied to optimization and planning. Foundations and Trends $$^{\\textregistered }$$ \u00ae in Machine Learning, 7, 1\u2013129.","DOI":"10.1561\/2200000038"},{"key":"5669_CR32","volume-title":"Logic, programming and Prolog","author":"U Nilsson","year":"1996","unstructured":"Nilsson, U., & Ma\u0142iszy\u0144ski, J. (1996). Logic, programming and Prolog (2nd ed.). Hoboken: Wiley.","edition":"2"},{"key":"5669_CR33","unstructured":"Nitti, D., Belle, V., De\u00a0Laet, T., & De\u00a0Raedt, L. (2015). Sample-based abstraction for hybrid relational MDPs. European workshop on reinforcement learning (EWRL 2015), 10\u201311."},{"key":"5669_CR34","doi-asserted-by":"crossref","unstructured":"Nitti, D., Belle, V., & De\u00a0Raedt, L. (2015). Planning in discrete and continuous Markov decision processes by probabilistic programming. In Proceedings of the European conference on machine learning and knowledge discovery in databases (ECML\/PKDD), 2015.","DOI":"10.1007\/978-3-319-23525-7_20"},{"key":"5669_CR35","doi-asserted-by":"crossref","unstructured":"Nitti, D., De Laet, T., & De Raedt, L. (2013). A particle filter for hybrid relational domains. In Proceedings of the IROS.","DOI":"10.1109\/IROS.2013.6696747"},{"key":"5669_CR36","doi-asserted-by":"crossref","unstructured":"Nitti, D., De Laet, T., & De Raedt, L. (2014). Relational object tracking and learning. In Proceedings of the ICRA.","DOI":"10.1109\/ICRA.2014.6906966"},{"key":"5669_CR37","unstructured":"Owen, A. B. (2013). Monte Carlo theory, methods and examples. http:\/\/statweb.stanford.edu\/~owen\/mc\/ ."},{"key":"5669_CR38","unstructured":"Peshkin, L., & Shelton, C. R. (2002). Learning from scarce experience. In Proceedings of the ICML (pp. 498\u2013505)."},{"key":"5669_CR39","unstructured":"Precup, D., Sutton, R. S., & Singh, S. P. (2000). Eligibility traces for off-policy policy evaluation. In Proceedings of the ICML."},{"key":"5669_CR40","unstructured":"Sanner, S. (2010). Relational dynamic influence diagram language (RDDL): Language description. Unpublished paper."},{"key":"5669_CR41","unstructured":"Sanner, S., Delgado, K. V., & de\u00a0Barros, L. N. (2011). Symbolic dynamic programming for discrete and continuous state MDPs. In Proceedings of the UAI (pp. 643\u2013652)."},{"key":"5669_CR42","doi-asserted-by":"crossref","unstructured":"Sato, T. (1995). A statistical learning method for logic programs with distribution semantics. In Proceedings of the twelfth international conference on logic programming (pp. 715\u2013729). MIT Press.","DOI":"10.7551\/mitpress\/4298.003.0069"},{"key":"5669_CR43","unstructured":"Shelton, C. R. (2001a). Importance sampling for reinforcement learning with multiple objectives. Ph.D. thesis, MIT."},{"key":"5669_CR44","unstructured":"Shelton, C. R. (2001b). Policy improvement for POMDPs using normalized importance sampling. In Proceedings of the UAI (pp. 496\u2013503)."},{"key":"5669_CR45","unstructured":"Smart, W. D., & Kaelbling, L. P. (2000). Practical reinforcement learning in continuous spaces. In Proceedings of the ICML."},{"key":"5669_CR46","unstructured":"Srivastava, S., Russell, S., Ruan, P., & Cheng, X. (2014). First-order open-universe POMDPs. In Proceedings of the UAI."},{"key":"5669_CR47","volume-title":"Reinforcement learning: An introduction","author":"RS Sutton","year":"1998","unstructured":"Sutton, R. S., & Barto, A. G. (1998). Reinforcement learning: An introduction. Cambridge: MIT Press."},{"key":"5669_CR48","unstructured":"Tadepalli, P., Givan, R., & Driessens, K. (2004). Relational reinforcement learning: An overview. In Proceedings of the ICML-2004 workshop on relational reinforcement learning (pp. 1\u20139)."},{"key":"5669_CR49","doi-asserted-by":"crossref","unstructured":"Van den Broeck, G., Thon, I., van Otterlo, M., & De Raedt, L. (2010). DTProbLog: A decision-theoretic probabilistic Prolog. In Proceedings of the AAAI (pp. 1217\u20131222).","DOI":"10.1609\/aaai.v24i1.7755"},{"key":"5669_CR50","doi-asserted-by":"crossref","unstructured":"Vianna, L. G. R., de\u00a0Barros, L. N., & Sanner, S. (2015). Real-time symbolic dynamic programming. In Proceedings of the twenty-ninth AAAI conference on artificial intelligence, January 25\u201330, 2015, Austin, Texas, USA (pp. 3402\u20133408).","DOI":"10.1609\/aaai.v29i1.9651"},{"key":"5669_CR51","unstructured":"Vien, N. A., & Toussaint, M. (2014). Model-based relational RL when object existence is partially observable. In Proceedings of the ICML."},{"key":"5669_CR52","doi-asserted-by":"crossref","unstructured":"Walsh, T. J., Goschin, S., & Littman, M. L. (2010). Integrating sample-based planning and model-based reinforcement learning. In Proceedings of the AAAI.","DOI":"10.1609\/aaai.v24i1.7689"},{"key":"5669_CR53","doi-asserted-by":"crossref","first-page":"431","DOI":"10.1613\/jair.2489","volume":"31","author":"C Wang","year":"2008","unstructured":"Wang, C., Joshi, S., & Khardon, R. (2008). First order decision diagrams for relational MDPs. Journal of Artificial Intelligence Research (JAIR), 31, 431\u2013472.","journal-title":"Journal of Artificial Intelligence Research (JAIR)"},{"key":"5669_CR54","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-27645-3","volume-title":"Reinforcement learning: State-of-the-art. Adaptation, learning, and optimization","author":"M Wiering","year":"2012","unstructured":"Wiering, M., & van Otterlo, M. (2012). Reinforcement learning: State-of-the-art. Adaptation, learning, and optimization. Berlin: Springer."},{"key":"5669_CR55","unstructured":"Wood, F., van\u00a0de Meent, J. W., & Mansinghka, V. (2014). A new approach to probabilistic programming inference. In Proceedings of the 17th international conference on artificial intelligence and statistics (pp. 1024\u20131032)."},{"key":"5669_CR56","unstructured":"Zamani, Z., Sanner, S., Delgado, K. V., & de\u00a0Barros, L. N. (2013). Robust optimization for hybrid MDPs with state-dependent noise. In IJCAI 2013, Proceedings of the 23rd international joint conference on artificial intelligence, Beijing, China, August 3\u20139, 2013."},{"key":"5669_CR57","unstructured":"Zamani, Z., Sanner, S., & Fang, C. (2012). Symbolic dynamic programming for continuous state and action MDPs. In Proceedings of the AAAI."}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10994-017-5669-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-017-5669-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-017-5669-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,25]],"date-time":"2025-06-25T20:28:38Z","timestamp":1750883318000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10994-017-5669-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,9,19]]},"references-count":57,"journal-issue":{"issue":"12","published-print":{"date-parts":[[2017,12]]}},"alternative-id":["5669"],"URL":"https:\/\/doi.org\/10.1007\/s10994-017-5669-x","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"value":"0885-6125","type":"print"},{"value":"1573-0565","type":"electronic"}],"subject":[],"published":{"date-parts":[[2017,9,19]]}}}