{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T05:48:38Z","timestamp":1742968118595,"version":"3.40.3"},"publisher-location":"Berlin, Heidelberg","reference-count":30,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642221514"},{"type":"electronic","value":"9783642221521"}],"license":[{"start":{"date-parts":[[2011,1,1]],"date-time":"2011-01-01T00:00:00Z","timestamp":1293840000000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2011]]},"DOI":"10.1007\/978-3-642-22152-1_40","type":"book-chapter","created":{"date-parts":[[2011,6,25]],"date-time":"2011-06-25T19:22:00Z","timestamp":1309029720000},"page":"472-484","source":"Crossref","is-referenced-by-count":0,"title":["Bridging the Gap between Reinforcement Learning and Knowledge Representation: A Logical Off- and On-Policy Framework"],"prefix":"10.1007","author":[{"given":"Emad","family":"Saad","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"key":"40_CR1","unstructured":"Baral, C., Tran, N., Tuan, L.C.: Reasoning about actions in a probabilistic setting. In: AAAI (2002)"},{"key":"40_CR2","first-page":"1","volume":"11","author":"C. Boutilier","year":"1999","unstructured":"Boutilier, C., Dean, T., Hanks, S.: Decision-theoretic planning: structural assumptions and computational leverage. Journal of AI Research\u00a011, 1\u201394 (1999)","journal-title":"Journal of AI Research"},{"key":"40_CR3","unstructured":"Boutilier, C., Reiter, R., Price, B.: Symbolic dynamic programming for first-order MDPs. In: 17th IJCAI (2001)"},{"key":"40_CR4","unstructured":"Crites, R., Barto, A.: Improving elevator performance using reinforcement learning. In: Advances in Neural Information Processing (1996)"},{"key":"40_CR5","unstructured":"Eiter, T., Lukasiewicz, T.: Probabilistic reasoning about actions in nonmonotonic causal theories. In: 19th UAI (2003)"},{"key":"40_CR6","unstructured":"Ernst, M., Millstein, T., Weld, D.: Automatic SAT-compilation of planning problems. In: IJCAI (1997)"},{"key":"40_CR7","volume-title":"ICSLP","author":"M. Gelfond","year":"1988","unstructured":"Gelfond, M., Lifschitz, V.: The stable model semantics for logic programming. In: ICSLP. MIT Press, Cambridge (1988)"},{"issue":"16","key":"40_CR8","first-page":"193","volume":"3","author":"M. Gelfond","year":"1998","unstructured":"Gelfond, M., Lifschitz, V.: Action languages. Electronic Transactions on AI\u00a03(16), 193\u2013210 (1998)","journal-title":"Electronic Transactions on AI"},{"issue":"4","key":"40_CR9","doi-asserted-by":"publisher","first-page":"345","DOI":"10.1007\/s10817-006-9033-2","volume":"36","author":"E. Giunchiglia","year":"2006","unstructured":"Giunchiglia, E., Lierler, Y., Maratea, M.: Answer set programming based on propositional satisfiability. Journal of Automated Reasoning\u00a036(4), 345\u2013377 (2006)","journal-title":"Journal of Automated Reasoning"},{"issue":"1-2","key":"40_CR10","first-page":"85","volume":"147","author":"C. Castellini","year":"2003","unstructured":"Castellini, C., Giunchiglia, E., Tacchella, A.: SAT-based planning in complex domains: Concurrency, constraints and nondeterminism. AIJ\u00a0147(1-2), 85\u2013117 (2003)","journal-title":"AIJ"},{"key":"40_CR11","unstructured":"Iocchi, L., Lukasiewicz, T., Nardi, D., Rosati, R.: Reasoning about actions with sensing under qualitative and probabilistic uncertainty. In: 16th ECAI (2004)"},{"key":"40_CR12","doi-asserted-by":"crossref","first-page":"237","DOI":"10.1613\/jair.301","volume":"4","author":"L. Kaelbling","year":"1996","unstructured":"Kaelbling, L., Littman, M., Moore, A.: Reinforcement learning: A survey. JAIR\u00a04, 237\u2013285 (1996)","journal-title":"JAIR"},{"key":"40_CR13","unstructured":"Kautz, H., Selman, B.: Pushing the envelope: planning, propositional logic, and stochastic search. In: 13th AAAI (1996)"},{"key":"40_CR14","doi-asserted-by":"crossref","unstructured":"Kersting, K., De Raedt, L.: Logical Markov decision programs and the convergence of logical TD(\u03bb). In: 14th ILP (2004)","DOI":"10.1007\/978-3-540-30109-7_16"},{"issue":"1-2","key":"40_CR15","doi-asserted-by":"publisher","first-page":"239","DOI":"10.1016\/0004-3702(94)00087-H","volume":"76","author":"N. Kushmerick","year":"1995","unstructured":"Kushmerick, N., Hanks, S., Weld, D.: An algorithm for probabilistic planning. Artificial Intelligence\u00a076(1-2), 239\u2013286 (1995)","journal-title":"Artificial Intelligence"},{"issue":"1-2","key":"40_CR16","doi-asserted-by":"publisher","first-page":"115","DOI":"10.1016\/j.artint.2004.04.004","volume":"157","author":"F. Lin","year":"2004","unstructured":"Lin, F., Zhao, Y.: ASSAT: Computing answer sets of a logic program by SAT solvers. Artificial Intelligence\u00a0157(1-2), 115\u2013137 (2004)","journal-title":"Artificial Intelligence"},{"key":"40_CR17","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1613\/jair.505","volume":"9","author":"M. Littman","year":"1998","unstructured":"Littman, M., Goldsmith, J., Mundhenk, M.: The computational complexity of probabilistic planning. Journal of Artificial Intelligence Research\u00a09, 1\u201336 (1998)","journal-title":"Journal of Artificial Intelligence Research"},{"key":"40_CR18","unstructured":"Majercik, S., Littman, M.: MAXPLAN: A new approach to probabilistic planning. In: 4th ICAPS, pp. 86\u201393 (1998)"},{"issue":"1-2","key":"40_CR19","doi-asserted-by":"publisher","first-page":"119","DOI":"10.1016\/S0004-3702(02)00379-X","volume":"147","author":"S. Majercik","year":"2003","unstructured":"Majercik, S., Littman, M.: Contingent planning under uncertainty via stochastic satisfiability. Artificial Intelligence\u00a0147(1-2), 119\u2013162 (2003)","journal-title":"Artificial Intelligence"},{"key":"40_CR20","unstructured":"Rummery, G., Niranjan, M.: Online Q-learning using connectionist systems. Technical report, CUED\/F-INFENG\/TR166, Cambridge University (1994)"},{"key":"40_CR21","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1007\/978-3-540-30557-6_7","volume-title":"Practical Aspects of Declarative Languages","author":"E. Saad","year":"2005","unstructured":"Saad, E., Pontelli, E.: Towards a more practical hybrid probabilistic logic programming framework. In: Hermenegildo, M.V., Cabeza, D. (eds.) PADL 2004. LNCS, vol.\u00a03350, pp. 67\u201382. Springer, Heidelberg (2005)"},{"issue":"3-4","key":"40_CR22","doi-asserted-by":"publisher","first-page":"187","DOI":"10.1007\/s10472-007-9048-3","volume":"48","author":"E. Saad","year":"2006","unstructured":"Saad, E., Pontelli, E.: A new approach to hybrid probabilistic logic programs. Annals of Mathematics and Artificial Intelligence Journal\u00a048(3-4), 187\u2013243 (2006)","journal-title":"Annals of Mathematics and Artificial Intelligence Journal"},{"key":"40_CR23","doi-asserted-by":"crossref","unstructured":"Saad, E.: Probabilistic planning in hybrid probabilistic logic programs. In: 1st Scalable Uncertainty Management (2007)","DOI":"10.1007\/978-3-540-75410-7_1"},{"key":"40_CR24","doi-asserted-by":"crossref","unstructured":"Saad, E.: Probabilistic planning with imperfect sensing actions using hybrid probabilistic logic programs. In: 3rd Scalable Uncertainty Management (2009)","DOI":"10.1007\/978-3-642-04388-8_17"},{"key":"40_CR25","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"341","DOI":"10.1007\/978-3-540-87993-0_27","volume-title":"Scalable Uncertainty Management","author":"E. Saad","year":"2008","unstructured":"Saad, E.: A logical framework to reinforcement learning using hybrid probabilistic logic programs. In: Greco, S., Lukasiewicz, T. (eds.) SUM 2008. LNCS (LNAI), vol.\u00a05291, pp. 341\u2013355. Springer, Heidelberg (2008)"},{"key":"40_CR26","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"356","DOI":"10.1007\/978-3-540-87993-0_28","volume-title":"Scalable Uncertainty Management","author":"E. Saad","year":"2008","unstructured":"Saad, E.: On the relationship between hybrid probabilistic logic programs and stochastic satisfiability. In: Greco, S., Lukasiewicz, T. (eds.) SUM 2008. LNCS (LNAI), vol.\u00a05291, pp. 356\u2013371. Springer, Heidelberg (2008)"},{"key":"40_CR27","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"663","DOI":"10.1007\/978-3-642-02906-6_57","volume-title":"Symbolic and Quantitative Approaches to Reasoning with Uncertainty","author":"E. Saad","year":"2009","unstructured":"Saad, E.: Probabilistic reasoning by SAT solvers. In: Sossai, C., Chemello, G. (eds.) ECSQARU 2009. LNCS, vol.\u00a05590, pp. 663\u2013675. Springer, Heidelberg (2009)"},{"issue":"5-6","key":"40_CR28","first-page":"748","volume":"173","author":"S. Sanner","year":"2009","unstructured":"Sanner, S., Boutilier, G.: Pratical solution techniques for first-order MDPs. AI\u00a0173(5-6), 748\u2013788 (2009)","journal-title":"AI"},{"issue":"4","key":"40_CR29","doi-asserted-by":"publisher","first-page":"613","DOI":"10.1145\/1183278.1183279","volume":"7","author":"T. Son","year":"2006","unstructured":"Son, T., Baral, C., Nam, T., McIlraith, S.: Domain-dependent knowledge in answer set planning. ACM Transactions on Computational Logic\u00a07(4), 613\u2013657 (2006)","journal-title":"ACM Transactions on Computational Logic"},{"key":"40_CR30","unstructured":"Watkins, C.: Learning from delayed rewards. Ph.D. dissertation, University of Cambridge (1989)"}],"container-title":["Lecture Notes in Computer Science","Symbolic and Quantitative Approaches to Reasoning with Uncertainty"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-22152-1_40","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,3,29]],"date-time":"2019-03-29T06:33:27Z","timestamp":1553841207000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-22152-1_40"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2011]]},"ISBN":["9783642221514","9783642221521"],"references-count":30,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-22152-1_40","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2011]]}}}