{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,27]],"date-time":"2026-06-27T05:51:13Z","timestamp":1782539473659,"version":"3.54.5"},"reference-count":289,"publisher":"Springer Science and Business Media LLC","issue":"8","license":[{"start":{"date-parts":[[2024,4,19]],"date-time":"2024-04-19T00:00:00Z","timestamp":1713484800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,4,19]],"date-time":"2024-04-19T00:00:00Z","timestamp":1713484800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100003816","name":"Huawei Technologies","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100003816","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Mach Learn"],"published-print":{"date-parts":[[2024,8]]},"DOI":"10.1007\/s10994-024-06543-w","type":"journal-article","created":{"date-parts":[[2024,4,19]],"date-time":"2024-04-19T11:01:37Z","timestamp":1713524497000},"page":"5847-5890","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":71,"title":["A survey on interpretable reinforcement learning"],"prefix":"10.1007","volume":"113","author":[{"given":"Claire","family":"Glanois","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2008-4569","authenticated-orcid":false,"given":"Paul","family":"Weng","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Matthieu","family":"Zimmer","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Dong","family":"Li","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Tianpei","family":"Yang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jianye","family":"Hao","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Wulong","family":"Liu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2024,4,19]]},"reference":[{"key":"6543_CR1","unstructured":"Adjodah, D., Klinger, T., & Joseph, J. (2018). Symbolic relation networks for reinforcement learning. In NeurIPS workshop on representation learning."},{"key":"6543_CR2","unstructured":"Agnew, W., & Domingos, P. (2018). Unsupervised object-level deep reinforcement learning. In NeurIPS workshop on deep RL."},{"key":"6543_CR3","unstructured":"Akrour, R., Tateo, D., & Peters, J. (2019). Towards reinforcement learning of human readable policies. In Workshop on deep continuous-discrete machine learning."},{"key":"6543_CR4","doi-asserted-by":"crossref","unstructured":"Aksaray, D., Jones, A., Kong, Z., et\u00a0al. (2016). Q-Learning for robust satisfaction of signal temporal logic specifications. In CDC.","DOI":"10.1109\/CDC.2016.7799279"},{"key":"6543_CR5","doi-asserted-by":"crossref","first-page":"171058","DOI":"10.1109\/ACCESS.2020.3023394","volume":"8","author":"A Alharin","year":"2020","unstructured":"Alharin, A., Doan, T. N., & Sartipi, M. (2020). Reinforcement learning interpretation methods: A survey. IEEE Access, 8, 171058\u2013171077.","journal-title":"IEEE Access"},{"key":"6543_CR6","doi-asserted-by":"crossref","unstructured":"Alshiekh, M., Bloem, R., Ehlers, R., et\u00a0al. (2018). Safe reinforcement learning via shielding. In AAAI.","DOI":"10.1609\/aaai.v32i1.11797"},{"key":"6543_CR7","unstructured":"Amodei, D., Olah, C., Steinhardt, J., et\u00a0al. (2016). Concrete Problems in AI Safety. arXiv: 1606.06565"},{"issue":"3","key":"6543_CR8","doi-asserted-by":"crossref","first-page":"973","DOI":"10.1177\/1461444816676645","volume":"20","author":"M Ananny","year":"2018","unstructured":"Ananny, M., & Crawford, K. (2018). Seeing without knowing: Limitations of the transparency ideal and its application to algorithmic accountability. New Media and Society, 20(3), 973\u201389.","journal-title":"New Media and Society"},{"key":"6543_CR9","unstructured":"Andersen, G., & Konidaris, G. (2017). Active exploration for learning symbolic representations. In NeurIPS."},{"key":"6543_CR10","unstructured":"Anderson, G., Verma, A., Dillig, I., et\u00a0al. (2020). Neurosymbolic reinforcement learning with formally verified exploration. In NeurIPS."},{"key":"6543_CR11","unstructured":"Andreas, J., Klein, D., & Levine, S. (2017). Modular multitask reinforcement learning with policy sketches. In ICML."},{"key":"6543_CR12","doi-asserted-by":"crossref","unstructured":"Annasamy, R.M., & Sycara, K. (2019). Towards better interpretability in deep Q-networks. In AAAI.","DOI":"10.1609\/aaai.v33i01.33014561"},{"key":"6543_CR13","unstructured":"Arnold, T., Kasenberg, D., & Scheutz, M. (2017). Value alignment or misalignment: What will keep systems accountable? In AAAI workshop."},{"key":"6543_CR14","unstructured":"Arora, S., & Doshi, P. (2018). A survey of inverse reinforcement learning: Challenges, methods and progress. arXiv:1806.06877"},{"key":"6543_CR15","unstructured":"Atrey, A., Clary, K., & Jensen, D. (2020). Exploratory not explanatory: Counterfactual analysis of saliency maps for deep reinforcement learning. In ICLR."},{"key":"6543_CR16","unstructured":"Ault, J., Hanna, J. P., & Sharon, G. (2020). Learning an interpretable traffic signal control policy. In AAMAS."},{"key":"6543_CR17","unstructured":"Bader, S., & Hitzler, P. (2005). Dimensions of neural-symbolic integration: A structured survey. In We Will Show Them: Essays in Honour of Dov Gabbay."},{"key":"6543_CR18","doi-asserted-by":"crossref","first-page":"82","DOI":"10.1016\/j.inffus.2019.12.012","volume":"58","author":"A Barredo Arrieta","year":"2020","unstructured":"Barredo Arrieta, A., D\u00edaz-Rodr\u00edguez, N., Ser, J. D., et al. (2020). Explainable artificial intelligence (XAI): Concepts, taxonomies, opportunities and challenges toward responsible AI. Information Fusion, 58, 82\u2013115.","journal-title":"Information Fusion"},{"key":"6543_CR19","doi-asserted-by":"crossref","unstructured":"Barto, A. G., & Mahadevan, S. (2003). Recent advances in hierarchical reinforcement learning. Discrete Event Dynamic Systems","DOI":"10.1023\/A:1025696116075"},{"key":"6543_CR20","doi-asserted-by":"crossref","first-page":"5","DOI":"10.1016\/S0049-237X(08)71097-8","volume":"90","author":"J Barwise","year":"1977","unstructured":"Barwise, J. (1977). An introduction to first-order logic. Studies in Logic and the Foundations of Mathematics, 90, 5\u201346.","journal-title":"Studies in Logic and the Foundations of Mathematics"},{"key":"6543_CR21","unstructured":"Bastani, O., Pu, Y., & Solar-Lezama, A. (2018). Verifiable reinforcement learning via policy extraction. In NeurIPS."},{"key":"6543_CR22","unstructured":"Battaglia, P., Pascanu, R., Lai, M., et\u00a0al. (2016). Interaction networks for learning about objects, relations and physics. In NeurIPS."},{"key":"6543_CR23","unstructured":"Battaglia, P. W., Hamrick, J. B., Bapst, V., et\u00a0al. (2018). Relational inductive biases, deep learning, and graph networks. arXiv:1806.01261"},{"key":"6543_CR24","unstructured":"Bear, D., Fan, C., Mrowca, D., et\u00a0al. (2020). Learning physical graph representations from visual scenes. In NeurIPS."},{"key":"6543_CR25","unstructured":"Bertsekas, D., & Tsitsiklis, J. (1996). Neuro-dynamic programming. Athena Scientific."},{"key":"6543_CR26","doi-asserted-by":"crossref","unstructured":"Bewley, T., & Lawry, J. (2021). TripleTree: A versatile interpretable representation of black box agents and their environments. In AAAI.","DOI":"10.1609\/aaai.v35i13.17360"},{"key":"6543_CR27","unstructured":"Bewley, T., & L\u00e9cu\u00e9, F. (2022). Interpretable preference-based reinforcement learning with tree-structured reward functions. In AAMAS."},{"key":"6543_CR28","doi-asserted-by":"crossref","unstructured":"Beyret, B., Shafti, A., & Faisal, A. A. (2019). Dot-to-dot: Explainable hierarchical reinforcement learning for robotic manipulation. In IROS.","DOI":"10.1109\/IROS40897.2019.8968488"},{"key":"6543_CR29","unstructured":"Bommasani, R., Hudson, D. A., Adeli, E., et\u00a0al. (2022). On the opportunities and risks of foundation models. arXiv:2108.07258"},{"issue":"3","key":"6543_CR30","doi-asserted-by":"crossref","first-page":"502","DOI":"10.1109\/JPROC.2019.2897447","volume":"107","author":"J Bonnefon","year":"2019","unstructured":"Bonnefon, J., Shariff, A., & Rahwan, I. (2019). The trolley, the bull bar, and why engineers should care about the ethics of autonomous cars [point of view]. Proceedings of the IEEE, 107(3), 502\u20134.","journal-title":"Proceedings of the IEEE"},{"issue":"1\u20132","key":"6543_CR31","doi-asserted-by":"crossref","first-page":"49","DOI":"10.1016\/S0004-3702(00)00033-3","volume":"121","author":"C Boutilier","year":"2000","unstructured":"Boutilier, C., Dearden, R., & Goldszmidt, M. (2000). Stochastic dynamic programming with factored representations. Artificial Intelligence, 121(1\u20132), 49\u2013107.","journal-title":"Artificial Intelligence"},{"key":"6543_CR32","doi-asserted-by":"crossref","unstructured":"Brunelli, R. (2009). Template matching techniques in computer vision: Theory and practice. Wiley Publishing.","DOI":"10.1002\/9780470744055"},{"key":"6543_CR33","unstructured":"Brunner, G., Liu, Y., Pascual, D., et\u00a0al. (2020). On identifiability in transformers. In ICLR"},{"key":"6543_CR34","doi-asserted-by":"crossref","unstructured":"Bucilu\u01ce, C., Caruana, R., & Niculescu-Mizil, A. (2006). Model compression. In KDD.","DOI":"10.1145\/1150402.1150464"},{"key":"6543_CR35","doi-asserted-by":"crossref","unstructured":"Burke, M., Penkov, S., & Ramamoorthy, S. (2019). From explanation to synthesis: Compositional program induction for learning from demonstration. In RSS.","DOI":"10.15607\/RSS.2019.XV.015"},{"key":"6543_CR36","doi-asserted-by":"crossref","unstructured":"Camacho, A., Toro\u00a0Icarte, R., Klassen, T. Q., et\u00a0al. (2019). LTL and beyond: Formal languages for reward function specification in reinforcement learning. In IJCAI.","DOI":"10.24963\/ijcai.2019\/840"},{"key":"6543_CR37","unstructured":"Cao, Y., Li, Z., Yang, T., et\u00a0al. (2022). GALOIS: Boosting deep reinforcement learning via generalizable logic synthesis. In NeurIPS."},{"key":"6543_CR38","unstructured":"Casper, S., Davies, X., Shi, C., et\u00a0al. (2023). Open problems and fundamental limitations of reinforcement learning from human feedback. arXiv:2307.15217"},{"key":"6543_CR39","unstructured":"Chang, M. B., Ullman, T., Torralba, A., et\u00a0al. (2017). A compositional object-based approach to learning physical dynamics. In ICLR."},{"key":"6543_CR40","unstructured":"Chari, S., Gruen, D. M., Seneviratne, O., et\u00a0al. (2020). Directions for explainable knowledge-enabled systems. arXiv:2003.07523"},{"key":"6543_CR41","unstructured":"Chen, J., Li, S. E., & Tomizuka, M. (2020). Interpretable end-to-end urban autonomous driving with latent deep reinforcement learning. In ICML workshop on AI for autonomous driving."},{"key":"6543_CR42","doi-asserted-by":"crossref","first-page":"579","DOI":"10.2478\/amcs-2014-0042","volume":"24","author":"P Cichosz","year":"2014","unstructured":"Cichosz, P., & Pawe\u0142czak, L. (2014). Imitation learning of car driving skills with decision trees and random forests. International Journal of Applied Mathematics and Computer Science, 24, 579\u201397.","journal-title":"International Journal of Applied Mathematics and Computer Science"},{"key":"6543_CR43","unstructured":"Cimatti, A., Pistore, M., & Traverso, P. (2008). Automated planning. In Handbook of knowledge representation."},{"key":"6543_CR44","unstructured":"Cole, J., Lloyd, J., & Ng, K. S. (2003). Symbolic learning for adaptive agents. In Annual partner conference."},{"key":"6543_CR45","unstructured":"Commission, E. (2019). Ethics guidelines for trustworthy AI. https:\/\/ec.europa.eu\/digital-single-market\/en\/news\/ethics-guidelines-trustworthy-ai"},{"key":"6543_CR46","unstructured":"Coppens, Y., Efthymiadis, K., Lenaerts, T., et\u00a0al. (2019). Distilling deep reinforcement learning policies in soft decision trees. In IJCAI workshop on XAI."},{"key":"6543_CR47","doi-asserted-by":"crossref","unstructured":"Corazza, J., Gavran, I., & Neider, D. (2022). Reinforcement learning with stochastic reward machines. In AAAI.","DOI":"10.1609\/aaai.v36i6.20594"},{"key":"6543_CR48","unstructured":"Cranmer, M., Sanchez\u00a0Gonzalez, A., Battaglia, P., et\u00a0al. (2020). Discovering symbolic models from deep learning with inductive biases. In NeurIPS."},{"key":"6543_CR49","unstructured":"Crawford, K., Dobbe, R., Dryer, T., et al. (2016). AI Now Report. AI Now Institute: Tech. rep."},{"key":"6543_CR50","doi-asserted-by":"crossref","unstructured":"Cropper, A., Duman\u010di\u0107, S., & Muggleton, S.H. (2020). Turning 30: New ideas in inductive logic programming. In IJCAI.","DOI":"10.24963\/ijcai.2020\/673"},{"key":"6543_CR51","doi-asserted-by":"crossref","unstructured":"Cruz, F., Dazeley, R., & Vamplew, P. (2019). Memory-based explainable reinforcement learning. In Advances in artificial intelligence.","DOI":"10.1007\/978-3-030-35288-2_6"},{"key":"6543_CR52","volume-title":"Artificial Intelligence, Governance and Ethics: Global Perspectives","author":"A Daly","year":"2019","unstructured":"Daly, A., Hagendorff, T., Li, H., et al. (2019). Artificial Intelligence, Governance and Ethics: Global Perspectives. SSRN Scholarly Paper: Chinese University of Hong Kong."},{"key":"6543_CR53","unstructured":"d\u2019Avila Garcez, A., Dutra, A. R. R., & Alonso, E. (2018). Towards Symbolic Reinforcement Learning with Common Sense. arXiv:1804.08597"},{"issue":"1","key":"6543_CR54","doi-asserted-by":"crossref","first-page":"5","DOI":"10.1007\/s10994-015-5494-z","volume":"100","author":"L De Raedt","year":"2015","unstructured":"De Raedt, L., & Kimmig, A. (2015). Probabilistic (logic) programming concepts. Machine Learning, 100(1), 5\u201347.","journal-title":"Machine Learning"},{"issue":"3","key":"6543_CR55","first-page":"142","volume":"5","author":"T Dean","year":"1990","unstructured":"Dean, T., & Kanazawa, K. (1990). A model for reasoning about persistence and causation. Computational Intelligence, 5(3), 142\u2013150.","journal-title":"Computational Intelligence"},{"key":"6543_CR56","doi-asserted-by":"crossref","unstructured":"Degris, T., Sigaud, O., & Wuillemin, P. H. (2006). Learning the structure of factored Markov decision processes in reinforcement learning problems. In ICML.","DOI":"10.1145\/1143844.1143877"},{"key":"6543_CR57","unstructured":"Delfosse, Q., Shindo, H., Dhami, D., et\u00a0al. (2023). Interpretable and explainable logical policies via neurally guided symbolic abstraction. In NeurIPS."},{"key":"6543_CR58","doi-asserted-by":"crossref","unstructured":"Demeester, T., Rockt\u00e4schel, T., & Riedel, S. (2016). Lifted rule injection for relation embeddings. In EMNLP.","DOI":"10.18653\/v1\/D16-1146"},{"key":"6543_CR59","doi-asserted-by":"crossref","first-page":"143","DOI":"10.1016\/j.artint.2015.08.011","volume":"244","author":"M Diligenti","year":"2017","unstructured":"Diligenti, M., Gori, M., & Sacc\u00e0, C. (2017). Semantic-based regularization for learning and inference. Artificial Intelligence, 244, 143\u201365.","journal-title":"Artificial Intelligence"},{"key":"6543_CR60","doi-asserted-by":"crossref","unstructured":"Diuk, C., Cohen, A., & Littman, M. L. (2008). An object-oriented representation for efficient reinforcement learning. In ICML.","DOI":"10.1145\/1390156.1390187"},{"key":"6543_CR61","doi-asserted-by":"crossref","unstructured":"Donadello, I., Serafini, L., & D\u2019Avila\u00a0Garcez, A. (2017). Logic tensor networks for semantic image interpretation. In IJCAI.","DOI":"10.24963\/ijcai.2017\/221"},{"key":"6543_CR62","unstructured":"Dong, H., Mao, J., Lin, T., et\u00a0al. (2019). Neural logic machines. In ICLR."},{"key":"6543_CR63","unstructured":"Doshi-Velez, F., Kortz, M., Budish, R., et\u00a0al. (2019). Accountability of AI under the law: The role of explanation. arXiv:1711.01134"},{"key":"6543_CR64","doi-asserted-by":"crossref","unstructured":"Dragan, A. D., Lee, K. C., & Srinivasa, S. S. (2013). Legibility and predictability of robot motion. In HRI.","DOI":"10.1109\/HRI.2013.6483603"},{"key":"6543_CR65","unstructured":"Driessens, & Blockeel, H. (2001). Learning digger using hierarchical reinforcement learning for concurrent goals. In EWRL."},{"key":"6543_CR66","doi-asserted-by":"crossref","unstructured":"Driessens, K., Ramon, J., & Gartner, T. (2006). Graph kernels and Gaussian processes for relational reinforcement learning. Machine Learning","DOI":"10.1007\/s10994-006-8258-y"},{"key":"6543_CR67","unstructured":"Dutra, A. R., & d\u2019Avila Garcez, A. S. (2017). A Comparison between deep Q-networks and deep symbolic reinforcement learning. In CEUR workshop proceedings."},{"key":"6543_CR68","doi-asserted-by":"crossref","unstructured":"Dwork, C., Hardt, M., Pitassi, T., et\u00a0al. (2012). Fairness through awareness. In ICTS.","DOI":"10.1145\/2090236.2090255"},{"key":"6543_CR69","doi-asserted-by":"crossref","unstructured":"Dzeroski, S., Raedt, L. D., & Blockeel, H. (1998). Relational reinforcement learning. In ICML.","DOI":"10.1007\/BFb0027307"},{"issue":"1","key":"6543_CR70","doi-asserted-by":"crossref","first-page":"7","DOI":"10.1023\/A:1007694015589","volume":"43","author":"S D\u017eeroski","year":"2001","unstructured":"D\u017eeroski, S., De Raedt, L., & Driessens, K. (2001). Relational reinforcement learning. Machine Learning, 43(1), 7\u201352.","journal-title":"Machine Learning"},{"key":"6543_CR71","first-page":"503","volume":"6","author":"D Ernst","year":"2005","unstructured":"Ernst, D., Geurts, P., & Wehenkel, L. (2005). Tree-based batch mode reinforcement learning. JMLR, 6, 503\u2013556.","journal-title":"JMLR"},{"key":"6543_CR72","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1613\/jair.5714","volume":"61","author":"R Evans","year":"2018","unstructured":"Evans, R., & Grefenstette, E. (2018). Learning explanatory rules from noisy data. Journal of Artificial Intelligence Research, 61, 1\u201364.","journal-title":"Journal of Artificial Intelligence Research"},{"key":"6543_CR73","unstructured":"Eysenbach, B., Salakhutdinov, R. R., & Levine, S. (2019). Search on the replay buffer: Bridging planning and reinforcement learning. In NeurIPS."},{"key":"6543_CR75","unstructured":"Finn, C., Goodfellow, I., & Levine, S. (2016). Unsupervised learning for physical interaction through video prediction. In NeurIPS."},{"key":"6543_CR74","doi-asserted-by":"crossref","unstructured":"Finn, C., & Levine, S. (2017). Deep visual foresight for planning robot motion. In ICRA.","DOI":"10.1109\/ICRA.2017.7989324"},{"issue":"1","key":"6543_CR76","doi-asserted-by":"crossref","first-page":"81","DOI":"10.1007\/s10994-013-5392-1","volume":"94","author":"MVM Franca","year":"2014","unstructured":"Franca, M. V. M., Zaverucha, G., & Garcez, A. (2014). Fast relational learning using bottom clause propositionalization with artificial neural networks. Machine Learning, 94(1), 81\u2013104.","journal-title":"Machine Learning"},{"key":"6543_CR77","doi-asserted-by":"crossref","unstructured":"Francois-Lavet, V., Bengio, Y., Precup, D., et\u00a0al. (2019). Combined reinforcement learning via abstract representations. In AAAI.","DOI":"10.1609\/aaai.v33i01.33013582"},{"issue":"4","key":"6543_CR78","doi-asserted-by":"crossref","first-page":"136","DOI":"10.1145\/3433949","volume":"64","author":"SA Friedler","year":"2021","unstructured":"Friedler, S. A., Scheidegger, C., & Venkatasubramanian, S. (2021). The (Im)possibility of fairness: Different value systems require different mechanisms for fair decision making. Communications of the ACM, 64(4), 136\u2013143.","journal-title":"Communications of the ACM"},{"key":"6543_CR79","unstructured":"Friedman, D., Wettig, A., & Chen, D. (2023). Learning transformer programs. In NeurIPS."},{"key":"6543_CR80","unstructured":"Fujimoto, S., Hoof, H., & Meger, D. (2018). Addressing function approximation error in actor-critic methods. In ICML."},{"key":"6543_CR81","doi-asserted-by":"crossref","unstructured":"Fukuchi, Y., Osawa, M., Yamakawa, H., et\u00a0al. (2017). Autonomous self-explanation of behavior for interactive reinforcement learning agents. In International conference on human agent interaction.","DOI":"10.1145\/3125739.3125746"},{"key":"6543_CR82","doi-asserted-by":"crossref","first-page":"1031","DOI":"10.1613\/jair.1.12372","volume":"70","author":"D Furelos-Blanco","year":"2021","unstructured":"Furelos-Blanco, D., Law, M., Jonsson, A., et al. (2021). Induction and exploitation of subgoal automata for reinforcement learning. JAIR, 70, 1031\u20131116.","journal-title":"JAIR"},{"key":"6543_CR83","doi-asserted-by":"crossref","unstructured":"Gaon, M., & Brafman, R. I. (2020). Reinforcement learning with non-Markovian rewards. In AAAI.","DOI":"10.1609\/aaai.v34i04.5814"},{"key":"6543_CR84","unstructured":"Garg, S., Bajpai, A., Mausam. (2020). Symbolic network: Generalized neural policies for relational MDPs. arXiv:2002.07375"},{"key":"6543_CR85","unstructured":"Garnelo, M., Arulkumaran, K., & Shanahan, M. (2016). Towards deep symbolic reinforcement learning. In NeurIPS workshop on DRL."},{"key":"6543_CR86","unstructured":"Gilmer, J., Schoenholz, S. S., Riley, P. F., et\u00a0al. (2017). Neural message passing for quantum chemistry. In ICML."},{"key":"6543_CR87","doi-asserted-by":"crossref","unstructured":"Gilpin, L. H., Bau, D., Yuan, B. Z., et\u00a0al. (2019). Explaining explanations: An overview of interpretability of machine learning. In DSAA.","DOI":"10.1109\/DSAA.2018.00018"},{"key":"6543_CR88","unstructured":"Glaese, A., McAleese, N., Trebacz, M., et\u00a0al. (2022). Improving alignment of dialogue agents via targeted human judgements. arXiv:2209.14375"},{"key":"6543_CR89","unstructured":"Glanois, C., Jiang, Z., Feng, X., et\u00a0al. (2022). Neuro-symbolic hierarchical rule induction. In ICML."},{"key":"6543_CR90","unstructured":"Goel, V., Weng, J., & Poupart, P. (2018). Unsupervised video object segmentation for deep reinforcement learning. In NeurIPS."},{"key":"6543_CR91","unstructured":"Goodfellow, I., Bengio, Y., & Courville, A. (2016). Deep learning. MIT Press."},{"key":"6543_CR92","unstructured":"Greydanus, S., Koul, A., Dodge, J., et\u00a0al. (2018). Visualizing and understanding atari agents. In ICML."},{"key":"6543_CR93","doi-asserted-by":"crossref","unstructured":"Grzes, M., & Kudenko, D. (2008). Plan-based reward shaping for reinforcement learning. In International conference intelligent systems.","DOI":"10.1109\/IS.2008.4670492"},{"key":"6543_CR94","unstructured":"Guestrin, C., Koller, D., Gearhart, C., et\u00a0al. (2003). Generalizing plans to new environments in relational MDPs. In IJCAI."},{"issue":"1\u20132","key":"6543_CR95","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1561\/2500000010","volume":"4","author":"S Gulwani","year":"2017","unstructured":"Gulwani, S., Polozov, O., & Singh, R. (2017). Program synthesis. Foundations and Trends in Programming Languages, 4(1\u20132), 1\u2013119.","journal-title":"Foundations and Trends in Programming Languages"},{"key":"6543_CR96","unstructured":"Gupta, P., Puri, N., Verma, S., et\u00a0al. (2020). Explain your move: Understanding agent actions using focused feature saliency. In ICLR."},{"key":"6543_CR97","unstructured":"Gupta, U. D., Talvitie, E., & Bowling, M. (2015). Policy tree: Adaptive representation for policy gradient. In AAAI."},{"key":"6543_CR98","unstructured":"Haarnoja, T., Zhou, A., Abbeel, P., et\u00a0al. (2018). Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor. In ICML."},{"key":"6543_CR99","doi-asserted-by":"crossref","first-page":"335","DOI":"10.1016\/0167-2789(90)90087-6","volume":"42","author":"S Harnad","year":"1990","unstructured":"Harnad, S. (1990). The symbol grounding problem. Physica D-Nonlinear Phenomena, 42, 335\u2013346.","journal-title":"Physica D-Nonlinear Phenomena"},{"key":"6543_CR100","doi-asserted-by":"crossref","unstructured":"Hasanbeig, M., Kroening, D., & Abate, A. (2020). Deep reinforcement learning with temporal logics. In Formal modeling and analysis of timed systems.","DOI":"10.1007\/978-3-030-57628-8_1"},{"key":"6543_CR101","doi-asserted-by":"crossref","unstructured":"Hayes, B., & Shah, J. A. (2017). Improving robot controller transparency through autonomous policy explanation. In International conference on HRI.","DOI":"10.1145\/2909824.3020233"},{"key":"6543_CR102","first-page":"87","volume":"65","author":"D Hein","year":"2017","unstructured":"Hein, D., Hentschel, A., Runkler, T., et al. (2017). Particle swarm optimization for generating interpretable fuzzy reinforcement learning policies. Engineering Applications of AI, 65, 87\u201398.","journal-title":"Engineering Applications of AI"},{"key":"6543_CR103","first-page":"158","volume":"76","author":"D Hein","year":"2018","unstructured":"Hein, D., Udluft, S., & Runkler, T. A. (2018). Interpretable policies for reinforcement learning by genetic programming. Engineering Applications of AI, 76, 158\u2013169.","journal-title":"Engineering Applications of AI"},{"key":"6543_CR104","doi-asserted-by":"crossref","unstructured":"Hein, D., Udluft, S., & Runkler, T. A. (2019). Generating interpretable reinforcement learning policies using genetic programming. In GECCO.","DOI":"10.1145\/3319619.3326755"},{"key":"6543_CR105","doi-asserted-by":"crossref","unstructured":"Henderson, P., Islam, R., Bachman, P., et\u00a0al. (2018). Deep reinforcement learning that matters. In AAAI.","DOI":"10.1609\/aaai.v32i1.11694"},{"key":"6543_CR106","doi-asserted-by":"crossref","unstructured":"Hengst, B. (2010). Hierarchical reinforcement learning. Encyclopedia of machine learning (pp. 495\u2013502). Springer.","DOI":"10.1007\/978-0-387-30164-8_363"},{"key":"6543_CR107","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2020.106685","volume":"214","author":"A Heuillet","year":"2021","unstructured":"Heuillet, A., Couthouis, F., & D\u00edaz-Rodr\u00edguez, N. (2021). Explainability in deep reinforcement learning. Knowledge-Based Systems, 214, 106685.","journal-title":"Knowledge-Based Systems"},{"key":"6543_CR108","unstructured":"Higgins, I., Amos, D., Pfau, D., et\u00a0al. (2018). Towards a definition of disentangled representations. arXiv:1812.02230"},{"issue":"6245","key":"6543_CR109","doi-asserted-by":"crossref","first-page":"253","DOI":"10.1126\/science.aac4520","volume":"349","author":"E Horvitz","year":"2015","unstructured":"Horvitz, E., & Mulligan, D. (2015). Data, privacy, and the greater good. Science, 349(6245), 253\u2013255.","journal-title":"Science"},{"key":"6543_CR110","unstructured":"Huang, S., Papernot, N., Goodfellow, I., et\u00a0al. (2017). Adversarial attacks on neural network policies. In ICLR workshop."},{"issue":"2","key":"6543_CR111","first-page":"211","volume":"50","author":"A Hussein","year":"2017","unstructured":"Hussein, A., Gaber, M. M., Elyan, E., et al. (2017). Imitation learning: A survey of learning methods. ACM Computing Surveys, 50(2), 211\u20132135.","journal-title":"ACM Computing Surveys"},{"key":"6543_CR112","doi-asserted-by":"crossref","unstructured":"Illanes, L., Yan, X., Icarte, R. T., et\u00a0al. (2020). Symbolic plans as high-level instructions for reinforcement learning. In ICAPS.","DOI":"10.1609\/icaps.v30i1.6750"},{"key":"6543_CR113","doi-asserted-by":"crossref","unstructured":"Iyer, R., Li, Y., Li, H., et\u00a0al. (2018). Transparency and explanation in deep reinforcement learning neural networks. In AIES.","DOI":"10.1145\/3278721.3278776"},{"key":"6543_CR114","unstructured":"Jain, S., & Wallace, B. C. (2019). Attention is not explanation. In NAACL."},{"key":"6543_CR115","unstructured":"Janisch, J., Pevn\u00fd, T., & Lis\u00fd, V. (2021). Symbolic relational deep reinforcement learning based on graph neural networks. arXiv:2009.12462"},{"key":"6543_CR116","doi-asserted-by":"crossref","unstructured":"Jia, R., Jin, M., Sun, K., et\u00a0al. (2019). Advanced building control via deep reinforcement learning. In Energy Procedia.","DOI":"10.1016\/j.egypro.2019.01.494"},{"key":"6543_CR117","doi-asserted-by":"crossref","unstructured":"Jiang, Y., Yang, F., Zhang, S., et\u00a0al. (2018). Integrating task-motion planning with reinforcement learning for robust decision making in mobile robots. In ICAPS.","DOI":"10.1109\/IROS40897.2019.8967680"},{"key":"6543_CR118","unstructured":"Jiang, Z., & Luo, S. (2019). Neural logic reinforcement learning. In ICML."},{"key":"6543_CR119","doi-asserted-by":"crossref","unstructured":"Jin, M., Ma, Z., Jin, K., et\u00a0al. (2022). Creativity of ai: Automatic symbolic option discovery for facilitating deep reinforcement learning. In AAAI.","DOI":"10.1609\/aaai.v36i6.20663"},{"key":"6543_CR120","unstructured":"Juozapaitis, Z., Koul, A., Fern, A., et\u00a0al. (2019). Explainable reinforcement learning via reward decomposition. In IJCAI\/ECAI workshop on explainable artificial intelligence."},{"key":"6543_CR121","unstructured":"Kaiser, M., Otte, C., Runkler, T., et\u00a0al. (2019). Interpretable dynamics models for data-efficient reinforcement learning. In ESANN."},{"key":"6543_CR122","unstructured":"Kansky, K., Silver, T., M\u00e9ly, D. A., et\u00a0al. (2017). Schema networks: Zero-shot transfer with a generative causal model of intuitive physics. In ICML."},{"key":"6543_CR123","doi-asserted-by":"crossref","unstructured":"Kasenberg, D., & Scheutz, M. (2017). Interpretable apprenticeship learning with temporal logic specifications. In CDC.","DOI":"10.1109\/CDC.2017.8264386"},{"key":"6543_CR124","unstructured":"Kenny, E. M., Tucker, M., Shah, J. (2023). Towards interpretable deep reinforcement learning with human-friendly prototypes. In ICLR."},{"key":"6543_CR125","doi-asserted-by":"crossref","unstructured":"Kim, J., & Bansal, M. (2020). Attentional bottleneck: Towards an interpretable deep driving network. In CVPR workshop.","DOI":"10.1109\/CVPRW50498.2020.00169"},{"key":"6543_CR126","doi-asserted-by":"crossref","unstructured":"Koller, D. (1999). Probabilistic relational models. In Inductive logic programming (pp. 3\u201313).","DOI":"10.1007\/3-540-48751-4_1"},{"key":"6543_CR127","doi-asserted-by":"crossref","unstructured":"Konidaris, G., Kaelbling, L. P., & Lozano-Perez, T. (2014). Constructing symbolic representations for high-level planning. In AAAI.","DOI":"10.1609\/aaai.v28i1.9004"},{"key":"6543_CR128","unstructured":"Konidaris, G., Kaelbling, L. P., & Lozano-Perez, T. (2015). Symbol acquisition for probabilistic high-level planning. In IJCAI."},{"key":"6543_CR129","doi-asserted-by":"crossref","first-page":"215","DOI":"10.1613\/jair.5575","volume":"61","author":"G Konidaris","year":"2018","unstructured":"Konidaris, G., Kaelbling, L. P., & Lozano-Perez, T. (2018). From skills to symbols: Learning symbolic representations for abstract high-level planning. JAIR, 61, 215\u2013289.","journal-title":"JAIR"},{"key":"6543_CR130","unstructured":"Koul, A., Greydanus, S., & Fern, A. (2019). Learning finite state representations of recurrent policy networks. In ICLR."},{"key":"6543_CR131","unstructured":"Kulick, J., Toussaint, M., & Lang, T. et\u00a0al (2013). Active learning for teaching a robot grounded relational symbols. In IJCAI."},{"key":"6543_CR132","doi-asserted-by":"crossref","unstructured":"Kunapuli, G., Odom, P., & Shavlik, J. W. et\u00a0al (2013). Guiding autonomous agents to better behaviors through human advice. In ICDM.","DOI":"10.1109\/ICDM.2013.79"},{"key":"6543_CR133","unstructured":"Kwon, M., Xie, S. M., & Bullard, K. et\u00a0al (2023). Reward design with language models. In ICLR."},{"key":"6543_CR134","doi-asserted-by":"crossref","unstructured":"Lao, N., & Cohen, W. W. (2010). Relational retrieval using a combination of path-constrained random walks. In Machine learning.","DOI":"10.1007\/s10994-010-5205-8"},{"key":"6543_CR135","doi-asserted-by":"crossref","first-page":"103","DOI":"10.1016\/j.artint.2016.07.004","volume":"241","author":"M Leonetti","year":"2016","unstructured":"Leonetti, M., Iocchi, L., & Stone, P. (2016). A synthesis of automated planning and reinforcement learning for efficient, robust decision-making. Artificial Intelligence, 241, 103\u2013130.","journal-title":"Artificial Intelligence"},{"key":"6543_CR136","doi-asserted-by":"crossref","unstructured":"Leslie, D. (2020). Understanding artificial intelligence ethics and safety: A guide for the responsible design and implementation of AI systems in the public sector. SSRN Electronic Journal","DOI":"10.2139\/ssrn.3403301"},{"key":"6543_CR137","unstructured":"Levine, S. (2018). Reinforcement Learning and Control as Probabilistic Inference: Tutorial and Review. arXiv:1805.00909"},{"issue":"37","key":"6543_CR139","doi-asserted-by":"crossref","first-page":"eaay6276","DOI":"10.1126\/scirobotics.aay6276","volume":"4","author":"X Li","year":"2019","unstructured":"Li, X., Serlin, Z., Yang, G., et al. (2019). A formal methods approach to interpretable reinforcement learning for robotic planning. Science Robotics, 4(37), eaay6276.","journal-title":"Science Robotics"},{"key":"6543_CR138","doi-asserted-by":"crossref","unstructured":"Li, X., Vasile, C. I., & Belta, C. (2017a). Reinforcement learning with temporal logic rewards. In IROS.","DOI":"10.1109\/IROS.2017.8206234"},{"key":"6543_CR140","unstructured":"Li, Y., Sycara, K., & Iyer, R. (2017b). Object-sensitive deep reinforcement learning. In Global conference on AI."},{"key":"6543_CR141","unstructured":"Li, Y., Tarlow, D., Brockschmidt, M. et\u00a0al (2017c). Gated graph sequence neural networks. In ICLR."},{"key":"6543_CR142","doi-asserted-by":"crossref","DOI":"10.1016\/j.robot.2020.103568","volume":"131","author":"A Likmeta","year":"2020","unstructured":"Likmeta, A., Metelli, A. M., Tirinzoni, A., et al. (2020). Combining reinforcement learning with rule-based controllers for transparent and general decision-making in autonomous driving. Robotics and Autonomous Systems, 131, 103568.","journal-title":"Robotics and Autonomous Systems"},{"key":"6543_CR143","unstructured":"Lim, B. Y., Yang, Q., & Abdul, A. et\u00a0al (2019). Why these explanations? Selecting intelligibility types for explanation goals. In IUI workshops."},{"key":"6543_CR144","unstructured":"Lipton, Z. C. (2017). The mythos of model interpretability. arXiv:1606.03490"},{"key":"6543_CR145","unstructured":"Littman, M. L., Topcu, U., & Fu, J. et\u00a0al (2017). Environment-independent task specifications via GLTL, arXiv:1704.04341"},{"key":"6543_CR146","doi-asserted-by":"crossref","unstructured":"Liu, G., Schulte, O., & Zhu, W. et\u00a0al (2018). Toward interpretable deep reinforcement learning with linear model U-trees. In ECML.","DOI":"10.1007\/978-3-030-10928-8_25"},{"issue":"2","key":"6543_CR147","volume":"1","author":"Y Liu","year":"2023","unstructured":"Liu, Y., Han, T., Ma, S., et al. (2023). Summary of chatgpt-related research and perspective towards the future of large language models. Meta-Radiology, 1(2), 100017.","journal-title":"Meta-Radiology"},{"issue":"1","key":"6543_CR148","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1057\/s41599-020-0501-9","volume":"7","author":"S Lo Piano","year":"2020","unstructured":"Lo Piano, S. (2020). Ethical principles in machine learning and artificial intelligence: Cases from the field and possible ways forward. Humanities and Social Sciences Communications, 7(1), 1\u20137.","journal-title":"Humanities and Social Sciences Communications"},{"key":"6543_CR149","unstructured":"Lu, K., Zhang, S., & Stone, P. et\u00a0al (2018). Robot representation and reasoning with knowledge from reinforcement learning. arXiv:1809.11074"},{"key":"6543_CR150","unstructured":"Lundberg, S. M., & Lee, S. I. (2017). A unified approach to interpreting model predictions. In NeurIPS."},{"key":"6543_CR151","doi-asserted-by":"crossref","unstructured":"Lyu, D., Yang, F., & Liu, B. et\u00a0al (2019). SDRL: Interpretable and data-efficient deep reinforcement learning leveraging symbolic planning. In AAAI.","DOI":"10.1609\/aaai.v33i01.33012970"},{"key":"6543_CR152","unstructured":"Ma, Z., Zhuang, Y., & Weng, P. et\u00a0al (2020). Interpretable reinforcement learning with neural symbolic logic. arXiv:2103.08228"},{"key":"6543_CR154","doi-asserted-by":"crossref","first-page":"251","DOI":"10.1023\/A:1018020625251","volume":"22","author":"R Maclin","year":"1996","unstructured":"Maclin, R., & Shavlik, J. W. (1996). Creating advice-taking reinforcement learners. Machine Learning, 22, 251\u2013282.","journal-title":"Machine Learning"},{"key":"6543_CR155","unstructured":"Madumal, P., Miller, T., & Sonenberg, L. et\u00a0al (2020a). Distal explanations for model-free explainable reinforcement learning. arXiv:2001.10284"},{"key":"6543_CR156","doi-asserted-by":"crossref","unstructured":"Madumal, P., Miller, T., & Sonenberg, L. et\u00a0al (2020b). Explainable reinforcement learning through a causal lens. In AAAI.","DOI":"10.1609\/aaai.v34i03.5631"},{"key":"6543_CR157","doi-asserted-by":"crossref","unstructured":"Maes, F., Fonteneau, R., & Wehenkel, L. et\u00a0al (2012a). Policy search in a space of simple closed-form formulas: towards interpretability of reinforcement learning. In Discovery science.","DOI":"10.1007\/978-3-642-33492-4_6"},{"key":"6543_CR158","doi-asserted-by":"crossref","unstructured":"Maes, F., Wehenkel, L., & Ernst, D. (2012b). Automatic discovery of ranking formulas for playing with multi-armed bandits. In Recent advances in reinforcement learning.","DOI":"10.1007\/978-3-642-29946-9_5"},{"key":"6543_CR159","unstructured":"Maes, P., Mataric, M. J., & Meyer, J. A. et\u00a0al (1996). Learning to use selective attention and short-term memory in sequential tasks. In International conference on simulation of adaptive behavior."},{"key":"6543_CR160","unstructured":"Mania, H., Guy, A., & Recht, B. (2018). Simple random search of static linear policies is competitive for reinforcement learning. In NeurIPS."},{"key":"6543_CR161","unstructured":"Marom, O., & Rosman, B. (2018). Zero-shot transfer with deictic object-oriented representation in reinforcement learning. In NeurIPS."},{"key":"6543_CR162","doi-asserted-by":"crossref","unstructured":"Mart\u00ednez, D., Aleny\u00e0, & G., Torras, C. et\u00a0al (2016). Learning relational dynamics of stochastic domains for planning. In ICAPS.","DOI":"10.1609\/icaps.v26i1.13746"},{"issue":"78","key":"6543_CR163","first-page":"1","volume":"18","author":"D Mart\u00ednez","year":"2017","unstructured":"Mart\u00ednez, D., Aleny\u00e0, G., Ribeiro, T., et al. (2017). Relational reinforcement learning for planning with exogenous effects. Journal of Machine Learning Research, 18(78), 1\u201344.","journal-title":"Journal of Machine Learning Research"},{"key":"6543_CR164","doi-asserted-by":"crossref","first-page":"295","DOI":"10.1016\/j.artint.2015.02.006","volume":"247","author":"D Mart\u00ednez","year":"2017","unstructured":"Mart\u00ednez, D., Aleny\u00e0, G., & Torras, C. (2017). Relational reinforcement learning with guided demonstrations. Artificial Intelligence, 247, 295\u2013312.","journal-title":"Artificial Intelligence"},{"key":"6543_CR165","unstructured":"Mehrabi, N., Morstatter, F., & Saxena, N., et\u00a0al. (2019). A survey on bias and fairness in machine learning. arXiv:1908.09635"},{"key":"6543_CR166","doi-asserted-by":"crossref","unstructured":"Metzen, J. H. (2013). Learning graph-based representations for continuous reinforcement learning domains. In ECML.","DOI":"10.1007\/978-3-642-40988-2_6"},{"key":"6543_CR167","doi-asserted-by":"crossref","unstructured":"Michels, J., Saxena, A., & Ng, A. Y. (2005). High speed obstacle avoidance using monocular vision and reinforcement learning. In ICML.","DOI":"10.1145\/1102351.1102426"},{"key":"6543_CR168","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1016\/j.artint.2018.07.007","volume":"267","author":"T Miller","year":"2019","unstructured":"Miller, T. (2019). Explanation in artificial intelligence: Insights from the social sciences. Artificial Intelligence, 267, 1\u201338.","journal-title":"Artificial Intelligence"},{"key":"6543_CR169","doi-asserted-by":"crossref","unstructured":"Minervini, P., Demeester, T., & Rockt\u00e4schel, T., et\u00a0al. (2017). Adversarial sets for regularising neural link predictors. In UAI.","DOI":"10.18653\/v1\/K18-1007"},{"key":"6543_CR170","doi-asserted-by":"crossref","unstructured":"Mittelstadt, B., Russell, C., & Wachter, S. (2019). Explaining explanations in AI. In Conference on fairness, accountability, and transparency.","DOI":"10.1145\/3287560.3287574"},{"key":"6543_CR171","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., et al. (2015). Human-level control through deep reinforcement learning. Nature, 518, 529\u2013533.","journal-title":"Nature"},{"key":"6543_CR172","unstructured":"Mohseni, S., Zarei, N., & Ragan, E. D. (2020). A multidisciplinary survey and framework for design and evaluation of explainable AI systems. arXiv:1811.11839"},{"key":"6543_CR173","unstructured":"Molnar, C. (2019). Interpretable machine learning: A guide for making black box models explainable."},{"issue":"4","key":"6543_CR174","doi-asserted-by":"crossref","first-page":"2141","DOI":"10.1007\/s11948-019-00165-5","volume":"26","author":"J Morley","year":"2020","unstructured":"Morley, J., Floridi, L., Kinsey, L., et al. (2020). From what to how: An initial review of publicly available AI ethics tools, methods and research to translate principles into practices. Science and Engineering Ethics, 26(4), 2141\u201368.","journal-title":"Science and Engineering Ethics"},{"key":"6543_CR175","unstructured":"Mott, A., Zoran, D., & Chrzanowski, M., et\u00a0al. (2019). Towards interpretable reinforcement learning using attention augmented agents. In NeurIPS."},{"key":"6543_CR176","unstructured":"Munzer, T., Piot, B., & Geist, M., et\u00a0al. (2015). Inverse reinforcement learning in relational domains. In IJCAI."},{"key":"6543_CR177","doi-asserted-by":"crossref","unstructured":"Nageshrao, S., Costa, B., & Filev, D. (2019). Interpretable approximation of a deep reinforcement learning agent as a set of if-then rules. In ICMLA.","DOI":"10.1109\/ICMLA.2019.00041"},{"key":"6543_CR178","unstructured":"Natarajan, S., Joshi, S., & Tadepalli, P., et\u00a0al. (2011). Imitation learning in relational domains: A functional-gradient boosting approach. In IJCAI."},{"key":"6543_CR179","unstructured":"Ng, A. Y., & Russell, S. (2000). Algorithms for inverse reinforcement learning. In ICML."},{"key":"6543_CR180","unstructured":"OpenAI, Akkaya, I., & Andrychowicz, M., et\u00a0al. (2019). Solving Rubik\u2019s Cube with a Robot Hand. arXiv:1910.07113"},{"key":"6543_CR181","unstructured":"OpenAI, & Achiam, J., et\u00a0al. (2023). Gpt-4 technical report. arXiv:2303.08774"},{"issue":"1\u20132","key":"6543_CR182","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1561\/2300000053","volume":"7","author":"T Osa","year":"2018","unstructured":"Osa, T., Pajarinen, J., Neumann, G., et al. (2018). Algorithmic perspective on imitation learning. Foundations and Trends in Robotics, 7(1\u20132), 1\u2013179.","journal-title":"Foundations and Trends in Robotics"},{"key":"6543_CR183","unstructured":"Pace, A., Chan, A., & van\u00a0der Schaar, M. (2022). POETREE: Interpretable policy learning with adaptive decision trees. In ICLR."},{"issue":"3","key":"6543_CR184","doi-asserted-by":"crossref","first-page":"441","DOI":"10.1007\/s11023-019-09502-w","volume":"29","author":"A P\u00e1ez","year":"2019","unstructured":"P\u00e1ez, A. (2019). The pragmatic turn in explainable artificial intelligence (XAI). Minds and Machines, 29(3), 441\u2013459.","journal-title":"Minds and Machines"},{"key":"6543_CR185","unstructured":"Paischer, F., Adler, T., & Hofmarcher, M., et\u00a0al. (2023). Semantic helm: A human-readable memory for reinforcement learning. In NeurIPS."},{"key":"6543_CR186","doi-asserted-by":"crossref","unstructured":"Pasula, H. M., Zettlemoyer, L. S., & Kaelbling, L. P. (2007). Learning symbolic models of stochastic domains. In JAIR.","DOI":"10.1613\/jair.2113"},{"key":"6543_CR187","unstructured":"Payani, A., & Fekri, F. (2019a). Inductive logic programming via differentiable deep neural logic networks. arXiv:1906.03523"},{"key":"6543_CR188","unstructured":"Payani, A., & Fekri, F. (2019b). Learning algorithms via neural logic networks. arXiv:1904.01554"},{"key":"6543_CR189","unstructured":"Payani, A., & Fekri, F. (2020). Incorporating Relational Background Knowledge into Reinforcement Learning via Differentiable Inductive Logic Programming. arXiv:2003.10386"},{"key":"6543_CR190","unstructured":"Penkov, S., & Ramamoorthy, S. (2019). Learning programmatically structured representations with perceptor gradients. In ICLR."},{"key":"6543_CR191","unstructured":"Plumb, G., Al-Shedivat, M., & Cabrera, AA., et\u00a0al. (2020). Regularizing black-box models for improved interpretability. arXiv:1902.06787"},{"key":"6543_CR192","unstructured":"Pomerleau, D. (1989). Alvinn: An autonomous land vehicle in a neural network. In NeurIPS."},{"key":"6543_CR193","doi-asserted-by":"crossref","unstructured":"Puiutta, E., & Veith, E. M. (2020). Explainable reinforcement learning: A survey. In LNCS.","DOI":"10.1007\/978-3-030-57321-8_5"},{"key":"6543_CR194","doi-asserted-by":"crossref","unstructured":"Puterman, M. (1994). Markov decision processes: Discrete stochastic dynamic programming. Wiley.","DOI":"10.1002\/9780470316887"},{"key":"6543_CR195","unstructured":"Qiu, W., & Zhu, H. (2022). Programmatic reinforcement learning without oracles. In ICLR."},{"key":"6543_CR196","unstructured":"Rafailov, R., Sharma, A., & Mitchell, E., et\u00a0al. (2023). Direct preference optimization: Your language model is secretly a reward model. In NeurIPS."},{"key":"6543_CR197","doi-asserted-by":"crossref","unstructured":"Raji, I. D., Smart, A., & White, R. N., et\u00a0al. (2020). Closing the AI accountability gap: defining an end-to-end framework for internal algorithmic auditing. arXiv:2001.00973","DOI":"10.1145\/3351095.3372873"},{"key":"6543_CR198","unstructured":"Ramesh, A., Pavlov, M., & Goh, G., et\u00a0al. (2021). Zero-shot text-to-image generation. arXiv:2102.12092"},{"key":"6543_CR199","unstructured":"Randlov, J., & Alstrom, P. (1998). Learning to drive a bicycle using reinforcement learning and shaping. In ICML."},{"key":"6543_CR200","doi-asserted-by":"crossref","unstructured":"Redmon, J., Divvala, S., & Girshick, R., et\u00a0al. (2016). You only look once: Unified, real-time object detection. In CVPR.","DOI":"10.1109\/CVPR.2016.91"},{"key":"6543_CR201","unstructured":"Ribeiro, M. T., Singh, S., & Guestrin, C. (2016a). Model-Agnostic Interpretability of Machine Learning. In ICML workshop on human interpretability in ML."},{"key":"6543_CR202","doi-asserted-by":"crossref","unstructured":"Ribeiro, M. T., Singh, S., & Guestrin, C. (2016b). \"Why Should I Trust You?\": Explaining the Predictions of Any Classifier. In KDD.","DOI":"10.18653\/v1\/N16-3020"},{"key":"6543_CR203","doi-asserted-by":"crossref","unstructured":"Rockt\u00e4schel, T., Singh, S., & Riedel, S. (2015). Injecting logical background knowledge into embeddings for relation extraction. In Human language technologies.","DOI":"10.3115\/v1\/N15-1118"},{"key":"6543_CR204","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., & Lorenz, D., et\u00a0al. (2022). High-resolution image synthesis with latent diffusion models. In CVPR.","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"6543_CR205","unstructured":"Ross, S., Gordon, G. J., & Bagnell, J. A. (2011). A reduction of imitation learning and structured prediction to no-regret online learning. In AISTATS."},{"key":"6543_CR206","unstructured":"Roth, A. M., Topin, N., & Jamshidi, P., et\u00a0al. (2019). Conservative Q-Improvement: Reinforcement Learning for an Interpretable Decision-Tree Policy. arXiv:1907.01180"},{"key":"6543_CR207","doi-asserted-by":"crossref","unstructured":"Rothkopf, C. A., & Dimitrakakis, C. (2011). Preference elicitation and inverse reinforcement learning. In ECML.","DOI":"10.1007\/978-3-642-23808-6_3"},{"issue":"5","key":"6543_CR208","doi-asserted-by":"crossref","first-page":"206","DOI":"10.1038\/s42256-019-0048-x","volume":"1","author":"C Rudin","year":"2019","unstructured":"Rudin, C. (2019). Stop explaining black box machine learning models for high stakes decisions and use interpretable models instead. Nature Machine Intelligence, 1(5), 206\u2013215.","journal-title":"Nature Machine Intelligence"},{"key":"6543_CR209","doi-asserted-by":"crossref","unstructured":"Rudin, C., & Carlson, D. (2019). The secrets of machine learning: ten things you wish you had known earlier to be more effective at data analysis. In Operations research & management science in the age of analytics (pp. 44\u201372).","DOI":"10.1287\/educ.2019.0200"},{"key":"6543_CR210","doi-asserted-by":"crossref","unstructured":"Russell, S. (1998). Learning agents for uncertain environments. In COLT.","DOI":"10.1145\/279943.279964"},{"key":"6543_CR211","unstructured":"Rusu, A. A., Colmenarejo, S. G., G\u00fcl\u00e7ehre, \u00c7., et\u00a0al. (2016). Policy distillation. In ICLR."},{"key":"6543_CR212","unstructured":"Sanchez-Gonzalez, A., Heess, N., & Springenberg, J. T., et\u00a0al. (2018). Graph networks as learnable physics engines for inference and control. In ICML."},{"key":"6543_CR213","unstructured":"Sanner, S. (2005). Simultaneous learning of structure and value in relational reinforcement learning. In ICML workshop on rich representations for RL."},{"key":"6543_CR214","unstructured":"Sanner, S. (2011). Relational dynamic influence diagram language (RDDL): Language description. In International planning competition."},{"key":"6543_CR215","unstructured":"Santoro, A., Raposo, D., Barrett, D. G. T., et\u00a0al. (2017). A simple neural network module for relational reasoning. In NeurIPS."},{"issue":"1","key":"6543_CR216","doi-asserted-by":"crossref","first-page":"61","DOI":"10.1109\/TNN.2008.2005605","volume":"20","author":"F Scarselli","year":"2009","unstructured":"Scarselli, F., Gori, M., Tsoi, A. C., et al. (2009). The graph neural network model. IEEE Transactions on Neural Networks, 20(1), 61\u201380.","journal-title":"IEEE Transactions on Neural Networks"},{"key":"6543_CR217","unstructured":"Scholz, J., Levihn, M., & Isbell, C. L., et\u00a0al. (2014). A physics-based model prior for object-oriented MDPs. In ICML."},{"key":"6543_CR218","unstructured":"Schulman, J., Wolski, F., & Dhariwal, P., et\u00a0al. (2017). Proximal policy optimization algorithms. arXiv:1707.06347"},{"key":"6543_CR219","doi-asserted-by":"crossref","DOI":"10.1016\/j.artint.2020.103367","volume":"288","author":"P Sequeira","year":"2020","unstructured":"Sequeira, P., & Gervasio, M. (2020). Interestingness elements for explainable reinforcement learning: Understanding agents\u2019 capabilities and limitations. Artificial Intelligence, 288, 103367.","journal-title":"Artificial Intelligence"},{"key":"6543_CR220","unstructured":"Serafini, L., & d\u2019Avila Garcez, A. (2016). Logic tensor networks: Deep learning and logical reasoning from data and knowledge. In CEUR workshop."},{"key":"6543_CR221","doi-asserted-by":"crossref","unstructured":"Shi, W., Huang, G., & Song, S., et\u00a0al. (2020). Self-supervised discovering of interpretable features for reinforcement learning. arXiv:2003.07069","DOI":"10.1109\/TPAMI.2020.3037898"},{"key":"6543_CR222","unstructured":"Shu, T., Xiong, C., & Socher, R. (2018). Hierarchical and interpretable skill acquisition in multi-task reinforcement learning. In ICLR."},{"key":"6543_CR223","doi-asserted-by":"crossref","unstructured":"Silva, A., & Gombolay, M. (2020). Neural-encoding Human Experts\u2019 Domain Knowledge to Warm Start Reinforcement Learning. arXiv:1902.06007","DOI":"10.1609\/aaai.v35i6.16638"},{"key":"6543_CR224","unstructured":"Silva, A., Gombolay, M., & Killian, T., et\u00a0al. (2020). Optimization methods for interpretable differentiable decision trees applied to reinforcement learning. In AISTATS."},{"key":"6543_CR225","doi-asserted-by":"crossref","first-page":"354","DOI":"10.1038\/nature24270","volume":"550","author":"D Silver","year":"2017","unstructured":"Silver, D., Schrittwieser, J., Simonyan, K., et al. (2017). Mastering the game of Go without human knowledge. Nature, 550, 354\u2013359.","journal-title":"Nature"},{"key":"6543_CR226","doi-asserted-by":"crossref","first-page":"7913","DOI":"10.1038\/s41467-023-43713-1","volume":"14","author":"C Singh","year":"2023","unstructured":"Singh, C., Askari, A., Caruana, R., et al. (2023). Augmenting interpretable models with large language models during training. Nature Communications, 14, 7913.","journal-title":"Nature Communications"},{"issue":"1\u20132","key":"6543_CR227","doi-asserted-by":"crossref","first-page":"119","DOI":"10.1016\/S0004-3702(00)00079-5","volume":"125","author":"J Slaney","year":"2001","unstructured":"Slaney, J., & Thi\u00e9baux, S. (2001). Blocks world revisited. Artificial Intelligence, 125(1\u20132), 119\u2013153.","journal-title":"Artificial Intelligence"},{"key":"6543_CR228","doi-asserted-by":"crossref","first-page":"87","DOI":"10.1613\/jair.1.11524","volume":"65","author":"M Sridharan","year":"2019","unstructured":"Sridharan, M., Gelfond, M., Zhang, S., et al. (2019). REBA: A refinement-based architecture for knowledge representation and reasoning in robotics. JAIR, 65, 87\u2013180.","journal-title":"JAIR"},{"key":"6543_CR229","unstructured":"Srinivasan, S., & Doshi-Velez, F. (2020). Interpretable batch IRL to extract clinician goals in ICU hypotension management. In AMIA joint summits on translational science."},{"key":"6543_CR230","unstructured":"Sun, S. H., Wu, T. L., & Lim, J. J. (2020). Program guided agent. In ICLR."},{"key":"6543_CR231","unstructured":"Sutton, R. S., & Barto, A. G. (2018). Reinforcement learning: An introduction. MIT Press"},{"issue":"1\u20132","key":"6543_CR232","doi-asserted-by":"crossref","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","volume":"112","author":"RS Sutton","year":"1999","unstructured":"Sutton, R. S., Precup, D., & Singh, S. (1999). Between MDPs and semi-MDPs: A framework for temporal abstraction in reinforcement learning. Artificial Intelligence, 112(1\u20132), 181\u2013211.","journal-title":"Artificial Intelligence"},{"key":"6543_CR233","doi-asserted-by":"crossref","unstructured":"Swain, M. (2013). Knowledge Representation. In Encyclopedia of Systems Biology (pp. 1082\u20131084).","DOI":"10.1007\/978-1-4419-9863-7_595"},{"key":"6543_CR234","doi-asserted-by":"crossref","unstructured":"Tang, Y., Nguyen, D., & Ha, D. (2020). Neuroevolution of self-interpretable agents. In GECCO.","DOI":"10.1145\/3377930.3389847"},{"key":"6543_CR235","unstructured":"Tasse, G. N., James, S., & Rosman, B. (2020). A boolean task algebra for reinforcement learning. In NeurIPS."},{"key":"6543_CR236","unstructured":"Tasse, G. N., James, S., & Rosman, B. (2022). Generalisation in lifelong reinforcement learning through logical composition. In ICLR."},{"key":"6543_CR237","unstructured":"Todorov, E. (2009). Compositionality of optimal control laws. In NeurIPS."},{"key":"6543_CR238","doi-asserted-by":"crossref","unstructured":"Topin, N., & Veloso, M. (2019). Generation of policy-level explanations for reinforcement learning. In AAAI.","DOI":"10.1609\/aaai.v33i01.33012514"},{"key":"6543_CR239","doi-asserted-by":"crossref","unstructured":"Topin, N., Milani, S., & Fang, F., et\u00a0al. (2021). Iterative bounding MDPs: Learning interpretable policies via non-interpretable methods. In AAAI.","DOI":"10.1609\/aaai.v35i11.17192"},{"key":"6543_CR240","unstructured":"Toro\u00a0Icarte, R., Klassen, T., & Valenzano, R., et\u00a0al. (2018a). Using reward machines for high-level task specification and decomposition in reinforcement learning. In ICML."},{"key":"6543_CR241","unstructured":"Toro\u00a0Icarte, R., Klassen, T. Q., & Valenzano, R., et\u00a0al. (2018b). Teaching multiple tasks to an rl agent using LTL. In AAMAS."},{"key":"6543_CR242","unstructured":"Toro\u00a0Icarte, R., Waldie, E., & Klassen, T., et\u00a0al. (2019). Learning reward machines for partially observable reinforcement learning. In NeurIPS."},{"key":"6543_CR243","unstructured":"Torrey, L., & Taylor, M. E. (2013). Teaching on a budget: Agents advising agents in reinforcement learning. In AAMAS."},{"key":"6543_CR153","unstructured":"van der Maaten, L., & Hinton, G. (2008). Visualizing data using t-SNE. JMLR Sci 9(86), 2579\u20132605."},{"key":"6543_CR244","unstructured":"van der Waa, J., van Diggelen, J., van\u00a0den Bosch, K., et\u00a0al. (2018). Contrastive explanations for reinforcement learning in terms of expected consequences. In IJCAI workshop on XAI."},{"key":"6543_CR245","unstructured":"van Otterlo, M. (2005). A survey of reinforcement learning in relational domains. CTIT Technical Report Series: Tech. rep."},{"key":"6543_CR246","unstructured":"van Otterlo, M. (2009). The logic of adaptive behavior: Knowledge representation and algorithms for adaptive sequential decision making under uncertainty in first-order and relational domains. IOS Press."},{"key":"6543_CR247","doi-asserted-by":"crossref","unstructured":"van Otterlo, M. (2012). Solving relational and first-order logical markov decision processes: A Survey. In M. Wiering & M. van Otterlo (Eds.), Reinforcement learning (Vol. 12, pp. 253\u2013292). Berlin Heidelberg: Springer.","DOI":"10.1007\/978-3-642-27645-3_8"},{"key":"6543_CR248","unstructured":"Vasic, M., Petrovic, A., & Wang, K., et\u00a0al. (2019). MoET: Interpretable and verifiable reinforcement learning via mixture of expert trees. arXiv:1906.06717"},{"key":"6543_CR249","unstructured":"Vaswani, A., Shazeer, N., & Parmar, N., et\u00a0al. (2017). Attention is all you need. In NeurIPS."},{"key":"6543_CR250","unstructured":"Veerapaneni, R., Co-Reyes, J. D., & Chang, M., et\u00a0al. (2020). Entity abstraction in visual model-based reinforcement learning. In CoRL."},{"key":"6543_CR251","unstructured":"Verma, A., Murali, V., & Singh, R., et\u00a0al. (2018). Programmatically interpretable reinforcement learning. In ICML."},{"key":"6543_CR252","unstructured":"Verma, A., M.\u00a0Le, H., & Yue, Y., et\u00a0al. (2019). Imitation-projected programmatic reinforcement learning. In NeurIPS."},{"key":"6543_CR253","unstructured":"Vinyals, O., Ewalds, T., & Bartunov, S., et\u00a0al. (2017). StarCraft II: A new challenge for reinforcement learning. arXiv:1708.04782"},{"issue":"7782","key":"6543_CR254","doi-asserted-by":"crossref","first-page":"350","DOI":"10.1038\/s41586-019-1724-z","volume":"575","author":"O Vinyals","year":"2019","unstructured":"Vinyals, O., Babuschkin, I., Czarnecki, W. M., et al. (2019). Grandmaster level in StarCraft II using multi-agent reinforcement learning. Nature, 575(7782), 350\u2013354.","journal-title":"Nature"},{"key":"6543_CR255","doi-asserted-by":"crossref","unstructured":"Viola, P., & Jones, M. (2001). Robust real-time object detection. In International journal of computer vision.","DOI":"10.1109\/ICCV.2001.937709"},{"key":"6543_CR256","unstructured":"Walker, T., Shavlik, J., & Maclin, R. (2004). Relational reinforcement learning via sampling the space of first-order conjunctive features. In ICML workshop on relational reinforcement learning."},{"key":"6543_CR257","doi-asserted-by":"crossref","unstructured":"Walker, T., Torrey, L., & Shavlik, J., et\u00a0al. (2008). Building relational world models for reinforcement learning. In LNCS.","DOI":"10.1007\/978-3-540-78469-2_27"},{"key":"6543_CR258","unstructured":"Walsh, J. (2010). Efficient learning of relational models for sequential decision making. PhD thesis, Rutgers."},{"key":"6543_CR259","unstructured":"Wang, T., Liao, R., & Fidler, S. (2018). NerveNet: Learning Structured Policy with Graph Neural Networks. In: ICLR"},{"key":"6543_CR260","unstructured":"Wang, W., & Pan, S. J. (2019). Integrating deep learning with logic fusion for information extraction. In AAAI."},{"key":"6543_CR261","unstructured":"Wang, Y., Mase, M., & Egi, M. (2020). Attribution-based salience method towards interpretable reinforcement learning. In Spring symposium on combining ml and knowledge engineering in practice."},{"key":"6543_CR262","unstructured":"Weng, P., Busa-Fekete, R., H\u00fcllermeier, E. (2013). Interactive Q-learning with ordinal rewards and unreliable tutor. In ECML workshop on RL with generalized feedback."},{"key":"6543_CR263","doi-asserted-by":"crossref","first-page":"1003","DOI":"10.1613\/jair.1.12360","volume":"70","author":"J Whittlestone","year":"2021","unstructured":"Whittlestone, J., Arulkumaran, K., & Crosby, M. (2021). The societal implications of deep reinforcement learning. JAIR, 70, 1003\u20131030.","journal-title":"JAIR"},{"key":"6543_CR264","doi-asserted-by":"crossref","unstructured":"Wiegreffe, S., & Pinter, Y. (2019). Attention is not not Explanation. In EMNLP.","DOI":"10.18653\/v1\/D19-1002"},{"key":"6543_CR265","unstructured":"Wiener, N. (1954). The human use of human beings. Houghton Mifflin"},{"key":"6543_CR266","doi-asserted-by":"crossref","unstructured":"Wojke, N., Bewley, A., & Paulus, D. (2017). Simple online and realtime tracking with a deep association metric. In 2017 IEEE international conference on image processing (ICIP).","DOI":"10.1109\/ICIP.2017.8296962"},{"key":"6543_CR267","doi-asserted-by":"crossref","unstructured":"Wu, B., Gupta, J. K., & Kochenderfer, M. J. (2019a). Model primitive hierarchical lifelong reinforcement learning. In AAMAS.","DOI":"10.1007\/s10458-020-09451-0"},{"key":"6543_CR268","unstructured":"Wu, M., Parbhoo, S., & Hughes, M. C., et\u00a0al. (2019b). Optimizing for interpretability in deep neural networks with tree regularization. arXiv:1908.05254"},{"key":"6543_CR269","unstructured":"Wu, Z., Geiger, A., & Potts, C., et\u00a0al. (2023). Interpretability at scale: Identifying causal mechanisms in alpaca. In NeurIPS."},{"key":"6543_CR270","unstructured":"Xu, J., Zhang, Z., & Friedman, T., et\u00a0al. (2018). A semantic loss function for deep learning with symbolic knowledge. In ICML."},{"key":"6543_CR271","doi-asserted-by":"crossref","unstructured":"Xu, Z., Gavran, I., & Ahmad, Y., et\u00a0al. (2020). Joint inference of reward machines and policies for reinforcement learning. In ICAPS.","DOI":"10.1609\/icaps.v30i1.6756"},{"key":"6543_CR272","unstructured":"Yang, F., Yang, Z., & Cohen, W. W. (2017). Differentiable learning of logical rules for knowledge base reasoning. In NeurIPS."},{"key":"6543_CR273","doi-asserted-by":"crossref","unstructured":"Yang, F., Lyu, D., Liu, B., et\u00a0al. (2018a). PEORL: Integrating symbolic planning and hierarchical reinforcement learning for robust decision-making. In IJCAI.","DOI":"10.24963\/ijcai.2018\/675"},{"key":"6543_CR274","unstructured":"Yang, Y., & Song, L. (2019). Learn to explain efficiently via neural logic inductive learning. In ICLR."},{"key":"6543_CR275","unstructured":"Yang, Y., Morillo, I. G., & Hospedales, T. M. (2018b). Deep neural decision trees. In ICML workshop on human interpretability in ML."},{"key":"6543_CR276","unstructured":"Younes, L. (2004). PPDDL1.0: The language for the probabilistic part of IPC-4."},{"key":"6543_CR277","doi-asserted-by":"crossref","unstructured":"Yu, H., Shen, Z., & Miao, C., et\u00a0al. (2018). Building ethics into artificial intelligence. In IJCAI.","DOI":"10.24963\/ijcai.2018\/779"},{"key":"6543_CR278","unstructured":"Zahavy, T., Ben-Zrihem, N., & Mannor, S. (2016). Graying the black box: Understanding DQNs. In ICML."},{"key":"6543_CR279","unstructured":"Zambaldi, V., Raposo, D., & Santoro, A., et\u00a0al. (2019). Deep reinforcement learning with relational inductive biases. In ICLR."},{"key":"6543_CR280","unstructured":"Zhang, A., Sukhbaatar, S., & Lerer, A., et\u00a0al. (2018a). Composable planning with attributes. In ICML."},{"key":"6543_CR281","unstructured":"Zhang, C., Vinyals, O., & Munos, R., et\u00a0al. (2018b). A Study on Overfitting in Deep Reinforcement Learning. arXiv:1804.06893"},{"key":"6543_CR282","unstructured":"Zhang, H., Gao, Z., & Zhou, Y., et\u00a0al. (2019). Faster and Safer Training by Embedding High-Level Knowledge into Deep Reinforcement Learning. arXiv:1910.09986"},{"key":"6543_CR283","unstructured":"Zhang, S., & Sridharan, M. (2020). A Survey of Knowledge-based Sequential Decision Making under Uncertainty. arXiv:2008.08548"},{"key":"6543_CR284","unstructured":"Zhang, Y., Lee, J. D., & Jordan, M. I. (2016). L1-regularized neural networks are improperly learnable in polynomial time. In ICML."},{"key":"6543_CR285","unstructured":"Zhu, G., Huang, Z., & Zhang, C. (2018). Object-oriented dynamics predictor. In NeurIPS."},{"key":"6543_CR286","doi-asserted-by":"crossref","unstructured":"Zhu, G., Wang, J., & Ren, Z., et\u00a0al. (2020). Object-oriented dynamics learning through multi-level abstraction. In AAAI.","DOI":"10.1609\/aaai.v34i04.6183"},{"key":"6543_CR287","doi-asserted-by":"crossref","unstructured":"Zhu, H., Magill, S., & Xiong, Z., et\u00a0al. (2019). An inductive synthesis framework for verifiable reinforcement learning. In ACM SIGPLAN conference on PLDI.","DOI":"10.1145\/3314221.3314638"},{"key":"6543_CR288","unstructured":"Zimmer, M., Viappiani, P., & Weng, P. (2014). Teacher-student framework: A reinforcement learning approach. In AAMAS workshop on autonomous robots and multirobot systems."},{"key":"6543_CR289","unstructured":"Zimmer, M., Feng, X., & Glanois, C., et\u00a0al. (2021). Differentiable logic machines. arXiv:2102.11529"}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-024-06543-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10994-024-06543-w\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-024-06543-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,27]],"date-time":"2025-11-27T18:07:02Z","timestamp":1764266822000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10994-024-06543-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,19]]},"references-count":289,"journal-issue":{"issue":"8","published-print":{"date-parts":[[2024,8]]}},"alternative-id":["6543"],"URL":"https:\/\/doi.org\/10.1007\/s10994-024-06543-w","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"value":"0885-6125","type":"print"},{"value":"1573-0565","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,4,19]]},"assertion":[{"value":"15 August 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 March 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 March 2024","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 April 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}