{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,9]],"date-time":"2026-07-09T15:20:58Z","timestamp":1783610458058,"version":"3.55.0"},"reference-count":54,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2024,3,11]],"date-time":"2024-03-11T00:00:00Z","timestamp":1710115200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,3,11]],"date-time":"2024-03-11T00:00:00Z","timestamp":1710115200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/100000138","name":"U.S. Department of Education","doi-asserted-by":"publisher","award":["P116S210005"],"award-info":[{"award-number":["P116S210005"]}],"id":[{"id":"10.13039\/100000138","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["2226936"],"award-info":[{"award-number":["2226936"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["AI Ethics"],"published-print":{"date-parts":[[2025,4]]},"DOI":"10.1007\/s43681-024-00441-6","type":"journal-article","created":{"date-parts":[[2024,3,11]],"date-time":"2024-03-11T09:01:32Z","timestamp":1710147692000},"page":"1047-1067","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["Reinforcement learning-based motion planning in partially observable environments under ethical constraints"],"prefix":"10.1007","volume":"5","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3432-8827","authenticated-orcid":false,"given":"Junchao","family":"Li","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Mingyu","family":"Cai","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Shaoping","family":"Xiao","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2024,3,11]]},"reference":[{"key":"441_CR1","doi-asserted-by":"publisher","unstructured":"Slavkovik, M.: Automating Moral Reasoning. In: Bourgaux, C., Ozaki, A., Pe\u00f1aloza, R. (eds.) International Research School in Artificial Intelligence in Bergen (AIB 2022). Open Access Series in Informatics (OASIcs), vol. 99, pp. 6\u20131613. Schloss Dagstuhl \u2013 Leibniz-Zentrum f\u00fcr Informatik, Dagstuhl, Germany (2022). https:\/\/doi.org\/10.4230\/OASIcs.AIB.2022.6. https:\/\/drops.dagstuhl.de\/opus\/volltexte\/2022\/16004","DOI":"10.4230\/OASIcs.AIB.2022.6"},{"key":"441_CR2","unstructured":"Rest, J.R.: Moral development: advances in research and theory. Praeger, New York (1986)"},{"key":"441_CR3","unstructured":"Chonko, L.: Ethical theories. Retrieved from https:\/\/www.dsef.org\/wp-content\/uploads\/2012\/07\/EthicalTheories.pdf (2012). Accessed 20 June 2023"},{"key":"441_CR4","unstructured":"Mill, J.S.: Utilitarianism (1863). Utilitarianism, Liberty, Representative Government, 7\u20139, pp. 181\u2013202 (1859)"},{"key":"441_CR5","unstructured":"Sinnott-Armstrong, W.: Consequentialism. Stanford Encyclopedia of Philosophy (2019)"},{"key":"441_CR6","unstructured":"Davis, N.: Contemporary deontology (1993)"},{"key":"441_CR7","volume-title":"Virtue Ethics","author":"R Crisp","year":"1997","unstructured":"Crisp, R., Slote, M.: Virtue Ethics. Blackwell readings in philosophy. Oxford University Press, Kettering, Northamptonshire, England (1997)"},{"key":"441_CR8","doi-asserted-by":"crossref","unstructured":"Zoshak, J., Dew, K.: Beyond kant and bentham: How ethical theories are being used in artificial moral agents. In: Proceedings of the 2021 CHI Conference on Human Factors in Computing Systems, pp. 1\u201315 (2021)","DOI":"10.1145\/3411764.3445102"},{"key":"441_CR9","doi-asserted-by":"crossref","unstructured":"Svegliato, J., Nashed, S.B., Zilberstein, S.: Ethically compliant sequential decision making. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 35, pp. 11657\u201311665 (2021)","DOI":"10.1609\/aaai.v35i13.17386"},{"key":"441_CR10","volume-title":"Machine Learning","author":"TM Mitchell","year":"2007","unstructured":"Mitchell, T.M., et al.: Machine Learning, vol. 1. McGraw-hill, New York (2007)"},{"key":"441_CR11","unstructured":"Arkin, R.C., Borenstein, J., Wagner, A.R.: Competing ethical frameworks mediated by moral emotions in hri: Motivations, background, and approach. In: Proc. 2019 International Conference on Robot Ethics and Standards. (2019)"},{"key":"441_CR12","doi-asserted-by":"crossref","unstructured":"Nahian, M.S.A., Frazier, S., Riedl, M., Harrison, B.: Learning norms from stories: A prior for value aligned agents. In: Proceedings of the AAAI\/ACM Conference on AI, Ethics, and Society, pp. 124\u2013130 (2020)","DOI":"10.1145\/3375627.3375825"},{"key":"441_CR13","unstructured":"Serramia, M., Lopez-Sanchez, M., Rodriguez-Aguilar, J.A.: Value-aligned ai: Lessons learnt from value-aligned norm selection"},{"key":"441_CR14","unstructured":"Aliman, N.-M., Kester, L.: Requisite variety in ethical utility functions for ai value alignment. arXiv preprint arXiv:1907.00430 (2019). Accessed 5 June 2023"},{"key":"441_CR15","doi-asserted-by":"crossref","unstructured":"Malle, B.F., Bello, P., Scheutz, M.: Requirements for an artificial agent with norm competence. In: Proceedings of the 2019 AAAI\/ACM Conference on AI, Ethics, and Society, pp. 21\u201327 (2019)","DOI":"10.1145\/3306618.3314252"},{"key":"441_CR16","doi-asserted-by":"crossref","unstructured":"Arkin, R.C.: Governing lethal behavior: Embedding ethics in a hybrid deliberative\/reactive robot architecture. In: Proceedings of the 3rd ACM\/IEEE International Conference on Human Robot Interaction, pp. 121\u2013128 (2008)","DOI":"10.1145\/1349822.1349839"},{"key":"441_CR17","unstructured":"Baier, C., Katoen, J.-P.: Principles of Model Checking. MIT press, Cambridge, Massachusetts (2008). Chap. 3,5,6"},{"issue":"237","key":"441_CR18","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1093\/mind\/LX.237.1","volume":"60","author":"GH Von Wright","year":"1951","unstructured":"Von Wright, G.H.: Deontic logic. Mind 60(237), 1\u201315 (1951)","journal-title":"Mind"},{"key":"441_CR19","doi-asserted-by":"crossref","unstructured":"Pnueli, A.: The temporal logic of programs. In: 18th Annual Symposium on Foundations of Computer Science, pp. 46\u201357 (1977). IEEE","DOI":"10.1109\/SFCS.1977.32"},{"key":"441_CR20","doi-asserted-by":"crossref","unstructured":"Clarke, E.M., Emerson, E.A.: Design and synthesis of synchronization skeletons using branching time temporal logic. In: Workshop on Logic of Programs, pp. 52\u201371 (1981). Springer","DOI":"10.1007\/BFb0025774"},{"issue":"4","key":"441_CR21","doi-asserted-by":"publisher","first-page":"38","DOI":"10.1109\/MIS.2006.82","volume":"21","author":"S Bringsjord","year":"2006","unstructured":"Bringsjord, S., Arkoudas, K., Bello, P.: Toward a general logicist methodology for engineering ethically correct robots. IEEE Intelligent Systems 21(4), 38\u201344 (2006)","journal-title":"IEEE Intelligent Systems"},{"key":"441_CR22","volume-title":"Defeasible Deontic Logic","author":"D Nute","year":"2012","unstructured":"Nute, D.: Defeasible Deontic Logic, vol. 263. Springer, Dordrecht, Netherlands (2012)"},{"key":"441_CR23","doi-asserted-by":"crossref","unstructured":"Neufeld, E.A., Bartocci, E., Ciabattoni, A., Governatori, G.: A normative supervisor for reinforcement learning agents. In: CADE, pp. 565\u2013576 (2021)","DOI":"10.1007\/978-3-030-79876-5_32"},{"key":"441_CR24","doi-asserted-by":"crossref","unstructured":"Alshiekh, M., Bloem, R., Ehlers, R., K\u00f6nighofer, B., Niekum, S., Topcu, U.: Safe reinforcement learning via shielding. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 32 (2018)","DOI":"10.1609\/aaai.v32i1.11797"},{"issue":"1","key":"441_CR25","doi-asserted-by":"publisher","first-page":"1925","DOI":"10.1038\/s41598-023-28582-4","volume":"13","author":"M Cai","year":"2023","unstructured":"Cai, M., Xiao, S., Li, J., Kan, Z.: Safe reinforcement learning under temporal logic with reward design and quantum action selection. Scientific reports 13(1), 1925 (2023)","journal-title":"Scientific reports"},{"key":"441_CR26","doi-asserted-by":"crossref","unstructured":"Neufeld, E.A., Bartocci, E., Ciabattoni, A.: On normative reinforcement learning via safe reinforcement learning. In: PRIMA 2022: Principles and Practice of Multi-Agent Systems: 24th International Conference, Valencia, Spain, November 16\u201318, 2022, Proceedings, pp. 72\u201389 (2022). Springer","DOI":"10.1007\/978-3-031-21203-1_5"},{"key":"441_CR27","doi-asserted-by":"crossref","unstructured":"Grandi, U., Lorini, E., Parker, T., Alami, R.: Logic-based ethical planning. In: International Conference of the Italian Association for Artificial Intelligence, pp. 198\u2013211 (2022). Springer","DOI":"10.1007\/978-3-031-27181-6_14"},{"key":"441_CR28","doi-asserted-by":"crossref","unstructured":"Governatori, G.: Thou shalt is not you will. In: Proceedings of the 15th International Conference on Artificial Intelligence and Law, pp. 63\u201368 (2015)","DOI":"10.1145\/2746090.2746105"},{"key":"441_CR29","doi-asserted-by":"crossref","unstructured":"Governatori, G., Hashmi, M.: No time for compliance. In: 2015 IEEE 19th International Enterprise Distributed Object Computing Conference, pp. 9\u201318 (2015). IEEE","DOI":"10.1109\/EDOC.2015.12"},{"key":"441_CR30","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement learning: An introduction. MIT press, Cambridge, Massachusetts (2018). Chap. 1,2,3"},{"key":"441_CR31","doi-asserted-by":"crossref","unstructured":"Herlau, T.: Moral reinforcement learning using actual causation. In: 2nd International Conference on Computer, Control and Robotics (ICCCR), pp. 179\u2013185. IEEE (2022)","DOI":"10.1109\/ICCCR54399.2022.9790262"},{"key":"441_CR32","unstructured":"Ecoffet, A., Lehman, J.: Reinforcement learning under moral uncertainty. In: International Conference on Machine Learning, pp. 2926\u20132936 (2021). PMLR"},{"key":"441_CR33","unstructured":"Rodriguez-Soto, M., Lopez-Sanchez, M., Rodriguez-Aguilar, J.A.: Guaranteeing the learning of ethical behaviour through multi-objective reinforcement learning*. (2021)"},{"key":"441_CR34","unstructured":"Abel, D., MacGlashan, J., Littman, M.L.: Reinforcement learning as a framework for ethical decision making, vol. WS-16-01 - WS-16-15 (2016)"},{"key":"441_CR35","doi-asserted-by":"publisher","unstructured":"Kurniawati, H.: Partially observable markov decision processes and robotics. Ann. Rev. Control Robot. Auton. Syst. 5, 253\u2013277 (2022). https:\/\/doi.org\/10.1146\/annurev-control-042920-092451","DOI":"10.1146\/annurev-control-042920-092451"},{"key":"441_CR36","unstructured":"Ng, A.Y., Russell, S., et al.: Algorithms for inverse reinforcement learning. In: Icml, vol. 1, p. 2 (2000)"},{"key":"441_CR37","unstructured":"Peschl, M., Zgonnikov, A., Oliehoek, F.A., Siebert, L.C.: MORAL: aligning AI with human norms through multi-objective reinforced active learning. arXiv preprint arXiv:2201.00012 (2021)"},{"key":"441_CR38","unstructured":"Glazier, A., Loreggia, A., Mattei, N., Rahgooy, T., Rossi, F., Venable, B.: Learning behavioral soft constraints from demonstrations. arXiv preprint arXiv:2202.10407 (2022). Accessed 13 July 2023"},{"key":"441_CR39","doi-asserted-by":"publisher","first-page":"871","DOI":"10.1080\/01691864.2023.2226191","volume":"37","author":"J Li","year":"2023","unstructured":"Li, J., Cai, M., Wang, Z., Xiao, S.: Model-based motion planning in pomdps with temporal logic specifications. Advanced Robotics 37, 871\u2013886 (2023)","journal-title":"Advanced Robotics"},{"key":"441_CR40","doi-asserted-by":"crossref","unstructured":"Li, J., Cai, M., Kan, Z., Xiao, S.: Model-free motion planning of autonomous agents for complex tasks in partially observable environments. arXiv preprint arXiv:2305.00561 (2023). Accessed 25 Aug 2023","DOI":"10.21203\/rs.3.rs-2856026\/v1"},{"key":"441_CR41","doi-asserted-by":"crossref","unstructured":"Boella, G., Van Der\u00a0Torre, L.: Permissions and obligations in hierarchical normative systems. In: Proceedings of the 9th International Conference on Artificial Intelligence and Law, pp. 109\u2013118 (2003)","DOI":"10.1145\/1047788.1047818"},{"key":"441_CR42","doi-asserted-by":"publisher","first-page":"799","DOI":"10.1007\/s10992-013-9295-1","volume":"42","author":"G Governatori","year":"2013","unstructured":"Governatori, G., Olivieri, F., Rotolo, A., Scannapieco, S.: Computing strong and weak permissions in defeasible logic. Journal of Philosophical Logic 42, 799\u2013829 (2013)","journal-title":"Journal of Philosophical Logic"},{"key":"441_CR43","doi-asserted-by":"publisher","unstructured":"Watkins, C.J.C.H., Dayan, P.: Q-learning. Machine Learning 8, 279\u2013292 (1992) https:\/\/doi.org\/10.1007\/bf00992698","DOI":"10.1007\/bf00992698"},{"key":"441_CR44","doi-asserted-by":"publisher","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., Rusu, A.A., Veness, J., Bellemare, M.G., Graves, A., Riedmiller, M., Fidjeland, A.K., Ostrovski, G., Petersen, S., Beattie, C., Sadik, A., Antonoglou, I., King, H., Kumaran, D., Wierstra, D., Legg, S., Hassabis, D.: Human-level control through deep reinforcement learning. Nature 518 (2015) https:\/\/doi.org\/10.1038\/nature14236","DOI":"10.1038\/nature14236"},{"key":"441_CR45","doi-asserted-by":"publisher","unstructured":"Lin, L.-J.: Self-improving reactive agents based on reinforcement learning, planning and teaching. Machine Learning 8 (1992) https:\/\/doi.org\/10.1007\/bf00992699","DOI":"10.1007\/bf00992699"},{"key":"441_CR46","unstructured":"Hausknecht, M., Stone, P.: Deep recurrent q-learning for partially observable mdps, vol. FS-15-06 (2015)"},{"key":"441_CR47","doi-asserted-by":"publisher","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Computation 9 (1997) https:\/\/doi.org\/10.1162\/neco.1997.9.8.1735","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"441_CR48","doi-asserted-by":"publisher","unstructured":"Bozkurt, A.K., Wang, Y., Zavlanos, M.M., Pajic, M.: Control synthesis from linear temporal logic specifications using model-free reinforcement learning. In: 2020 IEEE International Conference on Robotics and Automation (ICRA), pp. 10349\u201310355 (2020). https:\/\/doi.org\/10.1109\/ICRA40945.2020.9196796","DOI":"10.1109\/ICRA40945.2020.9196796"},{"key":"441_CR49","doi-asserted-by":"publisher","unstructured":"Sickert, S., Esparza, J., Jaax, S., K\u0159et\u00ednsk\u00fd, J.: Limit-deterministic b\u00fcchi automata for linear temporal logic. (2016). https:\/\/doi.org\/10.1007\/978-3-319-41540-6_17","DOI":"10.1007\/978-3-319-41540-6_17"},{"key":"441_CR50","doi-asserted-by":"publisher","unstructured":"K\u0159et\u00ednsk\u00fd, J., Meggendorfer, T., Sickert, S.: Owl: A library for $$\\omega $$-words, automata, and ltl, LNCS. In: International Symposium on Automated Technology for Verification and Analysis, pp. 543\u2013550. Springer International Publishing, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01090-4_34","DOI":"10.1007\/978-3-030-01090-4_34"},{"key":"441_CR51","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3101544","author":"M Cai","year":"2021","unstructured":"Cai, M., Hasanbeig, M., Xiao, S., Abate, A., Kan, Z.: Modular deep reinforcement learning for continuous motion planning with temporal logic. IEEE Robotics Automat Lett. (2021). https:\/\/doi.org\/10.1109\/LRA.2021.3101544","journal-title":"IEEE Robotics Automat Lett."},{"key":"441_CR52","doi-asserted-by":"crossref","unstructured":"Cai, M., Xiao, S., Li, B., Li, Z., Kan, Z.: Reinforcement learning based temporal logic control with maximum probabilistic satisfaction. In: 2021 IEEE International Conference on Robotics and Automation (ICRA), pp. 806\u2013812. IEEE (2020)","DOI":"10.1109\/ICRA48506.2021.9561903"},{"key":"441_CR53","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2021.3138704","author":"M Cai","year":"2021","unstructured":"Cai, M., Xiao, S., Li, Z., Kan, Z.: Optimal probabilistic motion planning with potential infeasible ltl constraints. IEEE Trans Automatic Control (2021). https:\/\/doi.org\/10.1109\/TAC.2021.3138704","journal-title":"IEEE Trans Automatic Control"},{"key":"441_CR54","unstructured":"Coumans, E., Bai, Y.: PyBullet, a Python module for physics simulation for games, robotics and machine learning. http:\/\/pybullet.org (2016\u20132021). Accessed 17 May 2023"}],"container-title":["AI and Ethics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s43681-024-00441-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s43681-024-00441-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s43681-024-00441-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,7]],"date-time":"2025-05-07T06:00:26Z","timestamp":1746597626000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s43681-024-00441-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,3,11]]},"references-count":54,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2025,4]]}},"alternative-id":["441"],"URL":"https:\/\/doi.org\/10.1007\/s43681-024-00441-6","relation":{},"ISSN":["2730-5953","2730-5961"],"issn-type":[{"value":"2730-5953","type":"print"},{"value":"2730-5961","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,3,11]]},"assertion":[{"value":"19 September 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 February 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 March 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}