{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T17:52:06Z","timestamp":1775065926201,"version":"3.50.1"},"reference-count":53,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T00:00:00Z","timestamp":1740096000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T00:00:00Z","timestamp":1740096000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100002913","name":"Vlaamse Overheid","doi-asserted-by":"publisher","award":["174U01222"],"award-info":[{"award-number":["174U01222"]}],"id":[{"id":"10.13039\/501100002913","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Mach Learn"],"published-print":{"date-parts":[[2025,4]]},"DOI":"10.1007\/s10994-024-06653-5","type":"journal-article","created":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T13:40:18Z","timestamp":1740145218000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Maximum causal entropy inverse constrained reinforcement learning"],"prefix":"10.1007","volume":"114","author":[{"given":"Mattijs","family":"Baert","sequence":"first","affiliation":[]},{"given":"Pietro","family":"Mazzaglia","sequence":"additional","affiliation":[]},{"given":"Sam","family":"Leroux","sequence":"additional","affiliation":[]},{"given":"Pieter","family":"Simoens","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,2,21]]},"reference":[{"key":"6653_CR1","doi-asserted-by":"crossref","unstructured":"Abbeel, P., & Ng, A. Y. (2004). Apprenticeship learning via inverse reinforcement learning. In Proceedings of the twenty-first international conference on machine learning (p. 1).","DOI":"10.1145\/1015330.1015430"},{"key":"6653_CR2","volume-title":"Constrained Markov decision processes: Stochastic modeling","author":"E Altman","year":"1999","unstructured":"Altman, E. (1999). Constrained Markov decision processes: Stochastic modeling. Routledge."},{"key":"6653_CR3","doi-asserted-by":"crossref","unstructured":"Baert, M., Leroux, S., & Simoens, P. (2023). Inverse reinforcement learning through logic constraint inference. Machine Learning, 1\u20132.","DOI":"10.1007\/s10994-023-06311-2"},{"key":"6653_CR4","unstructured":"Baert, M., Leroux, S., & Simoens, P. (2023). Learning logic constraints from demonstration. In NeSy2023: 17th international workshop on neural-symbolic learning and reasoning (pp. 78\u201384)."},{"key":"6653_CR5","first-page":"103","volume":"15","author":"M Bain","year":"1995","unstructured":"Bain, M., & Sammut, C. (1995). A framework for behavioural cloning. Machine Intelligence, 15, 103\u2013129.","journal-title":"Machine Intelligence"},{"key":"6653_CR6","doi-asserted-by":"crossref","first-page":"3","DOI":"10.1609\/aaai.v33i01.33013","volume":"33","author":"A Balakrishnan","year":"2019","unstructured":"Balakrishnan, A., Bouneffouf, D., Mattei, N., & Rossi, F. (2019). Incorporating behavioral constraints in online AI systems. Proceedings of the AAAI Conference on Artificial Intelligence, 33, 3\u201311.","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"issue":"3","key":"6653_CR7","doi-asserted-by":"crossref","first-page":"688","DOI":"10.1007\/s10957-012-9989-5","volume":"153","author":"S Bhatnagar","year":"2012","unstructured":"Bhatnagar, S., & Lakshmanan, K. (2012). An online actor-critic algorithm with function approximation for constrained Markov decision processes. Journal of Optimization Theory and Applications, 153(3), 688\u2013708.","journal-title":"Journal of Optimization Theory and Applications"},{"key":"6653_CR8","volume-title":"Stochastic approximation: A dynamical systems viewpoint","author":"VS Borkar","year":"2009","unstructured":"Borkar, V. S. (2009). Stochastic approximation: A dynamical systems viewpoint (Vol. 48). Springer."},{"key":"6653_CR9","doi-asserted-by":"crossref","DOI":"10.1017\/CBO9780511804441","volume-title":"Convex optimization","author":"S Boyd","year":"2004","unstructured":"Boyd, S., Boyd, S. P., & Vandenberghe, L. (2004). Convex optimization. Cambridge University Press."},{"key":"6653_CR10","unstructured":"Brockman, G., Cheung, V., Pettersson, L., Schneider, J., Schulman, J., Tang, J., & Zaremba, W. (2016). Openai gym. arXiv preprint arXiv:1606.01540."},{"key":"6653_CR11","unstructured":"Chan, A. J., & Schaar, M. (2021). Scalable Bayesian inverse reinforcement learning. In International conference on learning representations. https:\/\/openreview.net\/forum?id=4qR3coiNaIv"},{"key":"6653_CR12","doi-asserted-by":"crossref","unstructured":"Chou, G., Berenson, D., & Ozay, N. (2020a). Learning constraints from demonstrations. In Algorithmic foundations of robotics XIII: proceedings of the 13th workshop on the algorithmic foundations of robotics (vol. 13, pp. 228\u2013245). Springer.","DOI":"10.1007\/978-3-030-44051-0_14"},{"key":"6653_CR13","unstructured":"Chou, G., Berenson, D., & Ozay, N. (2021). Uncertainty-aware constraint learning for adaptive safe motion planning from demonstrations. In Conference on robot learning (pp. 1612\u20131639). PMLR."},{"key":"6653_CR14","unstructured":"Chou, G., Ozay, N., & Berenson, D. (2020b). Learning parametric constraints in high dimensions from demonstrations. In Conference on robot learning (pp. 1211\u20131230). PMLR."},{"key":"6653_CR15","doi-asserted-by":"crossref","unstructured":"Chou, G., Ozay, N., & Berenson, D. (2020c). Learning constraints from locally-optimal demonstrations under cost function uncertainty. IEEE Robotics and Automation Letters, 5(2), 3682\u20133690.","DOI":"10.1109\/LRA.2020.2974427"},{"issue":"2","key":"6653_CR16","doi-asserted-by":"crossref","first-page":"3827","DOI":"10.1109\/LRA.2022.3148436","volume":"7","author":"G Chou","year":"2022","unstructured":"Chou, G., Wang, H., & Berenson, D. (2022). Gaussian process constraint learning for scalable chance-constrained motion planning from demonstrations. IEEE Robotics and Automation Letters, 7(2), 3827\u20133834.","journal-title":"IEEE Robotics and Automation Letters"},{"key":"6653_CR17","volume-title":"The alignment problem: Machine learning and human values","author":"B Christian","year":"2020","unstructured":"Christian, B. (2020). The alignment problem: Machine learning and human values. WW Norton & Company."},{"key":"6653_CR18","doi-asserted-by":"crossref","unstructured":"Chu, W., & Ghahramani, Z. (2005). Preference learning with Gaussian processes. In Proceedings of the 22nd international conference on machine learning (pp. 137\u2013144).","DOI":"10.1145\/1102351.1102369"},{"key":"6653_CR19","unstructured":"Finn, C., Levine, S., & Abbeel, P. (2016). Guided cost learning: Deep inverse optimal control via policy optimization. In International conference on machine learning (pp. 49\u201358). PMLR."},{"key":"6653_CR20","unstructured":"Fu, J., Luo, K., & Levine, S. (2017). Learning robust rewards with adversarial inverse reinforcement learning. arXiv preprint arXiv:1710.11248."},{"key":"6653_CR21","first-page":"4028","volume":"34","author":"D Garg","year":"2021","unstructured":"Garg, D., Chakraborty, S., Cundy, C., Song, J., & Ermon, S. (2021). IQ-Learn: Inverse soft-q learning for imitation. Advances in Neural Information Processing Systems, 34, 4028\u20134039.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"6653_CR22","unstructured":"Gaurav, A., Rezaee, K., Liu, G., & Poupart, P. (2022). Learning soft constraints from constrained expert demonstrations. arXiv preprint arXiv:2206.01311."},{"key":"6653_CR23","unstructured":"Glazier, A., Loreggia, A., Mattei, N., Rahgooy, T., Rossi, F., & Venable, B. (2022). Learning behavioral soft constraints from demonstrations. arXiv:2202.10407."},{"key":"6653_CR24","unstructured":"Gleave, A., Toyer, S. (2022). A primer on maximum causal entropy inverse reinforcement learning. arXiv preprint arXiv:2203.11409."},{"key":"6653_CR25","unstructured":"Haarnoja, T., Tang, H., Abbeel, P., & Levine, S. (2017). Reinforcement learning with deep energy-based policies. In International conference on machine learning (pp. 1352\u20131361). PMLR."},{"key":"6653_CR26","unstructured":"Haarnoja, T., Zhou, A., Abbeel, P., & Levine, S. (2018). Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor. In International conference on machine learning (pp. 1861\u20131870). PMLR."},{"key":"6653_CR27","unstructured":"Ho, J., & Ermon, S. (2016). Generative adversarial imitation learning. In Advances in neural information processing systems (Vol. 29)."},{"key":"6653_CR28","unstructured":"Kim, K., Swamy, G., Liu, Z., Zhao, D., Choudhury, S., & Wu, Z. S. (2023). Learning shared safety constraints from multi-task demonstrations."},{"key":"6653_CR29","unstructured":"Kingma, D. P., & Ba, J. (2014). Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980."},{"key":"6653_CR30","unstructured":"Kostrikov, I., Nachum, O., & Tompson, J. (2020). Imitation learning via off-policy distribution matching. In 8th international conference on learning representations, ICLR 2020, Addis Ababa, Ethiopia, April 26\u201330, 2020. OpenReview.net. https:\/\/openreview.net\/forum?id=Hyg-JC4FDr."},{"key":"6653_CR31","doi-asserted-by":"publisher","unstructured":"Krajewski, R., Bock, J., Kloeker, L., & Eckstein, L. (2018). The highd dataset: A drone dataset of naturalistic vehicle trajectories on german highways for validation of highly automated driving systems. In 2018 21st international conference on intelligent transportation systems (ITSC) (pp. 2118\u20132125). https:\/\/doi.org\/10.1109\/ITSC.2018.8569552.","DOI":"10.1109\/ITSC.2018.8569552"},{"key":"6653_CR32","unstructured":"Lee, K., Smith, L., Dragan, A., & Abbeel, P. (2021). B-pref: Benchmarking preference-based reinforcement learning. arXiv preprint arXiv:2111.03026."},{"key":"6653_CR33","unstructured":"Lindner, D., Chen, X., Tschiatschek, S., Hofmann, K., & Krause, A. (2023). Learning safety constraints from demonstrations with unknown rewards. arXiv preprint arXiv:2305.16147."},{"key":"6653_CR34","unstructured":"Liu, G., Luo, Y., Gaurav, A., Rezaee, K., & Poupart, P. (2022). Benchmarking constraint inference in inverse reinforcement learning. arXiv preprint arXiv:2206.09670."},{"key":"6653_CR35","unstructured":"Malik, S., Anwar, U., Aghasi, A., & Ahmed, A. (2021). Inverse constrained reinforcement learning. In International conference on machine learning (pp. 7390\u20137399). PMLR."},{"key":"6653_CR36","unstructured":"Mania, H., Guy, A., & Recht, B. (2018). Simple random search provides a competitive approach to reinforcement learning. arXiv preprint arXiv:1803.07055."},{"key":"6653_CR37","doi-asserted-by":"crossref","unstructured":"McPherson, D. L., Stocking, K. C., Sastry, S. S. (2021). Maximum likelihood constraint inference from stochastic demonstrations. In 2021 IEEE conference on control technology and applications (CCTA) (pp. 1208\u20131213). IEEE.","DOI":"10.1109\/CCTA48906.2021.9658862"},{"key":"6653_CR38","unstructured":"Papadimitriou, D., Anwar, U., & Brown, D. S. (2021) Bayesian inverse constrained reinforcement learning. In Workshop on safe and robust control of uncertain systems (NeurIPS)."},{"key":"6653_CR39","unstructured":"Qiao, G., Liu, G., Poupart, P., et al. (2023). Multi-modal inverse constrained reinforcement learning from a mixture of demonstrations. In Thirty-seventh conference on neural information processing systems."},{"key":"6653_CR40","unstructured":"Ross, S., Gordon, G., & Bagnell, D. (2011). A reduction of imitation learning and structured prediction to no-regret online learning. In Proceedings of the fourteenth international conference on artificial intelligence and statistics (pp. 627\u2013635). JMLR Workshop and Conference Proceedings."},{"key":"6653_CR41","volume-title":"Human compatible: Artificial intelligence and the problem of control","author":"S Russell","year":"2019","unstructured":"Russell, S. (2019). Human compatible: Artificial intelligence and the problem of control. Penguin."},{"key":"6653_CR42","unstructured":"Schulman, J., Moritz, P., Levine, S., Jordan, M., & Abbeel, P. (2015). High-dimensional continuous control using generalized advantage estimation. arXiv preprint arXiv:1506.02438."},{"key":"6653_CR43","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., & Klimov, O. (2017) Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347."},{"key":"6653_CR44","unstructured":"Scobee, D. R., & Sastry, S. S. (2019). Maximum likelihood constraint inference for inverse reinforcement learning. arXiv preprint arXiv:1909.05477."},{"key":"6653_CR45","unstructured":"Stocking, K. C., McPherson, D. L., Matthew, R. P., & Tomlin, C. J. (2021). Discretizing Dynamics for Maximum Likelihood Constraint Inference. arXiv:2109.04874."},{"key":"6653_CR46","volume-title":"Reinforcement learning: An introduction","author":"RS Sutton","year":"2018","unstructured":"Sutton, R. S., & Barto, A. G. (2018). Reinforcement learning: An introduction. MIT Press."},{"key":"6653_CR47","unstructured":"Tessler, C., Mankowitz, D. J., & Mannor, S. (2018). Reward constrained policy optimization. arXiv preprint arXiv:1805.11074."},{"issue":"1","key":"6653_CR48","first-page":"3483","volume":"15","author":"K Van Moffaert","year":"2014","unstructured":"Van Moffaert, K., & Now\u00e9, A. (2014). Multi-objective reinforcement learning using sets of pareto dominating policies. The Journal of Machine Learning Research, 15(1), 3483\u20133512.","journal-title":"The Journal of Machine Learning Research"},{"key":"6653_CR49","doi-asserted-by":"publisher","unstructured":"Wang, X., Krasowski, H., & Althoff, M. (2021). CommonRoad-RL: A configurable reinforcement learning environment for motion planning of autonomous vehicles. In IEEE international conference on intelligent transportation systems (ITSC). https:\/\/doi.org\/10.1109\/ITSC48978.2021.9564898.","DOI":"10.1109\/ITSC48978.2021.9564898"},{"issue":"3","key":"6653_CR50","doi-asserted-by":"crossref","first-page":"229","DOI":"10.1023\/A:1022672621406","volume":"8","author":"RJ Williams","year":"1992","unstructured":"Williams, R. J. (1992). Simple statistical gradient-following algorithms for connectionist reinforcement learning. Machine Learning, 8(3), 229\u2013256.","journal-title":"Machine Learning"},{"key":"6653_CR51","unstructured":"Wulfmeier, M., Ondruska, P., & Posner, I. (2015). Maximum entropy deep inverse reinforcement learning. arXiv preprint arXiv:1507.04888."},{"key":"6653_CR52","unstructured":"Ziebart, B. D., Bagnell, J. A., & Dey, A. K. (2010). Modeling interaction via the principle of maximum causal entropy. In ICML."},{"key":"6653_CR53","unstructured":"Ziebart, B. D., Maas, A. L., Bagnell, J. A., Dey, A. K., et al. (2008). Maximum entropy inverse reinforcement learning. In AAAI (vol. 8, pp. 1433\u20131438). Chicago, IL, USA."}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-024-06653-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10994-024-06653-5","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-024-06653-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,21]],"date-time":"2026-02-21T01:03:04Z","timestamp":1771635784000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10994-024-06653-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,2,21]]},"references-count":53,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2025,4]]}},"alternative-id":["6653"],"URL":"https:\/\/doi.org\/10.1007\/s10994-024-06653-5","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"value":"0885-6125","type":"print"},{"value":"1573-0565","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,2,21]]},"assertion":[{"value":"30 April 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 August 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 December 2024","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 February 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no relevant financial or non-financial interests to disclose.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}],"article-number":"103"}}