{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,14]],"date-time":"2026-05-14T18:19:25Z","timestamp":1778782765226,"version":"3.51.4"},"reference-count":53,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,8,1]],"date-time":"2026-08-01T00:00:00Z","timestamp":1785542400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,8,1]],"date-time":"2026-08-01T00:00:00Z","timestamp":1785542400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,8,1]],"date-time":"2026-08-01T00:00:00Z","timestamp":1785542400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,8,1]],"date-time":"2026-08-01T00:00:00Z","timestamp":1785542400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,8,1]],"date-time":"2026-08-01T00:00:00Z","timestamp":1785542400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,8,1]],"date-time":"2026-08-01T00:00:00Z","timestamp":1785542400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,8,1]],"date-time":"2026-08-01T00:00:00Z","timestamp":1785542400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100002766","name":"Beijing University of Posts and Telecommunications","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100002766","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Expert Systems with Applications"],"published-print":{"date-parts":[[2026,8]]},"DOI":"10.1016\/j.eswa.2026.132447","type":"journal-article","created":{"date-parts":[[2026,4,14]],"date-time":"2026-04-14T05:03:28Z","timestamp":1776143008000},"page":"132447","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Knowledge-guided policy arbitration: A hierarchical cognitive framework for safety-critical decision-making under dynamic conflicting objectives"],"prefix":"10.1016","volume":"323","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-9262-6294","authenticated-orcid":false,"given":"Fuqing","family":"Bie","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-0839-4608","authenticated-orcid":false,"given":"Xingyang","family":"Chang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-5975-1000","authenticated-orcid":false,"given":"Leyan","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-5677-4763","authenticated-orcid":false,"given":"Dehua","family":"Ma","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-5174-0708","authenticated-orcid":false,"given":"Songfu","family":"Xu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-3369-8267","authenticated-orcid":false,"given":"Shuodi","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-4952-1061","authenticated-orcid":false,"given":"Yingzhuo","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8486-6255","authenticated-orcid":false,"given":"Liuyu","family":"Xiang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3433-8435","authenticated-orcid":false,"given":"Zhaofeng","family":"He","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"78","reference":[{"key":"10.1016\/j.eswa.2026.132447_bib0001","series-title":"International conference on machine learning","first-page":"22","article-title":"Constrained policy optimization","author":"Achiam","year":"2017"},{"key":"10.1016\/j.eswa.2026.132447_bib0002","series-title":"Constrained Markov decision processes","author":"Altman","year":"2021"},{"key":"10.1016\/j.eswa.2026.132447_bib0003","doi-asserted-by":"crossref","DOI":"10.1016\/j.artint.2021.103649","article-title":"Logic tensor networks","volume":"303","author":"Badreddine","year":"2022","journal-title":"Artificial Intelligence"},{"key":"10.1016\/j.eswa.2026.132447_bib0004","doi-asserted-by":"crossref","first-page":"102","DOI":"10.1016\/j.artint.2015.05.008","article-title":"Transferring knowledge as heuristics in reinforcement learning: a case-based approach","volume":"226","author":"Bianchi","year":"2015","journal-title":"Artificial Intelligence"},{"issue":"1","key":"10.1016\/j.eswa.2026.132447_bib0005","doi-asserted-by":"crossref","first-page":"411","DOI":"10.1146\/annurev-control-042920-020211","article-title":"Safe learning in robotics: From learning-based control to safe reinforcement learning","volume":"5","author":"Brunke","year":"2022","journal-title":"Annual Review of Control, Robotics, and Autonomous Systems"},{"issue":"8","key":"10.1016\/j.eswa.2026.132447_bib0006","doi-asserted-by":"crossref","first-page":"1744","DOI":"10.1109\/JAS.2024.124530","article-title":"A survey on type-3 fuzzy logic systems and their control applications","volume":"11","author":"Castillo","year":"2024","journal-title":"IEEE\/CAA Journal of Automatica Sinica"},{"key":"10.1016\/j.eswa.2026.132447_bib0007","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2025.114514","article-title":"Energy-efficient adaptive perception for autonomous driving via lightweight policy learning and simulation-based optimization","volume":"330","author":"Chen","year":"2025","journal-title":"Knowledge-Based Systems"},{"key":"10.1016\/j.eswa.2026.132447_bib0008","series-title":"Learning for dynamics and control conference","first-page":"104","article-title":"Safe and efficient reinforcement learning using disturbance-observer-based control barrier functions","author":"Cheng","year":"2023"},{"key":"10.1016\/j.eswa.2026.132447_bib0009","unstructured":"Colelough, B. C., & Regli, W. (2025). Neuro-symbolic AI in 2024: A systematic review.arXiv: 2501.05435."},{"key":"10.1016\/j.eswa.2026.132447_bib0010","unstructured":"Cui, Y., Yang, S., Wan, C., Li, X., Xing, J., Zhang, Y., Huang, Y., & Chen, H. (2025). Continual adaptation for autonomous driving with the mixture of progressive experts network.arXiv: 2502.05943."},{"key":"10.1016\/j.eswa.2026.132447_bib0011","series-title":"The 22nd international conference on artificial intelligence and statistics","first-page":"1331","article-title":"Distilling policy distillation","author":"Czarnecki","year":"2019"},{"key":"10.1016\/j.eswa.2026.132447_bib0012","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2024.112524","article-title":"Aiding decision makers in articulating a preference closeness model through compensatory fuzzy logic for many-objective optimization problems","volume":"304","author":"Fernandez","year":"2024","journal-title":"Knowledge-Based Systems"},{"issue":"7","key":"10.1016\/j.eswa.2026.132447_bib0013","doi-asserted-by":"crossref","first-page":"103","DOI":"10.3390\/technologies12070103","article-title":"Defining a metric-driven approach for learning hazardous situations","volume":"12","author":"Fiorino","year":"2024","journal-title":"Technologies"},{"issue":"11","key":"10.1016\/j.eswa.2026.132447_bib0014","doi-asserted-by":"crossref","first-page":"12387","DOI":"10.1007\/s10462-023-10448-w","article-title":"Neurosymbolic AI: The 3rd wave","volume":"56","author":"Garcez","year":"2023","journal-title":"Artificial Intelligence Review"},{"key":"10.1016\/j.eswa.2026.132447_bib0015","series-title":"Proceedings of the 37th international conference on machine learning","article-title":"Random hypervolume scalarizations for provable multi-objective black box optimization","author":"Golovin","year":"2020"},{"issue":"1","key":"10.1016\/j.eswa.2026.132447_bib0016","doi-asserted-by":"crossref","first-page":"26","DOI":"10.1007\/s10458-022-09552-y","article-title":"A practical guide to multi-objective reinforcement learning and planning: CF Hayes et al","volume":"36","author":"Hayes","year":"2022","journal-title":"Autonomous Agents and Multi-Agent Systems"},{"key":"10.1016\/j.eswa.2026.132447_bib0017","first-page":"24432","article-title":"Model-based safe deep reinforcement learning via a constrained proximal policy optimization algorithm","volume":"35","author":"Jayant","year":"2022","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1016\/j.eswa.2026.132447_bib0018","series-title":"Advances in neural information processing systems","article-title":"A natural policy gradient","volume":"Vol. 14","author":"Kakade","year":"2001"},{"key":"10.1016\/j.eswa.2026.132447_bib0019","unstructured":"Kim, D., Hong, M., Park, J., & Oh, S. (2024). Conflict-averse gradient aggregation for constrained multi-objective reinforcement learning.arXiv: 2403.00282."},{"key":"10.1016\/j.eswa.2026.132447_bib0020","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2025.113943","article-title":"Hybrid multi-encoder transformer and case-based reasoning for intelligent decision support in high-speed railway vehicle maintenance","volume":"325","author":"Lee","year":"2025","journal-title":"Knowledge-Based Systems"},{"issue":"39","key":"10.1016\/j.eswa.2026.132447_bib0021","first-page":"1","article-title":"End-to-end training of deep visuomotor policies","volume":"17","author":"Levine","year":"2016","journal-title":"Journal of Machine Learning Research"},{"issue":"1","key":"10.1016\/j.eswa.2026.132447_bib0022","doi-asserted-by":"crossref","first-page":"2344","DOI":"10.1109\/TTE.2024.3421342","article-title":"Knowledge-guided deep reinforcement learning for multiobjective energy management of fuel cell electric vehicles","volume":"11","author":"Li","year":"2024","journal-title":"IEEE Transactions on Transportation Electrification"},{"issue":"12","key":"10.1016\/j.eswa.2026.132447_bib0023","doi-asserted-by":"crossref","first-page":"9456","DOI":"10.1109\/TPAMI.2024.3417451","article-title":"A survey of knowledge graph reasoning on graph types: Static, dynamic, and multi-modal","volume":"46","author":"Liang","year":"2024","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"10.1016\/j.eswa.2026.132447_bib0024","unstructured":"Lillicrap, T. P., Hunt, J. J., Pritzel, A., Heess, N., Erez, T., Tassa, Y., Silver, D., & Wierstra, D. (2015). Continuous control with deep reinforcement learning.arXiv: 1509.02971,."},{"key":"10.1016\/j.eswa.2026.132447_bib0025","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2022.108235","article-title":"Dynamic knowledge graph reasoning based on deep reinforcement learning","volume":"241","author":"Liu","year":"2022","journal-title":"Knowledge-Based Systems"},{"key":"10.1016\/j.eswa.2026.132447_bib0026","series-title":"Proceedings of the AAAI conference on artificial intelligence","first-page":"7637","article-title":"Adapt to environment sudden changes by learning a context sensitive policy","volume":"Vol. 36","author":"Luo","year":"2022"},{"key":"10.1016\/j.eswa.2026.132447_bib0027","doi-asserted-by":"crossref","DOI":"10.1016\/j.egyai.2022.100169","article-title":"Explainable artificial intelligence (XAI) techniques for energy and power systems: Review, challenges and opportunities","volume":"9","author":"Machlev","year":"2022","journal-title":"Energy and AI"},{"issue":"7540","key":"10.1016\/j.eswa.2026.132447_bib0028","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"Mnih","year":"2015","journal-title":"Nature"},{"key":"10.1016\/j.eswa.2026.132447_bib0029","series-title":"Advances in neural information processing systems","article-title":"Stability bounds for non-i.i.d. processes","volume":"Vol. 20","author":"Mohri","year":"2007"},{"key":"10.1016\/j.eswa.2026.132447_bib0030","first-page":"1043","article-title":"Reinforcement learning with hierarchies of machines","volume":"10","author":"Parr","year":"1997","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1016\/j.eswa.2026.132447_bib0031","series-title":"Ict analysis and applications","first-page":"23","article-title":"Internet of things (IOT), three-layer architecture, security issues and counter measures","author":"Paul","year":"2022"},{"key":"10.1016\/j.eswa.2026.132447_bib0032","doi-asserted-by":"crossref","DOI":"10.1016\/j.sysarc.2022.102505","article-title":"Drl-gat-sa: Deep reinforcement learning for autonomous driving planning based on graph attention networks and simplex architecture","volume":"126","author":"Peng","year":"2022","journal-title":"Journal of Systems Architecture"},{"key":"10.1016\/j.eswa.2026.132447_bib0033","unstructured":"Queeney, J., Cai, X., Schperberg, A., Corcodel, R., Benosman, M., & How, J. P. (2024). Gram: Generalization in deep rl with a robust adaptation module.arXiv: 2412.04323."},{"key":"10.1016\/j.eswa.2026.132447_bib0034","doi-asserted-by":"crossref","first-page":"67","DOI":"10.1613\/jair.3987","article-title":"A survey of multi-objective sequential decision-making","volume":"48","author":"Roijers","year":"2013","journal-title":"Journal of Artificial Intelligence Research"},{"key":"10.1016\/j.eswa.2026.132447_bib0035","unstructured":"Rusu, A. A., Colmenarejo, S. G., Gulcehre, C., Desjardins, G., Kirkpatrick, J., Pascanu, R., Mnih, V., Kavukcuoglu, K., & Hadsell, R. (2015). Policy distillation.arXiv: 1511.06295."},{"key":"10.1016\/j.eswa.2026.132447_bib0036","series-title":"Proceedings of the 32nd international conference on machine learning","first-page":"1889","article-title":"Trust region policy optimization","volume":"Vol. 37","author":"Schulman","year":"2015"},{"key":"10.1016\/j.eswa.2026.132447_bib0037","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., & Klimov, O. (2017). Proximal policy optimization algorithms. https:\/\/arxiv.org\/abs\/1707.06347."},{"key":"10.1016\/j.eswa.2026.132447_bib0038","series-title":"Advances in neural information processing systems","article-title":"Policy gradient methods for reinforcement learning with function approximation","volume":"vol. 12","author":"Sutton","year":"1999"},{"issue":"1\u20132","key":"10.1016\/j.eswa.2026.132447_bib0039","doi-asserted-by":"crossref","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","article-title":"Between MDPs and semi-MDPs: A framework for temporal abstraction in reinforcement learning","volume":"112","author":"Sutton","year":"1999","journal-title":"Artificial Intelligence"},{"issue":"1","key":"10.1016\/j.eswa.2026.132447_bib0040","doi-asserted-by":"crossref","DOI":"10.1080\/21642583.2024.2394429","article-title":"Fuzzy logic approach for controlling uncertain and nonlinear systems: A comprehensive review of applications and advances","volume":"12","author":"Tang","year":"2024","journal-title":"Systems Science & Control Engineering"},{"issue":"3","key":"10.1016\/j.eswa.2026.132447_bib0041","doi-asserted-by":"crossref","first-page":"4915","DOI":"10.1109\/LRA.2021.3070252","article-title":"Recovery RL: Safe reinforcement learning with learned recovery zones","volume":"6","author":"Thananjeyan","year":"2021","journal-title":"IEEE Robotics and Automation Letters"},{"issue":"11","key":"10.1016\/j.eswa.2026.132447_bib0042","doi-asserted-by":"crossref","first-page":"4793","DOI":"10.1109\/TNNLS.2020.3027314","article-title":"A survey on explainable artificial intelligence (xai): Toward medical Xai","volume":"32","author":"Tjoa","year":"2020","journal-title":"IEEE Transactions on Neural Networks and Learning Systems"},{"issue":"1","key":"10.1016\/j.eswa.2026.132447_bib0043","first-page":"3483","article-title":"Multi-objective reinforcement learning using sets of pareto dominating policies","volume":"15","author":"Van Moffaert","year":"2014","journal-title":"The Journal of Machine Learning Research"},{"issue":"1","key":"10.1016\/j.eswa.2026.132447_bib0044","first-page":"2023","article-title":"Learning using privileged information: Similarity control and knowledge transfer","volume":"16","author":"Vapnik","year":"2015","journal-title":"Journal of Machine Learning Research : JMLR"},{"key":"10.1016\/j.eswa.2026.132447_bib0045","series-title":"The world wide web conference","first-page":"1897","article-title":"Bolt-k: Bootstrapping ontology learning via transfer of knowledge","author":"Vedula","year":"2019"},{"key":"10.1016\/j.eswa.2026.132447_bib0046","doi-asserted-by":"crossref","DOI":"10.1016\/j.aap.2021.106157","article-title":"A review of surrogate safety measures and their applications in connected and automated vehicles safety modeling","volume":"157","author":"Wang","year":"2021","journal-title":"Accident Analysis & Prevention"},{"key":"10.1016\/j.eswa.2026.132447_bib0047","doi-asserted-by":"crossref","first-page":"854","DOI":"10.1016\/j.renene.2023.01.003","article-title":"Multi-objective deep reinforcement learning for optimal design of wind turbine blade","volume":"203","author":"Wang","year":"2023","journal-title":"Renewable Energy"},{"key":"10.1016\/j.eswa.2026.132447_bib0048","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2023.111311","article-title":"Neuro-symbolic recommendation model based on logic query","volume":"284","author":"Wu","year":"2024","journal-title":"Knowledge-Based Systems"},{"key":"10.1016\/j.eswa.2026.132447_bib0049","series-title":"A generalized algorithm for multi-objective reinforcement learning and policy adaptation","author":"Yang","year":"2019"},{"key":"10.1016\/j.eswa.2026.132447_bib0050","series-title":"Advances in neural information processing systems","article-title":"A generalized algorithm for multi-objective reinforcement learning and policy adaptation","volume":"Vol. 32","author":"Yang","year":"2019"},{"key":"10.1016\/j.eswa.2026.132447_bib0051","series-title":"Advances in neural information processing systems","first-page":"5824","article-title":"Gradient surgery for multi-task learning","volume":"Vol. 33","author":"Yu","year":"2020"},{"key":"10.1016\/j.eswa.2026.132447_bib0052","series-title":"Advances in neural information processing systems","first-page":"13584","article-title":"Anchor-changing regularized natural policy gradient for multi-objective reinforcement learning","volume":"Vol. 35","author":"Zhou","year":"2022"},{"key":"10.1016\/j.eswa.2026.132447_bib0053","unstructured":"Zhu, B., Dang, M., & Grover, A. (2023). Scaling pareto-efficient decision making via offline multi-objective rl. arXiv: 2305.00567."}],"container-title":["Expert Systems with Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0957417426013606?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0957417426013606?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,5,14]],"date-time":"2026-05-14T18:00:41Z","timestamp":1778781641000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0957417426013606"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,8]]},"references-count":53,"alternative-id":["S0957417426013606"],"URL":"https:\/\/doi.org\/10.1016\/j.eswa.2026.132447","relation":{},"ISSN":["0957-4174"],"issn-type":[{"value":"0957-4174","type":"print"}],"subject":[],"published":{"date-parts":[[2026,8]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Knowledge-guided policy arbitration: A hierarchical cognitive framework for safety-critical decision-making under dynamic conflicting objectives","name":"articletitle","label":"Article Title"},{"value":"Expert Systems with Applications","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.eswa.2026.132447","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"132447"}}