{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,5]],"date-time":"2026-06-05T14:14:29Z","timestamp":1780668869593,"version":"3.54.1"},"reference-count":59,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100012165","name":"Key Technologies Research and Development Program","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100012165","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Advanced Engineering Informatics"],"published-print":{"date-parts":[[2026,9]]},"DOI":"10.1016\/j.aei.2026.104721","type":"journal-article","created":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T03:09:56Z","timestamp":1776827396000},"page":"104721","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"PB","title":["HSFKG-DRL: Hybrid self-evolving fuzzy knowledge guided deep reinforcement learning"],"prefix":"10.1016","volume":"74","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-0661-7102","authenticated-orcid":false,"given":"Suyu","family":"Zhu","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6445-3912","authenticated-orcid":false,"given":"Mingjun","family":"Li","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Dean","family":"Hu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Chao","family":"Jiang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"issue":"6","key":"10.1016\/j.aei.2026.104721_b1","doi-asserted-by":"crossref","first-page":"165","DOI":"10.1007\/s10462-025-11166-1","article-title":"Intelligent games meeting with multi-agent deep reinforcement learning: a comprehensive review","volume":"58","author":"Wang","year":"2025","journal-title":"Artif. Intell. Rev."},{"key":"10.1016\/j.aei.2026.104721_b2","doi-asserted-by":"crossref","first-page":"207","DOI":"10.1016\/j.neucom.2021.04.006","article-title":"Self-play reinforcement learning with comprehensive critic in computer games","volume":"449","author":"Liu","year":"2021","journal-title":"Neurocomputing"},{"key":"10.1016\/j.aei.2026.104721_b3","doi-asserted-by":"crossref","DOI":"10.1016\/j.aei.2024.102836","article-title":"A fast transfer reinforcement learning model for transferring force-based human speed adjustment skills to robots for collaborative assembly posture alignment","volume":"62","author":"Sun","year":"2024","journal-title":"Adv. Eng. Inform."},{"key":"10.1016\/j.aei.2026.104721_b4","doi-asserted-by":"crossref","DOI":"10.1016\/j.aei.2023.102028","article-title":"Towards reliable robot packing system based on deep reinforcement learning","volume":"57","author":"Xiong","year":"2023","journal-title":"Adv. Eng. Inform."},{"key":"10.1016\/j.aei.2026.104721_b5","doi-asserted-by":"crossref","DOI":"10.1016\/j.aei.2025.104239","article-title":"Distributed interactive decision-making based on hierarchical games with belief estimation for automated vehicles in on-ramp merging","volume":"71","author":"Zhu","year":"2026","journal-title":"Adv. Eng. Inform."},{"key":"10.1016\/j.aei.2026.104721_b6","doi-asserted-by":"crossref","DOI":"10.1016\/j.aei.2025.103731","article-title":"A reinforcement learning framework integrating long-term safety reward and adjustable driving preferences for autonomous driving","volume":"68","author":"Cao","year":"2025","journal-title":"Adv. Eng. Inform."},{"issue":"9","key":"10.1016\/j.aei.2026.104721_b7","first-page":"1006","article-title":"Efficient and scalable reinforcement learning for large-scale network control","volume":"6","author":"Ma","year":"2024","journal-title":"Nat. Mach. Intell."},{"issue":"48","key":"10.1016\/j.aei.2026.104721_b8","doi-asserted-by":"crossref","first-page":"30079","DOI":"10.1073\/pnas.1907370117","article-title":"Fast reinforcement learning with generalized policy updates","volume":"117","author":"Barreto","year":"2020","journal-title":"Proc. Natl. Acad. Sci."},{"issue":"1","key":"10.1016\/j.aei.2026.104721_b9","doi-asserted-by":"crossref","first-page":"17","DOI":"10.1007\/s10458-023-09600-1","article-title":"Accelerating deep reinforcement learning via knowledge-guided policy network","volume":"37","author":"Yu","year":"2023","journal-title":"Auton. Agents Multi-Agent Syst."},{"key":"10.1016\/j.aei.2026.104721_b10","series-title":"2025 IEEE International Conference on Robotics and Automation","first-page":"13226","article-title":"Egomimic: Scaling imitation learning via egocentric video","author":"Kareer","year":"2025"},{"issue":"104","key":"10.1016\/j.aei.2026.104721_b11","doi-asserted-by":"crossref","first-page":"eadt5254","DOI":"10.1126\/scirobotics.adt5254","article-title":"SRT-H: A hierarchical framework for autonomous surgery via language-conditioned imitation learning","volume":"10","author":"Kim","year":"2025","journal-title":"Sci. Robot."},{"issue":"105","key":"10.1016\/j.aei.2026.104721_b12","doi-asserted-by":"crossref","first-page":"eads5033","DOI":"10.1126\/scirobotics.ads5033","article-title":"Precise and dexterous robotic manipulation via human-in-the-loop reinforcement learning","volume":"10","author":"Luo","year":"2025","journal-title":"Sci. Robot."},{"key":"10.1016\/j.aei.2026.104721_b13","series-title":"2024 IEEE International Conference on Robotics and Automation","first-page":"7939","article-title":"Decision making for human-in-the-loop robotic agents via uncertainty-aware reinforcement learning","author":"Singi","year":"2024"},{"key":"10.1016\/j.aei.2026.104721_b14","doi-asserted-by":"crossref","DOI":"10.1016\/j.asoc.2023.110756","article-title":"Deep reinforcement learning with reward shaping for tracking control and vibration suppression of flexible link manipulator","volume":"152","author":"Viswanadhapalli","year":"2024","journal-title":"Appl. Soft Comput."},{"key":"10.1016\/j.aei.2026.104721_b15","doi-asserted-by":"crossref","unstructured":"A. Ranjan, S. Agrawal, A. Jain, P. Jagtap, S. Kolathaya, et al., Barrier functions inspired reward shaping for reinforcement learning, in: 2024 IEEE International Conference on Robotics and Automation, ICRA, 2024, pp. 10807\u201310813.","DOI":"10.1109\/ICRA57147.2024.10610391"},{"key":"10.1016\/j.aei.2026.104721_b16","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2025.113087","article-title":"Knowledge guided deep deterministic policy gradient","volume":"311","author":"Qin","year":"2025","journal-title":"Knowl.-Based Syst."},{"key":"10.1016\/j.aei.2026.104721_b17","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2022.108235","article-title":"Dynamic knowledge graph reasoning based on deep reinforcement learning","volume":"241","author":"Liu","year":"2022","journal-title":"Knowl.-Based Syst."},{"issue":"1","key":"10.1016\/j.aei.2026.104721_b18","first-page":"1","article-title":"Computational intelligence: From nature to aristotle and archimedes, and today to meta-heuristic algorithms","volume":"1","author":"Asteris","year":"2025","journal-title":"Comput. Intell."},{"issue":"1","key":"10.1016\/j.aei.2026.104721_b19","doi-asserted-by":"crossref","first-page":"104","DOI":"10.53941\/bci.2025.100007","article-title":"Prediction of chloride resistance level in concrete using optimized tree-based machine learning models","volume":"1","author":"Benzaamia","year":"2025","journal-title":"Bull. Comp. Intell."},{"issue":"3","key":"10.1016\/j.aei.2026.104721_b20","doi-asserted-by":"crossref","first-page":"403","DOI":"10.1007\/s10470-022-02014-1","article-title":"Fuzzy-ChOA: an improved chimp optimization algorithm for marine mammal classification using artificial neural network","volume":"111","author":"Saffari","year":"2022","journal-title":"Analog Integr. Circuits Signal Process."},{"issue":"4","key":"10.1016\/j.aei.2026.104721_b21","doi-asserted-by":"crossref","first-page":"4843","DOI":"10.1007\/s11063-022-11068-1","article-title":"Underwater backscatter recognition using deep fuzzy extreme convolutional neural network optimized via hunger games search","volume":"55","author":"Khishe","year":"2023","journal-title":"Neural Process. Lett."},{"key":"10.1016\/j.aei.2026.104721_b22","first-page":"1","article-title":"Preserving and combining knowledge in robotic lifelong reinforcement learning","author":"Meng","year":"2025","journal-title":"Nat. Mach. Intell."},{"key":"10.1016\/j.aei.2026.104721_b23","doi-asserted-by":"crossref","DOI":"10.1016\/j.aei.2025.103188","article-title":"Knowledge transfer from simple to complex: A safe and efficient reinforcement learning framework for autonomous driving decision-making","volume":"65","author":"Zhou","year":"2025","journal-title":"Adv. Eng. Inform."},{"issue":"4","key":"10.1016\/j.aei.2026.104721_b24","doi-asserted-by":"crossref","first-page":"3213","DOI":"10.1007\/s10462-021-10085-1","article-title":"Model-free reinforcement learning from expert demonstrations: a survey","volume":"55","author":"Ram\u00edrez","year":"2022","journal-title":"Artif. Intell. Rev."},{"key":"10.1016\/j.aei.2026.104721_b25","series-title":"International Conference on Machine Learning","first-page":"1577","article-title":"Efficient online reinforcement learning with offline data","author":"Ball","year":"2023"},{"issue":"3","key":"10.1016\/j.aei.2026.104721_b26","doi-asserted-by":"crossref","first-page":"7327","DOI":"10.1109\/LRA.2022.3180108","article-title":"Calvin: A benchmark for language-conditioned policy learning for long-horizon robot manipulation tasks","volume":"7","author":"Mees","year":"2022","journal-title":"IEEE Robot. Autom. Lett."},{"key":"10.1016\/j.aei.2026.104721_b27","series-title":"Language conditioned imitation learning over unstructured data","author":"Lynch","year":"2020"},{"key":"10.1016\/j.aei.2026.104721_b28","doi-asserted-by":"crossref","DOI":"10.1016\/j.aei.2024.102872","article-title":"Dynamic flexible job-shop scheduling by multi-agent reinforcement learning with reward-shaping","volume":"62","author":"Zhang","year":"2024","journal-title":"Adv. Eng. Inform."},{"key":"10.1016\/j.aei.2026.104721_b29","doi-asserted-by":"crossref","DOI":"10.1016\/j.aei.2022.101800","article-title":"Reward shaping in multiagent reinforcement learning for self-organizing systems in assembly tasks","volume":"54","author":"Huang","year":"2022","journal-title":"Adv. Eng. Inform."},{"key":"10.1016\/j.aei.2026.104721_b30","series-title":"Kogun: accelerating deep reinforcement learning via integrating human suboptimal knowledge","author":"Zhang","year":"2020"},{"key":"10.1016\/j.aei.2026.104721_b31","series-title":"2018 IEEE International Conference on Robotics and Automation","first-page":"6292","article-title":"Overcoming exploration in reinforcement learning with demonstrations","author":"Nair","year":"2018"},{"key":"10.1016\/j.aei.2026.104721_b32","series-title":"2024 IEEE International Conference on Robotics and Automation","first-page":"9168","article-title":"Sprint: Scalable policy pre-training via language instruction relabeling","author":"Zhang","year":"2024"},{"issue":"3","key":"10.1016\/j.aei.2026.104721_b33","doi-asserted-by":"crossref","first-page":"2662","DOI":"10.1109\/TII.2022.3183802","article-title":"Reward shaping-based actor\u2013critic deep reinforcement learning for residential energy management","volume":"19","author":"Lu","year":"2022","journal-title":"IEEE Trans. Ind. Inform."},{"issue":"4","key":"10.1016\/j.aei.2026.104721_b34","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3618295","article-title":"A comprehensive survey on automatic knowledge graph construction","volume":"56","author":"Zhong","year":"2023","journal-title":"ACM Comput. Surv."},{"key":"10.1016\/j.aei.2026.104721_b35","doi-asserted-by":"crossref","DOI":"10.1016\/j.aei.2025.104264","article-title":"Making manufacturing knowledge graph more intelligent: A knowledge intelligence management method for manufacturing enterprises","volume":"71","author":"Liu","year":"2026","journal-title":"Adv. Eng. Inform."},{"key":"10.1016\/j.aei.2026.104721_b36","doi-asserted-by":"crossref","DOI":"10.1016\/j.aei.2025.103993","article-title":"LLM-enabled generative cultural product design with symbolic semantic representation","volume":"69","author":"Yin","year":"2026","journal-title":"Adv. Eng. Inform."},{"key":"10.1016\/j.aei.2026.104721_b37","doi-asserted-by":"crossref","DOI":"10.1016\/j.aei.2024.102356","article-title":"Crowd evacuation with human-level intelligence via neuro-symbolic approach","volume":"60","author":"Bahamid","year":"2024","journal-title":"Adv. Eng. Inform."},{"issue":"6","key":"10.1016\/j.aei.2026.104721_b38","doi-asserted-by":"crossref","first-page":"2163","DOI":"10.1007\/s40815-023-01534-w","article-title":"Literature review of the recent trends and applications in various fuzzy rule-based systems","volume":"25","author":"Varshney","year":"2023","journal-title":"Int. J. Fuzzy Syst."},{"issue":"9","key":"10.1016\/j.aei.2026.104721_b39","doi-asserted-by":"crossref","first-page":"3069","DOI":"10.1109\/TFUZZ.2023.3243935","article-title":"Fuzzy rule-based explainer systems for deep neural networks: From local explainability to global understanding","volume":"31","author":"Aghaeipoor","year":"2023","journal-title":"IEEE Trans. Fuzzy Syst."},{"key":"10.1016\/j.aei.2026.104721_b40","doi-asserted-by":"crossref","DOI":"10.1016\/j.aei.2021.101366","article-title":"Integrating and navigating engineering design decision-related knowledge using decision knowledge graph","volume":"50","author":"Hao","year":"2021","journal-title":"Adv. Eng. Inform."},{"key":"10.1016\/j.aei.2026.104721_b41","doi-asserted-by":"crossref","unstructured":"L. Illanes, X. Yan, R.T. Icarte, S.A. McIlraith, Symbolic plans as high-level instructions for reinforcement learning, in: Proceedings of the International Conference on Automated Planning and Scheduling, Vol. 30, 2020, pp. 540\u2013550.","DOI":"10.1609\/icaps.v30i1.6750"},{"key":"10.1016\/j.aei.2026.104721_b42","doi-asserted-by":"crossref","DOI":"10.1016\/j.aei.2025.103180","article-title":"Reinforcement learning-based fuzzy controller for autonomous guided vehicle path tracking","volume":"65","author":"Kuo","year":"2025","journal-title":"Adv. Eng. Inform."},{"issue":"6","key":"10.1016\/j.aei.2026.104721_b43","doi-asserted-by":"crossref","first-page":"3655","DOI":"10.1109\/TFUZZ.2024.3380824","article-title":"Reinforcement learning of an interpretable fuzzy system through a neural fuzzy actor-critic framework for mobile robot control","volume":"32","author":"Juang","year":"2024","journal-title":"IEEE Trans. Fuzzy Syst."},{"key":"10.1016\/j.aei.2026.104721_b44","doi-asserted-by":"crossref","DOI":"10.1016\/j.aei.2023.101968","article-title":"CO2 impact on convolutional network model training for autonomous driving through behavioral cloning","volume":"56","author":"Mart\u00ednez","year":"2023","journal-title":"Adv. Eng. Inform."},{"key":"10.1016\/j.aei.2026.104721_b45","doi-asserted-by":"crossref","DOI":"10.1016\/j.aei.2023.102273","article-title":"Towards conformal automation in air traffic control: Learning conflict resolution strategies through behavior cloning","volume":"59","author":"Guleria","year":"2024","journal-title":"Adv. Eng. Inform."},{"key":"10.1016\/j.aei.2026.104721_b46","doi-asserted-by":"crossref","DOI":"10.1016\/j.artint.2021.103500","article-title":"A survey of inverse reinforcement learning: Challenges, methods and progress","volume":"297","author":"Arora","year":"2021","journal-title":"Artificial Intelligence"},{"key":"10.1016\/j.aei.2026.104721_b47","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2023.122499","article-title":"Meta-IRLSOT++: A meta-inverse reinforcement learning method for fast adaptation of trajectory prediction networks","volume":"240","author":"Yang","year":"2024","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.aei.2026.104721_b48","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2024.125823","article-title":"Knowledge guided fuzzy deep reinforcement learning","volume":"264","author":"Qin","year":"2025","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.aei.2026.104721_b49","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2025.131004","article-title":"Knowledge guided self-evolving fuzzy deep reinforcement learning with dual-adaptive optimization strategy","volume":"307","author":"Zhu","year":"2026","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.aei.2026.104721_b50","article-title":"Reinforcement learning in continuous action spaces through sequential monte carlo methods","volume":"20","author":"Lazaric","year":"2007","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.aei.2026.104721_b51","article-title":"Policy gradient methods for reinforcement learning with function approximation","volume":"12","author":"Sutton","year":"1999","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.aei.2026.104721_b52","article-title":"Actor-critic algorithms","volume":"12","author":"Konda","year":"1999","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.aei.2026.104721_b53","series-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017"},{"key":"10.1016\/j.aei.2026.104721_b54","series-title":"High-dimensional continuous control using generalized advantage estimation","author":"Schulman","year":"2015"},{"key":"10.1016\/j.aei.2026.104721_b55","series-title":"International Conference on Machine Learning","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"Haarnoja","year":"2018"},{"key":"10.1016\/j.aei.2026.104721_b56","series-title":"International Conference on Machine Learning","first-page":"387","article-title":"Deterministic policy gradient algorithms","author":"Silver","year":"2014"},{"key":"10.1016\/j.aei.2026.104721_b57","series-title":"International Conference on Machine Learning","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","author":"Fujimoto","year":"2018"},{"key":"10.1016\/j.aei.2026.104721_b58","series-title":"Efficient Memory-Based Learning for Robot Control","author":"Moore","year":"1990"},{"issue":"11","key":"10.1016\/j.aei.2026.104721_b59","doi-asserted-by":"crossref","first-page":"16181","DOI":"10.1109\/TITS.2024.3420959","article-title":"Safety-aware human-in-the-loop reinforcement learning with shared control for autonomous driving","volume":"25","author":"Huang","year":"2024","journal-title":"IEEE Trans. Intell. Transp. Syst."}],"container-title":["Advanced Engineering Informatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1474034626004131?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1474034626004131?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,6,5]],"date-time":"2026-06-05T13:39:47Z","timestamp":1780666787000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S1474034626004131"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,9]]},"references-count":59,"alternative-id":["S1474034626004131"],"URL":"https:\/\/doi.org\/10.1016\/j.aei.2026.104721","relation":{},"ISSN":["1474-0346"],"issn-type":[{"value":"1474-0346","type":"print"}],"subject":[],"published":{"date-parts":[[2026,9]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"HSFKG-DRL: Hybrid self-evolving fuzzy knowledge guided deep reinforcement learning","name":"articletitle","label":"Article Title"},{"value":"Advanced Engineering Informatics","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.aei.2026.104721","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"104721"}}