{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,5]],"date-time":"2026-06-05T15:11:46Z","timestamp":1780672306066,"version":"3.54.1"},"reference-count":40,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Neurocomputing"],"published-print":{"date-parts":[[2026,9]]},"DOI":"10.1016\/j.neucom.2026.133692","type":"journal-article","created":{"date-parts":[[2026,4,21]],"date-time":"2026-04-21T16:05:33Z","timestamp":1776787533000},"page":"133692","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Enhancing deep reinforcement learning through fuzzy reward granulation: A strategy for reducing agent-environment interactions"],"prefix":"10.1016","volume":"693","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-5145-1875","authenticated-orcid":false,"given":"Mahdi","family":"Soltani-Nejad","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Sayed Mohammad","family":"Mousavi Gazafrudi","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Hossein","family":"Nezamabadi-pour","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"key":"10.1016\/j.neucom.2026.133692_bib0005","series-title":"Reinforcement Learning: an Introduction","author":"Sutton","year":"2018"},{"issue":"7540","key":"10.1016\/j.neucom.2026.133692_bib0010","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"Mnih","year":"2015","journal-title":"Nature"},{"key":"10.1016\/j.neucom.2026.133692_bib0015","author":"Mnih"},{"key":"10.1016\/j.neucom.2026.133692_bib0020","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence","article-title":"Deep reinforcement learning with double q-learning","volume":"vol. 30","author":"Van Hasselt","year":"2016"},{"key":"10.1016\/j.neucom.2026.133692_bib0025","series-title":"International Conference on Machine Learning","first-page":"1995","article-title":"Dueling network architectures for deep reinforcement learning","author":"Wang","year":"2016"},{"key":"10.1016\/j.neucom.2026.133692_bib0030","series-title":"Proceedings of the 33rd International Conference on Machine Learning, Vol. 48 of Proceedings of Machine Learning Research","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","author":"Mnih","year":"2016"},{"key":"10.1016\/j.neucom.2026.133692_bib0035","author":"Lillicrap"},{"key":"10.1016\/j.neucom.2026.133692_bib0040","author":"Schulman"},{"key":"10.1016\/j.neucom.2026.133692_bib0045","series-title":"Proceedings of the 35th International Conference on Machine Learning, Vol. 80 of Proceedings of Machine Learning Research","first-page":"1861","article-title":"Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"Haarnoja","year":"2018"},{"issue":"7953","key":"10.1016\/j.neucom.2026.133692_bib0050","doi-asserted-by":"crossref","first-page":"620","DOI":"10.1038\/s41586-023-05732-2","article-title":"Dense reinforcement learning for safety validation of autonomous vehicles","volume":"615","author":"Feng","year":"2023","journal-title":"Nature"},{"key":"10.1016\/j.neucom.2026.133692_bib0055","doi-asserted-by":"crossref","first-page":"397","DOI":"10.1016\/j.aej.2022.12.057","article-title":"Deep reinforcement learning based optimization of automated guided vehicle time and energy consumption in a container terminal","volume":"67","author":"Drungilas","year":"2023","journal-title":"Alex. Eng. J."},{"issue":"8","key":"10.1016\/j.neucom.2026.133692_bib0060","doi-asserted-by":"crossref","first-page":"4332","DOI":"10.1109\/TNNLS.2021.3117790","article-title":"Deep deterministic policy gradient with compatible critic network","volume":"34","author":"Wang","year":"2021","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"issue":"10","key":"10.1016\/j.neucom.2026.133692_bib0065","doi-asserted-by":"crossref","first-page":"8017","DOI":"10.1109\/TNNLS.2022.3148924","article-title":"Learning-based DOS attack power allocation in multiprocess systems","volume":"34","author":"Huang","year":"2022","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"issue":"4","key":"10.1016\/j.neucom.2026.133692_bib0070","doi-asserted-by":"crossref","first-page":"1115","DOI":"10.1017\/S0263574722001527","article-title":"Discrete soft actor-critic with auto-encoder on vascular robotic system","volume":"41","author":"Li","year":"2023","journal-title":"Robotica"},{"issue":"3","key":"10.1016\/j.neucom.2026.133692_bib0075","doi-asserted-by":"crossref","first-page":"338","DOI":"10.1016\/S0019-9958(65)90241-X","article-title":"Fuzzy sets","volume":"8","author":"Zadeh","year":"1965","journal-title":"Inf. Control"},{"issue":"4","key":"10.1016\/j.neucom.2026.133692_bib0080","doi-asserted-by":"crossref","first-page":"83","DOI":"10.1109\/2.53","article-title":"Fuzzy logic","volume":"21","author":"Zadeh","year":"1988","journal-title":"Computer"},{"key":"10.1016\/j.neucom.2026.133692_bib0085","first-page":"433","article-title":"Fuzzy sets and information granularity","author":"Zadeh","year":"1979","journal-title":"Fuzzy sets, fuzzy logic, and fuzzy systems: selected papers"},{"issue":"2","key":"10.1016\/j.neucom.2026.133692_bib0090","doi-asserted-by":"crossref","first-page":"111","DOI":"10.1016\/S0165-0114(97)00077-8","article-title":"Toward a theory of fuzzy information granulation and its centrality in human reasoning and fuzzy logic","volume":"90","author":"Zadeh","year":"1997","journal-title":"Fuzzy Sets Syst."},{"key":"10.1016\/j.neucom.2026.133692_bib0095","series-title":"Science and Human Behavior, No. 92904","author":"Skinner","year":"1965"},{"key":"10.1016\/j.neucom.2026.133692_bib0100","doi-asserted-by":"crossref","DOI":"10.1016\/j.energy.2024.134113","article-title":"A knowledge-assisted deep reinforcement learning approach for energy management in hybrid electric vehicles","volume":"313","author":"Zare","year":"2024","journal-title":"Energy"},{"key":"10.1016\/j.neucom.2026.133692_bib0105","article-title":"Integrating asynchronous advantage actor\u2013critic (a3c) and coalitional game theory algorithms for optimizing energy, carbon emissions, and reliability of scientific workflows in cloud data centers","author":"Khaleel","year":"2024","journal-title":"Swarm Evol. Comput."},{"key":"10.1016\/j.neucom.2026.133692_bib0110","doi-asserted-by":"crossref","DOI":"10.1016\/j.apenergy.2024.124594","article-title":"Type-and task-crossing energy management for fuel cell vehicles with longevity consideration: a heterogeneous deep transfer reinforcement learning framework","volume":"377","author":"Huang","year":"2025","journal-title":"Appl. Energy"},{"issue":"2","key":"10.1016\/j.neucom.2026.133692_bib0115","doi-asserted-by":"crossref","first-page":"2012","DOI":"10.1109\/TNSE.2023.3335973","article-title":"Multi-agent transfer reinforcement learning for resource management in underwater acoustic communication networks","volume":"11","author":"Wang","year":"2023","journal-title":"IEEE Trans. Netw. Sci. Eng."},{"key":"10.1016\/j.neucom.2026.133692_bib0120","doi-asserted-by":"crossref","DOI":"10.1016\/j.energy.2022.124849","article-title":"A generalized energy management framework for hybrid construction vehicles via model-based reinforcement learning","volume":"260","author":"Zhang","year":"2022","journal-title":"Energy"},{"key":"10.1016\/j.neucom.2026.133692_bib0125","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2024.112783","article-title":"Model-free robust reinforcement learning via polynomial chaos","volume":"309","author":"Liu","year":"2025","journal-title":"Knowl.-based Syst."},{"key":"10.1016\/j.neucom.2026.133692_bib0130","doi-asserted-by":"crossref","DOI":"10.1016\/j.ast.2024.109474","article-title":"Bayesian quadrature policy optimization for spacecraft proximity maneuvers and docking","volume":"154","author":"Du","year":"2024","journal-title":"Aerosp. Sci. Technol."},{"key":"10.1016\/j.neucom.2026.133692_bib0135","doi-asserted-by":"crossref","DOI":"10.1016\/j.apenergy.2024.125171","article-title":"Bayesian optimization for hyper-parameter tuning of an improved twin delayed deep deterministic policy gradients based energy management strategy for plug-in hybrid electric vehicles","volume":"381","author":"Wang","year":"2025","journal-title":"Appl. Energy"},{"key":"10.1016\/j.neucom.2026.133692_bib0140","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2022.108221","article-title":"Transfer reinforcement learning via meta-knowledge extraction using auto-pruned decision trees","volume":"242","author":"Lan","year":"2022","journal-title":"Knowl.-based Syst."},{"issue":"1","key":"10.1016\/j.neucom.2026.133692_bib0145","doi-asserted-by":"crossref","DOI":"10.1111\/exsy.13324","article-title":"Model-based offline reinforcement learning for sustainable fishery management","volume":"42","author":"Ju","year":"2025","journal-title":"Expert Syst."},{"key":"10.1016\/j.neucom.2026.133692_bib0150","first-page":"10","article-title":"Tree-based reinforcement learning for identifying optimal personalized treatment decisions for hand deformity in rheumatoid arthritis","author":"Yoon","year":"2021","journal-title":"Plast. Reconstr. Surg."},{"issue":"12","key":"10.1016\/j.neucom.2026.133692_bib0155","doi-asserted-by":"crossref","first-page":"18307","DOI":"10.1109\/TVT.2024.3444475","article-title":"A graph deep reinforcement learning traffic signal control for multiple intersections considering missing data","volume":"73","author":"Xu","year":"2024","journal-title":"IEEE Trans. Veh. Technol."},{"issue":"22","key":"10.1016\/j.neucom.2026.133692_bib0160","doi-asserted-by":"crossref","first-page":"12020","DOI":"10.1007\/s10489-024-05811-5","article-title":"Uncertainty modified policy for multi-agent reinforcement learning","volume":"54","author":"Zhao","year":"2024","journal-title":"Appl. Intell."},{"issue":"7","key":"10.1016\/j.neucom.2026.133692_bib0165","doi-asserted-by":"crossref","first-page":"3374","DOI":"10.1109\/TAI.2024.3351797","article-title":"Learn from safe experience: safe reinforcement learning for task Automation of surgical robot","volume":"5","author":"Fan","year":"2024","journal-title":"IEEE Trans. Artif. Intell."},{"key":"10.1016\/j.neucom.2026.133692_bib0170","article-title":"Variable speed limit control strategy for freeway tunnels based on a multi-objective deep reinforcement learning framework with safety perception","author":"Jin","year":"2024","journal-title":"Expert Syst. Appl."},{"issue":"1\u20132","key":"10.1016\/j.neucom.2026.133692_bib0175","doi-asserted-by":"crossref","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","article-title":"Between mdps and semi-mdps: a framework for temporal abstraction in reinforcement learning","volume":"112","author":"Sutton","year":"1999","journal-title":"Artif. Intell."},{"key":"10.1016\/j.neucom.2026.133692_bib0180","doi-asserted-by":"crossref","first-page":"50","DOI":"10.1214\/aoms\/1177730491","article-title":"On a test of whether one of two random variables is stochastically larger than the other","author":"Mann","year":"1947","journal-title":"The annals of mathematical statistics"},{"key":"10.1016\/j.neucom.2026.133692_bib0185","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1214\/09-SS051","article-title":"Wilcoxon-mann-whitney or t-test? On assumptions for hypothesis tests and multiple interpretations of decision rules","volume":"4","author":"Fay","year":"2010","journal-title":"Statistics surveys"},{"issue":"3","key":"10.1016\/j.neucom.2026.133692_bib0190","doi-asserted-by":"crossref","first-page":"185","DOI":"10.1023\/A:1022689125041","article-title":"Asynchronous stochastic approximation and q-learning","volume":"16","author":"Tsitsiklis","year":"1994","journal-title":"Mach. Learn."},{"key":"10.1016\/j.neucom.2026.133692_bib0195","series-title":"Proceedings of the Nineteenth International Conference on Machine Learning","first-page":"267","article-title":"Approximately optimal approximate reinforcement learning","author":"Kakade","year":"2002"},{"issue":"2","key":"10.1016\/j.neucom.2026.133692_bib0200","doi-asserted-by":"crossref","first-page":"447","DOI":"10.1137\/S0363012997331639","article-title":"The ODE method for convergence of stochastic approximation and reinforcement learning","volume":"38","author":"Borkar","year":"2000","journal-title":"SIAM J. Control Optim."}],"container-title":["Neurocomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0925231226010891?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0925231226010891?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,6,5]],"date-time":"2026-06-05T14:36:33Z","timestamp":1780670193000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0925231226010891"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,9]]},"references-count":40,"alternative-id":["S0925231226010891"],"URL":"https:\/\/doi.org\/10.1016\/j.neucom.2026.133692","relation":{},"ISSN":["0925-2312"],"issn-type":[{"value":"0925-2312","type":"print"}],"subject":[],"published":{"date-parts":[[2026,9]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Enhancing deep reinforcement learning through fuzzy reward granulation: A strategy for reducing agent-environment interactions","name":"articletitle","label":"Article Title"},{"value":"Neurocomputing","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.neucom.2026.133692","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Published by Elsevier B.V.","name":"copyright","label":"Copyright"}],"article-number":"133692"}}