{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,14]],"date-time":"2026-05-14T14:19:13Z","timestamp":1778768353173,"version":"3.51.4"},"reference-count":35,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62173166"],"award-info":[{"award-number":["62173166"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Neurocomputing"],"published-print":{"date-parts":[[2026,7]]},"DOI":"10.1016\/j.neucom.2026.133589","type":"journal-article","created":{"date-parts":[[2026,4,12]],"date-time":"2026-04-12T17:02:21Z","timestamp":1776013341000},"page":"133589","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["A Q-learning robot path planning algorithm based on improved reward function and exploration strategy"],"prefix":"10.1016","volume":"685","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6518-0207","authenticated-orcid":false,"given":"Zhaojun","family":"Zhang","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0003-8128-9263","authenticated-orcid":false,"given":"Guangyang","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Peiye","family":"Cao","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0001-8630-5192","authenticated-orcid":false,"given":"Shun","family":"Lu","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"issue":"7","key":"10.1016\/j.neucom.2026.133589_bib0005","doi-asserted-by":"crossref","first-page":"4415","DOI":"10.1109\/TSMC.2021.3096935","article-title":"An improved DYNA-Q algorithm for mobile robot path planning in unknown dynamic environment","volume":"52","author":"Pei","year":"2021","journal-title":"IEEE Trans. Syst. Man Cybern. Syst."},{"issue":"3","key":"10.1016\/j.neucom.2026.133589_bib0010","doi-asserted-by":"crossref","first-page":"413","DOI":"10.18196\/jrc.v4i3.18489","article-title":"Application of odometry and Dijkstra algorithm as navigation and shortest path determination system of warehouse mobile robot","volume":"4","author":"Ubaidillah","year":"2023","journal-title":"J. Robot. Control (JRC)"},{"issue":"8","key":"10.1016\/j.neucom.2026.133589_bib0015","doi-asserted-by":"crossref","first-page":"5275","DOI":"10.1109\/LRA.2023.3293319","article-title":"TMSTC*: a path planning algorithm for minimizing turns in multi-robot coverage","volume":"8","author":"Lu","year":"2023","journal-title":"IEEE Robot. Autom. Lett."},{"issue":"3","key":"10.1016\/j.neucom.2026.133589_bib0020","doi-asserted-by":"crossref","first-page":"106","DOI":"10.1007\/s10846-024-02144-w","article-title":"Dynamic informed bias RRT*-connect: improving heuristic guidance by dynamic informed bias using hybrid dual trees search","volume":"110","author":"Li","year":"2024","journal-title":"J. Intell. Robot. Syst."},{"key":"10.1016\/j.neucom.2026.133589_bib0025","doi-asserted-by":"crossref","DOI":"10.1016\/j.compeleceng.2024.110032","article-title":"Chaotic sequence-driven path planning for autonomous robot terrain coverage","volume":"123","author":"Abou-Bakr","year":"2025","journal-title":"Comput. Electr. Eng."},{"issue":"9","key":"10.1016\/j.neucom.2026.133589_bib0030","first-page":"1006","article-title":"Efficient and scalable reinforcement learning for large-scale network control","volume":"6","author":"Ma","year":"2024","journal-title":"Nat. Mach. Intell."},{"key":"10.1016\/j.neucom.2026.133589_bib0035","first-page":"24432","article-title":"Model-based safe deep reinforcement learning via a constrained proximal policy optimization algorithm","volume":"35","author":"Jayant","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst."},{"issue":"2","key":"10.1016\/j.neucom.2026.133589_bib0040","doi-asserted-by":"crossref","first-page":"296","DOI":"10.1016\/j.aci.2018.10.001","article-title":"Artificial neural networks and machine learning techniques applied to ground penetrating radar: a review","volume":"17","author":"Travassos","year":"2021","journal-title":"Appl. Comput. Inform."},{"key":"10.1016\/j.neucom.2026.133589_bib0045","doi-asserted-by":"crossref","first-page":"91275","DOI":"10.1109\/ACCESS.2023.3307480","article-title":"An optimized path planning method for container ships in bohai bay based on improved deep q-learning","volume":"11","author":"Gao","year":"2023","journal-title":"IEEE Access"},{"issue":"3s","key":"10.1016\/j.neucom.2026.133589_bib0050","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3468872","article-title":"Precise no-reference image quality evaluation based on distortion identification","volume":"17","author":"Yan","year":"2021","journal-title":"ACM Trans. Multimed. Comput. Commun. Appl."},{"key":"10.1016\/j.neucom.2026.133589_bib0055","series-title":"International Symposium on Artificial Intelligence and Robotics 2020","first-page":"296","article-title":"An end-to-end reinforcement learning method for automated guided vehicle path planning","volume":"vol. 11574","author":"Sun","year":"2020"},{"issue":"4","key":"10.1016\/j.neucom.2026.133589_bib0060","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3404374","article-title":"Depth image denoising using nuclear norm and learning graph model","volume":"16","author":"Yan","year":"2020","journal-title":"ACM Trans. Multimed. Comput. Commun. Appl."},{"issue":"4","key":"10.1016\/j.neucom.2026.133589_bib0065","doi-asserted-by":"crossref","first-page":"2913","DOI":"10.1109\/TSMC.2025.3539318","article-title":"Path-planning method based on reinforcement learning for cooperative two-crane lift considering load constraint","volume":"55","author":"An","year":"2025","journal-title":"IEEE Trans. Syst. Man Cybern. Syst."},{"issue":"8","key":"10.1016\/j.neucom.2026.133589_bib0070","doi-asserted-by":"crossref","first-page":"9001","DOI":"10.1109\/TWC.2024.3357702","article-title":"Multi-agent Q-learning for real-time load balancing user association and handover in mobile networks","volume":"23","author":"Alizadeh","year":"2024","journal-title":"IEEE Trans. Wirel. Commun."},{"issue":"1","key":"10.1016\/j.neucom.2026.133589_bib0075","doi-asserted-by":"crossref","first-page":"51","DOI":"10.21278\/TOF.491063824","article-title":"A decentralised collision avoidance method based on Q-learning for multi-AGV systems","volume":"49","author":"\u00c7oban","year":"2025","journal-title":"Trans. FAMENA"},{"issue":"2","key":"10.1016\/j.neucom.2026.133589_bib0080","doi-asserted-by":"crossref","DOI":"10.1007\/s11432-022-3696-5","article-title":"A survey on model-based reinforcement learning","volume":"67","author":"Luo","year":"2024","journal-title":"Sci. China Inf. Sci."},{"key":"10.1016\/j.neucom.2026.133589_bib0085","series-title":"Proceedings 2013 International Conference on Mechatronic Sciences, Electric Engineering and Computer (MEC)","first-page":"2204","article-title":"The method based on Q-learning path planning in migrating workflow","author":"Xiao","year":"2013"},{"issue":"4","key":"10.1016\/j.neucom.2026.133589_bib0090","doi-asserted-by":"crossref","first-page":"814","DOI":"10.1109\/TSMCA.2012.2226024","article-title":"Realization of an adaptive memetic algorithm using differential evolution and q-learning: a case study in multirobot path planning","volume":"43","author":"Rakshit","year":"2013","journal-title":"IEEE Trans. Syst. Man Cybern. Syst."},{"issue":"6","key":"10.1016\/j.neucom.2026.133589_bib0095","doi-asserted-by":"crossref","first-page":"148","DOI":"10.24018\/ejeng.2022.7.6.2944","article-title":"Developing a deep Q-learning and neural network framework for trajectory planning","volume":"7","author":"Kosuru","year":"2022","journal-title":"Eur. J. Eng. Technol. Res."},{"key":"10.1016\/j.neucom.2026.133589_bib0100","doi-asserted-by":"crossref","first-page":"51","DOI":"10.1007\/s10514-020-09947-4","article-title":"Reinforcement based mobile robot path planning with improved dynamic window approach in unknown environment","volume":"45","author":"Chang","year":"2021","journal-title":"Auton. Robots"},{"key":"10.1016\/j.neucom.2026.133589_bib0105","doi-asserted-by":"crossref","first-page":"92879","DOI":"10.1109\/ACCESS.2022.3203072","article-title":"Improved Q-learning applied to dynamic obstacle avoidance and path planning","volume":"10","author":"Wang","year":"2022","journal-title":"IEEE Access"},{"issue":"1","key":"10.1016\/j.neucom.2026.133589_bib0110","doi-asserted-by":"crossref","first-page":"66","DOI":"10.3390\/ijgi11010066","article-title":"Indoor emergency path planning based on the Q-learning optimization algorithm","volume":"11","author":"Xu","year":"2022","journal-title":"ISPRS Int. J. Geo-inf."},{"key":"10.1016\/j.neucom.2026.133589_bib0115","series-title":"2022 IEEE 17th Conference on Industrial Electronics and Applications (ICIEA)","first-page":"1104","article-title":"Q-learning-based collision-free path planning for mobile robot in unknown environment","author":"Wang","year":"2022"},{"key":"10.1016\/j.neucom.2026.133589_bib0120","doi-asserted-by":"crossref","DOI":"10.1016\/j.cie.2023.109338","article-title":"A modified Q-learning path planning approach using distortion concept and optimization in dynamic environment for autonomous mobile robot","volume":"181","author":"Low","year":"2023","journal-title":"Comput. Ind. Eng."},{"key":"10.1016\/j.neucom.2026.133589_bib0125","series-title":"2023 IEEE International Conference on Sensors, Electronics and Computer Engineering (ICSECE)","first-page":"277","article-title":"An improved Q-learning algorithm for path planning","author":"Huang","year":"2023"},{"key":"10.1016\/j.neucom.2026.133589_bib0130","doi-asserted-by":"crossref","first-page":"3866","DOI":"10.1109\/TASE.2024.3401456","article-title":"Hierarchical Q-learning path planning for cooperative tracking control of multi-agent systems with lumped uncertainties","volume":"22","author":"Lu","year":"2025","journal-title":"IEEE Trans. Autom. Sci. Eng."},{"key":"10.1016\/j.neucom.2026.133589_bib0135","series-title":"International Conference on Machine Learning","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","volume":"vol. 80","author":"Fujimoto","year":"2018"},{"issue":"4","key":"10.1016\/j.neucom.2026.133589_bib0140","doi-asserted-by":"crossref","first-page":"1445","DOI":"10.1109\/TPAMI.2020.2975798","article-title":"Deep multi-view enhancement hashing for image retrieval","volume":"43","author":"Yan","year":"2020","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.neucom.2026.133589_bib0145","author":"Naik"},{"issue":"2","key":"10.1016\/j.neucom.2026.133589_bib0150","doi-asserted-by":"crossref","first-page":"113","DOI":"10.1504\/IJSNET.2025.144555","article-title":"A reinforcement learning algorithm for mobile robot path planning with dynamic Q-value adjustment","volume":"47","author":"Hua","year":"2025","journal-title":"Int. J. Sens. Netw."},{"issue":"6","key":"10.1016\/j.neucom.2026.133589_bib0155","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1109\/TVT.2025.3533006","article-title":"Mix Q-learning for lane changing: a collaborative decision-making method in multi-agent deep reinforcement learning","volume":"74","author":"Bi","year":"2025","journal-title":"IEEE Trans. Veh. Technol."},{"issue":"1","key":"10.1016\/j.neucom.2026.133589_bib0160","doi-asserted-by":"crossref","first-page":"233","DOI":"10.1109\/TAFE.2025.3528403","article-title":"A fast path-planning method for continuous harvesting of table-top grown strawberries","volume":"3","author":"Miao","year":"2025","journal-title":"IEEE Trans. Agrifood Electron."},{"key":"10.1016\/j.neucom.2026.133589_bib0165","doi-asserted-by":"crossref","first-page":"271","DOI":"10.1016\/j.neucom.2022.03.014","article-title":"Groundwater level prediction using machine learning models: a comprehensive review","volume":"489","author":"Tao","year":"2022","journal-title":"Neurocomputing"},{"key":"10.1016\/j.neucom.2026.133589_bib0170","doi-asserted-by":"crossref","DOI":"10.1016\/j.neucom.2025.130096","article-title":"Optimal bipartite consensus for multi-agent systems using twin Q-learning deterministic policy gradient algorithm with adaptive learning rate","volume":"638","author":"Ji","year":"2025","journal-title":"Neurocomputing"},{"key":"10.1016\/j.neucom.2026.133589_bib0175","doi-asserted-by":"crossref","DOI":"10.1016\/j.neucom.2025.130191","article-title":"Goal-driven navigation via variational sparse Q network and transfer learning","volume":"638","author":"Yao","year":"2025","journal-title":"Neurocomputing"}],"container-title":["Neurocomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0925231226009860?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0925231226009860?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,5,14]],"date-time":"2026-05-14T13:54:06Z","timestamp":1778766846000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0925231226009860"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,7]]},"references-count":35,"alternative-id":["S0925231226009860"],"URL":"https:\/\/doi.org\/10.1016\/j.neucom.2026.133589","relation":{},"ISSN":["0925-2312"],"issn-type":[{"value":"0925-2312","type":"print"}],"subject":[],"published":{"date-parts":[[2026,7]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"A Q-learning robot path planning algorithm based on improved reward function and exploration strategy","name":"articletitle","label":"Article Title"},{"value":"Neurocomputing","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.neucom.2026.133589","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier B.V. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"133589"}}