{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,9]],"date-time":"2026-06-09T16:04:40Z","timestamp":1781021080209,"version":"3.54.1"},"reference-count":47,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,8,1]],"date-time":"2026-08-01T00:00:00Z","timestamp":1785542400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,8,1]],"date-time":"2026-08-01T00:00:00Z","timestamp":1785542400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,8,1]],"date-time":"2026-08-01T00:00:00Z","timestamp":1785542400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,8,1]],"date-time":"2026-08-01T00:00:00Z","timestamp":1785542400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,8,1]],"date-time":"2026-08-01T00:00:00Z","timestamp":1785542400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,8,1]],"date-time":"2026-08-01T00:00:00Z","timestamp":1785542400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,8,1]],"date-time":"2026-08-01T00:00:00Z","timestamp":1785542400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Applied Soft Computing"],"published-print":{"date-parts":[[2026,8]]},"DOI":"10.1016\/j.asoc.2026.115393","type":"journal-article","created":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T16:29:38Z","timestamp":1778084978000},"page":"115393","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["A Light-Guided Q-learning algorithm with backward reward propagation for path planning in cruise ship environments"],"prefix":"10.1016","volume":"200","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3839-1022","authenticated-orcid":false,"given":"Wanying","family":"Zhang","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5135-8198","authenticated-orcid":false,"given":"Huajun","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"key":"10.1016\/j.asoc.2026.115393_bib1","doi-asserted-by":"crossref","first-page":"40","DOI":"10.1109\/MELE.2017.2718838","article-title":"Increasing the safety of modern passenger ships: a comprehensive approach for designing safe shipboard integrated electrical power systems","volume":"5","author":"Vicenzutti","year":"2017","journal-title":"IEEE Electrific. Mag."},{"key":"10.1016\/j.asoc.2026.115393_bib2","doi-asserted-by":"crossref","DOI":"10.1016\/j.ress.2022.108887","article-title":"A novel method for the risk assessment of human evacuation from cruise ships in maritime transportation","volume":"230","author":"Wang","year":"2023","journal-title":"Reliab. Eng. Syst. Saf."},{"key":"10.1016\/j.asoc.2026.115393_bib3","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2023.120254","article-title":"Path planning techniques for mobile robots: Review and prospect","volume":"227","author":"Liu","year":"2023","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.asoc.2026.115393_bib4","doi-asserted-by":"crossref","DOI":"10.1016\/j.robot.2024.104822","article-title":"Improved ACO algorithm fused with improved Q-Learning algorithm for Bessel curve global path planning of search and rescue robots","author":"Fang","year":"2024","journal-title":"Robot. Auton. Syst."},{"key":"10.1016\/j.asoc.2026.115393_bib5","article-title":"A cyber-physical robotic mobile fulfillment system in smart manufacturing: The simulation aspect","volume":"83","author":"KEUNG","year":"2023","journal-title":"Robot. Comput. Manuf."},{"key":"10.1016\/j.asoc.2026.115393_bib6","doi-asserted-by":"crossref","first-page":"269","DOI":"10.1007\/BF01386390","article-title":"A note on two problems in connexion with graphs","volume":"1","author":"Dijkstra","year":"1959","journal-title":"Numer. Math."},{"key":"10.1016\/j.asoc.2026.115393_bib7","doi-asserted-by":"crossref","first-page":"100","DOI":"10.1109\/TSSC.1968.300136","article-title":"A formal basis for the heuristic determination of minimum cost paths","volume":"4","author":"Hart","year":"1968","journal-title":"IEEE Trans. Syst. Sci. Cybern."},{"key":"10.1016\/j.asoc.2026.115393_bib8","doi-asserted-by":"crossref","first-page":"566","DOI":"10.1109\/70.508439","article-title":"Probabilistic roadmaps for path planning in high-dimensional configuration spaces","volume":"12","author":"Kavraki","year":"1996","journal-title":"IEEE Trans. Robot. Autom."},{"key":"10.1016\/j.asoc.2026.115393_bib9","article-title":"Rapidly-exploring random trees: a new tool for path planning","author":"LaValle","year":"1998","journal-title":"Annu. Res. Rep."},{"key":"10.1016\/j.asoc.2026.115393_bib10","doi-asserted-by":"crossref","first-page":"16519","DOI":"10.1007\/s00500-020-04958-w","article-title":"An extensive review of computational intelligence-based optimization algorithms: trends and applications","volume":"24","author":"Goel","year":"2020","journal-title":"Soft Comput."},{"key":"10.1016\/j.asoc.2026.115393_bib11","doi-asserted-by":"crossref","first-page":"29","DOI":"10.1109\/3477.484436","article-title":"Ant system: optimization by a colony of cooperating agents","volume":"26","author":"Dorigo","year":"1996","journal-title":"IEEE Trans. Syst. Man Cybern. B"},{"key":"10.1016\/j.asoc.2026.115393_bib12","series-title":"Adaptation in Natural and Artificial Systems","author":"Holland","year":"1992"},{"key":"10.1016\/j.asoc.2026.115393_bib13","doi-asserted-by":"crossref","first-page":"1942","DOI":"10.1109\/ICNN.1995.488968","article-title":"Particle swarm optimization","author":"Kennedy","year":"1995","journal-title":"Proc. ICNN'95 - Int. Conf. Neural Netw. 4"},{"key":"10.1016\/j.asoc.2026.115393_bib14","doi-asserted-by":"crossref","first-page":"121","DOI":"10.1007\/s11370-017-0217-x","article-title":"Path planning of modular robots on various terrains using Q-learning versus optimization algorithms","volume":"10","author":"Haghzad Klidbary","year":"2017","journal-title":"Intel. Serv. Robot."},{"key":"10.1016\/j.asoc.2026.115393_bib15","first-page":"279","article-title":"Q-learning","volume":"8","author":"Watkins","year":"1992","journal-title":"Mach. Learn"},{"key":"10.1016\/j.asoc.2026.115393_bib16","doi-asserted-by":"crossref","first-page":"5984","DOI":"10.1109\/TIE.2021.3090707","article-title":"Active object detection based on a novel deep Q-learning network and long-term learning strategy for the service robot","volume":"69","author":"Liu","year":"2022","journal-title":"IEEE Trans. Ind. Electron"},{"key":"10.1016\/j.asoc.2026.115393_bib17","doi-asserted-by":"crossref","first-page":"8032","DOI":"10.1016\/j.eswa.2008.10.056","article-title":"Simulation of sequential data: an enhanced reinforcement learning approach","volume":"36","author":"Vanhulsel","year":"2009","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.asoc.2026.115393_bib18","doi-asserted-by":"crossref","DOI":"10.1016\/j.oceaneng.2023.113632","article-title":"A path planning approach for unmanned surface vehicles based on dynamic and fast Q-learning","volume":"270","author":"Hao","year":"2023","journal-title":"Ocean Eng."},{"key":"10.1016\/j.asoc.2026.115393_bib19","doi-asserted-by":"crossref","first-page":"8961","DOI":"10.1007\/s00500-022-07293-4","article-title":"Research on path planning algorithm of mobile robot based on reinforcement learning","volume":"26","author":"Pan","year":"2022","journal-title":"Soft Comput."},{"key":"10.1016\/j.asoc.2026.115393_bib20","doi-asserted-by":"crossref","first-page":"9","DOI":"10.1023\/A:1022633531479","article-title":"Learning to predict by the methods of temporal differences","volume":"3","author":"Sutton","year":"1988","journal-title":"Mach. Learn."},{"key":"10.1016\/j.asoc.2026.115393_bib21","doi-asserted-by":"crossref","first-page":"103","DOI":"10.1023\/A:1022635613229","article-title":"Prioritized sweeping: Reinforcement learning with less data and less time","volume":"13","author":"Moore","year":"1993","journal-title":"Mach. Learn"},{"key":"10.1016\/j.asoc.2026.115393_bib22","first-page":"5048","article-title":"Hindsight experience replay","author":"Andrychowicz","year":"2017","journal-title":"Neural Inf. Process. Syst."},{"key":"10.1016\/j.asoc.2026.115393_bib23","doi-asserted-by":"crossref","DOI":"10.1371\/journal.pone.0279438","article-title":"A novel Q-learning algorithm based on improved whale optimization algorithm for path planning","volume":"17","author":"Li","year":"2022","journal-title":"PLoS ONE"},{"key":"10.1016\/j.asoc.2026.115393_bib24","doi-asserted-by":"crossref","first-page":"51","DOI":"10.1016\/j.advengsoft.2016.01.008","article-title":"The whale optimization algorithm","volume":"95","author":"Mirjalili","year":"2016","journal-title":"Adv. Eng. Softw."},{"key":"10.1016\/j.asoc.2026.115393_bib25","doi-asserted-by":"crossref","first-page":"143","DOI":"10.1016\/j.robot.2019.02.013","article-title":"Solving the optimal path planning of a mobile robot using improved Q-learning","volume":"115","author":"Low","year":"2019","journal-title":"Robot. Auton. Syst."},{"key":"10.1016\/j.asoc.2026.115393_bib26","doi-asserted-by":"crossref","unstructured":"X.-S. Yang, Flower Pollination Algorithm for Global Optimization, International Conference on Unconventional Computation and Natural Computation abs\/1312.5673 (2012). https:\/\/doi.org\/10.1007\/978-3-642-32894-7_27.","DOI":"10.1007\/978-3-642-32894-7_27"},{"key":"10.1016\/j.asoc.2026.115393_bib27","doi-asserted-by":"crossref","first-page":"5910","DOI":"10.3390\/s22155910","article-title":"CLSQL: improved Q-learning algorithm based on continuous local search policy for mobile robot path planning","volume":"22","author":"Ma","year":"2022","journal-title":"Sensors"},{"key":"10.1016\/j.asoc.2026.115393_bib28","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2023.110355","article-title":"Mobile agent path planning under uncertain environment using reinforcement learning and probabilistic model checking","volume":"264","author":"Wang","year":"2023","journal-title":"Knowl. Based Syst."},{"key":"10.1016\/j.asoc.2026.115393_bib29","doi-asserted-by":"crossref","first-page":"1057","DOI":"10.3390\/sym13061057","article-title":"A self-adaptive reinforcement-exploration Q-learning algorithm","volume":"13","author":"Zhang","year":"2021","journal-title":"Symmetry"},{"key":"10.1016\/j.asoc.2026.115393_bib30","doi-asserted-by":"crossref","first-page":"66","DOI":"10.3390\/ijgi11010066","article-title":"Indoor emergency path planning based on the Q-learning optimization algorithm","volume":"11","author":"Xu","year":"2022","journal-title":"IJGI"},{"key":"10.1016\/j.asoc.2026.115393_bib31","doi-asserted-by":"crossref","first-page":"2140","DOI":"10.1109\/TSMCB.2004.832154","article-title":"A new Q-learning algorithm based on the metropolis criterion","volume":"34","author":"Guo","year":"2004","journal-title":"IEEE Trans. Syst. Man Cyber B Cyber"},{"key":"10.1016\/j.asoc.2026.115393_bib32","doi-asserted-by":"crossref","DOI":"10.1016\/j.artint.2021.103560","article-title":"Path-length analysis for grid-based path planning","volume":"301","author":"Bailey","year":"2021","journal-title":"Artif. Intell."},{"key":"10.1016\/j.asoc.2026.115393_bib33","doi-asserted-by":"crossref","first-page":"325","DOI":"10.1002\/rob.20244","article-title":"State space sampling of feasible motions for high-performance mobile robot navigation in complex environments","volume":"25","author":"Howard","year":"2008","journal-title":"J. Field Robot."},{"key":"10.1016\/j.asoc.2026.115393_bib34","doi-asserted-by":"crossref","first-page":"145","DOI":"10.1016\/j.cirpj.2024.04.002","article-title":"A study on real-time path planning method for skin machining based on frame feature extraction and chronological feature optimization","volume":"51","author":"Liu","year":"2024","journal-title":"Cirp. J. Manuf. Sci. Technol."},{"key":"10.1016\/j.asoc.2026.115393_bib35","article-title":"Cross-regional path planning based on improved Q-learning with dynamic exploration factor and heuristic reward value","volume":"260","author":"Zhong","year":"2024","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.asoc.2026.115393_bib36","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2025.113836","article-title":"Novel design of reward and epsilon-greedy decay strategy tailored for Q-learning in optimizing local mobile robot path planning","author":"Ben-Akka","year":"2025","journal-title":"Knowl. -Based Syst."},{"key":"10.1016\/j.asoc.2026.115393_bib37","series-title":"in: Adapt. Learn. Optim","first-page":"3","article-title":"Reinforcement Learning and Markov Decision Processes","author":"van Otterlo","year":"2012"},{"key":"10.1016\/j.asoc.2026.115393_bib38","doi-asserted-by":"crossref","DOI":"10.1002\/9780470316887","article-title":"Markov Decision Processe","author":"Puterman","year":"1994"},{"key":"10.1016\/j.asoc.2026.115393_bib39","series-title":"Reinforcement Learning: An Introduction","author":"Sutton","year":"2018"},{"key":"10.1016\/j.asoc.2026.115393_bib40","unstructured":"C. Watkins, Learning from delayed rewards. (1989). \u3008https:\/\/www.semanticscholar.org\/paper\/5c8bb027eb65b6d250a22e9b6db22853a552ac81\u3009."},{"key":"10.1016\/j.asoc.2026.115393_bib41","doi-asserted-by":"crossref","DOI":"10.1016\/j.asoc.2023.110773","article-title":"Q-learning-based unmanned aerial vehicle path planning with dynamic obstacle avoidance","volume":"147","author":"Sonny","year":"2023","journal-title":"Appl. Soft Comput."},{"key":"10.1016\/j.asoc.2026.115393_bib42","doi-asserted-by":"crossref","DOI":"10.1109\/TNN.1998.712192","article-title":"Reinforcement learning: an introduction","volume":"9","author":"Sutton","year":"1998","journal-title":"IEEE Trans. Neural Netw."},{"issue":"1\u20133","key":"10.1016\/j.asoc.2026.115393_bib43","doi-asserted-by":"crossref","first-page":"123","DOI":"10.1023\/A:1018012322525","article-title":"Reinforcement learning with replacing eligibility traces","volume":"22","author":"Singh","year":"1996","journal-title":"Mach. Learn."},{"issue":"3","key":"10.1016\/j.asoc.2026.115393_bib44","doi-asserted-by":"crossref","first-page":"419","DOI":"10.1080\/00401706.1962.10490022","article-title":"Note on a method for calculating corrected sums of squares and products","volume":"4","author":"Welford","year":"1962","journal-title":"Technometrics"},{"key":"10.1016\/j.asoc.2026.115393_bib45","first-page":"2613","article-title":"Double Q-learning","author":"Hasselt","year":"2010","journal-title":"Neural Inf. Process. Syst."},{"key":"10.1016\/j.asoc.2026.115393_bib46","series-title":"Proceedings of the Seventh International Conference on Machine Learning (ICML)","first-page":"216","article-title":"Integrated architectures for learning, planning, and reacting based on approximating dynamic programming","author":"Sutton","year":"1990"},{"key":"10.1016\/j.asoc.2026.115393_bib47","unstructured":"A.Y. Ng, D. Harada, S. RussellPolicy invariance under reward transformations: theory and application to reward shaping 99 Proc. 16th Int. Conf. Mach. Learn. (ICML)1999, 278287."}],"container-title":["Applied Soft Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1568494626008410?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1568494626008410?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,6,9]],"date-time":"2026-06-09T15:54:44Z","timestamp":1781020484000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S1568494626008410"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,8]]},"references-count":47,"alternative-id":["S1568494626008410"],"URL":"https:\/\/doi.org\/10.1016\/j.asoc.2026.115393","relation":{},"ISSN":["1568-4946"],"issn-type":[{"value":"1568-4946","type":"print"}],"subject":[],"published":{"date-parts":[[2026,8]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"A Light-Guided Q-learning algorithm with backward reward propagation for path planning in cruise ship environments","name":"articletitle","label":"Article Title"},{"value":"Applied Soft Computing","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.asoc.2026.115393","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier B.V. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"115393"}}