{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T21:05:23Z","timestamp":1776891923735,"version":"3.51.2"},"reference-count":37,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100006606","name":"Natural Science Foundation of Tianjin Municipality","doi-asserted-by":"publisher","award":["21JCJQJC00130"],"award-info":[{"award-number":["21JCJQJC00130"]}],"id":[{"id":"10.13039\/501100006606","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100008118","name":"Shandong Academy of Sciences","doi-asserted-by":"publisher","award":["2023JBZ031"],"award-info":[{"award-number":["2023JBZ031"]}],"id":[{"id":"10.13039\/501100008118","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100014890","name":"Qilu University of Technology","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100014890","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62373278"],"award-info":[{"award-number":["62373278"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Neurocomputing"],"published-print":{"date-parts":[[2026,6]]},"DOI":"10.1016\/j.neucom.2026.133308","type":"journal-article","created":{"date-parts":[[2026,3,14]],"date-time":"2026-03-14T07:48:05Z","timestamp":1773474485000},"page":"133308","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["UUV autonomous control for terrain tracking problem through distributional reinforcement learning"],"prefix":"10.1016","volume":"682","author":[{"given":"Rongshun","family":"Juan","sequence":"first","affiliation":[]},{"given":"Yang","family":"Li","sequence":"additional","affiliation":[]},{"given":"Shoufu","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Tianshu","family":"Wang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9551-202X","authenticated-orcid":false,"given":"Zhongke","family":"Gao","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.neucom.2026.133308_bib0005","series-title":"2023 IEEE Conference on Games (CoG)","first-page":"1","article-title":"Swimm deeper: a simulated underwater environment for tracking marine mammals using deep reinforcement learning and bluerov2","author":"Appleby","year":"2023"},{"key":"10.1016\/j.neucom.2026.133308_bib0010","doi-asserted-by":"crossref","first-page":"29","DOI":"10.1016\/j.eswa.2016.10.003","article-title":"Ensemble and fuzzy kalman filter for position estimation of an autonomous underwater vehicle based on dynamical system of AUV motion","volume":"68","author":"Apriliani","year":"2017","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.neucom.2026.133308_bib0015","doi-asserted-by":"crossref","first-page":"4643","DOI":"10.3390\/s23104643","article-title":"Survey on the developments of unmanned marine vehicles: intelligence and cooperation","volume":"23","author":"Bae","year":"2023","journal-title":"Sensors"},{"key":"10.1016\/j.neucom.2026.133308_bib0020","doi-asserted-by":"crossref","DOI":"10.1016\/j.marpol.2023.105672","article-title":"Sustainable use of ocean resources","volume":"154","author":"Bailey","year":"2023","journal-title":"Mar. Policy"},{"key":"10.1016\/j.neucom.2026.133308_bib0025","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2025.128388","article-title":"AUV path planning and obstacle avoidances in marine environment based on enhanced ecuo technique","volume":"290","author":"Barik","year":"2025","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.neucom.2026.133308_bib0030","first-page":"282","article-title":"Advances in unmanned underwater vehicles technologies: modeling, control and guidance perspectives","volume":"38","author":"Budiyono","year":"2009","journal-title":"Indian J. Mar. Sci."},{"key":"10.1016\/j.neucom.2026.133308_bib0035","article-title":"Artificial intelligence, deep learning, UUV, hull design, energy consumption, computer aided design, data-driven engineering, geometric reasoning, machine learning for engineering applications","volume":"24","author":"Chen","year":"2024","journal-title":"J. Comput. Inf. Sci. Eng."},{"key":"10.1016\/j.neucom.2026.133308_bib0040","doi-asserted-by":"crossref","DOI":"10.1016\/j.oceaneng.2024.117468","article-title":"A novel cascaded trajectory tracking control for uuvs under time-varying disturbances","volume":"300","author":"Chen","year":"2024","journal-title":"Ocean Eng."},{"key":"10.1016\/j.neucom.2026.133308_bib0045","series-title":"2020 39th Chinese Control Conference (CCC)","first-page":"1727","article-title":"Online receding horizon trajectory planning method for UUV near bottom terrain tracking","author":"Chen","year":"2020"},{"key":"10.1016\/j.neucom.2026.133308_bib0050","author":"Cini"},{"key":"10.1016\/j.neucom.2026.133308_bib0055","doi-asserted-by":"crossref","first-page":"2419","DOI":"10.1007\/s10994-021-05961-4","article-title":"Challenges of real-world reinforcement learning: definitions, benchmarks and analysis","volume":"110","author":"Dulac-Arnold","year":"2021","journal-title":"Mach. Learn."},{"key":"10.1016\/j.neucom.2026.133308_bib0060","series-title":"Model Predictive Control for the Bluerov2","author":"Einarsson","year":"2020"},{"key":"10.1016\/j.neucom.2026.133308_bib0065","doi-asserted-by":"crossref","DOI":"10.1109\/TCST.2024.3377876","article-title":"Path-following control of unmanned underwater vehicle based on an improved td3 deep reinforcement learning","author":"Fan","year":"2024","journal-title":"IEEE Trans. Control Syst. Technol."},{"key":"10.1016\/j.neucom.2026.133308_bib0070","series-title":"2014 IEEE International Conference on Robotics and Automation (ICRA)","first-page":"6606","article-title":"Coordinating UAVS and auvs for oceanographic field experiments: challenges and lessons learned","author":"Faria","year":"2014"},{"key":"10.1016\/j.neucom.2026.133308_bib0075","series-title":"International Conference on Machine Learning","first-page":"1861","article-title":"Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"Haarnoja","year":"2018"},{"key":"10.1016\/j.neucom.2026.133308_bib0080","doi-asserted-by":"crossref","DOI":"10.1016\/j.apor.2022.103326","article-title":"Deep reinforcement learning for adaptive path planning and control of an autonomous underwater vehicle","volume":"129","author":"Hadi","year":"2022","journal-title":"Appl. Ocean Res."},{"key":"10.1016\/j.neucom.2026.133308_bib0085","series-title":"2021 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","first-page":"1281","article-title":"Shaping progressive net of reinforcement learning for policy transfer with human evaluative feedback","author":"Juan","year":"2021"},{"key":"10.1016\/j.neucom.2026.133308_bib0090","series-title":"2023 IEEE International Conference on Robotics and Automation (ICRA)","first-page":"7212","article-title":"Sim-to-real policy and reward transfer with adaptive forward dynamics model","author":"Juan","year":"2023"},{"key":"10.1016\/j.neucom.2026.133308_bib0095","series-title":"International Conference on Machine Learning","first-page":"5556","article-title":"Controlling overestimation bias with truncated mixture of continuous distributional quantile critics","author":"Kuznetsov","year":"2020"},{"key":"10.1016\/j.neucom.2026.133308_bib0100","doi-asserted-by":"crossref","first-page":"89","DOI":"10.3390\/jmse7040089","article-title":"UUV simulation modeling and its control method: simulation and experimental studies","volume":"7","author":"Li","year":"2019","journal-title":"J. Mar. Sci. Eng."},{"key":"10.1016\/j.neucom.2026.133308_bib0105","author":"Li"},{"key":"10.1016\/j.neucom.2026.133308_bib0110","doi-asserted-by":"crossref","first-page":"574","DOI":"10.1002\/rob.22148","article-title":"Research on disturbance rejection motion control method of USV for UUV recovery","volume":"40","author":"Liao","year":"2023","journal-title":"J. Field Robot."},{"key":"10.1016\/j.neucom.2026.133308_bib0115","doi-asserted-by":"crossref","first-page":"2319","DOI":"10.1109\/TIV.2023.3245615","article-title":"Transformer-based dual-channel self-attention for UUV autonomous collision avoidance","volume":"8","author":"Lin","year":"2023","journal-title":"IEEE Trans. Intell. Veh."},{"key":"10.1016\/j.neucom.2026.133308_bib0120","doi-asserted-by":"crossref","DOI":"10.1016\/j.oceaneng.2023.116076","article-title":"Reinforcement learning-based path tracking for underactuated UUV under intermittent communication","volume":"288","author":"Liu","year":"2023","journal-title":"Ocean Eng."},{"key":"10.1016\/j.neucom.2026.133308_bib0125","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2021.114660","article-title":"Trajectory planning for multi-robot systems: methods and applications","volume":"173","author":"Madridano","year":"2021","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.neucom.2026.133308_bib0130","doi-asserted-by":"crossref","DOI":"10.1016\/j.oceaneng.2024.120036","article-title":"Deep reinforcement learning from human preferences for ROV path tracking","volume":"317","author":"Niu","year":"2025","journal-title":"Ocean Eng."},{"key":"10.1016\/j.neucom.2026.133308_bib0135","doi-asserted-by":"crossref","first-page":"131","DOI":"10.1109\/JOE.2013.2278891","article-title":"AUV navigation and localization: a review","volume":"39","author":"Paull","year":"2013","journal-title":"IEEE J. Ocean. Eng."},{"key":"10.1016\/j.neucom.2026.133308_bib0140","series-title":"Investigating Overestimation Bias in Reinforcement Learning","author":"Pentaliotis","year":"2020"},{"key":"10.1016\/j.neucom.2026.133308_bib0145","doi-asserted-by":"crossref","first-page":"19211","DOI":"10.1073\/pnas.0908322106","article-title":"Climate, carbon cycling, and deep-ocean ecosystems","volume":"106","author":"Smith Jr","year":"2009","journal-title":"Proc. Natl. Acad. Sci."},{"key":"10.1016\/j.neucom.2026.133308_bib0150","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence","article-title":"Deep reinforcement learning with double q-learning","author":"Van Hasselt","year":"2016"},{"key":"10.1016\/j.neucom.2026.133308_bib0155","doi-asserted-by":"crossref","first-page":"1898","DOI":"10.3390\/jmse10121898","article-title":"An open-source benchmark simulator: control of a bluerov2 underwater robot","volume":"10","author":"Von Benzon","year":"2022","journal-title":"J. Mar. Sci. Eng."},{"key":"10.1016\/j.neucom.2026.133308_bib0160","series-title":"2005 IEEE\/RSJ International Conference on Intelligent Robots and Systems","first-page":"3712","article-title":"Multi-agent quadrotor testbed control design: integral sliding mode VS. Reinforcement learning","author":"Waslander","year":"2005"},{"key":"10.1016\/j.neucom.2026.133308_bib0165","doi-asserted-by":"crossref","first-page":"7321","DOI":"10.3390\/s23177321","article-title":"A survey on unmanned underwater vehicles: challenges, enabling technologies, and future research directions","volume":"23","author":"Wibisono","year":"2023","journal-title":"Sensors"},{"key":"10.1016\/j.neucom.2026.133308_bib0170","doi-asserted-by":"crossref","first-page":"6782","DOI":"10.1109\/TVT.2020.2991983","article-title":"Cooperative path planning for heterogeneous unmanned vehicles in a search-and-track mission aiming at an underwater target","volume":"69","author":"Wu","year":"2020","journal-title":"IEEE Trans. Veh. Technol."},{"key":"10.1016\/j.neucom.2026.133308_bib0175","doi-asserted-by":"crossref","DOI":"10.1155\/2021\/5519033","article-title":"Reinforcement learning-based autonomous navigation and obstacle avoidance for usvs under partially observable conditions","volume":"2021","author":"Yan","year":"2021","journal-title":"Math. Probl. Eng."},{"key":"10.1016\/j.neucom.2026.133308_bib0180","doi-asserted-by":"crossref","first-page":"1983","DOI":"10.1109\/TASE.2022.3190901","article-title":"Intelligent path planning of underwater robot based on reinforcement learning","volume":"20","author":"Yang","year":"2022","journal-title":"IEEE Trans. Autom. Sci. Eng."},{"key":"10.1016\/j.neucom.2026.133308_bib0185","series-title":"2016 9th International Symposium on Computational Intelligence and Design (ISCID)","first-page":"211","article-title":"Path planning for UUV in dynamic environment","author":"Zhi-Wen","year":"2016"}],"container-title":["Neurocomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0925231226007058?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0925231226007058?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T20:32:25Z","timestamp":1776889945000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0925231226007058"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6]]},"references-count":37,"alternative-id":["S0925231226007058"],"URL":"https:\/\/doi.org\/10.1016\/j.neucom.2026.133308","relation":{},"ISSN":["0925-2312"],"issn-type":[{"value":"0925-2312","type":"print"}],"subject":[],"published":{"date-parts":[[2026,6]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"UUV autonomous control for terrain tracking problem through distributional reinforcement learning","name":"articletitle","label":"Article Title"},{"value":"Neurocomputing","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.neucom.2026.133308","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Published by Elsevier B.V.","name":"copyright","label":"Copyright"}],"article-number":"133308"}}