{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T07:55:53Z","timestamp":1773820553284,"version":"3.50.1"},"reference-count":54,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","award":["226-2023-00076"],"award-info":[{"award-number":["226-2023-00076"]}],"id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["52172334"],"award-info":[{"award-number":["52172334"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["52131202"],"award-info":[{"award-number":["52131202"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Automat. Sci. Eng."],"published-print":{"date-parts":[[2025]]},"DOI":"10.1109\/tase.2025.3628910","type":"journal-article","created":{"date-parts":[[2025,11,4]],"date-time":"2025-11-04T18:38:35Z","timestamp":1762281515000},"page":"23671-23686","source":"Crossref","is-referenced-by-count":1,"title":["A Two-Stage Reinforcement Learning Algorithm for AUV Path Planning Based on Trajectory Exploration and Sequence Modeling"],"prefix":"10.1109","volume":"22","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-7892-212X","authenticated-orcid":false,"given":"Yue","family":"Liu","sequence":"first","affiliation":[{"name":"State Key Laboratory of Ocean Sensing and the Ocean College, Zhejiang University, Zhoushan, China"}]},{"given":"Huan","family":"Tang","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Ocean Sensing and the Ocean College, Zhejiang University, Zhoushan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4193-3718","authenticated-orcid":false,"given":"Jiacheng","family":"Li","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Ocean Sensing and the Ocean College, Zhejiang University, Zhoushan, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-7686-3408","authenticated-orcid":false,"given":"Yun","family":"Xu","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Ocean Sensing and the Ocean College, Zhejiang University, Zhoushan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9334-1570","authenticated-orcid":false,"given":"Dongfang","family":"Ma","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Ocean Sensing and the Ocean College, Zhejiang University, Zhoushan, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2022.3162850"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TASE.2024.3371963"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1016\/j.apor.2022.103128"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2007.895057"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TASE.2022.3191519"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TASE.2021.3121408"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TASE.2020.2976560"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2023.3282681"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TASE.2022.3190901"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2023.3296415"},{"key":"ref11","first-page":"15084","article-title":"Decision transformer: Reinforcement learning via sequence modeling","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Chen"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TSSC.1968.300136"},{"key":"ref13","article-title":"Rapidly-exploring random trees: A new tool for path planning","author":"LaValle","year":"9811"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1177\/0278364911406761"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1016\/j.oceaneng.2023.115333"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1609\/icaps.v21i1.13457"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1016\/j.oceaneng.2024.119269"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1016\/j.oceaneng.2022.111418"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1016\/j.cie.2022.108905"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2024.3421541"},{"key":"ref21","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017","journal-title":"arXiv:1707.06347"},{"key":"ref22","article-title":"Continuous control with deep reinforcement learning","author":"Lillicrap","year":"2015","journal-title":"arXiv:1509.02971"},{"key":"ref23","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Fujimoto"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2020.3026638"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1016\/j.asoc.2021.107605"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2021.3102589"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1016\/j.asoc.2023.110601"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/TASE.2025.3558945"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i6.25864"},{"key":"ref30","first-page":"5463","article-title":"Learning to explore via meta-policy gradient","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Xu"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1007\/s10489-023-04955-0"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2022.3151651"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3172168"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2024.3509521"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2022.3153352"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2022.3155697"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2024.3429500"},{"key":"ref38","first-page":"1273","article-title":"Offline reinforcement learning as one big sequence modeling problem","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Janner"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2024.3408271"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/TII.2023.3240585"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2024.111604"},{"key":"ref42","first-page":"27042","article-title":"Online decision transformer","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Zheng"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1016\/j.oceaneng.2020.108268"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2022.3216003"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2022.105548"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2014.09.024"},{"key":"ref47","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Haarnoja"},{"key":"ref48","volume-title":"National Marine Data Center","year":"2024"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/TIM.2020.3031169"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1016\/j.oceaneng.2022.112421"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2021.3097084"},{"key":"ref52","first-page":"1","article-title":"CrossQ: Batch normalization in deep reinforcement learning for greater sample efficiency and simplicity","volume-title":"Proc. 12th Int. Conf. Learn. Represent.","author":"Bhatt"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.52202\/079017-1717"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2025.3537087"}],"container-title":["IEEE Transactions on Automation Science and Engineering"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/8856\/10839176\/11224878.pdf?arnumber=11224878","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,14]],"date-time":"2025-11-14T18:52:27Z","timestamp":1763146347000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11224878\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":54,"URL":"https:\/\/doi.org\/10.1109\/tase.2025.3628910","relation":{},"ISSN":["1545-5955","1558-3783"],"issn-type":[{"value":"1545-5955","type":"print"},{"value":"1558-3783","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]}}}