{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T14:14:09Z","timestamp":1777472049330,"version":"3.51.4"},"reference-count":41,"publisher":"Springer Science and Business Media LLC","issue":"3-4","license":[{"start":{"date-parts":[[2019,3,5]],"date-time":"2019-03-05T00:00:00Z","timestamp":1551744000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["51779057,51709061"],"award-info":[{"award-number":["51779057,51709061"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Intell Robot Syst"],"published-print":{"date-parts":[[2019,12]]},"DOI":"10.1007\/s10846-019-01004-2","type":"journal-article","created":{"date-parts":[[2019,3,5]],"date-time":"2019-03-05T06:02:39Z","timestamp":1551765759000},"page":"591-601","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":57,"title":["Mapless Motion Planning System for an Autonomous Underwater Vehicle Using Policy Gradient-based Deep Reinforcement Learning"],"prefix":"10.1007","volume":"96","author":[{"given":"Yushan","family":"Sun","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Junhan","family":"Cheng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Guocheng","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hao","family":"Xu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2019,3,5]]},"reference":[{"key":"1004_CR1","doi-asserted-by":"crossref","unstructured":"Bengio, Y., Louradour, J., Collobert, R., Weston, J.: Curriculum learning. In: Proceedings of the 26th annual international conference on machine learning, pp. 41\u201348. ACM (2009)","DOI":"10.1145\/1553374.1553380"},{"key":"1004_CR2","unstructured":"Carreras, M., Batlle, J., Ridao, P.: Hybrid coordination of reinforcement learning-based behaviors for auv control. In: 2001 IEEE\/RSJ international conference on intelligent robots and systems, 2001. Proceedings, vol. 3, pp. 1410\u20131415. IEEE (2001)"},{"key":"1004_CR3","doi-asserted-by":"publisher","first-page":"416","DOI":"10.1109\/JOE.2004.835805","volume":"30","author":"M Carreras P\u00e9rez","year":"2005","unstructured":"Carreras P\u00e9rez, M., Yuh, J., Batlle i Grabulosa, J., Ridao Rodr\u00edguez, P.: A behavior-based scheme using reinforcement learning for autonomous underwater vehicles. \u24b8 Oceanic Engineering 30, 416\u2013427 (2005)","journal-title":"\u24b8 Oceanic Engineering"},{"key":"1004_CR4","doi-asserted-by":"crossref","unstructured":"Chen, C., Seff, A., Kornhauser, A., Xiao, J.: Deepdriving: Learning affordance for direct perception in autonomous driving. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2722\u20132730 (2015)","DOI":"10.1109\/ICCV.2015.312"},{"key":"1004_CR5","doi-asserted-by":"publisher","first-page":"63","DOI":"10.1016\/j.neucom.2017.06.066","volume":"272","author":"Y Cheng","year":"2018","unstructured":"Cheng, Y., Zhang, W.: Concise deep reinforcement learning obstacle avoidance for underactuated unmanned marine vessels. Neurocomputing 272, 63\u201373 (2018)","journal-title":"Neurocomputing"},{"issue":"6","key":"1004_CR6","doi-asserted-by":"publisher","first-page":"1019","DOI":"10.1109\/TSMC.2016.2645699","volume":"47","author":"R Cui","year":"2017","unstructured":"Cui, R., Yang, C., Li, Y., Sharma, S.: Adaptive neural network control of auvs with control input nonlinearities using reinforcement learning. IEEE Trans. Syst. Man Cybern. Syst. Hum. 47(6), 1019\u20131029 (2017)","journal-title":"IEEE Trans. Syst. Man Cybern. Syst. Hum."},{"issue":"02","key":"1004_CR7","doi-asserted-by":"publisher","first-page":"251","DOI":"10.1142\/S0219525911002998","volume":"14","author":"S Devlin","year":"2011","unstructured":"Devlin, S., Kudenko, D., Grze\u015b, M.: An empirical study of potential-based reward shaping and advice in complex, multi-agent systems. Adv. Complex Syst. 14(02), 251\u2013278 (2011)","journal-title":"Adv. Complex Syst."},{"key":"1004_CR8","unstructured":"El-Fakdi, A., Carreras, M.: Policy gradient based reinforcement learning for real autonomous underwater cable tracking. In: IEEE\/RSJ international conference on intelligent robots and systems, 2008, IROS 2008. pp. 3635\u20133640. IEEE (2008)"},{"issue":"3","key":"1004_CR9","doi-asserted-by":"publisher","first-page":"271","DOI":"10.1016\/j.robot.2012.11.009","volume":"61","author":"A El-Fakdi","year":"2013","unstructured":"El-Fakdi, A., Carreras, M.: Two-step gradient-based reinforcement learning for underwater robotics behavior learning. Robot. Auton. Syst. 61(3), 271\u2013282 (2013)","journal-title":"Robot. Auton. Syst."},{"key":"1004_CR10","doi-asserted-by":"crossref","unstructured":"Fossen, T.I.: Handbook of marine craft hydrodynamics and motion control. John Wiley & Sons (2011)","DOI":"10.1002\/9781119994138"},{"issue":"7626","key":"1004_CR11","doi-asserted-by":"publisher","first-page":"471","DOI":"10.1038\/nature20101","volume":"538","author":"A Graves","year":"2016","unstructured":"Graves, A., Wayne, G., Reynolds, M., Harley, T., Danihelka, I., Grabska-Barwi\u0144ska, A., Colmenarejo, S.G., Grefenstette, E., Ramalho, T., Agapiou, J., et al.: Hybrid computing using a neural network with dynamic external memory. Nature 538(7626), 471 (2016)","journal-title":"Nature"},{"key":"1004_CR12","doi-asserted-by":"crossref","unstructured":"Gu, S., Holly, E., Lillicrap, T., Levine, S.: Deep reinforcement learning for robotic manipulation with asynchronous off-policy updates. In: 2017 IEEE international conference on robotics and automation (ICRA), pp. 3389\u20133396. IEEE (2017)","DOI":"10.1109\/ICRA.2017.7989385"},{"key":"1004_CR13","unstructured":"Heess, N., Hunt, J.J., Lillicrap, T.P., Silver, D.: Memory-based control with recurrent neural networks. arXiv: 1512.04455 (2015)"},{"key":"1004_CR14","unstructured":"Heess, N., Sriram, S., Lemmon, J., Merel, J., Wayne, G., Tassa, Y., Erez, T., Wang, Z., Eslami, A., Riedmiller, M., et al.: Emergence of locomotion behaviours in rich environments. arXiv: 1707.02286 (2017)"},{"key":"1004_CR15","unstructured":"Kawano, H., Ura, T.: Motion planning algorithm for nonholonomic autonomous underwater vehicle in disturbance using reinforcement learning and teaching method. In: IEEE international conference on robotics and automation, 2002. Proceedings. ICRA\u201902, vol. 4, pp. 4032\u20134038. IEEE (2002)"},{"key":"1004_CR16","unstructured":"Kormushev, P., Caldwell, D.G.: Towards improved auv control through learning of periodic signals. In: Oceans-San Diego, 2013, pp. 1\u20134. IEEE (2013)"},{"key":"1004_CR17","unstructured":"Lei, T., Ming, L.: A robot exploration strategy based on q-learning network. In: IEEE international conference on real-time computing and robotics (RCAR), pp. 57\u201362. IEEE (2016)"},{"key":"1004_CR18","doi-asserted-by":"crossref","unstructured":"Li, Y., Cui, R., Li, Z., Xu, D.: Neural network approximation-based near-optimal motion planning with kinodynamic constraints using rrt. IEEE Transactions on Industrial Electronics (2018)","DOI":"10.1109\/TIE.2018.2816000"},{"key":"1004_CR19","unstructured":"Lillicrap, T.P., Hunt, J.J., Pritzel, A., Heess, N., Erez, T., Tassa, Y., Silver, D., Wierstra, D. arXiv: 1509.02971 (2015)"},{"key":"1004_CR20","unstructured":"Mnih, V., Badia, A.P., Mirza, M., Graves, A., Lillicrap, T., Harley, T., Silver, D., Kavukcuoglu, K.: Asynchronous methods for deep reinforcement learning. In: International conference on machine learning, pp. 1928\u20131937 (2016)"},{"key":"1004_CR21","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., Graves, A., Antonoglou, I., Wierstra, D., Riedmiller, M.: Playing atari with deep reinforcement learning. arXiv: 1312.5602 (2013)"},{"issue":"7540","key":"1004_CR22","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., Rusu, A.A., Veness, J., Bellemare, M.G., Graves, A., Riedmiller, M., Fidjeland, A.K., Ostrovski, G., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529 (2015)","journal-title":"Nature"},{"key":"1004_CR23","unstructured":"Muller, U., Ben, J., Cosatto, E., Flepp, B., Cun, Y.L.: Off-road obstacle avoidance through end-to-end learning. In: Advances in neural information processing systems, pp. 739\u2013746 (2006)"},{"key":"1004_CR24","doi-asserted-by":"crossref","unstructured":"Ng, A.Y., Coates, A., Diel, M., Ganapathi, V., Schulte, J., Tse, B., Berger, E., Liang, E.: Autonomous inverted helicopter flight via reinforcement learning. In: Experimental Robotics IX, pp. 363\u2013372. Springer (2006)","DOI":"10.1007\/11552246_35"},{"key":"1004_CR25","unstructured":"Ng, A.Y., Harada, D., Russell, S.: Policy invariance under reward transformations: Theory and application to reward shaping. In: ICML, vol. 99, pp. 278\u2013287 (1999)"},{"key":"1004_CR26","doi-asserted-by":"crossref","unstructured":"Pfeiffer, M., Schaeuble, M., Nieto, J., Siegwart, R., Cadena, C.: From perception to decision: A data-driven approach to end-to-end motion planning for autonomous ground robots. In: 2017 IEEE international conference on robotics and automation (icra), pp. 1527\u20131533. IEEE (2017)","DOI":"10.1109\/ICRA.2017.7989182"},{"key":"1004_CR27","unstructured":"Ramachandran, P., Zoph, B., Le, Q.V.: Searching for activation functions. arXiv: 1710.05941 (2018)"},{"key":"1004_CR28","unstructured":"Schaul, T., Quan, J., Antonoglou, I., Silver, D.: Prioritized experience replay. arXiv: 1511.05952 (2015)"},{"key":"1004_CR29","unstructured":"Schulman, J., Levine, S., Abbeel, P., Jordan, M., Moritz, P.: Trust region policy optimization. In: International conference on machine learning, pp. 1889\u20131897 (2015)"},{"key":"1004_CR30","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms. arXiv: 1707.06347 (2017)"},{"issue":"7587","key":"1004_CR31","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver, D., Huang, A., Maddison, C.J., Guez, A., Sifre, L., Van Den Driessche, G., Schrittwieser, J., Antonoglou, I., Panneershelvam, V., Lanctot, M., et al.: Mastering the game of go with deep neural networks and tree search. Nature 529(7587), 484 (2016)","journal-title":"Nature"},{"key":"1004_CR32","unstructured":"Tai, L., Liu, M. arXiv: 1610.01733 (2016)"},{"key":"1004_CR33","doi-asserted-by":"crossref","unstructured":"Tai, L., Paolo, G., Liu, M.: Virtual-to-real deep reinforcement learning: Continuous control of mobile robots for mapless navigation. In: 2017 IEEE\/RSJ international conference on intelligent robots and systems (IROS), pp. 31\u201336. IEEE (2017)","DOI":"10.1109\/IROS.2017.8202134"},{"key":"1004_CR34","unstructured":"Tambet, M., Avital, O., Taco, C., John, S.: Teacher-student curriculum learning. arXiv: 1707.00183 (2017)"},{"key":"1004_CR35","doi-asserted-by":"crossref","unstructured":"Van Hasselt, H., Guez, A., Silver, D.: Deep reinforcement learning with double q-learning. In: AAAI, vol. 2, pp. 5. Phoenix, AZ (2016)","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"1004_CR36","unstructured":"Wang, Z., Schaul, T., Hessel, M., Van Hasselt, H., Lanctot, M., De Freitas, N.: Dueling network architectures for deep reinforcement learning. arXiv: 1511.06581 (2015)"},{"key":"1004_CR37","unstructured":"Wu, Y., Mansimov, E., Grosse, R.B., Liao, S., Ba, J.: Scalable trust-region method for deep reinforcement learning using kronecker-factored approximation. In: Advances in neural information processing systems, pp. 5279\u20135288 (2017)"},{"issue":"6","key":"1004_CR38","doi-asserted-by":"publisher","first-page":"1773","DOI":"10.1109\/TCYB.2017.2715228","volume":"48","author":"H Xiao","year":"2018","unstructured":"Xiao, H., Cui, R., Xu, D.: A sampling-based bayesian approach for cooperative multiagent online search with resource constraints. IEEE Trans Cybern 48(6), 1773\u20131785 (2018)","journal-title":"IEEE Trans Cybern"},{"key":"1004_CR39","doi-asserted-by":"crossref","unstructured":"Xie, C., Patil, S., Moldovan, T., Levine, S., Abbeel, P.: Model-based reinforcement learning with parametrized physical models and optimism-driven exploration. In: 2016 IEEE international conference on robotics and automation (ICRA), pp. 504\u2013511. IEEE (2016)","DOI":"10.1109\/ICRA.2016.7487172"},{"key":"1004_CR40","unstructured":"Zaremba, W., Sutskever, I.: Learning to execute. arXiv: 1410.4615 (2014)"},{"key":"1004_CR41","unstructured":"Zhang, F., Leitner, J., Milford, M., Upcroft, B., Corke, P.: Towards vision-based deep reinforcement learning for robotic motion control (2015). arXiv: 1511.03791"}],"container-title":["Journal of Intelligent &amp; Robotic Systems"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10846-019-01004-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10846-019-01004-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10846-019-01004-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,9,13]],"date-time":"2022-09-13T00:21:52Z","timestamp":1663028512000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10846-019-01004-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,3,5]]},"references-count":41,"journal-issue":{"issue":"3-4","published-print":{"date-parts":[[2019,12]]}},"alternative-id":["1004"],"URL":"https:\/\/doi.org\/10.1007\/s10846-019-01004-2","relation":{},"ISSN":["0921-0296","1573-0409"],"issn-type":[{"value":"0921-0296","type":"print"},{"value":"1573-0409","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,3,5]]},"assertion":[{"value":"29 August 2018","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 February 2019","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 March 2019","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}