{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,9]],"date-time":"2026-05-09T16:54:42Z","timestamp":1778345682920,"version":"3.51.4"},"reference-count":136,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","award":["3142019055"],"award-info":[{"award-number":["3142019055"]}],"id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","award":["3142015038"],"award-info":[{"award-number":["3142015038"]}],"id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Science and Technology Project of Hebei Education Department","award":["QN2021312"],"award-info":[{"award-number":["QN2021312"]}]},{"name":"Langfang Science and Technology Research Development Program","award":["2019011058"],"award-info":[{"award-number":["2019011058"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2021]]},"DOI":"10.1109\/access.2021.3076530","type":"journal-article","created":{"date-parts":[[2021,4,29]],"date-time":"2021-04-29T19:36:40Z","timestamp":1619725000000},"page":"69061-69081","source":"Crossref","is-referenced-by-count":123,"title":["Motion Planning for Mobile Robots\u2014Focusing on Deep Reinforcement Learning: A Systematic Review"],"prefix":"10.1109","volume":"9","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5373-3879","authenticated-orcid":false,"given":"Huihui","family":"Sun","sequence":"first","affiliation":[]},{"given":"Weijie","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Runxiang","family":"Yu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9494-0009","authenticated-orcid":false,"given":"Yujie","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-50936-1_82"},{"key":"ref38","first-page":"1","article-title":"Local path planning using artificial potential field for waypoint tracking with collision avoidance","author":"lin","year":"2020","journal-title":"Proc IEEE 23rd Int Conf Intell Transp Syst (ITSC)"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2020.113425"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2020.2975428"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICARSC.2019.8733623"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1016\/j.ifacol.2019.12.308"},{"key":"ref37","doi-asserted-by":"crossref","first-page":"163","DOI":"10.3901\/JME.2020.02.163","article-title":"Local path planning for autonomous vehicles based on sparse representation of point cloud in unstructured environments","volume":"56","author":"liu","year":"2020","journal-title":"Jixie Gongcheng Xuebao\/J Mech Eng"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-63784-2_114"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1016\/j.compeleceng.2019.05.012"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.3390\/s20205873"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ETFA.2019.8868950"},{"key":"ref27","first-page":"1","article-title":"Comparing path planning algorithms for multiple mobile robots","author":"okumu","year":"2018","journal-title":"Proc Int Conf Artif Intell Data Process (IDAP)"},{"key":"ref29","first-page":"5142","article-title":"The method based on Dijkstra of multi-directional ship&#x2019;s path planning","author":"cheng","year":"2020","journal-title":"Proc Chin Control Decis Conf (CCDC)"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/LWC.2020.3004687"},{"key":"ref22","first-page":"523","article-title":"Integrate multi-agent simulation environment and multi-agent reinforcement learning (MARL) for real-world scenario","author":"yeo","year":"2020","journal-title":"Proc Int Conf Inf Commun Technol Converg (ICTC)"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1080\/00207179.2018.1526414"},{"key":"ref24","first-page":"621","article-title":"An improved DDPG reinforcement learning control of underwater gliders for energy optimization","author":"jing","year":"2020","journal-title":"Proc 3rd Int Conf Unmanned Syst (ICUS)"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2020.2988287"},{"key":"ref101","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.2018.2884725"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/CCDC49329.2020.9164775"},{"key":"ref100","article-title":"Towards cognitive exploration through deep reinforcement learning for mobile robots","author":"tai","year":"2016","journal-title":"arXiv 1610 01733"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.2977924"},{"key":"ref50","first-page":"203","article-title":"Method for detecting obstacles in reservoir culverts based on circular structured light","volume":"46","author":"chen","year":"2020","journal-title":"Lasers Eng"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1016\/j.compag.2020.105523"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-14347-3_34"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.3389\/fnbot.2020.00044"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/ICCWorkshops49005.2020.9145456"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8794179"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2018.8593871"},{"key":"ref54","first-page":"2491","article-title":"Learning to navigate in cities without a map","author":"mirowski","year":"2018","journal-title":"Proc 32nd Conf Neural Inf Process Syst Found (NeurIPS)"},{"key":"ref53","first-page":"1198","article-title":"Object detection-based semantic map building for a semantic visual SLAM system","author":"truong","year":"2020","journal-title":"Proc 20th Int Conf Control Autom Syst (ICCAS)"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/JPHOT.2020.2981485"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.23919\/CCC50068.2020.9189250"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1080\/02564602.2019.1566031"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/3368961"},{"key":"ref6","first-page":"178","article-title":"Optimal robot path planning using enhanced particle swarm optimization algorithm","volume":"61","author":"ghathwan","year":"2020","journal-title":"Iraqi Journal of Science"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.3035729"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1016\/j.future.2020.12.012"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1007\/s10514-019-09859-y"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1155\/2020\/1849240"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1080\/0305215X.2020.1858074"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1016\/j.tcs.2019.10.033"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/MWC.001.1900232"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1016\/j.sna.2019.111731"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.18494\/SAM.2020.2540"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-60372-4_22"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1016\/j.procs.2019.01.112"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-15-8462-6_133"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/DCABES50732.2020.00026"},{"key":"ref127","first-page":"1423","article-title":"A fully distributed motion coordination strategy for multi-robot systems with local information","author":"yu","year":"2020","journal-title":"Proc Amer Control Conf (ACC)"},{"key":"ref126","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01214"},{"key":"ref125","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00691"},{"key":"ref124","doi-asserted-by":"publisher","DOI":"10.1016\/j.actaastro.2020.01.007"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1017\/S026357471800111X"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.4028\/www.scientific.net\/AMM.347-350.3208"},{"key":"ref129","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2020.2974648"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/ICC40277.2020.9148608"},{"key":"ref128","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2953326"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.3233\/JIFS-182560"},{"key":"ref76","article-title":"Prioritized experience replay","author":"schaul","year":"2015","journal-title":"arXiv 1511 05952"},{"key":"ref130","doi-asserted-by":"publisher","DOI":"10.1177\/0278364917741532"},{"key":"ref77","first-page":"1995","article-title":"Dueling network architectures for deep reinforcement learning","author":"wang","year":"2016","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref75","first-page":"1","article-title":"Deep reinforcement learning with double Q-learning","author":"van","year":"2016","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"ref133","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8461113"},{"key":"ref134","first-page":"1","article-title":"Towards efficient connected and automated driving system via multi-agent graph reinforcement learning","author":"shi","year":"2020","journal-title":"Mach Learn"},{"key":"ref131","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989037"},{"key":"ref78","first-page":"1","article-title":"Noisy networks for exploration","author":"fortunato","year":"2018","journal-title":"Proc Int Conf Learn Represent"},{"key":"ref132","doi-asserted-by":"publisher","DOI":"10.1016\/j.cirp.2020.04.001"},{"key":"ref79","first-page":"449","article-title":"A distributional perspective on reinforcement learning","author":"bellemare","year":"2017","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref136","doi-asserted-by":"publisher","DOI":"10.1007\/s10846-019-01110-1"},{"key":"ref135","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2020.2974695"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/IRC.2019.00120"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1142\/S0217595919400098"},{"key":"ref61","first-page":"219","article-title":"Model-based or model-free, a review of approaches in reinforcement learning","author":"huang","year":"2020","journal-title":"Proc Int Conf Comput Data Sci (CDS)"},{"key":"ref63","first-page":"330","article-title":"Off-policy TD($\\lambda$\n) with a true online equivalence","author":"van hasselt","year":"2014","journal-title":"Proc 13th Conf Uncertainty Artif Intell"},{"key":"ref64","first-page":"40","article-title":"&#x2018;An improved Q-learning algorithm for path-planning of a mobile robot","volume":"59","author":"das","year":"2012","journal-title":"Int J Comput Appl"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/ICISC44355.2019.9036354"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1016\/j.rcim.2010.06.019"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1007\/s12555-012-0119-9"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-17298-4_40"},{"key":"ref2","first-page":"14","article-title":"Path planning methods for mobile robots: A systematic and bibliometric review","volume":"19","author":"muhammad","year":"2020","journal-title":"Elektrika-Journal of Electrical Engineering"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/COMROB.2016.7955160"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2020.3004163"},{"key":"ref109","doi-asserted-by":"publisher","DOI":"10.1109\/SSRR.2018.8468643"},{"key":"ref95","article-title":"Reinforcement learning with unsupervised auxiliary tasks","author":"jaderberg","year":"2016","journal-title":"arXiv 1611 05397"},{"key":"ref108","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","volume":"48","author":"mnih","year":"2016","journal-title":"Proc 33rd Int Conf Mach Learn"},{"key":"ref94","doi-asserted-by":"publisher","DOI":"10.23919\/ACC45564.2020.9147960"},{"key":"ref107","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9196739"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.1109\/ROBIO.2018.8665177"},{"key":"ref106","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2018.8593702"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.1109\/ECMR.2019.8870964"},{"key":"ref105","first-page":"1","article-title":"Proximal policy optimization algorithm","author":"schulman","year":"2017","journal-title":"Mach Learn"},{"key":"ref91","article-title":"Shaping and policy search in reinforcement learning","author":"ng","year":"2003"},{"key":"ref104","doi-asserted-by":"publisher","DOI":"10.1145\/3301273"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2019.2931199"},{"key":"ref103","doi-asserted-by":"publisher","DOI":"10.1049\/trit.2018.0008"},{"key":"ref102","first-page":"1889","article-title":"Trust region policy optimization","author":"schulman","year":"2015","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref111","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2019.2891991"},{"key":"ref112","doi-asserted-by":"publisher","DOI":"10.3390\/s19183837"},{"key":"ref110","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989381"},{"key":"ref98","first-page":"1","article-title":"Memory-based control with recurrent neural networks","author":"heess","year":"2015","journal-title":"Comput Sci"},{"key":"ref99","doi-asserted-by":"publisher","DOI":"10.1007\/s10846-018-0891-8"},{"key":"ref96","first-page":"1","article-title":"Teaching a machine to read maps with deep reinforcement learning","author":"brunner","year":"2018","journal-title":"Proc 32nd AAAI Conf Artif Intell"},{"key":"ref97","first-page":"187a","article-title":"Continuous control with deep reinforcement learning","volume":"8","author":"lillicrap","year":"2015","journal-title":"Comput Sci"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2020.3048361"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.3020322"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.2975312"},{"key":"ref13","first-page":"1","article-title":"Temporal video scene segmentation using deep-learning","author":"trojahn","year":"2021","journal-title":"Multimedia Tools Appl"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.2196\/21383"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1038\/nature16961"},{"key":"ref118","doi-asserted-by":"publisher","DOI":"10.1109\/ICARCV.2016.7838739"},{"key":"ref16","article-title":"Dota 2 with large scale deep reinforcement learning","author":"berner","year":"2019","journal-title":"arXiv 1912 06680"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8202134"},{"key":"ref117","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2016.7798980"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1724-z"},{"key":"ref81","first-page":"1","article-title":"Rainbow: Combining improvements in deep reinforcement learning","author":"hessel","year":"2018","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.2970433"},{"key":"ref84","first-page":"1","article-title":"Deep reinforcement learning in a 3-D blockworld environment","author":"barron","year":"2016","journal-title":"Proc Int Joint Conf Artif Intell (IJCAI)"},{"key":"ref119","doi-asserted-by":"publisher","DOI":"10.1145\/3054912"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/s10514-020-09951-8"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.2018.2884725"},{"key":"ref114","first-page":"1","article-title":"Learning to walk via deep reinforcement learning","author":"haarnoja","year":"2018","journal-title":"Proc Robot Sci Syst"},{"key":"ref113","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"haarnoja","year":"2018","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref116","doi-asserted-by":"publisher","DOI":"10.1109\/JAS.2019.1911567"},{"key":"ref80","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","author":"mnih","year":"2016","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref115","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.aau5872"},{"key":"ref120","doi-asserted-by":"publisher","DOI":"10.1007\/s00521-017-3241-z"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2019.10.032"},{"key":"ref121","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2020.3036597"},{"key":"ref122","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.2997304"},{"key":"ref123","doi-asserted-by":"publisher","DOI":"10.1007\/s42405-020-00254-x"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2019.2927869"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2017.7965896"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8793735"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401148"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6287639\/9312710\/09419029.pdf?arnumber=9419029","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,12,17]],"date-time":"2021-12-17T19:55:53Z","timestamp":1639770953000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9419029\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"references-count":136,"URL":"https:\/\/doi.org\/10.1109\/access.2021.3076530","relation":{},"ISSN":["2169-3536"],"issn-type":[{"value":"2169-3536","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021]]}}}