{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,4]],"date-time":"2026-06-04T16:08:18Z","timestamp":1780589298283,"version":"3.54.1"},"reference-count":60,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"12","license":[{"start":{"date-parts":[[2021,12,1]],"date-time":"2021-12-01T00:00:00Z","timestamp":1638316800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,12,1]],"date-time":"2021-12-01T00:00:00Z","timestamp":1638316800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,12,1]],"date-time":"2021-12-01T00:00:00Z","timestamp":1638316800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2020AAA0109700"],"award-info":[{"award-number":["2020AAA0109700"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U1811463"],"award-info":[{"award-number":["U1811463"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61836012"],"award-info":[{"award-number":["61836012"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61876224"],"award-info":[{"award-number":["61876224"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62006253"],"award-info":[{"award-number":["62006253"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U181146"],"award-info":[{"award-number":["U181146"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61836012"],"award-info":[{"award-number":["61836012"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61976233"],"award-info":[{"award-number":["61976233"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Neural Netw. Learning Syst."],"published-print":{"date-parts":[[2021,12]]},"DOI":"10.1109\/tnnls.2021.3109284","type":"journal-article","created":{"date-parts":[[2021,9,14]],"date-time":"2021-09-14T20:11:35Z","timestamp":1631650295000},"page":"5379-5391","source":"Crossref","is-referenced-by-count":47,"title":["Deductive Reinforcement Learning for Visual Autonomous Urban Driving Navigation"],"prefix":"10.1109","volume":"32","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9588-064X","authenticated-orcid":false,"given":"Changxin","family":"Huang","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ronghui","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Meizi","family":"Ouyang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2190-0767","authenticated-orcid":false,"given":"Pengxu","family":"Wei","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Junfan","family":"Lin","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jiang","family":"Su","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2248-3755","authenticated-orcid":false,"given":"Liang","family":"Lin","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref39","article-title":"ChauffeurNet: Learning to drive by imitating the best and synthesizing the worst","author":"bansal","year":"2018","journal-title":"arXiv 1812 03079"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00942"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.16"},{"key":"ref32","first-page":"1988","article-title":"Deep learning face representation by joint identification-verification","author":"sun","year":"2014","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1155\/2018\/7068349"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.257"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2018.8593758"},{"key":"ref36","first-page":"3812","article-title":"Infogail: Interpretable imitation learning from visual demonstrations","author":"li","year":"2017","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref35","article-title":"Query-efficient imitation learning for end-to-end autonomous driving","author":"zhang","year":"2016","journal-title":"arXiv 1605 06450 [cs]"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2015.2506664"},{"key":"ref60","article-title":"Meta reinforcement learning-based lane change strategy for autonomous vehicles","author":"ye","year":"2020","journal-title":"arXiv 2008 12451"},{"key":"ref28","first-page":"1","article-title":"CARLA: An open urban driving simulator","author":"dosovitskiy","year":"2017","journal-title":"Proc Annu Conf Robot Learn"},{"key":"ref27","first-page":"2450","article-title":"Recurrent world models facilitate policy evolution","author":"ha","year":"2018","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref29","first-page":"305","article-title":"ALVINN: An autonomous land vehicle in a neural network","author":"pomerleau","year":"1989","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref2","article-title":"Proximal policy optimization algorithms","author":"schulman","year":"2017","journal-title":"arXiv 1707 06347"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8793742"},{"key":"ref20","first-page":"64","article-title":"Unsupervised learning for physical interaction through video prediction","author":"finn","year":"2016","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref22","first-page":"1437","article-title":"A comprehensive survey on safe reinforcement learning","volume":"16","author":"garc\u00eda","year":"2015","journal-title":"J Mach Learn Res"},{"key":"ref21","first-page":"908","article-title":"Safe model-based reinforcement learning with stability guarantees","author":"berkenkamp","year":"2017","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref24","article-title":"Model-based reinforcement learning for Atari","author":"kaiser","year":"2019","journal-title":"arXiv 1903 00374"},{"key":"ref23","author":"wymann","year":"2000","journal-title":"TORCS The Open Racing Car Simulator"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1016\/j.cell.2017.05.011"},{"key":"ref25","first-page":"1","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2016","journal-title":"Proc Int Conf Learn Represent"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1613\/jair.3912"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2018.XIV.008"},{"key":"ref59","article-title":"Learning via Hilbert space embedding of distributions","author":"song","year":"2008"},{"key":"ref58","first-page":"2579","article-title":"Visualizing data using t-SNE","volume":"9","author":"van der maaten","year":"2008","journal-title":"J Mach Learn Res"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.350"},{"key":"ref56","first-page":"801","article-title":"Encoder-decoder with atrous separable convolution for semantic image segmentation","author":"chen","year":"2018","journal-title":"Proc Eur Conf Comput Vis (ECCV)"},{"key":"ref55","first-page":"1","article-title":"Auto-encoding variational Bayes","author":"kingma","year":"2014","journal-title":"Proc Int Conf Learn Represent"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/682"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2018.8593691"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58539-6_19"},{"key":"ref10","author":"sutton","year":"2018","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref11","article-title":"Safe, multi-agent, reinforcement learning for autonomous driving","author":"shalev-shwartz","year":"2016","journal-title":"arXiv 1610 03295"},{"key":"ref40","article-title":"An auto-tuning framework for autonomous vehicles","author":"fan","year":"2018","journal-title":"arXiv 1808 04913"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01234-2_36"},{"key":"ref13","article-title":"Strengths, weaknesses, and combinations of model-based and model-free reinforcement learning","author":"asadi","year":"2015"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/IROS40897.2019.8967834"},{"key":"ref15","first-page":"4754","article-title":"Deep reinforcement learning in a handful of trials using probabilistic dynamics models","author":"chua","year":"2018","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref16","first-page":"465","article-title":"PILCO: A model-based and data-efficient approach to policy search","author":"deisenroth","year":"2011","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref17","article-title":"Model-ensemble trust-region policy optimization","author":"kurutach","year":"2018","journal-title":"arXiv 1802 10592"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8463189"},{"key":"ref19","article-title":"Recurrent environment simulators","author":"chiappa","year":"2017","journal-title":"arXiv 1704 02254"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1177\/0278364913495721"},{"key":"ref3","first-page":"387","article-title":"Deterministic policy gradient algorithms","author":"silver","year":"2014","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/MITS.2014.2306552"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.376"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460487"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.312"},{"key":"ref49","first-page":"6118","article-title":"Value prediction network","author":"oh","year":"2017","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1016\/0377-2217(89)90348-2"},{"key":"ref46","article-title":"Temporal difference models: Model-free deep RL for model-based control","author":"pong","year":"2018","journal-title":"arXiv 1802 09081"},{"key":"ref45","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","author":"mnih","year":"2016","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01270-0_3"},{"key":"ref47","first-page":"5690","article-title":"Imagination-augmented agents for deep reinforcement learning","author":"racani\u00e8re","year":"2017","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref42","article-title":"End-to-end deep reinforcement learning for lane keeping assist","author":"el sallab","year":"2016","journal-title":"arXiv 1612 04340"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/25.845116"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.2352\/ISSN.2470-1173.2017.19.AVM-023"},{"key":"ref43","article-title":"Virtual to real reinforcement learning for autonomous driving","author":"pan","year":"2017","journal-title":"arXiv 1704 03952"}],"container-title":["IEEE Transactions on Neural Networks and Learning Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/5962385\/9629429\/09537641.pdf?arnumber=9537641","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T14:53:14Z","timestamp":1652194394000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9537641\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,12]]},"references-count":60,"journal-issue":{"issue":"12"},"URL":"https:\/\/doi.org\/10.1109\/tnnls.2021.3109284","relation":{},"ISSN":["2162-237X","2162-2388"],"issn-type":[{"value":"2162-237X","type":"print"},{"value":"2162-2388","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,12]]}}}