{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,7]],"date-time":"2026-07-07T15:18:20Z","timestamp":1783437500270,"version":"3.54.6"},"reference-count":29,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"11","license":[{"start":{"date-parts":[[2020,11,1]],"date-time":"2020-11-01T00:00:00Z","timestamp":1604188800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,11,1]],"date-time":"2020-11-01T00:00:00Z","timestamp":1604188800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,11,1]],"date-time":"2020-11-01T00:00:00Z","timestamp":1604188800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"International Science and Technology Cooperation Program of China","award":["2019YFE0100200"],"award-info":[{"award-number":["2019YFE0100200"]}]},{"name":"Tsinghua University-Toyota Joint Research Center for AI Technology of Automated Vehicle"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Veh. Technol."],"published-print":{"date-parts":[[2020,11]]},"DOI":"10.1109\/tvt.2020.3026111","type":"journal-article","created":{"date-parts":[[2020,9,23]],"date-time":"2020-09-23T20:42:34Z","timestamp":1600893754000},"page":"12597-12608","source":"Crossref","is-referenced-by-count":166,"title":["Centralized Cooperation for Connected and Automated Vehicles at Intersections by Proximal Policy Optimization"],"prefix":"10.1109","volume":"69","author":[{"given":"Yang","family":"Guan","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yangang","family":"Ren","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4923-3633","authenticated-orcid":false,"given":"Shengbo Eben","family":"Li","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2664-2509","authenticated-orcid":false,"given":"Qi","family":"Sun","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Laiquan","family":"Luo","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9333-7416","authenticated-orcid":false,"given":"Keqiang","family":"Li","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2011.2178836"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2014.2354380"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2016.2514271"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/IVS.2017.7995727"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ROBIO.2018.8665334"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460934"},{"key":"ref16","article-title":"Carla: An open urban driving simulator","author":"dosovitskiy","year":"0","journal-title":"Proc Conf Robot Learn"},{"key":"ref17","first-page":"297","article-title":"Hierarchical reinforcement learning for self-driving decision-making without reliance on labeled driving data","volume":"14","author":"jingliang","year":"2019","journal-title":"IET Intell Transport Syst"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8793742"},{"key":"ref19","article-title":"Combining deep reinforcement learning and safety based control for autonomous driving","author":"xiong","year":"2016","journal-title":"arXiv 1612 00147"},{"key":"ref28","article-title":"Model-ensemble trust-region policy optimization","author":"kurutach","year":"0","journal-title":"Int Conf Learn Represent (ICLR)"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TCST.2019.2908146"},{"key":"ref27","first-page":"2944","article-title":"Learning continuous control policies by stochastic value gradients","author":"heess","year":"0","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2019.2926733"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TMECH.2017.2647987"},{"key":"ref29","article-title":"Model-based value estimation for efficient model-free reinforcement learning","author":"feinberg","year":"2018","journal-title":"CoRR"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TIE.2017.2787574"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.3141\/2381-08"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2020.2973977"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2017.2769084"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1016\/j.trc.2015.01.007"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2015.2483779"},{"key":"ref20","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","author":"sutton","year":"0","journal-title":"Proc Advances Neural Inf Process Syst"},{"key":"ref22","author":"sutton","year":"2018","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref21","first-page":"441","article-title":"Bias in natural actor-critic algorithms","author":"thomas","year":"0","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref24","first-page":"1531","article-title":"A natural policy gradient","author":"kakade","year":"0","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref23","article-title":"High-dimensional continuous control using generalized advantage estimation","author":"schulman","year":"0","journal-title":"Proc Int Conf Learn Represent"},{"key":"ref26","article-title":"Online model learning algorithms for actor-critic control","author":"grondman","year":"2015"},{"key":"ref25","first-page":"465","article-title":"Pilco: A model-based and data-efficient approach to policy search","author":"deisenroth","year":"0","journal-title":"Proc 28th Int Conf Mach Learn"}],"container-title":["IEEE Transactions on Vehicular Technology"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/25\/9258483\/09204585.pdf?arnumber=9204585","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,4,27]],"date-time":"2022-04-27T13:45:03Z","timestamp":1651067103000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9204585\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,11]]},"references-count":29,"journal-issue":{"issue":"11"},"URL":"https:\/\/doi.org\/10.1109\/tvt.2020.3026111","relation":{},"ISSN":["0018-9545","1939-9359"],"issn-type":[{"value":"0018-9545","type":"print"},{"value":"1939-9359","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,11]]}}}