{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,21]],"date-time":"2026-03-21T02:57:02Z","timestamp":1774061822132,"version":"3.50.1"},"reference-count":58,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"4","license":[{"start":{"date-parts":[[2022,7,1]],"date-time":"2022-07-01T00:00:00Z","timestamp":1656633600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,7,1]],"date-time":"2022-07-01T00:00:00Z","timestamp":1656633600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,7,1]],"date-time":"2022-07-01T00:00:00Z","timestamp":1656633600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Contr. Syst. Technol."],"published-print":{"date-parts":[[2022,7]]},"DOI":"10.1109\/tcst.2021.3116796","type":"journal-article","created":{"date-parts":[[2021,10,13]],"date-time":"2021-10-13T01:02:35Z","timestamp":1634086955000},"page":"1595-1611","source":"Crossref","is-referenced-by-count":37,"title":["Reinforcement Learning Versus PDE Backstepping and PI Control for Congested Freeway Traffic"],"prefix":"10.1109","volume":"30","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9324-0200","authenticated-orcid":false,"given":"Huan","family":"Yu","sequence":"first","affiliation":[{"name":"Systems Hub, Intelligent Transportation Thrust, The Hong Kong University of Science and Technology, Hong Kong"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0547-6345","authenticated-orcid":false,"given":"Saehong","family":"Park","sequence":"additional","affiliation":[{"name":"Department of Civil and Environmental Engineering, University of California at Berkeley, Berkeley, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6697-222X","authenticated-orcid":false,"given":"Alexandre","family":"Bayen","sequence":"additional","affiliation":[{"name":"Department of Civil and Environmental Engineering, University of California at Berkeley, Berkeley, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6393-4375","authenticated-orcid":false,"given":"Scott","family":"Moura","sequence":"additional","affiliation":[{"name":"Department of Civil and Environmental Engineering, University of California at Berkeley, Berkeley, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5523-941X","authenticated-orcid":false,"given":"Miroslav","family":"Krstic","sequence":"additional","affiliation":[{"name":"Department of Mechanical and Aerospace Engineering, University of California at San Diego, La Jolla, CA, USA"}]}],"member":"263","reference":[{"key":"ref1","first-page":"22","article-title":"Constrained policy optimization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Achiam"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1049\/iet-its.2009.0070"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1137\/S0036139997332099"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2020.108896"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2018.02.006"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1016\/j.conengprac.2005.03.010"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1016\/j.physleta.2015.05.019"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2017.2725912"},{"key":"ref9","volume-title":"Dynamic Programming and Optimal Control","author":"Bertsekas","year":"2005"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1016\/j.trc.2009.05.014"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1016\/j.trb.2011.02.001"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1016\/j.ifacol.2020.12.1343"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2013.2255286"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.3141\/2391-04"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevE.79.056113"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.23919\/ACC.2017.7963427"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1016\/j.ejcon.2021.03.007"},{"key":"ref18","article-title":"Reinforcement learning with function-valued action spaces for partial differential equation control","author":"Pan","year":"2018","journal-title":"arXiv:1806.06931"},{"key":"ref19","volume-title":"FHWA, U.S. Department of Transportation. Next Generation Simulation (NGSIM)","year":"2018"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1016\/j.ejcon.2013.05.015"},{"issue":"1","key":"ref21","first-page":"1437","article-title":"A comprehensive survey on safe reinforcement learning","volume":"16","author":"Garc\u00eda","year":"2015","journal-title":"J. Mach. Learn. Res."},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1016\/j.trc.2006.08.001"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1718942115"},{"key":"ref24","first-page":"267","article-title":"Approximately optimal approximate reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Kakade"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2017.2699125"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2018.2887141"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2019.03.021"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1137\/1.9780898718607"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2015.12.019"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/MCS.2006.1580155"},{"key":"ref31","volume-title":"PyTorch Implementations of Reinforcement Learning Algorithms","author":"Kostrikov","year":"2021"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1002\/cpa.3160130205"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/JAS.2016.7508798"},{"key":"ref34","first-page":"3053","article-title":"RLlib: Abstractions for distributed reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Liang"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2021.109964"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.2307\/99769"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1016\/0191-2607(90)90047-A"},{"issue":"1","key":"ref38","first-page":"58","article-title":"ALINEA: A local feedback control law for on-ramp metering","volume":"1320","author":"Papageorgiou","year":"1991","journal-title":"Transp. Res. Rec."},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2016.2607280"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1287\/opre.4.1.42"},{"key":"ref41","first-page":"1889","article-title":"Trust region policy optimization","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Schulman"},{"key":"ref42","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017","journal-title":"arXiv:1707.06347"},{"key":"ref43","article-title":"Constructing set-valued fundamental diagrams from jamiton solutions in second order traffic models","author":"Seibold","year":"2012","journal-title":"arXiv:1204.5510"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2021.109655"},{"key":"ref45","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Sutton"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/ACC.2014.6858635"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-32460-4"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC.2018.8569615"},{"key":"ref49","article-title":"Learning to discretize: Solving 1D scalar conservation laws via deep reinforcement learning","author":"Wang","year":"2019","journal-title":"arXiv:1905.11079"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2014.2307884"},{"key":"ref51","first-page":"398","article-title":"Emergent behaviors in mixed-autonomy traffic","volume-title":"Proc. Conf. Robot Learn.","author":"Wu"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2018.10.040"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC.2018.8569997"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/TCST.2020.2989101"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2020.2994031"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1115\/1.4048781"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1016\/S0191-2615(00)00050-3"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1016\/j.sysconle.2018.11.005"}],"container-title":["IEEE Transactions on Control Systems Technology"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/87\/9806193\/09568241.pdf?arnumber=9568241","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,11]],"date-time":"2024-01-11T22:27:33Z","timestamp":1705012053000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9568241\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,7]]},"references-count":58,"journal-issue":{"issue":"4"},"URL":"https:\/\/doi.org\/10.1109\/tcst.2021.3116796","relation":{},"ISSN":["1063-6536","1558-0865","2374-0159"],"issn-type":[{"value":"1063-6536","type":"print"},{"value":"1558-0865","type":"electronic"},{"value":"2374-0159","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,7]]}}}