{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,21]],"date-time":"2026-02-21T20:29:57Z","timestamp":1771705797297,"version":"3.50.1"},"reference-count":33,"publisher":"IEEE","license":[{"start":{"date-parts":[[2018,11,1]],"date-time":"2018-11-01T00:00:00Z","timestamp":1541030400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2018,11,1]],"date-time":"2018-11-01T00:00:00Z","timestamp":1541030400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018,11]]},"DOI":"10.1109\/itsc.2018.8569615","type":"proceedings-article","created":{"date-parts":[[2018,12,12]],"date-time":"2018-12-12T20:21:46Z","timestamp":1544646106000},"page":"759-765","source":"Crossref","is-referenced-by-count":68,"title":["Lagrangian Control through Deep-RL: Applications to Bottleneck Decongestion"],"prefix":"10.1109","author":[{"given":"Eugene","family":"Vinitsky","sequence":"first","affiliation":[{"name":"UC Berkeley, Department of Mechanical Engineering"}]},{"given":"Kanaad","family":"Parvate","sequence":"additional","affiliation":[{"name":"UC Berkeley, Electrical Engineering and Computer Science"}]},{"given":"Aboudy","family":"Kreidieh","sequence":"additional","affiliation":[{"name":"UC Berkeley, Department of Civil and Environmental Engineering"}]},{"given":"Cathy","family":"Wu","sequence":"additional","affiliation":[{"name":"UC Berkeley, Electrical Engineering and Computer Science"}]},{"given":"Alexandre","family":"Bayen","sequence":"additional","affiliation":[{"name":"UC Berkeley, Electrical Engineering and Computer Science"}]}],"member":"263","reference":[{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref32","first-page":"1476","article-title":"Increasing the action gap: New operators for reinforcement learning","author":"bellemare","year":"2016","journal-title":"AAAI"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1061\/(ASCE)0733-947X(2010)136:1(67)"},{"key":"ref30","first-page":"1071","article-title":"Learning neural network policies with guided policy search under unknown dynamics","author":"levine","year":"2014","journal-title":"Advances in neural information processing systems"},{"key":"ref34","first-page":"1823","article-title":"The numerics of gans","author":"mescheder","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref10","doi-asserted-by":"crossref","first-page":"167","DOI":"10.1007\/978-3-319-08422-0_25","article-title":"Multi-agent reinforcement learning control for ramp metering","author":"fares","year":"2015","journal-title":"Progress in Systems Engineering"},{"key":"ref12","author":"etherington","year":"2018","journal-title":"Waymo orders thousands of pacificas for 2018 self-driving fleet rollout"},{"key":"ref13","author":"stern","year":"2017","journal-title":"Dissipation of stop-and-go waves via control of autonomous vehicles Field experiments"},{"key":"ref14","author":"liu","year":"2018","journal-title":"Impact of cooperative adaptive cruise control (cacc) on multilane freeway merge capacity"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC.2010.5625245"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/j.trc.2007.12.004"},{"key":"ref17","first-page":"398","article-title":"Emergent behaviors in mixed-autonomy traffic","author":"wu","year":"2017","journal-title":"Conference on Robot Learning"},{"key":"ref18","author":"wu","year":"2017","journal-title":"Flow Architecture and benchmarking for reinforcement learning in traffic control"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1088\/1367-2630\/10\/3\/033001"},{"key":"ref28","first-page":"1329","article-title":"Benchmarking deep reinforcement learning for continuous control","author":"duan","year":"2016","journal-title":"International Conference on Machine Learning"},{"key":"ref4","author":"nagabandi","year":"2017","journal-title":"Neural network dynamics models for control of under-actuated legged millirobots"},{"key":"ref27","first-page":"128","article-title":"Recent development and applications of SUMO - Simulation of Urban MObility","volume":"5","author":"krajzewicz","year":"2012","journal-title":"International Journal on Advances in Systems and Measurements"},{"key":"ref3","author":"mnih","year":"2013","journal-title":"Playing atari with deep reinforcement learning"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1016\/j.trc.2014.01.014"},{"key":"ref29","author":"liang","year":"2017","journal-title":"Ray rllib A composable and scalable reinforcement learning library"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2017.2687620"},{"key":"ref8","doi-asserted-by":"crossref","first-page":"475","DOI":"10.1007\/978-3-642-11688-9_18","article-title":"Traffic light control by multiagent reinforcement learning systems","author":"bakker","year":"2010","journal-title":"Interactive Collaborative Information Systems"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/3068287"},{"key":"ref2","first-page":"2944","article-title":"Learning continuous control policies by stochastic value gradients","author":"heess","year":"2015","journal-title":"Advances in neural information processing systems"},{"key":"ref9","article-title":"Expert level control of ramp metering based on multi-task deep reinforcement learning","author":"belletti","year":"2017","journal-title":"IEEE Transactions on Intelligent Transportation Systems"},{"key":"ref1","first-page":"1889","article-title":"Trust region policy optimization","author":"schulman","year":"2015","journal-title":"ICML"},{"key":"ref20","article-title":"Freeway capacity drop and the definition of capacity","author":"hall","year":"1991","journal-title":"Transportation Research Record"},{"key":"ref22","first-page":"58","article-title":"Alinea: A local feedback control law for on-ramp metering","volume":"1320","author":"papageorgiou","year":"1991","journal-title":"Transportation Research Record"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1016\/j.trb.2006.02.011"},{"key":"ref24","author":"chung","year":"2014","journal-title":"Empirical evaluation of gated recurrent neural networks on sequence modeling"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1512\/iumj.1957.6.56038"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevE.62.1805"},{"key":"ref25","first-page":"1889","article-title":"Trust region policy optimization","author":"schulman","year":"2015","journal-title":"International Conference on Machine Learning"}],"event":{"name":"2018 IEEE International Conference on Intelligent Transportation Systems (ITSC)","location":"Maui, HI, USA","start":{"date-parts":[[2018,11,4]]},"end":{"date-parts":[[2018,11,7]]}},"container-title":["2018 21st International Conference on Intelligent Transportation Systems (ITSC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8543039\/8569013\/08569615.pdf?arnumber=8569615","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,25]],"date-time":"2025-08-25T20:27:21Z","timestamp":1756153641000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8569615\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,11]]},"references-count":33,"URL":"https:\/\/doi.org\/10.1109\/itsc.2018.8569615","relation":{},"subject":[],"published":{"date-parts":[[2018,11]]}}}