{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,8]],"date-time":"2025-05-08T18:10:04Z","timestamp":1746727804387,"version":"3.40.5"},"reference-count":33,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"3","license":[{"start":{"date-parts":[[2025,5,1]],"date-time":"2025-05-01T00:00:00Z","timestamp":1746057600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,5,1]],"date-time":"2025-05-01T00:00:00Z","timestamp":1746057600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,5,1]],"date-time":"2025-05-01T00:00:00Z","timestamp":1746057600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100014717","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["52272420"],"award-info":[{"award-number":["52272420"]}],"id":[{"id":"10.13039\/100014717","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Intell. Transport. Syst. Mag."],"published-print":{"date-parts":[[2025,5]]},"DOI":"10.1109\/mits.2024.3440069","type":"journal-article","created":{"date-parts":[[2024,8,20]],"date-time":"2024-08-20T15:42:33Z","timestamp":1724168553000},"page":"58-66","source":"Crossref","is-referenced-by-count":1,"title":["Boosting the Training of Deep Reinforcement Learning Traffic Control by Using the World Model"],"prefix":"10.1109","volume":"17","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8453-3992","authenticated-orcid":false,"given":"Pengbo","family":"Wang","sequence":"first","affiliation":[{"name":"Department of Automation, Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7565-4979","authenticated-orcid":false,"given":"Yisheng","family":"Lv","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2309-9739","authenticated-orcid":false,"given":"Jingwei","family":"Ge","sequence":"additional","affiliation":[{"name":"Department of Automation, Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9428-1960","authenticated-orcid":false,"given":"Li","family":"Li","sequence":"additional","affiliation":[{"name":"Department of Automation, Beijing National Research Center for Information Science and Technology, Tsinghua University, Beijing, China"}]}],"member":"263","reference":[{"key":"ref1","first-page":"41,414","article-title":"The wisdom of hindsight makes language models better instruction followers","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Zhang","year":"2023"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1177\/0278364919887447"},{"key":"ref3","first-page":"214","article-title":"Coordinated deep reinforcement learners for traffic light control","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","author":"Van der Pol","year":"2016"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.2307\/3006800"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/tsmca.2010.2052606"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1016\/j.trpro.2017.05.175"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1287\/moor.27.4.819.297"},{"issue":"3","key":"ref8","first-page":"1","article-title":"Recent development and applications of sumo-simulation of urban mobility","volume":"5","author":"Krajzewicz","year":"2012","journal-title":"Int. J. Adv. Syst. Meas."},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.12720\/joace.4.1.40-46"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/itsc.2016.7795937"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3220096"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICICAS48597.2019.00164"},{"key":"ref13","article-title":"Deep reinforcement learning for traffic signal control along arterials","volume-title":"Proc. DRL4KDD","author":"Wei","year":"2019"},{"key":"ref14","first-page":"2450","article-title":"Recurrent world models facilitate policy evolution","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","author":"Ha","year":"2018"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/mits.2022.3144797"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/smartcity.2015.63"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/5.58337"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/3357384.3357902"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/tcyb.2020.3015811"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330949"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/tits.2022.3229477"},{"volume-title":"Reinforcement learning for traffic signal control","year":"2023","key":"ref22"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"article-title":"Proximal policy optimization algorithms","year":"2017","author":"Schulman","key":"ref24"},{"key":"ref25","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Haarnoja","year":"2018"},{"key":"ref26","article-title":"Multi-agent reinforcement learning for networked system control","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Chu","year":"2020"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/tits.2019.2901791"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/tmc.2020.3033782"},{"key":"ref29","first-page":"4079","article-title":"AttendLight: Universal attention-based reinforcement learning model for traffic signal control","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","author":"Oroojlooy","year":"2020"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/access.2019.2907618"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/mits.2021.3119869"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/mits.2022.3158631"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/mits.2023.3284059"}],"container-title":["IEEE Intelligent Transportation Systems Magazine"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/5117645\/10990054\/10639829.pdf?arnumber=10639829","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,8]],"date-time":"2025-05-08T17:39:11Z","timestamp":1746725951000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10639829\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5]]},"references-count":33,"journal-issue":{"issue":"3"},"URL":"https:\/\/doi.org\/10.1109\/mits.2024.3440069","relation":{},"ISSN":["1939-1390","1941-1197"],"issn-type":[{"type":"print","value":"1939-1390"},{"type":"electronic","value":"1941-1197"}],"subject":[],"published":{"date-parts":[[2025,5]]}}}