{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,23]],"date-time":"2026-03-23T12:24:48Z","timestamp":1774268688082,"version":"3.50.1"},"reference-count":30,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,7,18]],"date-time":"2022-07-18T00:00:00Z","timestamp":1658102400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,7,18]],"date-time":"2022-07-18T00:00:00Z","timestamp":1658102400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61772377,61672257,91746206,U20A20177"],"award-info":[{"award-number":["61772377,61672257,91746206,U20A20177"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,7,18]]},"DOI":"10.1109\/ijcnn55064.2022.9892538","type":"proceedings-article","created":{"date-parts":[[2022,9,30]],"date-time":"2022-09-30T19:56:04Z","timestamp":1664567764000},"page":"1-8","source":"Crossref","is-referenced-by-count":6,"title":["Urban Traffic Signal Control with Reinforcement Learning from Demonstration Data"],"prefix":"10.1109","author":[{"given":"Min","family":"Wang","sequence":"first","affiliation":[{"name":"School of Computer Science, Wuhan University,Wuhan,China"}]},{"given":"Libing","family":"Wu","sequence":"additional","affiliation":[{"name":"School of Computer Science, Wuhan University,Wuhan,China"}]},{"given":"Jianxin","family":"Li","sequence":"additional","affiliation":[{"name":"School of Information Technology, Deakin University,Burwood,VIC,Australia"}]},{"given":"Dan","family":"Wu","sequence":"additional","affiliation":[{"name":"School of Computer Science, University of Windsor,Windsor,Canada,N9B 3P4"}]},{"given":"Chao","family":"Ma","sequence":"additional","affiliation":[{"name":"School of Cyber Science and Engineering, Wuhan University,Wuhan,China"}]}],"member":"263","reference":[{"key":"ref30","first-page":"1928","article-title":"Asynchronous methods for deep rein-forcement learning","author":"mnih","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref10","article-title":"Deep q-learning from demonstrations","author":"hester","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref11","article-title":"Leveraging demonstrations for deep reinforcement learning on robotics problems with sparse rewards","author":"vecerik","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4471-5113-5_3"},{"key":"ref13","article-title":"Distributed prioritized experience replay","author":"horgan","year":"2018","journal-title":"ArXiv Preprint"},{"key":"ref14","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/3357384.3357902"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5744"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1049\/iet-its.2017.0153"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1016\/j.trc.2017.09.020"},{"key":"ref19","article-title":"Playing atari games with deep reinforcement learning and human checkpoint replay","author":"hosu","year":"2016","journal-title":"ArXiv Preprint"},{"key":"ref28","article-title":"Network in network","author":"lin","year":"2013","journal-title":"ArXiv Preprint"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3220096"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/3308558.3314139"},{"key":"ref3","article-title":"Coordinated deep reinforcement learners for traffic light control","author":"van","year":"0","journal-title":"Proceedings of Learning Inference and Control of Multi-Agent Systems (at NIPS 2016)"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2019.2901791"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-662-44851-9_35"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/3357384.3357900"},{"key":"ref8","article-title":"Prioritized experience replay","author":"schaul","year":"2015","journal-title":"ArXiv Preprint"},{"key":"ref7","first-page":"447","article-title":"Exploration from demonstration for interactive reinforcement learning","author":"subramanian","year":"2016","journal-title":"AAMAS"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC.2012.6338853"},{"key":"ref9","first-page":"617","article-title":"Integrating reinforcement learning with human demonstrations of varying ability","author":"taylor","year":"0","journal-title":"The 10th International Conference on Autonomous Agents and Multiagent Systems"},{"key":"ref1","article-title":"A survey on traffic signal control methods","author":"wei","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref20","article-title":"Re-inforcement learning from imperfect demonstrations","author":"gao","year":"2018","journal-title":"ar Xiv preprint"},{"key":"ref22","first-page":"4565","article-title":"Generative adversarial imitation learning","author":"ho","year":"2016","journal-title":"Advances in neural information processing systems"},{"key":"ref21","article-title":"Reinforcement learning from demonstration through shaping","author":"brys","year":"0","journal-title":"Twenty-Fourth International Joint Conference on Artificial Intelligence"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/3357384.3358079"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8463162"},{"key":"ref26","article-title":"Observe and look further: Achieving consistent performance on atari","volume":"abs 1805 11593","author":"pohlen","year":"2018","journal-title":"CoRR"},{"key":"ref25","author":"sutton","year":"2018","journal-title":"Reinforcement Learning An Introduction"}],"event":{"name":"2022 International Joint Conference on Neural Networks (IJCNN)","location":"Padua, Italy","start":{"date-parts":[[2022,7,18]]},"end":{"date-parts":[[2022,7,23]]}},"container-title":["2022 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9891857\/9889787\/09892538.pdf?arnumber=9892538","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,4]],"date-time":"2022-11-04T01:26:47Z","timestamp":1667525207000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9892538\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,7,18]]},"references-count":30,"URL":"https:\/\/doi.org\/10.1109\/ijcnn55064.2022.9892538","relation":{},"subject":[],"published":{"date-parts":[[2022,7,18]]}}}