{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,10]],"date-time":"2026-04-10T16:33:48Z","timestamp":1775838828563,"version":"3.50.1"},"reference-count":42,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,5,30]],"date-time":"2021-05-30T00:00:00Z","timestamp":1622332800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,5,30]],"date-time":"2021-05-30T00:00:00Z","timestamp":1622332800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,5,30]]},"DOI":"10.1109\/icra48506.2021.9562006","type":"proceedings-article","created":{"date-parts":[[2021,10,20]],"date-time":"2021-10-20T00:28:35Z","timestamp":1634689715000},"page":"6064-6071","source":"Crossref","is-referenced-by-count":48,"title":["Reinforcement Learning for Autonomous Driving with Latent State Inference and Spatial-Temporal Relationships"],"prefix":"10.1109","author":[{"given":"Xiaobai","family":"Ma","sequence":"first","affiliation":[{"name":"Honda Research Institute,US"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiachen","family":"Li","sequence":"additional","affiliation":[{"name":"Honda Research Institute,US"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mykel J.","family":"Kochenderfer","sequence":"additional","affiliation":[{"name":"Stanford University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"David","family":"Isele","sequence":"additional","affiliation":[{"name":"Honda Research Institute,US"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kikuo","family":"Fujimura","sequence":"additional","affiliation":[{"name":"Honda Research Institute,US"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"crossref","first-page":"229","DOI":"10.1007\/BF00992696","article-title":"Simple statistical gradient-following algorithms for connectionist reinforcement learning","volume":"8","author":"williams","year":"1992","journal-title":"Machine Learning"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/10187.001.0001"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00637"},{"key":"ref32","article-title":"Spectral temporal graph neural network for trajectory prediction","author":"cao","year":"2021","journal-title":"International Conference on Robotics and Automation (ICRA)"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01154"},{"key":"ref30","article-title":"Relational inductive biases, deep learning, and graph networks","author":"battaglia","year":"2018","journal-title":"arXiv preprint arXiv 1806 01261"},{"key":"ref37","article-title":"Nervenet: Learning structured policy with graph neural networks","author":"wang","year":"2018","journal-title":"International Conference on Learning Representations"},{"key":"ref36","article-title":"Evolvegraph: Multi-agent trajectory prediction with dynamic relational reasoning","author":"li","year":"2020","journal-title":"Advances in Neural IInformation Processing Systems"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01443"},{"key":"ref34","article-title":"Social-wagdat: Interaction-aware trajectory prediction via wasserstein graph double-attention network","author":"li","year":"2020","journal-title":"arXiv preprint arXiv 2002 05155"},{"key":"ref10","first-page":"1","article-title":"Reinforcement learning with iterative reasoning for merging in dense traffic","author":"bouton","year":"2020","journal-title":"IEEE International Conference on Intelligent Transportation Systems (ITSC)"},{"key":"ref40","first-page":"1889","article-title":"Trust region policy optimization","author":"schulman","year":"2015","journal-title":"International Conference on Machine Learning (ICML)"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9197132"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460487"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2016.XII.029"},{"key":"ref14","first-page":"1","article-title":"Comparison of deep reinforcement learning and model predictive control for adaptive cruise control","author":"lin","year":"2020","journal-title":"IEEE Transactions on Intelligent Vehicles"},{"key":"ref15","article-title":"How powerful are graph neural networks&#x0192;","author":"xu","year":"2018","journal-title":"International Conference on Learning Representations"},{"key":"ref16","first-page":"779","article-title":"You only look once: Unified, real-time object detection","author":"redmon","year":"2016","journal-title":"Conference on Computer Vision and Pattern Recognition (CVPR)"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00752"},{"key":"ref18","article-title":"Reinforcement learning with unsupervised auxiliary tasks","author":"jaderberg","year":"2017","journal-title":"International Conference on Learning Representations"},{"key":"ref19","article-title":"Do deep reinforcement learning agents model intentions&#x0192;","author":"matiisen","year":"2018","journal-title":"arXiv preprint arXiv 1805 06020"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1016\/j.trc.2020.102615"},{"key":"ref4","first-page":"3370","article-title":"Interactionaware multi-agent reinforcement learning for mobile agents with individual goals","author":"mohseni-kabir","year":"2019","journal-title":"IEEE International Conference on Robotics and Automation (ICRA)"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/IVS.2013.6629564"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8461233"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.3141\/1999-10"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC.2017.8317738"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1098\/rsta.2010.0084"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ECC.2015.7330991"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/MITS.2017.2709782"},{"key":"ref2","article-title":"Playing fps games with deep reinforcement learning","volume":"31","author":"lample","year":"2017","journal-title":"AAAI Conference on Artificial Intelligence (AAAI)"},{"key":"ref9","article-title":"Cm3: Cooperative multi-goal multi-stage multi-agent reinforcement learning","author":"yang","year":"2020","journal-title":"International Conference on Learning Representations"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989385"},{"key":"ref20","first-page":"1024","article-title":"Inductive representation learning on large graphs","author":"hamilton","year":"2017","journal-title":"Advances in Neural IInformation Processing Systems"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC.2019.8917306"},{"key":"ref21","article-title":"Graph attention networks","author":"veli?kovi?","year":"2018","journal-title":"International Conference on Learning Representations"},{"key":"ref42","article-title":"Semi-supervised classification with graph convolutional networks","author":"kipf","year":"0"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2015.7139219"},{"key":"ref41","article-title":"Proximal policy optimization algorithms","author":"schulman","year":"2017","journal-title":"arXiv preprint arXiv 1707 06347"},{"key":"ref23","article-title":"Playing atari with deep reinforcement learning","author":"mnih","year":"2013","journal-title":"arXiv preprint arXiv 1312 5602"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/IVS.2017.7995935"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1155\/2016\/1025349"}],"event":{"name":"2021 IEEE International Conference on Robotics and Automation (ICRA)","location":"Xi'an, China","start":{"date-parts":[[2021,5,30]]},"end":{"date-parts":[[2021,6,5]]}},"container-title":["2021 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9560720\/9560666\/09562006.pdf?arnumber=9562006","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,2]],"date-time":"2022-08-02T23:22:37Z","timestamp":1659482557000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9562006\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,5,30]]},"references-count":42,"URL":"https:\/\/doi.org\/10.1109\/icra48506.2021.9562006","relation":{},"subject":[],"published":{"date-parts":[[2021,5,30]]}}}