{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,21]],"date-time":"2025-03-21T21:10:11Z","timestamp":1742591411162,"version":"3.40.2"},"reference-count":29,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,9,24]],"date-time":"2024-09-24T00:00:00Z","timestamp":1727136000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,9,24]],"date-time":"2024-09-24T00:00:00Z","timestamp":1727136000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U23B2061"],"award-info":[{"award-number":["U23B2061"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003399","name":"Shanghai Municipal Science and Technology Major","doi-asserted-by":"publisher","award":["2021SHZDZX0100"],"award-info":[{"award-number":["2021SHZDZX0100"]}],"id":[{"id":"10.13039\/501100003399","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,9,24]]},"DOI":"10.1109\/itsc58415.2024.10919663","type":"proceedings-article","created":{"date-parts":[[2025,3,21]],"date-time":"2025-03-21T19:00:11Z","timestamp":1742583611000},"page":"3005-3011","source":"Crossref","is-referenced-by-count":0,"title":["From Unsupervised Reinforcement Learning to Continual Reinforcement Learning: Leading Learning from the Relevance to the Whole of Autonomous Driving Decision-Making"],"prefix":"10.1109","author":[{"given":"Zhenyu","family":"Ma","sequence":"first","affiliation":[{"name":"School of Automotive Studies, Tongji University,Shanghai,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yixin","family":"Cui","sequence":"additional","affiliation":[{"name":"School of Transportation, Jilin University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yanjun","family":"Huang","sequence":"additional","affiliation":[{"name":"School of Automotive Studies, Tongji University,Shanghai,China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC57777.2023.10422557"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC55140.2022.9922208"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2024.3402221"},{"key":"ref4","first-page":"93","article-title":"An approach to lifelong reinforcement learning through multiple environments","volume-title":"6th European Workshop on Learning Robots","author":"Tanaka"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.3390\/electronics9091363"},{"key":"ref6","first-page":"1094","article-title":"Meta-world: A benchmark and evaluation for multi-task and meta reinforcement learning","volume-title":"Conference on robot learning","author":"Yu"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3185549"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/s10489-020-01758-5"},{"key":"ref9","article-title":"Deep reinforcement learning amidst lifelong non-stationarity","author":"Xie","year":"2020","journal-title":"arXiv preprint"},{"key":"ref10","first-page":"838","article-title":"Lifelong robotic reinforcement learning by retaining experiences","volume-title":"Conference on Lifelong Learning Agents","author":"Xie"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1016\/0921-8890(95)00004-y"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/3191513"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.3389\/fnbot.2018.00078"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8793982"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1613\/jair.1.13673"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2022\/780"},{"key":"ref17","article-title":"Cic: Contrastive intrinsic control for unsupervised skill discovery","author":"Laskin","year":"2022","journal-title":"arXiv preprint"},{"key":"ref18","article-title":"Diversity is all you need: Learning skills without a reward function","author":"Eysenbach","year":"2018","journal-title":"ar Xiv preprint"},{"key":"ref19","article-title":"Variational intrinsic control","author":"Gregor","year":"2016","journal-title":"arXiv preprint"},{"key":"ref20","article-title":"Urlb: Unsupervised reinforcement learning benchmark","author":"Laskin","year":"2021","journal-title":"arXiv preprint"},{"key":"ref21","first-page":"28598","article-title":"Mastering the unsupervised reinforcement learning benchmark from pixels","volume-title":"International Conference on Machine Learning","author":"Rajeswar"},{"key":"ref22","article-title":"Continuous control with deep reinforcement learning","author":"Lillicrap","year":"2015","journal-title":"arXiv preprint"},{"key":"ref23","doi-asserted-by":"crossref","first-page":"3","DOI":"10.1007\/978-3-642-27645-3_1","article-title":"Reinforcement learning and markov decision processes","volume-title":"Reinforcement learning: State-of-the-art","author":"Van Otterlo","year":"2012"},{"issue":"1","key":"ref24","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1037\/0022-3514.40.1.1","article-title":"Characteristics of the rewarder and intrinsic motivation of the rewardee","volume":"40","author":"Deci","year":"1981","journal-title":"Journal of per-sonality and social psychology"},{"issue":"1","key":"ref25","doi-asserted-by":"crossref","first-page":"267","DOI":"10.1109\/TPAMI.2023.3322426","article-title":"Fear-neuro-inspired reinforcement learning for safe au-tonomous driving","volume":"46","author":"He","year":"2024","journal-title":"IEEE Transactions on Pattern Analysis and Ma-chine Intelligence"},{"key":"ref26","article-title":"Polter: Policy trajectory ensemble regularization for unsupervised reinforcement learning","author":"Schubert","year":"2022","journal-title":"arXiv preprint"},{"key":"ref27","article-title":"Exploration by random network distillation","author":"Burda","year":"2018","journal-title":"arXiv preprint"},{"key":"ref28","first-page":"5062","article-title":"Self-supervised exploration via disagreement","volume-title":"International conference on machine learning","author":"Pathak"},{"key":"ref29","first-page":"6736","article-title":"Aps: Active pretraining with successor features","volume-title":"International Conference on Machine Learning","author":"Liu"}],"event":{"name":"2024 IEEE 27th International Conference on Intelligent Transportation Systems (ITSC)","location":"Edmonton, AB, Canada","start":{"date-parts":[[2024,9,24]]},"end":{"date-parts":[[2024,9,27]]}},"container-title":["2024 IEEE 27th International Conference on Intelligent Transportation Systems (ITSC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10919469\/10919190\/10919663.pdf?arnumber=10919663","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,21]],"date-time":"2025-03-21T20:40:43Z","timestamp":1742589643000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10919663\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,24]]},"references-count":29,"URL":"https:\/\/doi.org\/10.1109\/itsc58415.2024.10919663","relation":{},"subject":[],"published":{"date-parts":[[2024,9,24]]}}}