{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,20]],"date-time":"2025-09-20T20:26:37Z","timestamp":1758399997898,"version":"3.37.3"},"reference-count":21,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,9,19]],"date-time":"2021-09-19T00:00:00Z","timestamp":1632009600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,9,19]],"date-time":"2021-09-19T00:00:00Z","timestamp":1632009600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,9,19]],"date-time":"2021-09-19T00:00:00Z","timestamp":1632009600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["51975310"],"award-info":[{"award-number":["51975310"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program","doi-asserted-by":"publisher","award":["2016YFB0100905"],"award-info":[{"award-number":["2016YFB0100905"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,9,19]]},"DOI":"10.1109\/itsc48978.2021.9564464","type":"proceedings-article","created":{"date-parts":[[2021,10,25]],"date-time":"2021-10-25T19:52:26Z","timestamp":1635191546000},"page":"473-478","source":"Crossref","is-referenced-by-count":4,"title":["Multi-Objective End-to-End Self-Driving Based on Pareto-Optimal Actor-Critic Approach"],"prefix":"10.1109","author":[{"given":"Tinghan","family":"Wang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yugong","family":"Luo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jinxin","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Keqiang","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref10","first-page":"197","article-title":"Multi-criteria reinforcement learning","volume":"98","author":"g\u00e1bor","year":"0","journal-title":"International Conference on Machine Learning (ICML)"},{"key":"ref11","first-page":"325","article-title":"A geometric approach to multi-criterion reinforcement learning","volume":"5","author":"mannor","year":"2004","journal-title":"Journal of Machine Learning Research"},{"journal-title":"A practical guide to multi-objective reinforcement learning and planning","year":"2021","author":"hayes","key":"ref12"},{"key":"ref13","first-page":"969","article-title":"On finding compromise solutions in multiobjective markov decision processes","author":"perny","year":"0","journal-title":"Proceedings of the 2010 Conference on ECAI 2010 19th European Conference on Artificial Intelligence"},{"key":"ref14","first-page":"3483","article-title":"Multi-objective reinforcement learning using sets of pareto dominating policies","volume":"15","author":"van moffaert","year":"2014","journal-title":"The Journal of Machine Learning Research"},{"key":"ref15","article-title":"Pareto-dqn: Approximating the pareto front in complex multi-objective decision problems","author":"reymond","year":"0","journal-title":"Proceedings of the Adaptive And Learning Agents Workshop (ALA-19) at AAMAS"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2016.11.094"},{"key":"ref17","first-page":"1","article-title":"Decomposition based multiobjective evolutionary algorithm in xcs for multi-objective reinforcement learning","author":"cheng","year":"0","journal-title":"2018 IEEE Congress on Evolutionary Computation (CEC)"},{"key":"ref18","article-title":"Deterministic policy gradient algorithms","author":"silver","year":"0","journal-title":"International Conference on Machine Learning (ICML)"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-70928-2_64"},{"key":"ref4","first-page":"435","article-title":"End-to-end learning of driving models with surround-view cameras and route planners","author":"hecker","year":"0","journal-title":"Proceedings of the European Conference on Computer Vision (ECCV)"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/3185066.3185084"},{"key":"ref6","first-page":"1445","article-title":"Multiple-goal reinforcement learning with modular sarsa (o)","author":"sprague","year":"0","journal-title":"Proceedings of the 18th International Joint Conference on Artificial Intelligence"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2018.XIV.056"},{"key":"ref8","first-page":"135","article-title":"Action selection methods using reinforcement learning","author":"humphrys","year":"0","journal-title":"Proceedings of the 4th International Conference on Simulation of Adaptive Behavior"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-013-5369-0"},{"journal-title":"End to End Learning for Self-Driving Cars","year":"2016","author":"bojarski","key":"ref2"},{"key":"ref1","first-page":"305","article-title":"Alvinn: An autonomous land vehicle in a neural network","author":"pomerleau","year":"1989","journal-title":"Advances in neural information processing systems"},{"key":"ref9","first-page":"3190","article-title":"Multi-objective reinforcement learning algorithm for mosdmp in unknown environment","author":"zhao","year":"0","journal-title":"2010 8th World Congress on Intelligent Control and Automation"},{"key":"ref20","article-title":"A generalized algorithm for multi-objective reinforcement learning and policy adaptation","volume":"32","author":"yang","year":"2019","journal-title":"Advances in neural information processing systems"},{"key":"ref21","first-page":"3253","article-title":"Linear fitted-q iteration with multiple reward functions","volume":"13","author":"lizotte","year":"2012","journal-title":"The Journal of Machine Learning Research"}],"event":{"name":"2021 IEEE International Intelligent Transportation Systems Conference (ITSC)","start":{"date-parts":[[2021,9,19]]},"location":"Indianapolis, IN, USA","end":{"date-parts":[[2021,9,22]]}},"container-title":["2021 IEEE International Intelligent Transportation Systems Conference (ITSC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9564393\/9564395\/09564464.pdf?arnumber=9564464","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T15:47:36Z","timestamp":1652197656000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9564464\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,9,19]]},"references-count":21,"URL":"https:\/\/doi.org\/10.1109\/itsc48978.2021.9564464","relation":{},"subject":[],"published":{"date-parts":[[2021,9,19]]}}}