{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,8]],"date-time":"2026-05-08T09:49:31Z","timestamp":1778233771619,"version":"3.51.4"},"reference-count":32,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62273256"],"award-info":[{"award-number":["62273256"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","award":["FRF-OT-23-02"],"award-info":[{"award-number":["FRF-OT-23-02"]}],"id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100013804","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100013804","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004826","name":"Beijing Natural Science Foundation","doi-asserted-by":"publisher","award":["L257002"],"award-info":[{"award-number":["L257002"]}],"id":[{"id":"10.13039\/501100004826","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Expert Systems with Applications"],"published-print":{"date-parts":[[2026,7]]},"DOI":"10.1016\/j.eswa.2026.132112","type":"journal-article","created":{"date-parts":[[2026,3,19]],"date-time":"2026-03-19T17:48:35Z","timestamp":1773942515000},"page":"132112","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":1,"special_numbering":"C","title":["A smooth reinforcement learning method for trajectory tracking and collision avoidance of wheeled vehicle"],"prefix":"10.1016","volume":"319","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-8442-3205","authenticated-orcid":false,"given":"Liangfa","family":"Chen","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9155-3035","authenticated-orcid":false,"given":"Xujie","family":"Song","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-0380-0423","authenticated-orcid":false,"given":"Junjie","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0193-816X","authenticated-orcid":false,"given":"Wenxuan","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-6252-0618","authenticated-orcid":false,"given":"Yinuo","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-4566-1103","authenticated-orcid":false,"given":"Liming","family":"Xiao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4923-3633","authenticated-orcid":false,"given":"Shengbo","family":"Eben Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3184-9520","authenticated-orcid":false,"given":"Fei","family":"Ma","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3697-1576","authenticated-orcid":false,"given":"Jingliang","family":"Duan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"78","reference":[{"key":"10.1016\/j.eswa.2026.132112_bib0001","series-title":"Proceedings of the AAAI conference on artificial intelligence","first-page":"7020","article-title":"Addressing action oscillations through learning policy inertia","volume":"vol. 35","author":"Chen","year":"2021"},{"issue":"2","key":"10.1016\/j.eswa.2026.132112_bib0002","doi-asserted-by":"crossref","first-page":"920","DOI":"10.1109\/TAC.2023.3275732","article-title":"On the optimization landscape of dynamic output feedback linear quadratic control","volume":"69","author":"Duan","year":"2024","journal-title":"IEEE Trans. Automatic Control"},{"issue":"11","key":"10.1016\/j.eswa.2026.132112_bib0003","doi-asserted-by":"crossref","first-page":"6584","DOI":"10.1109\/TNNLS.2021.3082568","article-title":"Distributional soft actor-critic: Off-policy reinforcement learning for addressing value estimation errors","volume":"33","author":"Duan","year":"2021","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"issue":"2","key":"10.1016\/j.eswa.2026.132112_bib0004","doi-asserted-by":"crossref","first-page":"96","DOI":"10.1109\/MCI.2024.3364428","article-title":"Encoding distributional soft actor-critic for autonomous driving in multi-lane scenarios [research frontier]","volume":"19","author":"Duan","year":"2024","journal-title":"IEEE Comput. Intell. Magaz."},{"issue":"5","key":"10.1016\/j.eswa.2026.132112_bib0005","doi-asserted-by":"crossref","first-page":"3935","DOI":"10.1109\/TPAMI.2025.3537087","article-title":"Distributional soft actor-critic with three refinements","volume":"47","author":"Duan","year":"2025","journal-title":"IEEE Trans. Pattern Analy. Mach. Intell."},{"key":"10.1016\/j.eswa.2026.132112_sbref0006","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2022.116714","article-title":"Reinforcement learning-based saturated adaptive robust neural-network control of underactuated autonomous underwater vehicles","volume":"197","author":"Elhaki","year":"2022","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.eswa.2026.132112_bib0007","series-title":"International conference on machine learning","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","author":"Fujimoto","year":"2018"},{"key":"10.1016\/j.eswa.2026.132112_bib0008","series-title":"2021 IEEE intelligent vehicles symposium workshops (IV workshops)","first-page":"128","article-title":"Numerically stable dynamic bicycle model for discrete-time control","author":"Ge","year":"2021"},{"key":"10.1016\/j.eswa.2026.132112_bib0009","series-title":"Nonlinear Model Predictive Control","author":"Gr\u00fcne","year":"2017"},{"key":"10.1016\/j.eswa.2026.132112_bib0010","series-title":"International conference on machine learning","first-page":"1861","article-title":"Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"Haarnoja","year":"2018"},{"issue":"1","key":"10.1016\/j.eswa.2026.132112_bib0011","doi-asserted-by":"crossref","first-page":"184","DOI":"10.1109\/TIV.2022.3165178","article-title":"Robust lane change decision making for autonomous vehicles: an observation adversarial reinforcement learning approach","volume":"8","author":"He","year":"2022","journal-title":"IEEE Trans. Intell. Veh."},{"issue":"5","key":"10.1016\/j.eswa.2026.132112_bib0012","doi-asserted-by":"crossref","first-page":"359","DOI":"10.1016\/0893-6080(89)90020-8","article-title":"Multilayer feedforward networks are universal approximators","volume":"2","author":"Hornik","year":"1989","journal-title":"Neural Networks"},{"key":"10.1016\/j.eswa.2026.132112_bib0013","series-title":"2022 IEEE\/RSJ international conference on intelligent robots and systems (IROS)","first-page":"4032","article-title":"L2c2: locally lipschitz continuous constraint towards stable and smooth reinforcement learning","author":"Kobayashi","year":"2022"},{"key":"10.1016\/j.eswa.2026.132112_sbref0014","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2025.126493","article-title":"Serial distributed reinforcement learning for enhanced multi-objective platoon control in curved road coordinates","volume":"269","author":"Li","year":"2025","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.eswa.2026.132112_bib0015","series-title":"Reinforcement Learning for Sequential Decision and Optimal Control","author":"Li","year":"2023"},{"key":"10.1016\/j.eswa.2026.132112_bib0016","unstructured":"Lillicrap, T. P., Hunt, J. J., Pritzel, A., Heess, N., Erez, T., Tassa, Y., Silver, D., & Wierstra, D. (2015). Continuous control with deep reinforcement learning. arXiv preprint arXiv: 1509.02971."},{"issue":"3","key":"10.1016\/j.eswa.2026.132112_bib0017","doi-asserted-by":"crossref","first-page":"4470","DOI":"10.1109\/TIV.2024.3372522","article-title":"Event-triggered parallel control using deep reinforcement learning with application to comfortable autonomous driving","volume":"9","author":"Lu","year":"2024","journal-title":"IEEE Trans. Intell. Veh."},{"key":"10.1016\/j.eswa.2026.132112_bib0018","series-title":"2022 13th Asian control conference (ASCC)","first-page":"1893","article-title":"Deep reinforcement learning based tracking control of unmanned vehicle with safety guarantee","author":"Luo","year":"2022"},{"issue":"12","key":"10.1016\/j.eswa.2026.132112_bib0019","doi-asserted-by":"crossref","first-page":"23031","DOI":"10.1109\/TITS.2022.3194571","article-title":"Alternating direction method of multipliers for constrained iterative LQR in autonomous driving","volume":"23","author":"Ma","year":"2022","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"10.1016\/j.eswa.2026.132112_bib0020","series-title":"2021 IEEE international conference on robotics and automation (ICRA)","first-page":"1810","article-title":"Regularizing action policies for smooth control with reinforcement learning","author":"Mysore","year":"2021"},{"issue":"1","key":"10.1016\/j.eswa.2026.132112_bib0021","doi-asserted-by":"crossref","first-page":"466","DOI":"10.1109\/TNNLS.2022.3175595","article-title":"Model-based chance-constrained reinforcement learning via separated proportional-integral lagrangian","volume":"35","author":"Peng","year":"2022","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"10.1016\/j.eswa.2026.132112_bib0022","series-title":"International conference on machine learning","first-page":"1889","article-title":"Trust region policy optimization","author":"Schulman","year":"2015"},{"key":"10.1016\/j.eswa.2026.132112_bib0023","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., & Klimov, O. (2017). Proximal policy optimization algorithms. arXiv preprint arXiv: 1707.06347."},{"key":"10.1016\/j.eswa.2026.132112_bib0024","series-title":"International conference on machine learning","first-page":"8707","article-title":"Deep reinforcement learning with robust and smooth policy","author":"Shen","year":"2020"},{"key":"10.1016\/j.eswa.2026.132112_bib0025","series-title":"Proceedings of the 40th international conference on machine learning","first-page":"32253","article-title":"LipsNet: a smooth and robust neural network with adaptive lipschitz constant for high accuracy optimal control","volume":"vol. 202","author":"Song","year":"2023"},{"key":"10.1016\/j.eswa.2026.132112_bib0026","article-title":"Stability-certified reinforcement learning control via spectral normalization","volume":"10","author":"Takase","year":"2022","journal-title":"Mach. Learn. Appl."},{"key":"10.1016\/j.eswa.2026.132112_sbref0027","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2023.123055","article-title":"Comparing actor-critic deep reinforcement learning controllers for enhanced performance on a ball-and-plate system","volume":"245","author":"Udekwe","year":"2024","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.eswa.2026.132112_bib0028","doi-asserted-by":"crossref","DOI":"10.1016\/j.commtr.2023.100096","article-title":"Gops: a general optimal control problem solver for autonomous driving and industrial control applications","volume":"3","author":"Wang","year":"2023","journal-title":"Commun. Transp. Res."},{"key":"10.1016\/j.eswa.2026.132112_sbref0029","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2023.122158","article-title":"A deep reinforcement learning-based approach for autonomous lane-changing velocity control in mixed flow of vehicle group level","volume":"238","author":"Wang","year":"2024","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.eswa.2026.132112_sbref0030","doi-asserted-by":"crossref","DOI":"10.1016\/j.engappai.2026.114345","article-title":"Transformer-based explicit model predictive control with variable prediction horizon","volume":"172","author":"Wu","year":"2026","journal-title":"Eng. Appl. Artif. Intell."},{"key":"10.1016\/j.eswa.2026.132112_bib0031","series-title":"Proceedings of the IEEE\/CVF international conference on computer vision","first-page":"6373","article-title":"Gradient normalization for generative adversarial networks","author":"Wu","year":"2021"},{"key":"10.1016\/j.eswa.2026.132112_bib0032","first-page":"29021","article-title":"Taac: temporally abstract actor-critic for continuous control","volume":"34","author":"Yu","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."}],"container-title":["Expert Systems with Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0957417426010250?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0957417426010250?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,4,14]],"date-time":"2026-04-14T08:43:58Z","timestamp":1776156238000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0957417426010250"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,7]]},"references-count":32,"alternative-id":["S0957417426010250"],"URL":"https:\/\/doi.org\/10.1016\/j.eswa.2026.132112","relation":{},"ISSN":["0957-4174"],"issn-type":[{"value":"0957-4174","type":"print"}],"subject":[],"published":{"date-parts":[[2026,7]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"A smooth reinforcement learning method for trajectory tracking and collision avoidance of wheeled vehicle","name":"articletitle","label":"Article Title"},{"value":"Expert Systems with Applications","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.eswa.2026.132112","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"132112"}}