{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,23]],"date-time":"2026-01-23T23:11:23Z","timestamp":1769209883913,"version":"3.49.0"},"reference-count":22,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,6,13]],"date-time":"2023-06-13T00:00:00Z","timestamp":1686614400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,6,13]],"date-time":"2023-06-13T00:00:00Z","timestamp":1686614400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,6,13]]},"DOI":"10.23919\/ecc57647.2023.10178119","type":"proceedings-article","created":{"date-parts":[[2023,8,1]],"date-time":"2023-08-01T18:00:50Z","timestamp":1690912850000},"page":"1-7","source":"Crossref","is-referenced-by-count":3,"title":["A Painless Deterministic Policy Gradient Method for Learning-based MPC"],"prefix":"10.23919","author":[{"given":"Akhil S","family":"Anand","sequence":"first","affiliation":[{"name":"Norwegian University of Science and Technology (NTNU),Dept. of Engineering Cybernetics,Trondheim,Norway"}]},{"given":"Dirk","family":"Reinhardt","sequence":"additional","affiliation":[{"name":"Norwegian University of Science and Technology (NTNU),Dept. of Engineering Cybernetics,Trondheim,Norway"}]},{"given":"Shambhuraj","family":"Sawant","sequence":"additional","affiliation":[{"name":"Norwegian University of Science and Technology (NTNU),Dept. of Engineering Cybernetics,Trondheim,Norway"}]},{"given":"Jan Tommy","family":"Gravdahl","sequence":"additional","affiliation":[{"name":"Norwegian University of Science and Technology (NTNU),Dept. of Engineering Cybernetics,Trondheim,Norway"}]},{"given":"Sebastien","family":"Gros","sequence":"additional","affiliation":[{"name":"Norwegian University of Science and Technology (NTNU),Dept. of Engineering Cybernetics,Trondheim,Norway"}]}],"member":"263","reference":[{"key":"ref13","article-title":"Deep reinforcement learning for autonomous driving","author":"wang","year":"2018","journal-title":"arXiv preprint arXiv 1811 11329"},{"key":"ref12","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2015","journal-title":"arXiv preprint arXiv 1509 02971"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.23919\/ECC54610.2021.9654852"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CDC45484.2021.9683750"},{"key":"ref20","first-page":"1107","article-title":"Least-squares policy iteration","volume":"4","author":"lagoudakis","year":"2003","journal-title":"The Journal of Machine Learning Research"},{"key":"ref11","article-title":"Actor-critic algorithms","volume":"12","author":"konda","year":"1999","journal-title":"Advances in neural information processing systems"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.2307\/2998564"},{"key":"ref10","first-page":"387","article-title":"Deterministic policy gradient algorithms","author":"silver","year":"2014","journal-title":"In International Conference on Machine Learning"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-662-04331-8_1"},{"key":"ref2","volume":"2","author":"rawlings","year":"2017","journal-title":"Model Predictive Control Theory Computation and Design"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2017.2743240"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CDC45484.2021.9683333"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CCTA48906.2021.9659202"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/OJCSYS.2022.3221063"},{"key":"ref18","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","author":"fujimoto","year":"2018","journal-title":"In International Conference on Machine Learning"},{"key":"ref8","author":"sutton","year":"2018","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2020.3024161"},{"key":"ref9","article-title":"Policy gradient methods for reinforcement learning with function approximation","volume":"12","author":"sutton","year":"1999","journal-title":"Advances in neural information processing systems"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2019.2913768"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1016\/j.ifacol.2018.11.038"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2022.110598"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-control-090419-075625"}],"event":{"name":"2023 European Control Conference (ECC)","location":"Bucharest, Romania","start":{"date-parts":[[2023,6,13]]},"end":{"date-parts":[[2023,6,16]]}},"container-title":["2023 European Control Conference (ECC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10178092\/10178115\/10178119.pdf?arnumber=10178119","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,7]],"date-time":"2023-08-07T17:38:04Z","timestamp":1691429884000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10178119\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,13]]},"references-count":22,"URL":"https:\/\/doi.org\/10.23919\/ecc57647.2023.10178119","relation":{},"subject":[],"published":{"date-parts":[[2023,6,13]]}}}