{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T12:30:06Z","timestamp":1730205006931,"version":"3.28.0"},"reference-count":24,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,12,13]],"date-time":"2023-12-13T00:00:00Z","timestamp":1702425600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,12,13]],"date-time":"2023-12-13T00:00:00Z","timestamp":1702425600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,12,13]]},"DOI":"10.1109\/cdc49753.2023.10383562","type":"proceedings-article","created":{"date-parts":[[2024,1,19]],"date-time":"2024-01-19T18:38:36Z","timestamp":1705689516000},"page":"610-617","source":"Crossref","is-referenced-by-count":0,"title":["Combining Q-learning and Deterministic Policy Gradient for Learning-Based MPC"],"prefix":"10.1109","author":[{"given":"Katrine","family":"Seel","sequence":"first","affiliation":[]},{"given":"S\u00e9bastien","family":"Gros","sequence":"additional","affiliation":[]},{"given":"Jan Tommy","family":"Gravdahl","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/7503.003.0006"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1038\/nature16961"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1002\/rnc.5361"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TCST.2019.2949757"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2013.02.003"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2019.2913768"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2020.3024161"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2022.110598"},{"journal-title":"Reinforcement learning: An introduction","year":"2018","author":"Sutton","key":"ref9"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1016\/j.ifacol.2021.08.562"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/OJCSYS.2022.3221063"},{"key":"ref12","article-title":"Combining policy gradient and Q-Iearning","volume-title":"International Conference on Learning Representations","author":"ODonoghue","year":"2017"},{"key":"ref13","article-title":"A natural policy gradient","volume":"14","author":"Kakade","year":"2001","journal-title":"Advances in neural information processing systems"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1016\/j.arcontrol.2011.10.011"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.23919\/ACC50511.2021.9483016"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2022.105343"},{"key":"ref17","first-page":"49","article-title":"Least-squares temporal difference learning","volume-title":"International Conference on Machine Learning","author":"Boyan","year":"1999"},{"key":"ref18","article-title":"Policy gradi-ent methods for reinforcement learning with function approximation","volume":"12","author":"Sutton","year":"1999","journal-title":"Advances in neural information processing systems"},{"key":"ref19","first-page":"605","article-title":"Deterministic policy gradient algorithms","volume-title":"31st International Conference on Machine Learning, ICML 2014","volume":"1","author":"Silver","year":"2014"},{"issue":"6","key":"ref20","first-page":"1107","article-title":"Least-squares policy iteration","volume":"4","author":"Lagoudakis","year":"2004","journal-title":"Journal of Machine Learning Research"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.23919\/ACC53348.2022.9867217"},{"key":"ref22","article-title":"Approximate Newton methods for policy search in Markov decision processes","volume":"17","author":"Furmston","year":"2016","journal-title":"Journal of Machine Learning Research"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/s00158-003-0368-6"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1093\/oso\/9780198505143.003.0023"}],"event":{"name":"2023 62nd IEEE Conference on Decision and Control (CDC)","start":{"date-parts":[[2023,12,13]]},"location":"Singapore, Singapore","end":{"date-parts":[[2023,12,15]]}},"container-title":["2023 62nd IEEE Conference on Decision and Control (CDC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10383192\/10383193\/10383562.pdf?arnumber=10383562","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,23]],"date-time":"2024-01-23T16:40:11Z","timestamp":1706028011000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10383562\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12,13]]},"references-count":24,"URL":"https:\/\/doi.org\/10.1109\/cdc49753.2023.10383562","relation":{},"subject":[],"published":{"date-parts":[[2023,12,13]]}}}