{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T02:33:01Z","timestamp":1730341981296,"version":"3.28.0"},"reference-count":29,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,7]]},"DOI":"10.23919\/acc45564.2020.9147818","type":"proceedings-article","created":{"date-parts":[[2020,7,27]],"date-time":"2020-07-27T22:02:33Z","timestamp":1595887353000},"page":"4003-4010","source":"Crossref","is-referenced-by-count":2,"title":["Optimal Control Inspired Q-Learning for Switched Linear Systems"],"prefix":"10.23919","author":[{"given":"Hua","family":"Chen","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Linfang","family":"Zheng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wei","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref10","article-title":"Playing atari with deep reinforcement learning","author":"mnih","year":"2013","journal-title":"arXiv preprint arXiv 1312 5602"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1038\/nature16961"},{"key":"ref12","first-page":"1889","article-title":"Trust region policy optimization","author":"schulman","year":"2015","journal-title":"International Conference on Machine Learning"},{"key":"ref13","article-title":"Proximal policy optimization algorithms","author":"schulman","year":"2017","journal-title":"arXiv preprint arXiv 1707 06347"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-7908-2604-3_16"},{"key":"ref15","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"2014","journal-title":"arXiv preprint arXiv 1412 6980"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/MCS.2012.2214134"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2008.08.017"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2014.2317301"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2014.02.015"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2009.5457695"},{"journal-title":"Dynamic Optimization The Calculus of Variations and Optimal Control in Economics and Management","year":"2012","author":"kamien","key":"ref4"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ALLERTON.2018.8635985"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1201\/9781420011418"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2017.2743240"},{"key":"ref29","doi-asserted-by":"crossref","DOI":"10.1007\/s10208-019-09426-y","article-title":"On the sample complexity of the linear quadratic regulator","author":"dean","year":"2019","journal-title":"Foundations of Computational Mathematics"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.1998.712192"},{"key":"ref8","doi-asserted-by":"crossref","first-page":"19","DOI":"10.1109\/37.126844","article-title":"Reinforcement learning is direct adaptive optimal control","volume":"12","author":"sutton","year":"1992","journal-title":"IEEE Control Systems Magazine"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.1995.478953"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9781139173551"},{"key":"ref9","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref1","volume":"1","author":"bertsekas","year":"2005","journal-title":"Dynamic Programming and Optimal Control"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2010.02.018"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2015.2401334"},{"key":"ref21","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2015","journal-title":"arXiv preprint arXiv 1509 02971"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2017.2758374"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2015.2509421"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2011.2178649"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ACC.2009.5160616"}],"event":{"name":"2020 American Control Conference (ACC)","start":{"date-parts":[[2020,7,1]]},"location":"Denver, CO, USA","end":{"date-parts":[[2020,7,3]]}},"container-title":["2020 American Control Conference (ACC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9140048\/9147203\/09147818.pdf?arnumber=9147818","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,8,31]],"date-time":"2020-08-31T21:46:24Z","timestamp":1598910384000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9147818\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,7]]},"references-count":29,"URL":"https:\/\/doi.org\/10.23919\/acc45564.2020.9147818","relation":{},"subject":[],"published":{"date-parts":[[2020,7]]}}}