{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,21]],"date-time":"2026-05-21T15:09:30Z","timestamp":1779376170048,"version":"3.53.1"},"reference-count":49,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"9","license":[{"start":{"date-parts":[[2021,9,1]],"date-time":"2021-09-01T00:00:00Z","timestamp":1630454400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,9,1]],"date-time":"2021-09-01T00:00:00Z","timestamp":1630454400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,9,1]],"date-time":"2021-09-01T00:00:00Z","timestamp":1630454400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"Swiss State Secretariat for Education, Research and Innovation","award":["16.0106"],"award-info":[{"award-number":["16.0106"]}]},{"name":"European Union Research and Innovation Programme","award":["723562"],"award-info":[{"award-number":["723562"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Neural Netw. Learning Syst."],"published-print":{"date-parts":[[2021,9]]},"DOI":"10.1109\/tnnls.2020.3016906","type":"journal-article","created":{"date-parts":[[2020,9,1]],"date-time":"2020-09-01T20:25:37Z","timestamp":1598991937000},"page":"4096-4110","source":"Crossref","is-referenced-by-count":15,"title":["A Hybrid Learning Method for System Identification and Optimal Control"],"prefix":"10.1109","volume":"32","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5029-6450","authenticated-orcid":false,"given":"Baptiste","family":"Schubnel","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7149-6304","authenticated-orcid":false,"given":"Rafael E.","family":"Carrillo","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0604-9043","authenticated-orcid":false,"given":"Pierre-Jean","family":"Alet","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Andreas","family":"Hutter","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/W14-4012"},{"key":"ref38","first-page":"3104","article-title":"Sequence to sequence learning with neural networks","author":"sutskever","year":"2014","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1561\/2300000053"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1016\/S1364-6613(99)01327-3"},{"key":"ref31","year":"2019","journal-title":"Alphastar Mastering the real-time strategy game StarCraft II"},{"key":"ref30","doi-asserted-by":"crossref","first-page":"484","DOI":"10.1038\/nature16961","article-title":"Mastering the game of go with deep neural networks and tree search","volume":"529","author":"silver","year":"2016","journal-title":"Nature"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1016\/j.buildenv.2012.01.023"},{"key":"ref35","article-title":"When to trust your model: Model-based policy optimization","author":"janner","year":"2019","journal-title":"arXiv 1906 08253"},{"key":"ref34","article-title":"Learning latent state representation for speeding up exploration","author":"vezzani","year":"2019","journal-title":"arXiv 1905 12621"},{"key":"ref28","author":"sutton","year":"2018","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1016\/j.enbuild.2015.04.029"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.3182\/20120823-5-NL-3013.00040"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/978-0-85729-398-5"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1906995116"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1016\/j.apenergy.2018.02.156"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2015.XI.012"},{"key":"ref24","article-title":"Deep model predictive control with online learning for complex physical systems","author":"bieker","year":"2019","journal-title":"arXiv 1905 10094"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1800923115"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/TSG.2018.2834219"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.3390\/en11030631"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8202133"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460528"},{"key":"ref40","first-page":"5998","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref12","article-title":"SBEED: Convergent reinforcement learning with nonlinear function approximation","author":"dai","year":"2017","journal-title":"arXiv 1712 10285"},{"key":"ref13","article-title":"Neural proximal\/trust region policy optimization attains globally optimal policy","author":"liu","year":"2019","journal-title":"arXiv 1906 10306"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1002\/047134608X.W1046"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/MCS.2016.2535918"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ISIC.1994.367791"},{"key":"ref17","article-title":"Nonlinear systems identification using deep dynamic neural networks","author":"ogunmolu","year":"2016","journal-title":"arXiv 1610 01439"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1016\/j.ifacol.2018.07.326"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1364\/JOSAB.35.000617"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1017\/9781139061759"},{"key":"ref3","author":"van overschee","year":"2012","journal-title":"Subspace identification for linear systems Theory&#x2014;Implementation&#x2014;Applications"},{"key":"ref6","article-title":"Nonlinear system identification: A user-oriented roadmap","author":"schoukens","year":"2019","journal-title":"arXiv 1902 00683"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2014.10.128"},{"key":"ref8","article-title":"Proximal policy optimization algorithms","author":"schulman","year":"2017","journal-title":"arXiv 1707 06347"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.4324\/9780080498942"},{"key":"ref49","author":"abadi","year":"2015","journal-title":"TensorFlow Large-Scale Machine Learning on Heterogeneous Systems"},{"key":"ref9","article-title":"Learning dexterous in-hand manipulation","year":"2018","journal-title":"arXiv 1808 00177"},{"key":"ref46","article-title":"Emergence of locomotion behaviours in rich environments","author":"heess","year":"2017","journal-title":"arXiv 1707 02286"},{"key":"ref45","first-page":"1889","article-title":"Trust region policy optimization","author":"schulman","year":"2015","journal-title":"Proc ICML"},{"key":"ref48","year":"2019"},{"key":"ref47","first-page":"1","article-title":"Thermal building modeling adapted to district energy simulation","author":"perez","year":"2015","journal-title":"Proc BS"},{"key":"ref42","article-title":"Why gradient clipping accelerates training: A theoretical justification for adaptivity","author":"zhang","year":"2020","journal-title":"Proc ICLR"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.3390\/app9050832"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1031"},{"key":"ref43","first-page":"154","article-title":"Causal inference on time series using restricted structural equation models","author":"peters","year":"2013","journal-title":"Proc NIPS"}],"container-title":["IEEE Transactions on Neural Networks and Learning Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/5962385\/9525619\/09184248.pdf?arnumber=9184248","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T14:53:05Z","timestamp":1652194385000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9184248\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,9]]},"references-count":49,"journal-issue":{"issue":"9"},"URL":"https:\/\/doi.org\/10.1109\/tnnls.2020.3016906","relation":{},"ISSN":["2162-237X","2162-2388"],"issn-type":[{"value":"2162-237X","type":"print"},{"value":"2162-2388","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,9]]}}}