{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,10]],"date-time":"2026-02-10T16:02:25Z","timestamp":1770739345251,"version":"3.49.0"},"reference-count":56,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2009,4,1]],"date-time":"2009-04-01T00:00:00Z","timestamp":1238544000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Syst., Man, Cybern. B"],"published-print":{"date-parts":[[2009,4]]},"DOI":"10.1109\/tsmcb.2008.2007630","type":"journal-article","created":{"date-parts":[[2008,12,18]],"date-time":"2008-12-18T17:52:54Z","timestamp":1229622774000},"page":"517-529","source":"Crossref","is-referenced-by-count":171,"title":["Reinforcement Learning Versus Model Predictive Control: A Comparison on a Power System Problem"],"prefix":"10.1109","volume":"39","author":[{"given":"D.","family":"Ernst","sequence":"first","affiliation":[]},{"given":"M.","family":"Glavic","sequence":"additional","affiliation":[]},{"given":"F.","family":"Capitanescu","sequence":"additional","affiliation":[]},{"given":"L.","family":"Wehenkel","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1137\/0802028"},{"key":"ref38","author":"fiacco","year":"1968","journal-title":"Nonlinear Programming Sequential Unconstrained Minimization Techniques"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.2202\/1553-779X.1066"},{"key":"ref32","author":"ghandhari","year":"2000","journal-title":"Control Lyapunov functions A control strategy for damping of power oscillations in large power systems"},{"key":"ref31","doi-asserted-by":"crossref","first-page":"10","DOI":"10.1109\/CDC.2005.1582107","article-title":"dynamic programming and suboptimal control: from adp to mpc","author":"bertsekas","year":"2005","journal-title":"Proc 44th IEEE Conf Decision Control Eur Control Conf"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4612-0729-0"},{"key":"ref37","author":"pavella","year":"1994","journal-title":"Transient Stability of Power Systems Theory and Practice"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4613-1635-0"},{"key":"ref35","author":"kundur","year":"1994","journal-title":"Power System Stability and Control"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4615-4561-3"},{"key":"ref28","first-page":"446","article-title":"reinforcement learning with raw pixels as state input","volume":"4153","author":"ernst","year":"2006","journal-title":"Proc Int Workshop Intell Comput Pattern Anal \/Synthesis"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2006.377527"},{"key":"ref29","volume":"i","author":"bertsekas","year":"2000","journal-title":"Dynamic Programming and Optimal Control"},{"key":"ref2","author":"maciejowski","year":"2001","journal-title":"Predictive Control with Constraints"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/S0098-1354(98)00301-9"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-006-6226-1"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1023\/A:1017928328829"},{"key":"ref21","author":"bellman","year":"1957","journal-title":"Dynamic Programming"},{"key":"ref24","first-page":"503","article-title":"tree-based batch mode reinforcement learning","volume":"6","author":"ernst","year":"2005","journal-title":"J Mach Learn Res"},{"key":"ref23","first-page":"317","article-title":"neural fitted q iterationfirst experiences with a data efficient neural reinforcement learning method","author":"riedmiller","year":"2005","journal-title":"Proc 16th Eur Conf Mach Learn"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICSMC.2005.1571637"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-39857-8_11"},{"key":"ref50","doi-asserted-by":"crossref","first-page":"483","DOI":"10.1016\/S0927-0507(03)10008-4","volume":"10","author":"romisch","year":"2003","journal-title":"Stochastic Programming Handbooks in Operations Research and Management Science"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1137\/050632865"},{"key":"ref56","first-page":"8","volume":"16","author":"fern","year":"2004","journal-title":"Advances in neural information processing systems"},{"key":"ref55","first-page":"424","article-title":"reinforcement learning as classification: leveraging modern classifiers","author":"lagoudakis","year":"2003","journal-title":"Proc ICML"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-377-6.50040-2"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1007\/BF02705417"},{"key":"ref52","first-page":"6","article-title":"learning-based model predictive control for markov decision processes","author":"negenborn","year":"2005","journal-title":"Proc 16th IFAC World Congr"},{"key":"ref10","first-page":"1","article-title":"epicardial ecg mapping of human ventricular fibrillation","author":"mourad","year":"2006","journal-title":"2006 IET 3rd International Conference On Advances in Medical Signal and Information Processing - MEDSIP 2006 MEDSIP"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1994.6.2.215"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1016\/S0098-1354(98)00292-0"},{"key":"ref12","first-page":"974","volume":"9","author":"singh","year":"1997","journal-title":"Advances in neural information processing systems"},{"key":"ref13","first-page":"1615","article-title":"autonomous helicopter control using reinforcement learning policy search methods","author":"bagnell","year":"2001","journal-title":"Proc Int Conf Robot Autom"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TPWRS.2003.821457"},{"key":"ref15","first-page":"232","article-title":"an overview of industrial model predictive control technology","volume":"93","author":"qin","year":"1997","journal-title":"Proc Chem Process Control"},{"key":"ref16","author":"hassoun","year":"1995","journal-title":"Fundamentals of Artificial Neural Networks"},{"key":"ref17","author":"schlkopf","year":"1999","journal-title":"Advances in Kernel Methods Support Vector Learning"},{"key":"ref18","author":"cristianini","year":"2000","journal-title":"An Introduction to Support Vector Machines"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1023\/A:1010933404324"},{"key":"ref4","author":"bertsekas","year":"1996","journal-title":"Neuro-Dynamic Programming"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1016\/S0005-1098(99)00214-9"},{"key":"ref6","doi-asserted-by":"crossref","first-page":"237","DOI":"10.1613\/jair.301","article-title":"reinforcement learning: a survey","volume":"4","author":"kaelbling","year":"1996","journal-title":"J Artif Intell Res"},{"key":"ref5","author":"sutton","year":"1998","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992696"},{"key":"ref7","author":"watkins","year":"1989","journal-title":"Learning from delayed rewards"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1016\/S0005-1098(02)00002-X"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1007\/BF00993306"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1016\/0005-1098(96)00063-5"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1016\/0098-1354(90)87012-E"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1016\/S0098-1354(00)00398-7"},{"key":"ref47","doi-asserted-by":"crossref","first-page":"207","DOI":"10.1007\/BFb0109870","volume":"245","author":"bemporad","year":"1999","journal-title":"Robustness in Identification and Control"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/ACC.2000.877018"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1023\/B:COAP.0000018880.63497.eb"},{"key":"ref44","first-page":"295","volume":"5","author":"bradtke","year":"1993","journal-title":"Advances in neural information processing systems"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1016\/S0098-1354(00)00302-1"}],"container-title":["IEEE Transactions on Systems, Man, and Cybernetics, Part B (Cybernetics)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/3477\/4802395\/04717266.pdf?arnumber=4717266","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,10,11]],"date-time":"2021-10-11T00:00:41Z","timestamp":1633910441000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/4717266\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2009,4]]},"references-count":56,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/tsmcb.2008.2007630","relation":{},"ISSN":["1083-4419"],"issn-type":[{"value":"1083-4419","type":"print"}],"subject":[],"published":{"date-parts":[[2009,4]]}}}