{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,6]],"date-time":"2026-06-06T17:03:01Z","timestamp":1780765381532,"version":"3.54.1"},"reference-count":42,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"11","license":[{"start":{"date-parts":[[2016,11,1]],"date-time":"2016-11-01T00:00:00Z","timestamp":1477958400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"}],"funder":[{"DOI":"10.13039\/501100000780","name":"European Commission project Unifying Control and Verification of Cyber-Physical Systems","doi-asserted-by":"publisher","award":["643921"],"award-info":[{"award-number":["643921"]}],"id":[{"id":"10.13039\/501100000780","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Cybern."],"published-print":{"date-parts":[[2016,11]]},"DOI":"10.1109\/tcyb.2015.2483780","type":"journal-article","created":{"date-parts":[[2015,10,23]],"date-time":"2015-10-23T16:05:54Z","timestamp":1445616354000},"page":"2643-2655","source":"Crossref","is-referenced-by-count":16,"title":["Policy Search for the Optimal Control of Markov Decision Processes: A Novel Particle-Based Iterative Scheme"],"prefix":"10.1109","volume":"46","author":[{"given":"Giorgio","family":"Manganini","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Matteo","family":"Pirotta","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Marcello","family":"Restelli","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6063-8060","authenticated-orcid":false,"given":"Luigi","family":"Piroddi","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Maria","family":"Prandini","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-71493-4_4"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-24743-2_22"},{"key":"ref33","author":"okabe","year":"1992","journal-title":"Spatial Tessellations Concepts and Applications of Voronoi Diagrams"},{"key":"ref32","author":"bertsekas","year":"1996","journal-title":"Stochastic Optimal Control The Discrete-Time Case"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992696"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.3182\/20140824-6-ZA-1003.01987"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1007\/BF02573985"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1112\/S0025579300002850"},{"key":"ref35","first-page":"441","article-title":"Bias in natural actor-critic algorithms","author":"thomas","year":"2014","journal-title":"Proc 31st Int Conf Mach Learn"},{"key":"ref34","first-page":"288","article-title":"Reinforcement learning for continuous action using stochastic gradient ascent","author":"kimura","year":"1998","journal-title":"Proc Intell Auton Syst (IAS-5)"},{"key":"ref10","first-page":"9","article-title":"Fitted Q-iteration in continuous action-space MDPs","author":"antos","year":"2008","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2012.6425809"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/s11768-011-1005-3"},{"key":"ref12","first-page":"441","article-title":"Regularized policy iteration","author":"farahmand","year":"2008","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref13","first-page":"1143","article-title":"On actor-critic algorithms","volume":"42","author":"konda","year":"2003","journal-title":"Soc Ind Appl Math J Control Optim"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2013.2276571"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.2009.2027233"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/MCAS.2009.933854"},{"key":"ref17","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1561\/2300000021","article-title":"A survey on policy search for robotics","volume":"2","author":"deisenroth","year":"2013","journal-title":"Foundations and Trends in Robotics"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2015.7280673"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2001.932842"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2009.12.004"},{"key":"ref4","volume":"2","author":"bertsekas","year":"2007","journal-title":"Dynamic Programming and Optimal Control"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2008.02.003"},{"key":"ref3","doi-asserted-by":"crossref","DOI":"10.1002\/9780470316887","author":"puterman","year":"1994","journal-title":"Markov Decision Processes Discrete Stochastic Dynamic Programming"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1002\/9780470182963"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2011.09.005"},{"key":"ref5","author":"sutton","year":"1998","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1201\/9781439821091"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4471-4757-2"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.3166\/ejc.16.583-594"},{"key":"ref9","first-page":"503","article-title":"Tree-based batch mode reinforcement learning","volume":"6","author":"ernst","year":"2005","journal-title":"J Mach Learn Res"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2008.03.027"},{"key":"ref20","first-page":"29","article-title":"Reinforcement learning for humanoid robotics","author":"peters","year":"2003","journal-title":"Proc IEEE-RAS Int Conf Humanoid Robots (Humanoids)"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1177\/0278364913495721"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2004.1307456"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2014.2313655"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCB.2010.2050586"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2014.2313915"},{"key":"ref23","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-031-01551-9","author":"szepesv\u00e1ri","year":"2010","journal-title":"Algorithms for Reinforcement Learning"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCC.2012.2218595"},{"key":"ref25","author":"rubinstein","year":"2004","journal-title":"The Cross-Entropy Method A Unified Approach to Combinatorial Optimization Monte-Carlo Simulation and Machine Learning"}],"container-title":["IEEE Transactions on Cybernetics"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6221036\/7586170\/07303937.pdf?arnumber=7303937","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,31]],"date-time":"2025-05-31T03:31:26Z","timestamp":1748662286000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7303937\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,11]]},"references-count":42,"journal-issue":{"issue":"11"},"URL":"https:\/\/doi.org\/10.1109\/tcyb.2015.2483780","relation":{},"ISSN":["2168-2267","2168-2275"],"issn-type":[{"value":"2168-2267","type":"print"},{"value":"2168-2275","type":"electronic"}],"subject":[],"published":{"date-parts":[[2016,11]]}}}