{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,7]],"date-time":"2025-12-07T03:36:30Z","timestamp":1765078590441,"version":"3.37.3"},"reference-count":49,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"9","license":[{"start":{"date-parts":[[2023,9,1]],"date-time":"2023-09-01T00:00:00Z","timestamp":1693526400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,9,1]],"date-time":"2023-09-01T00:00:00Z","timestamp":1693526400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,9,1]],"date-time":"2023-09-01T00:00:00Z","timestamp":1693526400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Neural Netw. Learning Syst."],"published-print":{"date-parts":[[2023,9]]},"DOI":"10.1109\/tnnls.2022.3190246","type":"journal-article","created":{"date-parts":[[2022,7,21]],"date-time":"2022-07-21T19:28:20Z","timestamp":1658431700000},"page":"5268-5282","source":"Crossref","is-referenced-by-count":1,"title":["Stochastic Optimal Control for Multivariable Dynamical Systems Using Expectation Maximization"],"prefix":"10.1109","volume":"34","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3535-5464","authenticated-orcid":false,"given":"Prakash","family":"Mallick","sequence":"first","affiliation":[{"name":"School of Engineering, The University of Newcastle, Callaghan, NSW, Australia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2033-4249","authenticated-orcid":false,"given":"Zhiyong","family":"Chen","sequence":"additional","affiliation":[{"name":"School of Engineering, The University of Newcastle, Callaghan, NSW, Australia"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386025"},{"key":"ref12","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1561\/2300000021","article-title":"A survey on policy search for robotics","volume":"2","author":"deisenroth","year":"2013","journal-title":"Foundations and Trends in Robotics"},{"key":"ref15","first-page":"703","article-title":"Combining model-based and model-free updates for trajectory-centric reinforcement learning","volume":"70","author":"chebotar","year":"2017","journal-title":"Proc 34th Int Conf Mach Learn"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2016.7487175"},{"key":"ref11","first-page":"465","article-title":"PILCO: A model-based and data-efficient approach to policy search","author":"deisenroth","year":"2011","journal-title":"Proc 28th Int Conf Mach Learn (ICML)"},{"article-title":"Motor skill learning with local trajectory methods","year":"2014","author":"levine","key":"ref10"},{"key":"ref17","first-page":"222","article-title":"Iterative linear quadratic regulator design for nonlinear biological movement systems","volume":"1","author":"li","year":"2004","journal-title":"Proc ICINCO"},{"key":"ref16","first-page":"2816","article-title":"Bregman alternating direction method of multipliers","author":"wang","year":"2014","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1155\/S1110865702000872"},{"key":"ref18","first-page":"1071","article-title":"Learning neural network policies with guided policy search under unknown dynamics","author":"levine","year":"2014","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1201\/b14539"},{"journal-title":"Linear Estimation","year":"2000","author":"kailath","key":"ref45"},{"journal-title":"Nonlinear Time Series Theory Methods and Applications with R Examples","year":"2014","author":"douc","key":"ref48"},{"journal-title":"Expectation-maximization as lower bound maximization","year":"1998","author":"minka","key":"ref47"},{"journal-title":"Stochastic Processes and Filtering Theory","year":"2007","author":"jazwinski","key":"ref42"},{"key":"ref41","first-page":"1723","article-title":"Reward augmented maximum likelihood for neural structured prediction","author":"norouzi","year":"2016","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref44","volume":"382","author":"mclachlan","year":"2007","journal-title":"The EM Algorithm and Extensions"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1007\/s11222-015-9561-x"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1214\/aos\/1176346060"},{"journal-title":"Markov Decision Processes Discrete Stochastic Dynamic Programming","year":"2014","author":"puterman","key":"ref8"},{"key":"ref7","volume":"1","author":"bertsekas","year":"1995","journal-title":"Dynamic Programming and Optimal Control"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/9123.003.0009"},{"key":"ref4","first-page":"1334","article-title":"End-to-end training of deep visuomotor policies","volume":"17","author":"levine","year":"2016","journal-title":"J Mach Learn Res"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-012-5278-7"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3174051"},{"key":"ref5","article-title":"Guided policy search as approximate mirror descent","author":"montgomery","year":"2016","journal-title":"arXiv 1607 04614"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1002\/9781119432036"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2021.01.142"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2010.VI.037"},{"key":"ref37","first-page":"238","article-title":"Memoryless policies: Theoretical limitations and practical results","volume":"3","author":"littman","year":"0","journal-title":"From Animals to Animats 3 Proceedings of the Third International Conference on Simulation of Adaptive Behavior"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1287\/moor.12.3.441"},{"key":"ref31","first-page":"849","article-title":"Policy search for motor primitives in robotics","author":"kober","year":"2009","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/s10514-009-9132-0"},{"key":"ref33","first-page":"2329","article-title":"Point-based value iteration for continuous POMDPs","volume":"7","author":"porta","year":"2006","journal-title":"J Mach Learn Res"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553508"},{"journal-title":"Optimal Control and Estimation","year":"1994","author":"stengel","key":"ref2"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/9816.003.0050"},{"journal-title":"Pattern Recognition and Machine Learning","year":"2006","author":"bishop","key":"ref39"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1613\/jair.714"},{"key":"ref24","first-page":"817","article-title":"Variational inference for policy search in changing situations","author":"neumann","year":"2011","journal-title":"Proc 28th Int Conf Mach Learn (ICML)"},{"key":"ref23","article-title":"A method for using belief networks as influence diagrams","author":"cooper","year":"2013","journal-title":"arXiv 1304 2346"},{"article-title":"Modeling purposeful adaptive behavior with the principle of maximum causal entropy","year":"2010","author":"ziebart","key":"ref26"},{"key":"ref25","first-page":"1","article-title":"Modeling interaction via the principle of maximum causal entropy","author":"ziebart","year":"2010","journal-title":"Proc ICML"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4757-3261-0"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2010.10.013"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2005.05.008"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143963"},{"key":"ref27","first-page":"232","article-title":"An expectation maximization algorithm for continuous Markov decision processes with arbitrary reward","author":"hoffman","year":"2009","journal-title":"Proc Artif Intell Statist"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.2.271"}],"container-title":["IEEE Transactions on Neural Networks and Learning Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/5962385\/10237282\/09836999.pdf?arnumber=9836999","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,25]],"date-time":"2023-09-25T18:25:09Z","timestamp":1695666309000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9836999\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,9]]},"references-count":49,"journal-issue":{"issue":"9"},"URL":"https:\/\/doi.org\/10.1109\/tnnls.2022.3190246","relation":{},"ISSN":["2162-237X","2162-2388"],"issn-type":[{"type":"print","value":"2162-237X"},{"type":"electronic","value":"2162-2388"}],"subject":[],"published":{"date-parts":[[2023,9]]}}}