{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,27]],"date-time":"2025-10-27T20:32:36Z","timestamp":1761597156987,"version":"3.37.0"},"reference-count":21,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2009,8,1]],"date-time":"2009-08-01T00:00:00Z","timestamp":1249084800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Auton Robot"],"published-print":{"date-parts":[[2009,8]]},"DOI":"10.1007\/s10514-009-9132-0","type":"journal-article","created":{"date-parts":[[2009,8,10]],"date-time":"2009-08-10T17:42:04Z","timestamp":1249926124000},"page":"123-130","source":"Crossref","is-referenced-by-count":39,"title":["Learning model-free robot control by a Monte Carlo EM algorithm"],"prefix":"10.1007","volume":"27","author":[{"given":"Nikos","family":"Vlassis","sequence":"first","affiliation":[]},{"given":"Marc","family":"Toussaint","sequence":"additional","affiliation":[]},{"given":"Georgios","family":"Kontes","sequence":"additional","affiliation":[]},{"given":"Savas","family":"Piperidis","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2009,8,11]]},"reference":[{"doi-asserted-by":"crossref","unstructured":"Abbeel, P., Coates, A., Quigley, M., & Ng, A. Y. (2007). An application of reinforcement learning to aerobatic helicopter flight. In Proc. neural information processing systems.","key":"9132_CR1","DOI":"10.7551\/mitpress\/7503.003.0006"},{"key":"9132_CR2","volume-title":"Neuro-dynamic programming","author":"D. P. Bertsekas","year":"1996","unstructured":"Bertsekas, D. P., & Tsitsiklis, J. N. (1996). Neuro-dynamic programming. Nashua: Athena Scientific."},{"unstructured":"Cooper, G. F. (1988). A method for using belief networks as influence diagrams. In Proc. 4th workshop on uncertainty in artificial intelligence (pp. 55\u201363), Minneapolis, Minnesota.","key":"9132_CR3"},{"issue":"2","key":"9132_CR4","doi-asserted-by":"crossref","first-page":"271","DOI":"10.1162\/neco.1997.9.2.271","volume":"9","author":"P. Dayan","year":"1997","unstructured":"Dayan, P., & Hinton, G. E. (1997). Using expectation-maximization for reinforcement learning. Neural Computation, 9(2), 271\u2013278.","journal-title":"Neural Computation"},{"key":"9132_CR5","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1111\/j.2517-6161.1977.tb01600.x","volume":"39","author":"A. P. Dempster","year":"1977","unstructured":"Dempster, A. P., Laird, N. M., & Rubin, D. B. (1977). Maximum likelihood from incomplete data via the EM algorithm. Journal of the Royal Statistical Society Series B, 39, 1\u201338.","journal-title":"Journal of the Royal Statistical Society Series B"},{"unstructured":"Hoffman, M., Doucet, A., de Freitas, N., & Jasra, A. (2008). Bayesian policy learning with trans-dimensional MCMC. In Proc. neural information processing systems.","key":"9132_CR6"},{"issue":"1","key":"9132_CR7","doi-asserted-by":"crossref","first-page":"25","DOI":"10.1007\/s10846-005-9022-4","volume":"44","author":"Y. Kim","year":"2005","unstructured":"Kim, Y., Kim, S. H., & Kwak, Y. K. (2005). Dynamic analysis of a nonholonomic two-wheeled inverted pendulum robot. Journal of Intelligent and Robotic Systems, 44(1), 25\u201346.","journal-title":"Journal of Intelligent and Robotic Systems"},{"unstructured":"Kober, J., & Peters, J. (2009). Policy search for motor primitives in robotics. In Proc. neural information processing systems.","key":"9132_CR8"},{"doi-asserted-by":"crossref","unstructured":"Martinez-Cantin, R., de Freitas, N., Castellanos, J. A., & Doucet, A. (2009). A Bayesian exploration-exploitation approach for optimal online sensing and planning with a visually guided mobile robot. Autonomous Robots. doi: 10.1007\/s10514-009-9130-2 .","key":"9132_CR9","DOI":"10.1007\/s10514-009-9130-2"},{"key":"9132_CR10","doi-asserted-by":"crossref","first-page":"355","DOI":"10.1007\/978-94-011-5014-9_12","volume-title":"Learning in graphical models","author":"R. M. Neal","year":"1998","unstructured":"Neal, R. M., & Hinton, G. E. (1998). A view of the EM algorithm that justifies incremental, sparse, and other variants. In M. I. Jordan (Ed.), Learning in graphical models (pp. 355\u2013368). Dordrecht: Kluwer Academic."},{"unstructured":"Ng, A. Y., & Jordan, M. I. (2000). PEGASUS: a policy search method for large MDPs and POMDPs. In Proc. uncertainty in artificial intelligence.","key":"9132_CR11"},{"issue":"7\u20139","key":"9132_CR12","doi-asserted-by":"crossref","first-page":"1180","DOI":"10.1016\/j.neucom.2007.11.026","volume":"71","author":"J. Peters","year":"2008","unstructured":"Peters, J., & Schaal, S. (2008a). Natural actor critic. Neurocomputing, 71(7\u20139), 1180\u20131190.","journal-title":"Neurocomputing"},{"issue":"4","key":"9132_CR13","doi-asserted-by":"crossref","first-page":"682","DOI":"10.1016\/j.neunet.2008.02.003","volume":"21","author":"J. Peters","year":"2008","unstructured":"Peters, J., & Schaal, S. (2008b). Reinforcement learning of motor skills with policy gradients. Neural Networks, 21(4), 682\u2013697.","journal-title":"Neural Networks"},{"doi-asserted-by":"crossref","unstructured":"Peters, J., & Kober, J. (2009). Using reward-weighted imitation for robot reinforcement learning. In Proc. 2009 IEEE int. symp. on approximate dynamic programming and reinforcement learning.","key":"9132_CR14","DOI":"10.1109\/ADPRL.2009.4927549"},{"issue":"1","key":"9132_CR15","doi-asserted-by":"crossref","first-page":"55","DOI":"10.1007\/s10514-009-9120-4","volume":"27","author":"M. Riedmiller","year":"2009","unstructured":"Riedmiller, M., Gabel, T., Hafner, R., & Lange, S. (2009). Reinforcement learning for robot soccer. Autonomous Robots, 27(1), 55\u201373. This issue, part A.","journal-title":"Autonomous Robots"},{"doi-asserted-by":"crossref","unstructured":"R\u00fcckstie\u00df, T., Felder, M., & Schmidhuber, J. (2008). State-dependent exploration for policy gradient methods. In Proc. European conf. on machine learning.","key":"9132_CR16","DOI":"10.1007\/978-3-540-87481-2_16"},{"key":"9132_CR17","volume-title":"Reinforcement learning: an introduction","author":"R. S. Sutton","year":"1998","unstructured":"Sutton, R. S., & Barto, A. G. (1998). Reinforcement learning: an introduction. Cambridge: MIT Press."},{"unstructured":"Tedrake, R., Zhang, T. W., & Seung, H. S. (2005). Learning to walk in 20 minutes. In Proc. 14th Yale workshop on adaptive and learning systems.","key":"9132_CR18"},{"doi-asserted-by":"crossref","unstructured":"Toussaint, M., & Storkey, A. (2006). Probabilistic inference for solving discrete and continuous state Markov decision processes. In Proc. int. conf. on machine learning.","key":"9132_CR19","DOI":"10.1145\/1143844.1143963"},{"doi-asserted-by":"crossref","unstructured":"Vlassis, N., & Toussaint, M. (2009). Model-free reinforcement learning as mixture learning. In Proc. int. conf. on machine learning, Montreal, Canada.","key":"9132_CR20","DOI":"10.1145\/1553374.1553512"},{"key":"9132_CR21","doi-asserted-by":"crossref","first-page":"699","DOI":"10.1080\/01621459.1990.10474930","volume":"85","author":"G. Wei","year":"1990","unstructured":"Wei, G., & Tanner, M. (1990). A Monte Carlo implementation of the EM algorithm and the poor man\u2019s data augmentation algorithm. Journal of the American Statistical Association, 85, 699\u2013704.","journal-title":"Journal of the American Statistical Association"}],"container-title":["Autonomous Robots"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10514-009-9132-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10514-009-9132-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10514-009-9132-0","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,11]],"date-time":"2025-02-11T18:18:43Z","timestamp":1739297923000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10514-009-9132-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2009,8]]},"references-count":21,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2009,8]]}},"alternative-id":["9132"],"URL":"https:\/\/doi.org\/10.1007\/s10514-009-9132-0","relation":{},"ISSN":["0929-5593","1573-7527"],"issn-type":[{"type":"print","value":"0929-5593"},{"type":"electronic","value":"1573-7527"}],"subject":[],"published":{"date-parts":[[2009,8]]}}}