{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,28]],"date-time":"2025-03-28T10:28:00Z","timestamp":1743157680025,"version":"3.40.3"},"publisher-location":"Berlin, Heidelberg","reference-count":18,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642041792"},{"type":"electronic","value":"9783642041808"}],"license":[{"start":{"date-parts":[[2009,1,1]],"date-time":"2009-01-01T00:00:00Z","timestamp":1230768000000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2009]]},"DOI":"10.1007\/978-3-642-04180-8_48","type":"book-chapter","created":{"date-parts":[[2009,8,27]],"date-time":"2009-08-27T08:11:20Z","timestamp":1251360680000},"page":"469-484","source":"Crossref","is-referenced-by-count":3,"title":["Efficient Sample Reuse in EM-Based Policy Search"],"prefix":"10.1007","author":[{"given":"Hirotaka","family":"Hachiya","sequence":"first","affiliation":[]},{"given":"Jan","family":"Peters","sequence":"additional","affiliation":[]},{"given":"Masashi","family":"Sugiyama","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"48_CR1","unstructured":"Bagnell, J.A., Kakade, S., Ng, A.Y., Schneider, J.: Policy search by dynamic programming. In: Neural Information Processing Systems, vol.\u00a016 (2003)"},{"issue":"2","key":"48_CR2","doi-asserted-by":"publisher","first-page":"271","DOI":"10.1162\/neco.1997.9.2.271","volume":"9","author":"P. Dayan","year":"1997","unstructured":"Dayan, P., Hinton, G.E.: Using expectation-maximization for reinforcement learning. Neural Computation\u00a09(2), 271\u2013278 (1997)","journal-title":"Neural Computation"},{"key":"48_CR3","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1111\/j.2517-6161.1977.tb01600.x","volume":"39","author":"A.P. Dempster","year":"1977","unstructured":"Dempster, A.P., Laird, N.M., Rubin, D.B.: Maximum likelihood from incomplete data via the em algorithm. Journal of the Royal Statistical Society B\u00a039, 1\u201338 (1977)","journal-title":"Journal of the Royal Statistical Society B"},{"key":"48_CR4","unstructured":"Hachiya, H., Akiyama, T., Sugiyama, M., Peters, J.: Adaptive importance sampling with automatic model selection in value function approximation. In: Proceedings of the Twenty-Third National Conference on Artificial Intelligence (2008)"},{"key":"48_CR5","unstructured":"Kakade, S.: A natural policy gradient. In: Neural Information Processing Systems, vol.\u00a014, pp. 1531\u20131538 (2002)"},{"key":"48_CR6","doi-asserted-by":"crossref","unstructured":"Kober, J., Peters, J.: Policy search for motor primitives in robotics. In: Neural Information Processing Systems, vol.\u00a021 (2008)","DOI":"10.1109\/ROBOT.2009.5152577"},{"key":"48_CR7","unstructured":"Peshkin, C.R., Shelton, L.: Learning from scarce experience. In: Proceedings of International Conference on Machine Learning, pp. 498\u2013505 (2002)"},{"key":"48_CR8","doi-asserted-by":"crossref","unstructured":"Peters, J., Schaal, S.: Reinforcement learning by reward-weighted regression for operational space control. In: Proceedings of the International Conference on Machine Learning (2007)","DOI":"10.1145\/1273496.1273590"},{"key":"48_CR9","doi-asserted-by":"crossref","first-page":"280","DOI":"10.1007\/11564096_29","volume-title":"Machine Learning: ECML 2005","author":"Jan Peters","year":"2005","unstructured":"Peters, J., Vijayakumar, S., Shaal, S.: Natural actor-critic. In: Proceedings of the 16th European Conference on Machine Learning, pp. 280\u2013291 (2005)"},{"key":"48_CR10","unstructured":"Precup, D., Sutton, R.S., Singh, S.: Eligibility traces for off-policy policy evaluation. In: Proceedings of International Conference on Machine Learning, pp. 759\u2013766 (2000)"},{"key":"48_CR11","doi-asserted-by":"publisher","DOI":"10.1002\/9780470316436","volume-title":"Linear Statistical Inference and Its Applications","author":"C.R. Rao","year":"1973","unstructured":"Rao, C.R.: Linear Statistical Inference and Its Applications. Wiley, Chichester (1973)"},{"key":"48_CR12","unstructured":"Schaal, S.: The SL Simulation and Real-Time Control Software Package. University of Southern California (2007)"},{"key":"48_CR13","unstructured":"Shelton, C.R.: Policy improvement for POMDPs using normalized importance sampling. In: Proceedings of Uncertainty in Artificial Intelligence, pp. 496\u2013503 (2001)"},{"issue":"2","key":"48_CR14","doi-asserted-by":"publisher","first-page":"227","DOI":"10.1016\/S0378-3758(00)00115-4","volume":"90","author":"H. Shimodaira","year":"2000","unstructured":"Shimodaira, H.: Improving predictive inference under covariate shift by weighting the log-likelihood function. Journal of Statistical Planning and Inference\u00a090(2), 227\u2013244 (2000)","journal-title":"Journal of Statistical Planning and Inference"},{"key":"48_CR15","first-page":"985","volume":"8","author":"M. Sugiyama","year":"2007","unstructured":"Sugiyama, M., Krauledat, M., M\u00fcller, K.-R.: Covariate shift adaptation by importance weighted cross validation. Journal of Machine Learning Research\u00a08, 985\u20131005 (2007)","journal-title":"Journal of Machine Learning Research"},{"key":"48_CR16","volume-title":"Reinforcement Learning: An Introduction","author":"R.S. Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (1998)"},{"key":"48_CR17","first-page":"1057","volume-title":"Advances in Neural Information Processing Systems","author":"R.S. Sutton","year":"2000","unstructured":"Sutton, R.S., Mcallester, M., Singh, S., Mansour, Y.: Policy gradient methods for reinforcement learning with function approximation. In: Advances in Neural Information Processing Systems, vol.\u00a012, pp. 1057\u20131063. MIT Press, Cambridge (2000)"},{"key":"48_CR18","first-page":"229","volume":"8","author":"R.J. Williams","year":"1992","unstructured":"Williams, R.J.: Simple statistical gradient-following algorithms for connectionist reinforcement learning. Machine Learning\u00a08, 229\u2013256 (1992)","journal-title":"Machine Learning"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-04180-8_48","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,11]],"date-time":"2025-02-11T21:52:44Z","timestamp":1739310764000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-04180-8_48"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2009]]},"ISBN":["9783642041792","9783642041808"],"references-count":18,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-04180-8_48","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2009]]}}}