{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,17]],"date-time":"2026-03-17T07:46:55Z","timestamp":1773733615409,"version":"3.50.1"},"reference-count":13,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2016,1,25]],"date-time":"2016-01-25T00:00:00Z","timestamp":1453680000000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Artif Life Robotics"],"published-print":{"date-parts":[[2016,3]]},"DOI":"10.1007\/s10015-015-0260-7","type":"journal-article","created":{"date-parts":[[2016,1,25]],"date-time":"2016-01-25T12:24:45Z","timestamp":1453724685000},"page":"125-131","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":9,"title":["EM-based policy hyper parameter exploration: application to standing and balancing of a two-wheeled smartphone robot"],"prefix":"10.1007","volume":"21","author":[{"given":"Jiexin","family":"Wang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Eiji","family":"Uchibe","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kenji","family":"Doya","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2016,1,25]]},"reference":[{"issue":"1\u20132","key":"260_CR1","first-page":"1","volume":"2","author":"MP Deisenroth","year":"2013","unstructured":"Deisenroth MP, Neumann G, Peters J (2013) A survey on policy search for robotics. Found Trends Robot 2(1\u20132):1\u2013142","journal-title":"Found Trends Robot"},{"key":"260_CR2","first-page":"229","volume":"8","author":"RJ Williams","year":"1992","unstructured":"Williams RJ (1992) Simple statistical gradient-following algorithms for connectionist reinforcement learning. Mach Learn 8:229\u2013256","journal-title":"Mach Learn"},{"key":"260_CR3","doi-asserted-by":"crossref","unstructured":"Peters J, Schaal S (2006) Policy gradient methods for robotics. In: Proceedings of the IEEE\/RSJ International Conference on Intelligent Robotics and Systems. Beijing, China","DOI":"10.1109\/IROS.2006.282564"},{"issue":"4","key":"260_CR4","doi-asserted-by":"crossref","first-page":"551","DOI":"10.1016\/j.neunet.2009.12.004","volume":"21","author":"F Sehnke","year":"2010","unstructured":"Sehnke F, Osendorfer C, R\u00fcckstie\u00df T, Graves A, Peters J, Schmidhuber J (2010) Parameter-exploring policy gradients. Neural Netw 21(4):551\u2013559","journal-title":"Neural Netw"},{"key":"260_CR5","doi-asserted-by":"crossref","unstructured":"Peters J, Schaal S (2007) Reinforcement learning by reward-weighted regression for operational space control. In: IEEE Proceedings of the IEEE International Conference on Intelligent Robots and Systems. Piscataway, NJ","DOI":"10.1145\/1273496.1273590"},{"key":"260_CR6","unstructured":"Yoshida N, Yoshimoto J, Uchibe E, Doya K (2012) Development of robot platform with smart phone. In: Proceedings of the annual conference on Robotics Society of Japan. (in Japanese)"},{"key":"260_CR7","unstructured":"Wang J, Uchibe E, Doya K (2013) Standing-up and balancing behaviors of android phone robot, Technical committee on Nonlinear Problems, IEICE, Hong Kong, China"},{"key":"260_CR8","unstructured":"Wang J, Uchibe E, Doya K (2014) Control of two-wheel balancing and standing-up behaviors by an android phone robot. In: Proceedings of the annual conference on Robotics Society of Japan"},{"key":"260_CR9","doi-asserted-by":"crossref","first-page":"2798","DOI":"10.1162\/NECO_a_00199","volume":"23","author":"H Hachiya","year":"2011","unstructured":"Hachiya H, Peters J, Sugiyama M (2011) Reward-weighted regression with sample reuse for direct policy search in reinforcement learning. Neural Comput 23:2798\u20132832","journal-title":"Neural Comput"},{"issue":"1","key":"260_CR10","doi-asserted-by":"crossref","first-page":"219","DOI":"10.1162\/089976600300015961","volume":"12","author":"K Doya","year":"2000","unstructured":"Doya K (2000) Reinforcement learning in continuous time and space. Neural Comput 12(1):219\u2013245","journal-title":"Neural Comput"},{"key":"260_CR11","doi-asserted-by":"crossref","unstructured":"Riedmiller M, Peters J, Schaal S (2007) Evaluation of Policy Gradient Methods and Variants on the Cart-Pole Benchmark. In: Proceedings of the IEEE Symposium on Approximate Dynamic Programming and Reinforcement Learning","DOI":"10.1109\/ADPRL.2007.368196"},{"issue":"2","key":"260_CR12","doi-asserted-by":"crossref","first-page":"159","DOI":"10.1162\/106365601750190398","volume":"9","author":"N Hansen","year":"2001","unstructured":"Hansen N, Ostermeier A (2001) Completely derandomized self-adaptation in evolution strategies. Evolut Comput 9(2):159\u2013195","journal-title":"Evolut Comput"},{"key":"260_CR13","first-page":"3137","volume":"11","author":"E Theodorou","year":"2010","unstructured":"Theodorou E, Buchli J, Schaal S (2010) A generalized path integral control approach to reinforcement learning. J Mach Learn Res 11:3137\u20133181","journal-title":"J Mach Learn Res"}],"container-title":["Artificial Life and Robotics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10015-015-0260-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10015-015-0260-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10015-015-0260-7","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,3,26]],"date-time":"2019-03-26T19:07:56Z","timestamp":1553627276000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10015-015-0260-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,1,25]]},"references-count":13,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2016,3]]}},"alternative-id":["260"],"URL":"https:\/\/doi.org\/10.1007\/s10015-015-0260-7","relation":{},"ISSN":["1433-5298","1614-7456"],"issn-type":[{"value":"1433-5298","type":"print"},{"value":"1614-7456","type":"electronic"}],"subject":[],"published":{"date-parts":[[2016,1,25]]}}}