{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,5]],"date-time":"2024-09-05T06:01:13Z","timestamp":1725516073407},"publisher-location":"Berlin, Heidelberg","reference-count":10,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783540691594"},{"type":"electronic","value":"9783540691624"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"DOI":"10.1007\/978-3-540-69162-4_18","type":"book-chapter","created":{"date-parts":[[2008,7,31]],"date-time":"2008-07-31T02:38:20Z","timestamp":1217471900000},"page":"167-176","source":"Crossref","is-referenced-by-count":0,"title":["Finding Exploratory Rewards by Embodied Evolution and Constrained Reinforcement Learning in the Cyber Rodents"],"prefix":"10.1007","author":[{"given":"Eiji","family":"Uchibe","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kenji","family":"Doya","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"key":"18_CR1","doi-asserted-by":"publisher","first-page":"149","DOI":"10.1177\/105971230501300206","volume":"13","author":"K. Doya","year":"2005","unstructured":"Doya, K., Uchibe, E.: The Cyber Rodent Project: Exploration of adaptive mechanisms for self-preservation and self-reproduction. Adaptive Behavior\u00a013, 149\u2013160 (2005)","journal-title":"Adaptive Behavior"},{"key":"18_CR2","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning. MIT Press\/Bradford Books (1998)"},{"key":"18_CR3","unstructured":"Morimura, T., Uchibe, E., Doya, K.: Utilizing the natural gradient in temporal difference reinforcement learning with eligibility traces. In: Proc. of the 2nd International Symposium on Information Geometry and its Application, pp. 256\u2013263 (2005)"},{"issue":"2","key":"18_CR4","doi-asserted-by":"publisher","first-page":"249","DOI":"10.1109\/TEVC.2006.890270","volume":"11","author":"S. Elfwing","year":"2007","unstructured":"Elfwing, S., Uchibe, E., Doya, K., Christensen, H.I.: Evolutionary development of hierarchical learning structures. IEEE Transactions on Evolutionary Computation\u00a011(2), 249\u2013264 (2007)","journal-title":"IEEE Transactions on Evolutionary Computation"},{"key":"18_CR5","doi-asserted-by":"crossref","unstructured":"Sato, T., Uchibe, E., Doya, K.: Learning how, what, and whether to communicate: emergence of protocommunication in reinforcement learning agents. Journal of Artificial Life and Robotics 12 (to appear, 2007)","DOI":"10.1007\/s10015-007-0444-x"},{"key":"18_CR6","first-page":"1281","volume-title":"Advances in Neural Information Processing Systems 17","author":"S. Singh","year":"2005","unstructured":"Singh, S., Barto, A.G., Chentanez, N.: Intrinsically motivated reinforcement learning. In: Saul, L.K., Weiss, Y., Bottou, L. (eds.) Advances in Neural Information Processing Systems 17, pp. 1281\u20131288. MIT Press, Cambridge (2005)"},{"key":"18_CR7","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/S0921-8890(02)00170-7","volume":"39","author":"R.A. Watson","year":"2002","unstructured":"Watson, R.A., Ficici, S.G., Pollack, J.B.: Embodied evolution: Distributing an evolutionary algorithm in a population of robots. Robotics and Autonomous Systems\u00a039, 1\u201318 (2002)","journal-title":"Robotics and Autonomous Systems"},{"key":"18_CR8","doi-asserted-by":"crossref","unstructured":"Uchibe, E., Doya, K.: Constrained reinforcement learning from intrinsic and extrinsic rewards. In: Proc. of the International Conference of Development and Learning (2007)","DOI":"10.1109\/DEVLRN.2007.4354030"},{"key":"18_CR9","first-page":"187","volume-title":"Foundations of Genetic Algorithms 2","author":"L.J. Eshelman","year":"1993","unstructured":"Eshelman, L.J., Schaffer, J.D.: Real-coded genetic algorithms and interval-schemata. In: Foundations of Genetic Algorithms 2, pp. 187\u2013202. Morgan Kaufmann, San Francisco (1993)"},{"issue":"4","key":"18_CR10","doi-asserted-by":"publisher","first-page":"1143","DOI":"10.1137\/S0363012901385691","volume":"42","author":"V.R. Konda","year":"2003","unstructured":"Konda, V.R., Tsitsiklis, J.N.: Actor-critic algorithms. SIAM Journal on Control and Optimization\u00a042(4), 1143\u20131166 (2003)","journal-title":"SIAM Journal on Control and Optimization"}],"container-title":["Lecture Notes in Computer Science","Neural Information Processing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-540-69162-4_18.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,5,3]],"date-time":"2021-05-03T00:29:17Z","timestamp":1620001757000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-540-69162-4_18"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[null]]},"ISBN":["9783540691594","9783540691624"],"references-count":10,"URL":"https:\/\/doi.org\/10.1007\/978-3-540-69162-4_18","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[]}}