{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T23:43:52Z","timestamp":1743032632280,"version":"3.40.3"},"publisher-location":"Berlin, Heidelberg","reference-count":27,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642407048"},{"type":"electronic","value":"9783642407055"}],"license":[{"start":{"date-parts":[[2013,1,1]],"date-time":"2013-01-01T00:00:00Z","timestamp":1356998400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2013,1,1]],"date-time":"2013-01-01T00:00:00Z","timestamp":1356998400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2013]]},"DOI":"10.1007\/978-3-642-40705-5_7","type":"book-chapter","created":{"date-parts":[[2013,10,21]],"date-time":"2013-10-21T11:26:54Z","timestamp":1382354814000},"page":"68-79","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":7,"title":["Meta-Learning of Exploration and Exploitation Parameters with Replacing Eligibility Traces"],"prefix":"10.1007","author":[{"given":"Michel","family":"Tokic","sequence":"first","affiliation":[]},{"given":"Friedhelm","family":"Schwenker","sequence":"additional","affiliation":[]},{"given":"G\u00fcnther","family":"Palm","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2013,10,22]]},"reference":[{"key":"7_CR1","volume-title":"Reinforcement Learning: An Introduction","author":"RS Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (1998)"},{"key":"7_CR2","unstructured":"Thrun, S.B.: Efficient exploration in reinforcement learning. Technical Report CMU-CS-92-102, Carnegie Mellon University, Pittsburgh (1992)"},{"key":"7_CR3","first-page":"397","volume":"3","author":"P Auer","year":"2002","unstructured":"Auer, P.: Using confidence bounds for exploitation-exploration trade-offs. J. Mach. Learn. Res. 3, 397\u2013422 (2002)","journal-title":"J. Mach. Learn. Res."},{"key":"7_CR4","doi-asserted-by":"publisher","first-page":"282","DOI":"10.1007\/11871842_29","volume-title":"ECML 2006. LNCS (LNAI)","author":"L Kocsis","year":"2006","unstructured":"Kocsis, L., Szepesv\u00e1ri, C.: Bandit based monte-carlo planning. In: F\u00fcrnkranz, J., Scheffer, T., Spiliopoulou, M. (eds.) ECML 2006. LNCS (LNAI), vol. 4212, pp. 282\u2013293. Springer, Heidelberg (2006)"},{"issue":"1","key":"7_CR5","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1016\/S0893-6080(02)00228-9","volume":"16","author":"N Schweighofer","year":"2003","unstructured":"Schweighofer, N., Doya, K.: Meta-learning in reinforcement learning. Neural Netw. 16(1), 5\u20139 (2003)","journal-title":"Neural Netw."},{"key":"7_CR6","first-page":"530","volume-title":"ICONIP 2009, Part I. LNCS, vol. 5863","author":"K Kobayashi","year":"2009","unstructured":"Kobayashi, K., Mizoue, H., Kuremoto, T., Obayashi, M.: A meta-learning method based on temporal difference error. In: Leung, C.S., Lee, M., Chan, J.H. (eds.) ICONIP 2009, Part I. LNCS, vol. 5863, pp. 530\u2013537. Springer, Heidelberg (2009)"},{"key":"7_CR7","first-page":"335","volume-title":"KI 2011. LNCS, vol. 7006","author":"M Tokic","year":"2011","unstructured":"Tokic, M., Palm, G.: Value-difference based exploration: Adaptive control between epsilon-greedy and softmax. In: Bach, J., Edelkamp, S. (eds.) KI 2011. LNCS, vol. 7006, pp. 335\u2013346. Springer, Heidelberg (2011)"},{"key":"7_CR8","doi-asserted-by":"crossref","unstructured":"Tokic, M., Ertle, P., Palm, G., S\u00f6ffker, D., Voos, H.: Robust exploration\/exploitation trade-offs in safety-critical applications. In: Proceedings of the 8th International Symposium on Fault Detection, Supervision and Safety of Technical Processes, Mexico City, Mexico, IFAC, pp. 660\u2013665 (2012)","DOI":"10.3182\/20120829-3-MX-2028.00160"},{"key":"7_CR9","first-page":"42","volume-title":"ICANN 2012, Part II. LNCS, vol. 7553","author":"M Tokic","year":"2012","unstructured":"Tokic, M., Palm, G.: Adaptive exploration using stochastic neurons. In: Villa, A.E., Duch, W., \u00c9rdi, P., Palm, G. (eds.) ICANN 2012, Part II. LNCS, vol. 7553, pp. 42\u201349. Springer, Heidelberg (2012)"},{"key":"7_CR10","first-page":"60","volume-title":"ANNPR 2012. LNCS, vol. 7477","author":"M Tokic","year":"2012","unstructured":"Tokic, M., Palm, G.: Gradient algorithms for exploration\/exploitation trade-offs: Global and local variants. In: Mana, N., Schwenker, F., Trentin, E. (eds.) ANNPR 2012. LNCS, vol. 7477, pp. 60\u201371. Springer, Heidelberg (2012)"},{"key":"7_CR11","first-page":"123","volume":"22","author":"S Singh","year":"1996","unstructured":"Singh, S., Sutton, R.S.: Reinforcement learning with replacing eligibility traces. Mach. Learn. 22, 123\u2013158 (1996)","journal-title":"Mach. Learn."},{"issue":"8","key":"7_CR12","doi-asserted-by":"publisher","first-page":"987","DOI":"10.1038\/nn0806-987","volume":"9","author":"Y Niv","year":"2006","unstructured":"Niv, Y., Daw, N.D., Dayan, P.: Choice values. Nat. Neurosci. 9(8), 987\u2013988 (2006)","journal-title":"Nat. Neurosci."},{"issue":"3","key":"7_CR13","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1016\/j.jmp.2008.12.005","volume":"53","author":"Y Niv","year":"2009","unstructured":"Niv, Y.: Reinforcement learning in the brain. J. Math. Psychol. 53(3), 139\u2013154 (2009)","journal-title":"J. Math. Psychol."},{"key":"7_CR14","unstructured":"Watkins, C.: Learning from delayed rewards. Ph.D. thesis, University of Cambridge, England (1989)"},{"issue":"1","key":"7_CR15","doi-asserted-by":"publisher","first-page":"167","DOI":"10.1007\/s10994-006-8365-9","volume":"65","author":"AP George","year":"2006","unstructured":"George, A.P., Powell, W.B.: Adaptive stepsizes for recursive estimation with applications in approximate dynamic programming. Mach. Learn. 65(1), 167\u2013198 (2006)","journal-title":"Mach. Learn."},{"key":"7_CR16","first-page":"437","volume-title":"ECML 2005. LNCS (LNAI), vol. 3720","author":"J Vermorel","year":"2005","unstructured":"Vermorel, J., Mohri, M.: Multi-armed bandit algorithms and empirical evaluation. In: Gama, J., Camacho, R., Brazdil, P.B., Jorge, A.M., Torgo, L. (eds.) ECML 2005. LNCS (LNAI), vol. 3720, pp. 437\u2013448. Springer, Heidelberg (2005)"},{"key":"7_CR17","unstructured":"Wiering, M.: Explorations in efficient reinforcement learning. PhD thesis, University of Amsterdam, Amsterdam (1999)"},{"key":"7_CR18","first-page":"203","volume-title":"KI 2010. LNCS, vol. 6359","author":"M Tokic","year":"2010","unstructured":"Tokic, M.: Adaptive $$\\epsilon $$-greedy exploration in reinforcement learning based on value differences. In: Dillmann, R., Beyerer, J., Hanebeck, U.D., Schultz, T. (eds.) KI 2010. LNCS, vol. 6359, pp. 203\u2013210. Springer, Heidelberg (2010)"},{"key":"7_CR19","first-page":"229","volume":"8","author":"RJ Williams","year":"1992","unstructured":"Williams, R.J.: Simple statistical gradient-following algorithms for connectionist reinforcement learning. Mach. Learn. 8, 229\u2013256 (1992)","journal-title":"Mach. Learn."},{"key":"7_CR20","unstructured":"Tokic, M., Bou Ammar, H.: Teaching reinforcement learning using a physical robot. In: Proceedings of the Workshop on Teaching Machine Learning at the 29th International Conference on Machine Learning, Edinburgh, UK, pp. 1\u20134 (2012)"},{"key":"7_CR21","unstructured":"Kimura, H., Miyazaki, K., Kobayashi, S.: Reinforcement learning in POMDPs with function approximation. In: Proceedings of the 14th International Conference on Machine Learning, San Francisco, CA, USA, pp. 152\u2013160. Morgan Kaufmann Publishers Inc. (1997)"},{"key":"7_CR22","first-page":"317","volume-title":"ECML 2005. LNCS (LNAI), vol. 3720","author":"M Riedmiller","year":"2005","unstructured":"Riedmiller, M.: Neural fitted Q iteration - first experiences with a data efficient neural reinforcement learning method. In: Gama, J., Camacho, R., Brazdil, P.B., Jorge, A.M., Torgo, L. (eds.) ECML 2005. LNCS (LNAI), vol. 3720, pp. 317\u2013328. Springer, Heidelberg (2005)"},{"key":"7_CR23","doi-asserted-by":"crossref","unstructured":"Riedmiller, M., Montemerlo, M., Dahlkamp, H.: Learning to drive a real car in 20 minutes. In: Proceedings of the FBIT 2007 Conference, Jeju, Korea. Special Track on, autonomous robots (2007)","DOI":"10.1109\/FBIT.2007.37"},{"key":"7_CR24","doi-asserted-by":"crossref","unstructured":"Fau\u00dfer, S., Schwenker, F.: Learning a strategy with neural approximated temporal-difference methods in english draughts. In: Proceedings of the 20th International Conference on Pattern Recognition, pp. 2925\u20132928. IEEE Computer Society (2010)","DOI":"10.1109\/ICPR.2010.717"},{"key":"7_CR25","first-page":"90","volume-title":"ANNPR 2008. LNCS (LNAI), vol. 5064","author":"S Fau\u00dfer","year":"2008","unstructured":"Fau\u00dfer, S., Schwenker, F.: Neural approximation of monte carlo policy evaluation deployed in connect four. In: Prevost, L., Marinai, S., Schwenker, F. (eds.) ANNPR 2008. LNCS (LNAI), vol. 5064, pp. 90\u2013100. Springer, Heidelberg (2008)"},{"issue":"7\u20138","key":"7_CR26","doi-asserted-by":"publisher","first-page":"961","DOI":"10.1016\/S0893-6080(99)00046-5","volume":"12","author":"K Doya","year":"1999","unstructured":"Doya, K.: What are the computations of the cerebellum, the basal ganglia and the cerebral cortex? Neural Netw. 12(7\u20138), 961\u2013974 (1999)","journal-title":"Neural Netw."},{"issue":"18","key":"7_CR27","doi-asserted-by":"publisher","first-page":"8452","DOI":"10.1073\/pnas.1000496107","volume":"107","author":"AC Bostan","year":"2010","unstructured":"Bostan, A.C., Dum, R.P., Strick, P.L.: The basal ganglia communicate with the cerebellum. Proc. Nat. Acad. Sci. 107(18), 8452\u20138456 (2010)","journal-title":"Proc. Nat. Acad. Sci."}],"container-title":["Lecture Notes in Computer Science","Partially Supervised Learning"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-40705-5_7","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,19]],"date-time":"2023-02-19T18:26:12Z","timestamp":1676831172000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-642-40705-5_7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013]]},"ISBN":["9783642407048","9783642407055"],"references-count":27,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-40705-5_7","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2013]]},"assertion":[{"value":"22 October 2013","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}