{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,7]],"date-time":"2024-09-07T09:50:06Z","timestamp":1725702606747},"publisher-location":"Berlin, Heidelberg","reference-count":12,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642332654"},{"type":"electronic","value":"9783642332661"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-33266-1_6","type":"book-chapter","created":{"date-parts":[[2012,9,18]],"date-time":"2012-09-18T19:42:31Z","timestamp":1347997351000},"page":"42-49","source":"Crossref","is-referenced-by-count":2,"title":["Adaptive Exploration Using Stochastic Neurons"],"prefix":"10.1007","author":[{"given":"Michel","family":"Tokic","sequence":"first","affiliation":[]},{"given":"G\u00fcnther","family":"Palm","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"6_CR1","volume-title":"Reinforcement Learning: An Introduction","author":"R.S. Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (1998)"},{"key":"6_CR2","unstructured":"Wiering, M.: Explorations in Efficient Reinforcement Learning. PhD thesis, University of Amsterdam, Amsterdam (1999)"},{"key":"6_CR3","unstructured":"Thrun, S.B.: Efficient exploration in reinforcement learning. Technical Report CMU-CS-92-102, Carnegie Mellon University, Pittsburgh, USA (1992)"},{"key":"6_CR4","first-page":"397","volume":"3","author":"P. Auer","year":"2002","unstructured":"Auer, P.: Using confidence bounds for exploitation-exploration trade-offs. The Journal of Machine Learning Research\u00a03, 397\u2013422 (2002)","journal-title":"The Journal of Machine Learning Research"},{"key":"6_CR5","doi-asserted-by":"publisher","first-page":"1999","DOI":"10.1016\/j.cor.2006.10.004","volume":"35","author":"N.J. Eck van","year":"2008","unstructured":"van Eck, N.J., van Wezel, M.: Application of reinforcement learning to the game of Othello. Computers and Operations Research\u00a035, 1999\u20132017 (2008)","journal-title":"Computers and Operations Research"},{"key":"6_CR6","doi-asserted-by":"crossref","unstructured":"Fau\u00dfer, S., Schwenker, F.: Learning a strategy with neural approximated temporal-difference methods in english draughts. In: Proceedings of the 20th International Conference on Pattern Recognition, ICPR 2010, pp. 2925\u20132928. IEEE Computer Society (2010)","DOI":"10.1109\/ICPR.2010.717"},{"key":"6_CR7","unstructured":"Rummery, G.A., Niranjan, M.: On-line Q-learning using connectionist systems. Technical Report CUED\/F-INFENG\/TR 166, Cambridge University (1994)"},{"issue":"7095","key":"6_CR8","doi-asserted-by":"publisher","first-page":"876","DOI":"10.1038\/nature04766","volume":"441","author":"N.D. Daw","year":"2006","unstructured":"Daw, N.D., O\u2019Doherty, J.P., Dayan, P., Seymour, B., Dolan, R.J.: Cortical substrates for exploratory decisions in humans. Nature\u00a0441(7095), 876\u2013879 (2006)","journal-title":"Nature"},{"key":"6_CR9","first-page":"229","volume":"8","author":"R.J. Williams","year":"1992","unstructured":"Williams, R.J.: Simple statistical Gradient-Following algorithms for connectionist reinforcement learning. Machine Learning\u00a08, 229\u2013256 (1992)","journal-title":"Machine Learning"},{"key":"6_CR10","unstructured":"Watkins, C.: Learning from Delayed Rewards. PhD thesis, University of Cambridge, England (1989)"},{"issue":"4","key":"6_CR11","doi-asserted-by":"publisher","first-page":"541","DOI":"10.1016\/j.neunet.2010.01.001","volume":"23","author":"M. Grzes","year":"2010","unstructured":"Grzes, M., Kudenko, D.: Online learning of shaping rewards in reinforcement learning. Neural Networks\u00a023(4), 541\u2013550 (2010)","journal-title":"Neural Networks"},{"key":"6_CR12","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"crossref","first-page":"335","DOI":"10.1007\/978-3-642-24455-1_33","volume-title":"KI 2011: Advances in Artificial Intelligence","author":"M. Tokic","year":"2011","unstructured":"Tokic, M., Palm, G.: Value-Difference Based Exploration: Adaptive Control between Epsilon-Greedy and Softmax. In: Bach, J., Edelkamp, S. (eds.) KI 2011. LNCS, vol.\u00a07006, pp. 335\u2013346. Springer, Heidelberg (2011)"}],"container-title":["Lecture Notes in Computer Science","Artificial Neural Networks and Machine Learning \u2013 ICANN 2012"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-33266-1_6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,7]],"date-time":"2019-05-07T07:44:38Z","timestamp":1557215078000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-33266-1_6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642332654","9783642332661"],"references-count":12,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-33266-1_6","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2012]]}}}