{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,4]],"date-time":"2024-09-04T22:43:39Z","timestamp":1725489819227},"publisher-location":"Berlin, Heidelberg","reference-count":11,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783540425366"},{"type":"electronic","value":"9783540447955"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2003]]},"DOI":"10.1007\/3-540-44795-4_7","type":"book-chapter","created":{"date-parts":[[2007,8,15]],"date-time":"2007-08-15T06:35:10Z","timestamp":1187159710000},"page":"73-84","source":"Crossref","is-referenced-by-count":0,"title":["Learning While Exploring: Bridging the Gaps in the Eligibility Traces"],"prefix":"10.1007","author":[{"given":"Fredrik A.","family":"Dahl","sequence":"first","affiliation":[]},{"given":"Ole Martin","family":"Halck","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2001,8,30]]},"reference":[{"key":"7_CR1","first-page":"9","volume":"3","author":"R.S. Sutton","year":"1988","unstructured":"Sutton, R.S.: Learning to predict by the methods of temporal differences. Machine Learning 3 (1988) 9\u201344.","journal-title":"Machine Learning"},{"key":"7_CR2","first-page":"257","volume":"8","author":"G.J. Tesauro","year":"1992","unstructured":"Tesauro, G.J.: Practical issues in temporal difference learning. Machine Learning 8 (1992) 257\u2013277.","journal-title":"Machine Learning"},{"unstructured":"Boyan, J.A., Moore, A.W.: Learning evaluation functions for large acyclic domains. In: Saitta, L. (ed.): Proceedings of the Thirteenth International Conference on Machine Learning, Morgan Kaufmann (1996) 63\u201370.","key":"7_CR3"},{"unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: an Introduction. MIT Press (1998).URL: \n                    http:\/\/www-anw.cs.umass.edu\/~rich\/book\/the-book.html","key":"7_CR4"},{"key":"7_CR5","doi-asserted-by":"crossref","first-page":"237","DOI":"10.1613\/jair.301","volume":"4","author":"L.P. Kaelbling","year":"1996","unstructured":"Kaelbling, L.P., Littman, M.L., Moore, A.W.: Reinforcement learning: a survey. Journal of Artificial Intelligence Research 4 (1996) 237\u2013285.","journal-title":"Journal of Artificial Intelligence Research"},{"unstructured":"Bertsekas, D.P., Tsitsiklis, J.N.: Neuro-Dynamic Programming. Athena Scientific (1996).","key":"7_CR6"},{"key":"7_CR7","doi-asserted-by":"publisher","first-page":"2017","DOI":"10.1162\/089976699300016070","volume":"11","author":"C. Szepesvari","year":"1999","unstructured":"Szepesvari, C., Littman, M.L.: A unified analysis of value-function-based reinforcement learning algorithms. Neural Computation 11 (1999) 2017\u20132060.","journal-title":"Neural Computation"},{"unstructured":"Thrun, S.B.: The role of exploration in learning control. In: White, D.A., Sofge, D.A. (eds.): Handbook of Intelligent Control: Neural, Fuzzy and Adaptive Approaches, Van Nostrand Reinhold, New York (1992).","key":"7_CR8"},{"unstructured":"John, G.H.: When the best move isn\u2019t optimal: Q-learning with exploration. In: Proceedings, 10\n                    th\n                   National Conference on Artificial Intelligence, AAAI Press (1994) 1464.","key":"7_CR9"},{"key":"7_CR10","volume-title":"Learning from Delayed Rewards","author":"C.J.C.H. Watkins","year":"1989","unstructured":"Watkins, C.J.C.H.: Learning from Delayed Rewards. PhD thesis, University of Cambridge, UK (1989)."},{"key":"7_CR11","volume-title":"Efficient Dynamic Programming-Based Learning for Control","author":"J. Peng","year":"1993","unstructured":"Peng, J.: Efficient Dynamic Programming-Based Learning for Control. PhD thesis, Northeastern University, Boston (1993)."}],"container-title":["Lecture Notes in Computer Science","Machine Learning: ECML 2001"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/3-540-44795-4_7","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,2,21]],"date-time":"2019-02-21T21:16:21Z","timestamp":1550783781000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/3-540-44795-4_7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2001,8,30]]},"ISBN":["9783540425366","9783540447955"],"references-count":11,"URL":"https:\/\/doi.org\/10.1007\/3-540-44795-4_7","relation":{},"ISSN":["0302-9743"],"issn-type":[{"type":"print","value":"0302-9743"}],"subject":[],"published":{"date-parts":[[2001,8,30]]}}}