{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,6]],"date-time":"2024-09-06T23:13:27Z","timestamp":1725664407165},"publisher-location":"Berlin, Heidelberg","reference-count":14,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783540628583"},{"type":"electronic","value":"9783540687085"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[1997]]},"DOI":"10.1007\/3-540-62858-4_72","type":"book-chapter","created":{"date-parts":[[2012,2,26]],"date-time":"2012-02-26T22:52:24Z","timestamp":1330296744000},"page":"57-72","source":"Crossref","is-referenced-by-count":0,"title":["Integrated learning and planning based on truncating temporal differences"],"prefix":"10.1007","author":[{"given":"Pawe\u0142","family":"Cichosz","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2005,6,2]]},"reference":[{"key":"6_CR1","first-page":"835","volume":"13","author":"A. G. Barto","year":"1983","unstructured":"A. G. Barto, R. S. Sutton, and C. W. Anderson. Neuronlike adaptive elements that can solve difficult learning control problems. IEEE Transactions on Systems, Man, and Cybernetics, 13:835\u2013846, 1983.","journal-title":"IEEE Transactions on Systems, Man, and Cybernetics"},{"key":"6_CR2","doi-asserted-by":"crossref","first-page":"287","DOI":"10.1613\/jair.135","volume":"2","author":"P. Cichosz","year":"1995","unstructured":"P. Cichosz. Truncating temporal differences: On the efficient implementation of TD(\u03bb) for reinforcement learning. Journal of Artificial Intelligence Research, 2:287\u2013318, 1995.","journal-title":"Journal of Artificial Intelligence Research"},{"unstructured":"P. Cichosz. Truncated temporal differences with function approximation: Successful examples using CMAC. In Proceedings of the Thirteenth European Symposium on Cybernetics and Systems Research (EMCSR-96), 1996.","key":"6_CR3"},{"doi-asserted-by":"crossref","unstructured":"P. Cichosz and J. J. Mulawka. Fast and efficient reinforcement learning with truncated temporal differences. In Proceedings of the Twelfth International Conference on Machine Learning (ML-95). Morgan Kaufmann, 1995.","key":"6_CR4","DOI":"10.1016\/B978-1-55860-377-6.50021-9"},{"unstructured":"P. Cichosz and J. J. Mulawka. Integrated architectures for learning, planning, and reacting based on approximating TD(\u03bb). In Proceedings of the First International Workshop on Intelligent Adaptive Systems (IAS-95), 1995.","key":"6_CR5"},{"unstructured":"Long-Ji Lin. Reinforcement Learning for Robots Using Neural Networks. PhD thesis, School of Computer Science, Carnegie-Mellon University, January 1993.","key":"6_CR6"},{"key":"6_CR7","doi-asserted-by":"crossref","first-page":"311","DOI":"10.1016\/0004-3702(92)90058-6","volume":"55","author":"S. Mahadevan","year":"1992","unstructured":"S. Mahadevan and J. Connell. Automatic programming of behavior-based robots using reinforcement learning. Artificial Intelligence, 55:311\u2013365, 1992.","journal-title":"Artificial Intelligence"},{"key":"6_CR8","first-page":"103","volume":"13","author":"A. W. Moore","year":"1993","unstructured":"A. W. Moore and C. G. Atkeson. Prioritized sweeping: Reinforcement learning with less memory and less time. Machine Learning, 13:103\u2013130, 1993.","journal-title":"Machine Learning"},{"doi-asserted-by":"crossref","unstructured":"J. Peng and R. J. Williams. Efficient learning and planning within the Dyna framework. In Proceedings of the Second International Conference on Simulation of Adaptive Behavior. The MIT Press, 1993.","key":"6_CR9","DOI":"10.1109\/ICNN.1993.298551"},{"unstructured":"R. S. Sutton. Temporal Credit Assignment in Reinforcement Learning. PhD thesis, Department of Computer and Information Science, University of Massachusetts, 1984.","key":"6_CR10"},{"key":"6_CR11","first-page":"9","volume":"3","author":"R. S. Sutton","year":"1988","unstructured":"R. S. Sutton. Learning to predict by the methods of temporal differences. Machine Learning, 3:9\u201344, 1988.","journal-title":"Machine Learning"},{"doi-asserted-by":"crossref","unstructured":"R. S. Sutton. Integrated architectures for learning, planning, and reacting based on approximating dynamic programming. In Proceedings of the Seventh International Conference on Machine Learning (ML-90). Morgan Kaufmann, 1990.","key":"6_CR12","DOI":"10.1016\/B978-1-55860-141-3.50030-4"},{"unstructured":"R. S. Sutton. Generalization in reinforcement learning: Successful examples using sparse coarse coding. In Advances in Neural Information Processing Systems 8. Morgan Kaufmann, 1996.","key":"6_CR13"},{"key":"6_CR14","volume-title":"PhD thesis","author":"C. J. C. H. Watkins","year":"1989","unstructured":"C. J. C. H. Watkins. Learning from Delayed Rewards. PhD thesis, King's College, Cambridge, 1989."}],"container-title":["Lecture Notes in Computer Science","Machine Learning: ECML-97"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/3-540-62858-4_72.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,4,28]],"date-time":"2021-04-28T01:39:58Z","timestamp":1619573998000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/3-540-62858-4_72"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[1997]]},"ISBN":["9783540628583","9783540687085"],"references-count":14,"URL":"https:\/\/doi.org\/10.1007\/3-540-62858-4_72","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[1997]]}}}