{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T21:35:27Z","timestamp":1743024927759,"version":"3.40.3"},"publisher-location":"Berlin, Heidelberg","reference-count":17,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783540726647"},{"type":"electronic","value":"9783540726654"}],"license":[{"start":{"date-parts":[[2007,1,1]],"date-time":"2007-01-01T00:00:00Z","timestamp":1167609600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2007]]},"DOI":"10.1007\/978-3-540-72665-4_37","type":"book-chapter","created":{"date-parts":[[2007,6,21]],"date-time":"2007-06-21T20:22:09Z","timestamp":1182457329000},"page":"429-440","source":"Crossref","is-referenced-by-count":4,"title":["Reinforcement Learning in Nonstationary Environment Navigation Tasks"],"prefix":"10.1007","author":[{"given":"Terran","family":"Lane","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Martin","family":"Ridens","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Scott","family":"Stevens","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"key":"37_CR1","unstructured":"Finney, S., Gardiol, N.H., Kaelbling, L.P., Oates, T.: The thing that we tried didn\u2019t work very well: Deictic representation in reinforcement learning. In: UAI-2002 (2002)"},{"key":"37_CR2","doi-asserted-by":"crossref","unstructured":"McCallum, A.: Overcoming incomplete perception with utile distinction memory. In: ICML-93 (1993)","DOI":"10.1016\/B978-1-55860-307-3.50031-9"},{"key":"37_CR3","volume-title":"Optimization and neural computation series","author":"D.P. Bertsekas","year":"1996","unstructured":"Bertsekas, D.P., Tsitsiklis, J.N.: Neuro-Dynamic Programming. In: Optimization and neural computation series, Athena Scientific, Belmont (1996)"},{"key":"37_CR4","doi-asserted-by":"publisher","first-page":"1107","DOI":"10.1162\/jmlr.2003.4.6.1107","volume":"4","author":"M.G. Lagoudakis","year":"2003","unstructured":"Lagoudakis, M.G., Parr, R.: Least-squares policy iteration. Journal of Machine Learning Research\u00a04, 1107\u20131149 (2003)","journal-title":"Journal of Machine Learning Research"},{"key":"37_CR5","doi-asserted-by":"crossref","unstructured":"Mahadevan, S.: Proto-value functions: Developmental reinforcement learning. In: ICML-2005 (2005)","DOI":"10.1145\/1102351.1102421"},{"issue":"1\u20132","key":"37_CR6","doi-asserted-by":"publisher","first-page":"7","DOI":"10.1023\/A:1007694015589","volume":"43","author":"S. D\u017eeroski","year":"2001","unstructured":"D\u017eeroski, S., De Raedt, L., Driessens, K.: Relational reinforcement learning. Machine Learning\u00a043(1\u20132), 7\u201352 (2001)","journal-title":"Machine Learning"},{"key":"37_CR7","unstructured":"van Otterlo, M.: A survey of reinforcement learning in relational domains. Technical Report TR-CTIT-05-31, University of Twente, Centre for Telematics and Information Technology (July 2005)"},{"key":"37_CR8","doi-asserted-by":"crossref","first-page":"75","DOI":"10.1613\/jair.1700","volume":"25","author":"A. Fern","year":"2006","unstructured":"Fern, A., Yoon, S., Givan, R.: Approximate policy iteration with a policy language bias: Solving relational markov decision processes. Journal of Artificial Intelligence Research\u00a025, 75\u2013118 (2006)","journal-title":"Journal of Artificial Intelligence Research"},{"key":"37_CR9","unstructured":"Dean, T., Givan, R.: Model minimization in Markov decision processes. In: AAAI-97, pp. 106\u2013111 (1997)"},{"key":"37_CR10","unstructured":"Ravindran, B., Barto, A.G.: Relativized options: Choosing the right transformation. In: ICML-2003, pp. 608\u2013615 (2003)"},{"key":"37_CR11","unstructured":"Ravindran, B.: An Algebraic Approach to Abstraction in Reinforcement Learning. PhD thesis, Department of Computer Science, University of Massachusetts, Amherst, MA (2004)"},{"key":"37_CR12","unstructured":"Tash, J., Russell, S.: Control strategies for a stochastic planner. In: AAAI-94 (1994)"},{"key":"37_CR13","doi-asserted-by":"crossref","unstructured":"Dean, T., Kaelbling, L.P., Kirman, J., Nicholson, A.: Planning under time constraints in stochastic domains. Artificial Intelligence 76 (1995)","DOI":"10.1016\/0004-3702(94)00086-G"},{"key":"37_CR14","doi-asserted-by":"crossref","unstructured":"Baum, J., Nicholson, A.E.: Dynamic non-uniform abstractions for approximate planning in large structured stochastic domains. Technical Report 1998\/18, School of Computer Science and Software Engineering, Monash University, Melbourne (1998)","DOI":"10.1007\/BFb0095303"},{"key":"37_CR15","unstructured":"Glaubius, R., Smart, W.D.: Manifold representations for value-function approximation in reinforcement learning. Technical Report 05-19, Department of Computer Science and Engineering, Washington University in St. Louis (2005)"},{"key":"37_CR16","doi-asserted-by":"crossref","DOI":"10.1002\/9780470316887","volume-title":"Markov Decision Processes: Discrete Stochastic Dynamic Programming","author":"M.L. Puterman","year":"1994","unstructured":"Puterman, M.L.: Markov Decision Processes: Discrete Stochastic Dynamic Programming. John Wiley & Sons, New York (1994)"},{"key":"37_CR17","volume-title":"Reinforcement Learning: An Introduction","author":"R.S. Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (1998)"}],"container-title":["Lecture Notes in Computer Science","Advances in Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-540-72665-4_37","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,19]],"date-time":"2019-05-19T09:44:47Z","timestamp":1558259087000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-540-72665-4_37"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2007]]},"ISBN":["9783540726647","9783540726654"],"references-count":17,"URL":"https:\/\/doi.org\/10.1007\/978-3-540-72665-4_37","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2007]]}}}