{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,5]],"date-time":"2024-09-05T18:11:55Z","timestamp":1725559915045},"publisher-location":"Berlin, Heidelberg","reference-count":18,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783540278726"},{"type":"electronic","value":"9783540318828"}],"license":[{"start":{"date-parts":[[2005,1,1]],"date-time":"2005-01-01T00:00:00Z","timestamp":1104537600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2005]]},"DOI":"10.1007\/11527862_36","type":"book-chapter","created":{"date-parts":[[2010,7,20]],"date-time":"2010-07-20T19:48:01Z","timestamp":1279655281000},"page":"367-374","source":"Crossref","is-referenced-by-count":3,"title":["Learning Skills in Reinforcement Learning Using Relative Novelty"],"prefix":"10.1007","author":[{"given":"\u00d6zg\u00fcr","family":"\u015eim\u015fek","sequence":"first","affiliation":[]},{"given":"Andrew G.","family":"Barto","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"36_CR1","unstructured":"Barto, A.G., Singh, S., Chentanez, N.: Intrinsically motivated learning of hierarchical collections of skills. In: Proceedings of the Third International Conference on Developmental Learning (2004)"},{"key":"36_CR2","doi-asserted-by":"crossref","first-page":"227","DOI":"10.1613\/jair.639","volume":"13","author":"T.G. Dietterich","year":"2000","unstructured":"Dietterich, T.G.: Hierarchical reinforcement learning with the MAXQ value function decomposition. Journal of Artificial Intelligence Research\u00a013, 227\u2013303 (2000)","journal-title":"Journal of Artificial Intelligence Research"},{"key":"36_CR3","volume-title":"From Animals to Animats 5: The Fifth Conference on the Simulation of Adaptive Behaviour","author":"B. Digney","year":"1998","unstructured":"Digney, B.: Learning hierarchical control structure for multiple tasks and changing environments. In: From Animals to Animats 5: The Fifth Conference on the Simulation of Adaptive Behaviour. The MIT Press, Cambridge (1998)"},{"key":"36_CR4","volume-title":"Pattern Classification","author":"R.O. Duda","year":"2001","unstructured":"Duda, R.O., Hart, P.E., Stork, D.G.: Pattern Classification. Wiley, New York (2001)"},{"key":"36_CR5","first-page":"243","volume-title":"Proceedings of the Nineteenth International Conference on Machine Learning","author":"B. Hengst","year":"2002","unstructured":"Hengst, B.: Discovering hierarchy in reinforcement learning with HEXQ. In: Proceedings of the Nineteenth International Conference on Machine Learning, pp. 243\u2013250. Morgan Kaufmann, San Francisco (2002)"},{"key":"36_CR6","first-page":"131","volume-title":"Advances in Neural Information Processing Systems","author":"S. Kakade","year":"2001","unstructured":"Kakade, S., Dayan, P.: Dopamine bonuses. In: Advances in Neural Information Processing Systems, vol.\u00a013, pp. 131\u2013137. MIT Press, Cambridge (2001)"},{"key":"36_CR7","first-page":"293","volume":"8","author":"L. Lin","year":"1992","unstructured":"Lin, L.: Self-improving reactive agents based on reinforcement learning, planning and teaching. Machine Learning\u00a08, 293\u2013321 (1992)","journal-title":"Machine Learning"},{"key":"36_CR8","doi-asserted-by":"crossref","unstructured":"Mannor, S., Menache, I., Hoze, A., Klein, U.: Dynamic abstraction in reinforcement learning via clustering. In: Proceedings of the Twenty-First International Conference on Machine Learning (2004)","DOI":"10.1145\/1015330.1015355"},{"key":"36_CR9","first-page":"361","volume-title":"Proceedings of the Eighteenth International Conference on Machine Learning","author":"A. McGovern","year":"2001","unstructured":"McGovern, A., Barto, A.G.: Automatic discovery of subgoals in reinforcement learning using diverse density. In: Proceedings of the Eighteenth International Conference on Machine Learning, pp. 361\u2013368. Morgan Kaufmann, San Francisco (2001)"},{"key":"36_CR10","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"295","DOI":"10.1007\/3-540-36755-1_25","volume-title":"Machine Learning: ECML 2002","author":"I. Menache","year":"2002","unstructured":"Menache, I., Mannor, S., Shimkin, N.: Q-Cut - Dynamic discovery of sub-goals in reinforcement learning. In: Elomaa, T., Mannila, H., Toivonen, H. (eds.) ECML 2002. LNCS (LNAI), vol.\u00a02430, pp. 295\u2013306. Springer, Heidelberg (2002)"},{"key":"36_CR11","first-page":"751","volume-title":"Proceedings of the Twenty-First International Conference on Machine Learning","author":"\u00d6. \u015eim\u015fek","year":"2004","unstructured":"\u015eim\u015fek, \u00d6., Barto, A.G.: Using relative novelty to identify useful temporal abstractions in reinforcement learning. In: Proceedings of the Twenty-First International Conference on Machine Learning, pp. 751\u2013758. ACM Press, New York (2004)"},{"key":"36_CR12","unstructured":"\u015eim\u015fek, \u00d6., Wolfe, A.P., Barto, A.G.: Identifying useful subgoals in reinforcement learning by local graph partitioning. In: Proceedings of the Twenty-Second International Conference on Machine Learning (to appear)"},{"key":"36_CR13","unstructured":"Parr, B.R.: Hierarchical Control and Learning for Markov Decision Processes. PhD thesis, Computer Science Division, University of California, Berkeley (1998)"},{"key":"36_CR14","first-page":"506","volume-title":"Proceedings of the Nineteenth International Conference on Machine Learning","author":"M. Pickett","year":"2002","unstructured":"Pickett, M., Barto, A.G.: PolicyBlocks: An algorithm for creating useful macro-actions in reinforcement learning. In: Proceedings of the Nineteenth International Conference on Machine Learning, pp. 506\u2013513. Morgan Kaufmann, San Francisco (2002)"},{"key":"36_CR15","unstructured":"Precup, D.: Temporal abstraction in reinforcement learning. PhD thesis, University of Massachusetts Amherst (2000)"},{"issue":"1-2","key":"36_CR16","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","volume":"112","author":"R.S. Sutton","year":"1999","unstructured":"Sutton, R.S., Precup, D., Singh, S.P.: Between MDPs and semi-MDPs: A framework for temporal abstraction in reinforcement learning. Artificial Intelligence\u00a0112(1-2), 181\u2013211 (1999)","journal-title":"Artificial Intelligence"},{"key":"36_CR17","first-page":"385","volume-title":"Advances in Neural Information Processing Systems","author":"S. Thrun","year":"1995","unstructured":"Thrun, S., Schwartz, A.: Finding structure in reinforcement learning. In: Advances in Neural Information Processing Systems, vol.\u00a07, pp. 385\u2013392. MIT Press, Cambridge (1995)"},{"key":"36_CR18","doi-asserted-by":"publisher","first-page":"297","DOI":"10.1037\/h0040934","volume":"66","author":"R.W. White","year":"1959","unstructured":"White, R.W.: Motivation reconsidered: The concept of competence. Psychological Review\u00a066, 297\u2013333 (1959)","journal-title":"Psychological Review"}],"container-title":["Lecture Notes in Computer Science","Abstraction, Reformulation and Approximation"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/11527862_36","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,19]],"date-time":"2019-05-19T20:22:48Z","timestamp":1558297368000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/11527862_36"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2005]]},"ISBN":["9783540278726","9783540318828"],"references-count":18,"URL":"https:\/\/doi.org\/10.1007\/11527862_36","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2005]]}}}