{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,19]],"date-time":"2026-03-19T03:33:26Z","timestamp":1773891206724,"version":"3.50.1"},"publisher-location":"Berlin, Heidelberg","reference-count":21,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"value":"9783540439417","type":"print"},{"value":"9783540456223","type":"electronic"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2002]]},"DOI":"10.1007\/3-540-45622-8_16","type":"book-chapter","created":{"date-parts":[[2007,5,23]],"date-time":"2007-05-23T18:45:20Z","timestamp":1179945920000},"page":"212-223","source":"Crossref","is-referenced-by-count":95,"title":["Learning Options in Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Martin","family":"Stolle","sequence":"first","affiliation":[]},{"given":"Doina","family":"Precup","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2002,7,9]]},"reference":[{"key":"16_CR1","unstructured":"[1995]Bradtke:SMDPQ Bradtke, S. J.,& Duff, M. O. (1995). Reinforcement learning methods for continuous-time Markov Decision Problems. Advances in Neural Information Processing Systems 7 (pp. 393\u2013400). MIT Press."},{"key":"16_CR2","unstructured":"[1998]Dietterich:MAXQ Dietterich, T. G. (1998). The MAXQ method for hierarchical reinforcement learning. Proceedings of the Fifteenth International Conference on Machine Learning. Morgan Kaufmann."},{"key":"16_CR3","doi-asserted-by":"publisher","first-page":"251","DOI":"10.1016\/0004-3702(72)90051-3","volume":"3","author":"P.E. Hart","year":"1972","unstructured":"[1972]Fikes:RobotPlan Fikes, R., P.E. Hart, & Nilsson, N. J. (1972). Learning and executing generalized robot plans. Artificial Intelligence, 3, 251\u2013288.","journal-title":"Artificial Intelligence"},{"key":"16_CR4","first-page":"285","volume":"3","author":"G. A. Iba","year":"1989","unstructured":"[1989]Iba:Macro Iba, G. A. (1989). A heuristic approach to the discovery of macro-operators. Machine Learning, 3, 285\u2013317.","journal-title":"Machine Learning"},{"key":"16_CR5","unstructured":"[1985]Korf:Macro Korf, R. E. (1985). Learning to solve problems by searching for macro-operators. Pitman Publishing Ltd."},{"key":"16_CR6","first-page":"11","volume":"1","author":"J. E. Laird","year":"1986","unstructured":"[1986]Laird:ChunkSOAR Laird, J. E., Rosenbloom, P. S., & Newell, A. (1986). Chunking in SOAR: The anatomy of a general learning mechanism. Machine Learning, 1, 11\u201346.","journal-title":"Machine Learning"},{"key":"16_CR7","unstructured":"[1997]Mahadevan:SMDP Mahadevan, S., Mar-challek, N., Das, T. K., & Gosavi, A. (1997). Self-improving factory simulation using continuous-time average-reward reinforcement learning. Proceedings of the Fourteenth International Conference on Machine Learning (pp. 202\u2013210). Morgan Kaufmann."},{"key":"16_CR8","unstructured":"[1997]McGovern:MacroRL McGovern, A., Sutton, R.S., & Fagg, A. H. (1997). Roles of macro-actions in accelerating reinforcement learning. Grace Hopper Celebration of Women in Computing (pp. 13\u201317)."},{"key":"16_CR9","volume-title":"Autonomous discovery of temporal abstractions from interaction with an environment","author":"E. A. McGovern","year":"2002","unstructured":"[2002]McGovern:Thesis McGovern, E. A. (2002). Autonomous discovery of temporal abstractions from interaction with an environment. Doctoral dissertation, University of Massachusetts, Amherst."},{"key":"16_CR10","unstructured":"[2001]McGovern:ICML McGovern, E. A., & Barto, A. G. (2001). Automatic discovery of subgoals in reinforcement learning using diverse density. Proceedings of the Eighteenth International Conference on Machine Learning (pp. 361\u2013368). Morgan Kaufman."},{"key":"16_CR11","unstructured":"[1988]Minton:BookMinton, S. (1988). Learning search control knowledge. An explanation-based approach. Kluwer Academic Publishers."},{"key":"16_CR12","unstructured":"[1972]Newell:Simon Newell, A., & Simon, H. A. (1972). Human problem solving. Prentice-Hall."},{"key":"16_CR13","unstructured":"[1998]Parr:Thesis Parr, R. (1998). Hierarchical control and learning for Markov Decision Processes. Doctoral dissertation, Computer Science Division, University of California, Berkeley, USA."},{"key":"16_CR14","unstructured":"[1998]Parr:HAMs Parr, R., & Russell, S. (1998). Reinforcement learning with hierarchies of machines. Advances in Neural Information Processing Systems 10. MIT Press."},{"key":"16_CR15","unstructured":"[2000]Precup:Thesis Precup, D. (2000)._Temporal abstraction in reinforcement learning. Doctoral dissertation, Department of Computer Science, University of Massachusetts, Amherst, USA."},{"key":"16_CR16","unstructured":"[1994]Puterman:Book Puterman, M. L. (1994). Markov Decision Processes: Discrete stochastic dynamic programming. Wiley."},{"key":"16_CR17","doi-asserted-by":"crossref","unstructured":"[1974]Sacerdoti:PlanArt Sacerdoti, E. D. (1974). Planning in a hierarchy of abstraction spaces. Artificial Intelligence, 5, 115\u2013135.","DOI":"10.1016\/0004-3702(74)90026-5"},{"key":"16_CR18","unstructured":"[1992]Singh:HDynaAAAI Singh, S. P. (1992). Reinforcement learning with a hierarchy of abstract models. Proceedings of the Tenth National Conference on Artificial Intelligence (pp. 202\u2013207). MIT\/AAAI Press."},{"key":"16_CR19","unstructured":"[1998]Sutton:BookSutton,R.S.,&Barto, A.G. (1998). Reinforcement learning: An introduction. MIT Press."},{"key":"16_CR20","doi-asserted-by":"crossref","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","volume":"112","author":"R. S. Sutton","year":"1999","unstructured":"[1999]Precup:Options Sutton, R. S., Precup, D., & Singh, S. (1999). Between MDPs and semi-MDPs: A framework for temporal abstraction in reinforcement learning. Artificial Intelligence, 112, 181\u2013211.","journal-title":"Artificial Intelligence"},{"key":"16_CR21","unstructured":"[1989]Watkins:Qlearn Watkins, C. J. C. H. (1989). Learning with delayed rewards. Doctoral dissertation, Psychology Department, Cambridge University, Cambridge, UK."}],"container-title":["Lecture Notes in Computer Science","Abstraction, Reformulation, and Approximation"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/3-540-45622-8_16","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,2,16]],"date-time":"2019-02-16T22:12:22Z","timestamp":1550355142000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/3-540-45622-8_16"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2002]]},"ISBN":["9783540439417","9783540456223"],"references-count":21,"URL":"https:\/\/doi.org\/10.1007\/3-540-45622-8_16","relation":{},"ISSN":["0302-9743"],"issn-type":[{"value":"0302-9743","type":"print"}],"subject":[],"published":{"date-parts":[[2002]]}}}