{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,4]],"date-time":"2024-09-04T22:22:29Z","timestamp":1725488549741},"publisher-location":"Berlin, Heidelberg","reference-count":35,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783540415978"},{"type":"electronic","value":"9783540445654"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2000]]},"DOI":"10.1007\/3-540-44565-x_11","type":"book-chapter","created":{"date-parts":[[2007,8,11]],"date-time":"2007-08-11T05:48:14Z","timestamp":1186811294000},"page":"241-263","source":"Crossref","is-referenced-by-count":1,"title":["Automatic Segmentation of Sequences through Hierarchical Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"R.","family":"Sun","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"C.","family":"Sessions","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2001,12,7]]},"reference":[{"issue":"1","key":"11_CR1","doi-asserted-by":"publisher","first-page":"43","DOI":"10.1016\/0004-3702(94)90062-0","volume":"71","author":"F. Bacchus","year":"1994","unstructured":"F. Bacchus and Q. Yang, (1994). Downward refinement and the efficiency of hierarchical problem solving. Artificial Intelligence. 71,1, 43\u2013100.","journal-title":"Artificial Intelligence"},{"key":"11_CR2","volume-title":"Neuro-Dynamic Programming","author":"D. Bertsekas","year":"1996","unstructured":"D. Bertsekas and J. Tsitsiklis, (1996). Neuro-Dynamic Programming. Athena Scientific, Belmont, MA."},{"key":"11_CR3","volume-title":"Proc. of 12th National Conference on Artificial Intelligence","author":"A. Cassandra","year":"1994","unstructured":"A. Cassandra, L. Kaelbling, and M. Littman, (1994). Acting optimally in partially observable stochastic domains. Proc. of 12th National Conference on Artificial Intelligence. Morgan Kaufmann, San Mateo, CA."},{"key":"11_CR4","first-page":"183","volume-title":"Proc. of AAAI","author":"L. Chrisman","year":"1993","unstructured":"L. Chrisman, (1993). Reinforcement learning with perceptual aliasing: the perceptual distinction approach. Proc. of AAAI. 183\u2013188. Morgan Kaufmann, San Mateo, CA."},{"key":"11_CR5","volume-title":"Advances in Neural Information Processing Systems","author":"P. Dayan","year":"1993","unstructured":"P. Dayan and G. Hinton, (1993). Feudal reinforcement learning. Advances in Neural Information Processing Systems. MIT Press, Cambridge, MA."},{"key":"11_CR6","unstructured":"T. Dietterich, (1997). Hierarchical reinforcement learning with MAXQ value function decomposition. \n                    \n                      http:\/\/www:engr:orst:edu\/~tgd\/cv\/pubs.html"},{"key":"11_CR7","doi-asserted-by":"publisher","first-page":"179","DOI":"10.1016\/0364-0213(90)90002-E","volume":"14","author":"J. Elman","year":"1990","unstructured":"J. Elman, (1990). Finding structure in time. Cognitive Science. 14, 179\u2013212.","journal-title":"Cognitive Science"},{"issue":"6","key":"11_CR8","doi-asserted-by":"publisher","first-page":"313","DOI":"10.1016\/0950-7051(96)81916-2","volume":"8","author":"P. Frasconi","year":"1995","unstructured":"P. Frasconi, M. Gori, and G. Soda, (1995). Recurrent neural networks and prior knowledge for sequence processing. Knowledge Based Systems. 8,6, 313\u2013332.","journal-title":"Knowledge Based Systems"},{"issue":"9","key":"11_CR9","doi-asserted-by":"publisher","first-page":"1359","DOI":"10.1016\/0893-6080(95)00041-0","volume":"8","author":"C.L. Giles","year":"1995","unstructured":"C.L. Giles, B.G. Horne, and T. Lin, (1995). Learning a class of large finite state machines with a recurrent neural network. Neural Networks, 8(9), 1359\u20131365.","journal-title":"Neural Networks"},{"key":"11_CR10","first-page":"167","volume-title":"Proc. of ICML","author":"L. Kaelbling","year":"1993","unstructured":"L. Kaelbling, (1993). Hierarchical learning in stochastic domains: preliminary results. Proc. of ICML, 167\u2013173. Morgan Kaufmann, San Francisco, CA."},{"key":"11_CR11","doi-asserted-by":"crossref","first-page":"237","DOI":"10.1613\/jair.301","volume":"4","author":"L. Kaelbling","year":"1996","unstructured":"L. Kaelbling, M. Littman, and A. Moore, (1996). Reinforcement learning: A survey. Journal of Artificial Intelligence Research, 4, 237\u2013285.","journal-title":"Journal of Artificial Intelligence Research"},{"key":"11_CR12","first-page":"692","volume-title":"Proc of AAAI\u201994","author":"C. Knoblock","year":"1994","unstructured":"C. Knoblock, J. Tenenberg, and Q. Yang, (1994). Characterizing abstraction hierarchies for planning. Proc of AAAI\u201994. 692\u2013697. Morgan Kaufmann, San Mateo, CA."},{"key":"11_CR13","volume-title":"Reinforcement Learning for Robots Using Neural Networks","author":"L. Lin","year":"1993","unstructured":"L. Lin, (1993). Reinforcement Learning for Robots Using Neural Networks. Ph.D. Thesis, Carnegie Mellon University, Pittsburgh."},{"key":"11_CR14","first-page":"315","volume-title":"Proc. Conference on Simulation of Adaptive Behavior","author":"A. McCallum","year":"1996","unstructured":"A. McCallum, (1996). Learning to use selective attention and short-term memory in sequential tasks. Proc. Conference on Simulation of Adaptive Behavior. 315\u2013324. MIT Press, Cambridge, MA."},{"key":"11_CR15","volume-title":"Reinforcement Learning with Selective Perception and Hidden State","author":"A. McCallum","year":"1996","unstructured":"A. McCallum, (1996b). Reinforcement Learning with Selective Perception and Hidden State. Ph.D Thesis, Department of Computer Science, University of Rochester, Rochester, NY."},{"issue":"1","key":"11_CR16","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1287\/mnsc.28.1.1","volume":"28","author":"G. Monohan","year":"1982","unstructured":"G. Monohan, (1982). A survey of partially observable Markov decision processes: theory, models, and algorithms. Management Science, 28(1), 1\u201316.","journal-title":"Management Science"},{"key":"11_CR17","first-page":"1088","volume-title":"Proc. of IJCAI\u201995","author":"R. Parr","year":"1995","unstructured":"R. Parr and S. Russell, (1995). Approximating optimal policies for partially observable stochastic domains. Proc. of IJCAI\u201995. 1088\u20131094. Morgan Kaufmann, San Mateo, CA."},{"key":"11_CR18","volume-title":"Advances in Neural Information Processing Systems","author":"R. Parr","year":"1997","unstructured":"R. Parr and S. Russell, (1997). Reinforcement learning with hierarchies of machines. Advances in Neural Information Processing Systems 9. MIT Press, Cambridge, MA."},{"key":"11_CR19","volume-title":"Advances in Neural Information Processing Systems","author":"D. Precup","year":"1998","unstructured":"D. Precup, R. Sutton, and S. Singh, (1998). Multi-time models for temporary abstract planning. Advances in Neural Information Processing Systems 10. MIT Press, Cambridge, MA."},{"key":"11_CR20","doi-asserted-by":"crossref","DOI":"10.1002\/9780470316887","volume-title":"Markov Decision Processes","author":"M. Puterman","year":"1994","unstructured":"M. Puterman, (1994). Markov Decision Processes. Wiley-Inter-science. New York."},{"key":"11_CR21","first-page":"343","volume-title":"Proc. of ICML","author":"M. Ring","year":"1991","unstructured":"M. Ring, (1991). Incremental development of complex behaviors through automatic construction of sensory-motor hierarchies. Proc. of ICML. 343\u2013347. Morgan Kaufmann, San Francisco, CA."},{"key":"11_CR22","doi-asserted-by":"publisher","first-page":"115","DOI":"10.1016\/0004-3702(74)90026-5","volume":"5","author":"E. Sacerdoti","year":"1974","unstructured":"E. Sacerdoti, (1974). Planning in a hierarchy of abstraction spaces. Artificial Intelligence. 5, 115\u2013135.","journal-title":"Artificial Intelligence"},{"issue":"2","key":"11_CR23","doi-asserted-by":"publisher","first-page":"234","DOI":"10.1162\/neco.1992.4.2.234","volume":"4","author":"J. Schmidhuber","year":"1992","unstructured":"J. Schmidhuber, (1992). Learning complex, extended sequences using the principle of history compression. Neural Computation, 4(2), 234\u2013242.","journal-title":"Neural Computation"},{"key":"11_CR24","unstructured":"J. Schmidhuber, (1993). Learning unambiguous reduced sequence descriptions. Advances in Neural Information Processing Systems, 291\u2013298."},{"key":"11_CR25","volume-title":"Learning to Solve Markovian Decision Processes","author":"S. Singh","year":"1994","unstructured":"S. Singh, (1994). Learning to Solve Markovian Decision Processes. Ph.D Thesis, University of Massachusetts, Amherst, MA."},{"key":"11_CR26","doi-asserted-by":"crossref","unstructured":"E. Sondik, (1978). The optimal control of partially observable Markov processes over the infinite horizon: discounted costs. Operations research, 26(2).","DOI":"10.1287\/opre.26.2.282"},{"issue":"4\u20135","key":"11_CR27","first-page":"127","volume":"12","author":"R. Sun","year":"1999","unstructured":"R. Sun and T. Peterson, (1999). Multi-agent reinforcement learning: weighting and partitioning. Neural Networks, Vol.12 No.4\u20135. pp.127\u2013153.","journal-title":"Neural Networks"},{"key":"11_CR28","unstructured":"R. Sun and C. Sessions, (1998). Learning plans without a priori knowledge. Adaptive Behavior, in press. A shortened version appeared in Proceedings of WCCI-IJCNN\u201998, vol.1, 1\u20136. IEEE Press, Piscateway, NJ."},{"key":"11_CR29","volume-title":"Proc. of ICML","author":"R. Sutton","year":"1995","unstructured":"R. Sutton, (1995). TD models: modeling the world at a mixture of time scales. Proc. of ICML. Morgan Kaufmann, San Francisco, CA."},{"key":"11_CR30","first-page":"358","volume-title":"Proc. International Conference on Machine Learning","author":"P. Tadepalli","year":"1997","unstructured":"P. Tadepalli and T. Dietterich, (1997). Hierarchical explanation-based reinforcement learning. Proc. International Conference on Machine Learning. 358\u2013366. Morgan Kaufmann, San Francisco, CA."},{"key":"11_CR31","doi-asserted-by":"publisher","first-page":"247","DOI":"10.1016\/0921-8890(95)00005-Z","volume":"15","author":"C. Tham","year":"1995","unstructured":"C. Tham, (1995). Reinforcement learning of multiple tasks using a hierarchical CMAC architecture. Robotics and Autonomous Systems. 15, 247\u2013274.","journal-title":"Robotics and Autonomous Systems"},{"key":"11_CR32","volume-title":"Neural Information Processing Systems","author":"S. Thrun","year":"1995","unstructured":"S. Thrun and A. Schwartz, (1995). Finding structure in reinforcement learning. Neural Information Processing Systems. MIT Press, Cambridge, MA."},{"key":"11_CR33","volume-title":"Learning with Delayed Rewards","author":"C. Watkins","year":"1989","unstructured":"C. Watkins, (1989). Learning with Delayed Rewards. Ph.D Thesis, Cambridge University, Cambridge, UK."},{"issue":"1\u20132","key":"11_CR34","doi-asserted-by":"publisher","first-page":"271","DOI":"10.1016\/0004-3702(94)00012-P","volume":"73","author":"S. Whitehead","year":"1995","unstructured":"S. Whitehead and L. Lin, (1995). Reinforcement learning of non-Markov decision processes. Artificial Intelligence. 73(1\u20132). 271\u2013306.","journal-title":"Artificial Intelligence"},{"issue":"2","key":"11_CR35","doi-asserted-by":"publisher","first-page":"219","DOI":"10.1177\/105971239700600202","volume":"6","author":"M. Wiering","year":"1998","unstructured":"M. Wiering and J. Schmidhuber, (1998). HQ-learning. Adaptive Behavior, 6(2), 219\u2013246.","journal-title":"Adaptive Behavior"}],"container-title":["Lecture Notes in Computer Science","Sequence Learning"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/3-540-44565-X_11","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,2,21]],"date-time":"2019-02-21T12:43:58Z","timestamp":1550753038000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/3-540-44565-X_11"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2000]]},"ISBN":["9783540415978","9783540445654"],"references-count":35,"URL":"https:\/\/doi.org\/10.1007\/3-540-44565-x_11","relation":{},"ISSN":["0302-9743"],"issn-type":[{"type":"print","value":"0302-9743"}],"subject":[],"published":{"date-parts":[[2000]]}}}