{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,19]],"date-time":"2026-03-19T03:33:44Z","timestamp":1773891224962,"version":"3.50.1"},"publisher-location":"Berlin, Heidelberg","reference-count":18,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"value":"9783540440369","type":"print"},{"value":"9783540367550","type":"electronic"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2002]]},"DOI":"10.1007\/3-540-36755-1_25","type":"book-chapter","created":{"date-parts":[[2007,11,13]],"date-time":"2007-11-13T21:03:29Z","timestamp":1194987809000},"page":"295-306","source":"Crossref","is-referenced-by-count":73,"title":["Q-Cut\u2014Dynamic Discovery of Sub-goals in Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Ishai","family":"Menache","sequence":"first","affiliation":[]},{"given":"Shie","family":"Mannor","sequence":"additional","affiliation":[]},{"given":"Nahum","family":"Shimkin","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2002,9,20]]},"reference":[{"key":"25_CR1","unstructured":"R. K. Ahuja, T. L. Magnati, and J. B. Orlin. Network Flows Theory, Algorithms and Applications. Prentice Hall Press, 1993."},{"key":"25_CR2","unstructured":"D. P. Bertsekas and J. N. Tsitsiklis. Neuro-Dynamic Programming. Athena Scientific, 1995."},{"key":"25_CR3","unstructured":"A. Blum and S. Chawla. Learning from labeled and unlabeled data using graph mincuts. In Proceedings of the 18th International Conference on Machine Learning, pages 19\u201326. Morgan Kaufmann, 2001."},{"key":"25_CR4","unstructured":"P. Dayan and G. E. Hinton. Feudal reinforcement learning. In Advances in Neural Information Processing Systems 5. Morgan Kaufmann, 1993."},{"key":"25_CR5","first-page":"279","volume":"8","author":"P. Dayan","year":"1992","unstructured":"P. Dayan and C. Watkins. Q-learning. Machine Learning, 8:279\u2013292, 1992.","journal-title":"Machine Learning"},{"key":"25_CR6","doi-asserted-by":"crossref","first-page":"227","DOI":"10.1613\/jair.639","volume":"13","author":"T. G. Dietterich","year":"2000","unstructured":"T. G. Dietterich. Hierarchical reinforcement learning with the MAXQ value function decomposition. Journal of Artificial Intelligence Research, 13:227\u2013303, 2000.","journal-title":"Journal of Artificial Intelligence Research"},{"key":"25_CR7","doi-asserted-by":"crossref","unstructured":"B. Digney. Learning hierarchical control structure for multiple tasks and changing environments. In Proceedings of the Fifth Conference on the Simulation of Adaptive Behavior: SAB 98, 1998.","DOI":"10.7551\/mitpress\/3119.003.0050"},{"issue":"4","key":"25_CR8","doi-asserted-by":"publisher","first-page":"921","DOI":"10.1145\/48014.61051","volume":"35","author":"A. V. Goldberg","year":"1988","unstructured":"A. V. Goldberg and R. E. Tarjan. A new approach to the maximum-flow problem. Journal of ACM, 35(4):921\u2013940, October 1988.","journal-title":"Journal of ACM"},{"key":"25_CR9","doi-asserted-by":"crossref","unstructured":"D. J. Huang and A. B. Kahng. When clusters meet partitions: A new density based methods for circuit decomposition. In Proceedings of the European Design and Test Conference, pages 60\u201364, 1995.","DOI":"10.1109\/EDTC.1995.470419"},{"issue":"3","key":"25_CR10","first-page":"293","volume":"8","author":"L. G. Lin","year":"1992","unstructured":"L. G. Lin. Self-improving reactive agents based on reinforcement learning, planning and teaching. Machine Learning, 8(3):293\u2013321, 1992.","journal-title":"Machine Learning"},{"key":"25_CR11","unstructured":"A. McGovern and A. G. Barto. Automatic discovery of subgoals in reinforcement learning using diverse density. In Proceedings of the 18th International Conference on Machine Learning, pages 361\u2013368. Morgan Kaufmann, 2001."},{"key":"25_CR12","unstructured":"A. McGovern, R. S. Sutton, and A. H. Fagg. Roles of macro-actions in accelerating reinforcement learning. In Proceedings of the 1997 Grace Hopper Celebration of Women in Computing, pages 13\u201318, 1997."},{"key":"25_CR13","unstructured":"J. Morimoto and K. Doya. Acquisition of stand-up behavior by a real robot using hierarchical reinforcement learning. In Proceedings of the 17th International Conference on Machine Learning, pages 623\u2013630. Morgan Kaufmann, 2000."},{"key":"25_CR14","first-page":"361","volume":"7","author":"S. P. Singh","year":"1995","unstructured":"S. P. Singh, T. Jaakkola, and M. I. Jordan. Reinforcement learning with soft state aggregation. In Advances in Neural Information Processing Systems, volume 7, pages 361\u2013368. The MIT Press, 1995.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"25_CR15","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","volume":"112","author":"R. S. Sutton","year":"1999","unstructured":"R. S. Sutton, D. Precup, and S. Singh. Between MDPs and semi-MDPs: A framework for temporal abstraction in reinforcement learning. Artificial Intelligence, 112:181\u2013211, 1999.","journal-title":"Artificial Intelligence"},{"issue":"5","key":"25_CR16","doi-asserted-by":"publisher","first-page":"674","DOI":"10.1109\/9.580874","volume":"42","author":"J. N. Tsitsiklis","year":"1997","unstructured":"J. N. Tsitsiklis and B. Van Roy. An analysis of temporal-difference learning with function approximation. IEEE Transactions on Automatic Control, 42(5):674\u2013690, 1997.","journal-title":"IEEE Transactions on Automatic Control"},{"issue":"7","key":"25_CR17","first-page":"911","volume":"10","author":"Y. C. Wei","year":"1991","unstructured":"Y. C. Wei and C. K. Cheng. Ratio cut partitioning for hierarchical designs. IEEE\/ACM Transaction on Networking, 10(7):911\u2013921, 1991.","journal-title":"IEEE\/ACM Transaction on Networking"},{"issue":"2","key":"25_CR18","doi-asserted-by":"publisher","first-page":"219","DOI":"10.1177\/105971239700600202","volume":"6","author":"M. Wiering","year":"1997","unstructured":"M. Wiering and J. Schmidhuber. HQ-learning. Adaptive Behavior, 6(2):219\u2013246, 1997.","journal-title":"Adaptive Behavior"}],"container-title":["Lecture Notes in Computer Science","Machine Learning: ECML 2002"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/3-540-36755-1_25","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,19]],"date-time":"2024-02-19T00:41:21Z","timestamp":1708303281000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/3-540-36755-1_25"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2002]]},"ISBN":["9783540440369","9783540367550"],"references-count":18,"URL":"https:\/\/doi.org\/10.1007\/3-540-36755-1_25","relation":{},"ISSN":["0302-9743"],"issn-type":[{"value":"0302-9743","type":"print"}],"subject":[],"published":{"date-parts":[[2002]]}}}