{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,8,30]],"date-time":"2024-08-30T17:20:30Z","timestamp":1725038430243},"reference-count":11,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2011,5,28]],"date-time":"2011-05-28T00:00:00Z","timestamp":1306540800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Artif Intell Rev"],"published-print":{"date-parts":[[2012,8]]},"DOI":"10.1007\/s10462-011-9243-9","type":"journal-article","created":{"date-parts":[[2011,5,27]],"date-time":"2011-05-27T05:57:19Z","timestamp":1306475839000},"page":"119-127","source":"Crossref","is-referenced-by-count":6,"title":["Research on task decomposition and state abstraction in reinforcement learning"],"prefix":"10.1007","volume":"38","author":[{"given":"Yu","family":"Lasheng","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiang","family":"Zhongbin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Liu","family":"Kang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2011,5,28]]},"reference":[{"key":"9243_CR1","doi-asserted-by":"crossref","first-page":"41","DOI":"10.1023\/A:1022140919877","volume":"13","author":"A Barto","year":"2003","unstructured":"Barto A, Mahadevan S (2003) Recent advances in hierarchical reinforcement learning. Discrete Event Syst (special issue on reinforcement learning) 13: 41\u201377","journal-title":"Discrete Event Syst (special issue on reinforcement learning)"},{"key":"9243_CR2","volume-title":"Neuro-dynamic programming","author":"DP Bertsekas","year":"1996","unstructured":"Bertsekas DP, Tsitsiklis JN (1996) Neuro-dynamic programming. Athena Scientific, Belmont"},{"key":"9243_CR3","first-page":"1104","volume":"14","author":"C Boutilier","year":"1995","unstructured":"Boutilier C, Dearden R, Goldszmidt M (1995) Exploiting structure in policy construction. IJCAI 14: 1104\u20131113","journal-title":"IJCAI"},{"issue":"3","key":"9243_CR4","doi-asserted-by":"crossref","first-page":"142","DOI":"10.1111\/j.1467-8640.1989.tb00324.x","volume":"5","author":"T Dean","year":"1989","unstructured":"Dean T, Kanazawa K (1989) A model for reasoning about persistence and causation. Comput Intell 5(3): 142\u2013150","journal-title":"Comput Intell"},{"key":"9243_CR5","doi-asserted-by":"crossref","first-page":"227","DOI":"10.1613\/jair.639","volume":"13","author":"T Dietterich","year":"2000","unstructured":"Dietterich T (2000) Hierarchical reinforcement learning with the MAXQ value function decoposition. J Artif Intell Res 13: 227\u2013303","journal-title":"J Artif Intell Res"},{"key":"9243_CR6","first-page":"243","volume":"19","author":"B Hengst","year":"2002","unstructured":"Hengst B (2002) Discovering hierarchy in reinforcement learning with HEXQ. ICML 19: 243\u2013250","journal-title":"ICML"},{"key":"9243_CR7","doi-asserted-by":"crossref","unstructured":"Jonsson A, Barto A (2005) A causal approach to hierarchical decomposition of factored MDPs. In: Proceedings of the 22nd international conference on machine learning, pp 401\u2013408","DOI":"10.1145\/1102351.1102402"},{"key":"9243_CR8","doi-asserted-by":"crossref","unstructured":"Makar R, Mahadevan S, Ghavamzadeh M (2001) Hierarchical multi-agent reinforcement learning. In: Proceedings of the 5th international conference on autonomous agents","DOI":"10.1145\/375735.376302"},{"key":"9243_CR9","first-page":"1043","volume-title":"Reinforcement learning with hierarchies of machines. Advances in neural information processing systems","author":"R Parr","year":"1998","unstructured":"Parr R, Russell S (1998) Reinforcement learning with hierarchies of machines. Advances in neural information processing systems. MIT Press, Oxford, pp 1043\u20131049"},{"key":"9243_CR10","volume-title":"Reinforcement learning","author":"R Sutton","year":"1998","unstructured":"Sutton R, Barto A (1998) Reinforcement learning. MIT Press, Oxford"},{"issue":"1-2","key":"9243_CR11","doi-asserted-by":"crossref","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","volume":"112","author":"R Sutton","year":"1999","unstructured":"Sutton R, Precup D, Singh S (1999) Between MDPs and semi-MDPs: a framework for temporal abstraction in reinforcement learning. Artif Intell 112(1-2): 181\u2013211","journal-title":"Artif Intell"}],"container-title":["Artificial Intelligence Review"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10462-011-9243-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10462-011-9243-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10462-011-9243-9","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,6,11]],"date-time":"2019-06-11T07:02:13Z","timestamp":1560236533000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10462-011-9243-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2011,5,28]]},"references-count":11,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2012,8]]}},"alternative-id":["9243"],"URL":"https:\/\/doi.org\/10.1007\/s10462-011-9243-9","relation":{},"ISSN":["0269-2821","1573-7462"],"issn-type":[{"value":"0269-2821","type":"print"},{"value":"1573-7462","type":"electronic"}],"subject":[],"published":{"date-parts":[[2011,5,28]]}}}