{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T07:24:26Z","timestamp":1740122666412,"version":"3.37.3"},"reference-count":32,"publisher":"Springer Science and Business Media LLC","issue":"15","license":[{"start":{"date-parts":[[2022,4,7]],"date-time":"2022-04-07T00:00:00Z","timestamp":1649289600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,4,7]],"date-time":"2022-04-07T00:00:00Z","timestamp":1649289600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2022,12]]},"DOI":"10.1007\/s10489-021-03093-9","type":"journal-article","created":{"date-parts":[[2022,4,6]],"date-time":"2022-04-06T23:02:33Z","timestamp":1649286153000},"page":"18049-18060","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Screening goals and selecting policies in hierarchical reinforcement learning"],"prefix":"10.1007","volume":"52","author":[{"given":"Junyan","family":"Zhou","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5775-3423","authenticated-orcid":false,"given":"Jing","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yanfeng","family":"Tong","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Junrui","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,4,7]]},"reference":[{"key":"3093_CR1","unstructured":"Andrychowicz M, Wolski F, Ray A, Schneider J, Fong R, Welinder P, McGrew B, Tobin J, Abbeel P, Zaremba W (2017)\u00a0\u00a0Hindsight experience replay. arXiv preprint \narXiv:1707.01495"},{"key":"3093_CR2","doi-asserted-by":"crossref","unstructured":"Bacon PL, Harb J, Precup\u00a0 D (2017)\u00a0The option-critic architecture. In: Proceedings of the AAAI Conference on Artificial Intelligence 31\u00a0","DOI":"10.1609\/aaai.v31i1.10916"},{"key":"3093_CR3","doi-asserted-by":"publisher","first-page":"253","DOI":"10.1613\/jair.3912","volume":"47","author":"MG Bellemare","year":"2013","unstructured":"Bellemare MG, Naddaf Y, Veness J, Bowling M (2013) The arcade learning environment: An evaluation platform for general agents. Journal of Artificial Intelligence Research 47:253\u2013279","journal-title":"Journal of Artificial Intelligence Research"},{"key":"3093_CR4","doi-asserted-by":"publisher","DOI":"10.1016\/j.asoc.2020.106335","volume":"93","author":"Y Chen","year":"2020","unstructured":"Chen Y, He F, Li H, Zhang D, Wu Y (2020) A full migration bbo algorithm with enhanced population quality bounds for multimodal biomedical image registration. Applied Soft Computing 93:106335","journal-title":"Applied Soft Computing"},{"key":"3093_CR5","doi-asserted-by":"publisher","first-page":"227","DOI":"10.1613\/jair.639","volume":"13","author":"TG Dietterich","year":"2000","unstructured":"Dietterich TG (2000) Hierarchical reinforcement learning with the maxq value function decomposition. Journal of artificial intelligence research 13:227\u2013303","journal-title":"Journal of artificial intelligence research"},{"issue":"11","key":"3093_CR6","doi-asserted-by":"publisher","first-page":"3409","DOI":"10.1109\/TNNLS.2019.2891792","volume":"30","author":"N Dilokthanakul","year":"2019","unstructured":"Dilokthanakul N, Kaplanis C, Pawlowski N, Shanahan M (2019) Feature control as intrinsic motivation for hierarchical reinforcement learning. IEEE transactions on neural networks and learning systems 30(11):3409\u20133418","journal-title":"IEEE transactions on neural networks and learning systems"},{"key":"3093_CR7","unstructured":"Dulac-Arnold G, Mankowitz D, Hester T (2019)\u00a0Challenges of real-world reinforcement learning. arXiv preprint \narXiv:1904.12901"},{"key":"3093_CR8","doi-asserted-by":"crossref","unstructured":"Johannink T, Bahl S, Nair A, Luo J, Kumar A, Loskyll M, Ojea JA, Solowjow\u00a0 E, Levine\u00a0 S (2019)\u00a0Residual reinforcement learning for robot control. In: 2019 International Conference on Robotics and Automation (ICRA) IEEE 6023\u20136029","DOI":"10.1109\/ICRA.2019.8794127"},{"key":"3093_CR9","unstructured":"Jong NK, Hester T, Stone\u00a0 P (2008)\u00a0The utility of temporal abstraction in reinforcement learning. In: AAMAS\u00a0Citeseer\u00a0 1: 299\u2013306."},{"key":"3093_CR10","unstructured":"Kahn G, Villaflor A, Pong V, Abbeel P, Levine S (2017)\u00a0Uncertainty-aware reinforcement learning for collision avoidance. arXiv preprint arXiv:1702.01182"},{"key":"3093_CR11","first-page":"3675","volume":"29","author":"TD Kulkarni","year":"2016","unstructured":"Kulkarni TD, Narasimhan K, Saeedi A, Tenenbaum J (2016) Hierarchical deep reinforcement learning: Integrating temporal abstraction and intrinsic motivation. Advances in neural information processing systems 29:3675\u20133683","journal-title":"Advances in neural information processing systems"},{"key":"3093_CR12","unstructured":"Levy A, Konidaris G, Platt R, Saenko K (2017) Learning multi-level hierarchies with hindsight. arXiv preprint arXiv:1712.00948"},{"key":"3093_CR13","doi-asserted-by":"publisher","DOI":"10.1016\/j.swevo.2020.100743","volume":"58","author":"H Li","year":"2020","unstructured":"Li H, He F, Chen Y, Luo J (2020) Multi-objective self-organizing optimization for constrained sparse array synthesis. Swarm and Evolutionary Computation 58:100743","journal-title":"Swarm and Evolutionary Computation"},{"key":"3093_CR14","unstructured":"Li S, Wang R, Tang M, Zhang C (2019) Hierarchical reinforcement learning with advantage-based auxiliary rewards. arXiv preprint arXiv:1910.04450"},{"key":"3093_CR15","doi-asserted-by":"crossref","unstructured":"Liang Y, He F, Zeng X (2020) 3d mesh simplification with feature preservation based on whale optimization algorithm and differential evolution. Integrated Computer-Aided Engineering (Preprint), 1\u201319","DOI":"10.3233\/ICA-200641"},{"issue":"3","key":"3093_CR16","doi-asserted-by":"publisher","first-page":"581","DOI":"10.3233\/IDA-194641","volume":"24","author":"J Luo","year":"2020","unstructured":"Luo J, He F, Yong J (2020) An efficient and robust bat algorithm with fusion of opposition-based learning and whale optimization algorithm. Intelligent Data Analysis 24(3):581\u2013606","journal-title":"Intelligent Data Analysis"},{"key":"3093_CR17","unstructured":"McGovern A, Sutton RS (1998) Macro-actions in reinforcement learning: An empirical analysis. Computer Science Department Faculty Publication Series 15"},{"key":"3093_CR18","unstructured":"Mnih V, Badia AP, Mirza M, Graves A, Lillicrap T, Harley T, Silver D, Kavukcuoglu K (2016) Asynchronous methods for deep reinforcement learning. In: International conference on machine learning\u00a0PMLR\u00a01928\u20131937"},{"key":"3093_CR19","unstructured":"Mnih V, Kavukcuoglu K, Silver D, Graves A, Antonoglou I, Wierstra D, Riedmiller M (2013) Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602"},{"key":"3093_CR20","unstructured":"Nachum O, Gu S, Lee H, Levine S (2018) Data-efficient hierarchical reinforcement learning. arXiv preprint arXiv:1805.08296"},{"key":"3093_CR21","unstructured":"Parr R, Russell S (1998) Reinforcement learning with hierarchies of machines. Advances in neural information processing systems 1043\u20131049"},{"key":"3093_CR22","unstructured":"Rummery GA, Niranjan M (1994) On-line Q-learning using connectionist systems\u00a0Citeseer 37"},{"issue":"19","key":"3093_CR23","doi-asserted-by":"publisher","first-page":"70","DOI":"10.2352\/ISSN.2470-1173.2017.19.AVM-023","volume":"2017","author":"AE Sallab","year":"2017","unstructured":"Sallab AE, Abdou M, Perot E, Yogamani S (2017) Deep reinforcement learning framework for autonomous driving. Electronic Imaging 2017(19):70\u201376","journal-title":"Electronic Imaging"},{"key":"3093_CR24","unstructured":"Schaul T, Horgan D, Gregor K, Silver D (2015) Universal value function approximators. In: International conference on machine learning PMLR 1312\u20131320\u00a0"},{"key":"3093_CR25","unstructured":"Schulman YWMLR Openai baselines: A2c. [EB\/OL]. https:\/\/openai.com\/blog\/baselines-acktr-a2c\/ Accessed 18 Aug 2017"},{"key":"3093_CR26","unstructured":"Sharma A, Gu S, Levine S, Kumar V, Hausman K (2019) Dynamics-aware unsupervised discovery of skills. arXiv preprint arXiv:1907.01657"},{"key":"3093_CR27","unstructured":"Sutton RS, Barto AG (2018) Reinforcement learning: An introduction. MIT press\u00a0"},{"key":"3093_CR28","unstructured":"Sutton RS, McAllester DA, Singh SP, Mansour Y, et al (1999) Policy gradient methods for reinforcement learning with function approximation. In: NIPs\u00a0Citeseer 99: 1057\u20131063"},{"issue":"1\u20132","key":"3093_CR29","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","volume":"112","author":"RS Sutton","year":"1999","unstructured":"Sutton RS, Precup D, Singh S (1999) Between mdps and semi-mdps: A framework for temporal abstraction in reinforcement learning. Artificial intelligence 112(1\u20132):181\u2013211","journal-title":"Artificial intelligence"},{"key":"3093_CR30","unstructured":"Vezhnevets AS, Osindero S, Schaul T, Heess N, Jaderberg M, Silver D, Kavukcuoglu K (2017) Feudal networks for hierarchical reinforcement learning. In: International Conference on Machine Learning PMLR 3540\u20133549"},{"issue":"3\u20134","key":"3093_CR31","first-page":"279","volume":"8","author":"CJ Watkins","year":"1992","unstructured":"Watkins CJ, Dayan P (1992) Q-learning. Machine learning 8(3\u20134):279\u2013292","journal-title":"Q-learning. Machine learning"},{"key":"3093_CR32","unstructured":"Zhang T, Guo S, Tan T, Hu X, Chen F (2020) Generating adjacency-constrained subgoals in hierarchical reinforcement learning. arXiv preprint arXiv:2006.11485"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-021-03093-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-021-03093-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-021-03093-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,19]],"date-time":"2022-11-19T10:34:16Z","timestamp":1668854056000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-021-03093-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,4,7]]},"references-count":32,"journal-issue":{"issue":"15","published-print":{"date-parts":[[2022,12]]}},"alternative-id":["3093"],"URL":"https:\/\/doi.org\/10.1007\/s10489-021-03093-9","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"type":"print","value":"0924-669X"},{"type":"electronic","value":"1573-7497"}],"subject":[],"published":{"date-parts":[[2022,4,7]]},"assertion":[{"value":"10 December 2021","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 April 2022","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}