{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T17:51:38Z","timestamp":1740160298106,"version":"3.37.3"},"reference-count":35,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2021,8,30]],"date-time":"2021-08-30T00:00:00Z","timestamp":1630281600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,8,30]],"date-time":"2021-08-30T00:00:00Z","timestamp":1630281600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100001809","name":"national natural science foundation of china","doi-asserted-by":"publisher","award":["71701205"],"award-info":[{"award-number":["71701205"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int. J. Mach. Learn. &amp; Cyber."],"published-print":{"date-parts":[[2022,2]]},"DOI":"10.1007\/s13042-021-01417-2","type":"journal-article","created":{"date-parts":[[2021,8,30]],"date-time":"2021-08-30T13:03:38Z","timestamp":1630328618000},"page":"447-459","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Efficient hierarchical policy network with fuzzy rules"],"prefix":"10.1007","volume":"13","author":[{"given":"Wei","family":"Shi","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1608-8695","authenticated-orcid":false,"given":"Yanghe","family":"Feng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Honglan","family":"Huang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhong","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jincai","family":"Huang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Guangquan","family":"Cheng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2021,8,30]]},"reference":[{"issue":"9","key":"1417_CR1","doi-asserted-by":"publisher","first-page":"2529","DOI":"10.1007\/s13042-021-01336-2","volume":"12","author":"Mohammed Al-taezi","year":"2021","unstructured":"Al-taezi M, Zhu P, Hu Q, Wang Y, Al-badwi A (2021) Self-paced hierarchical metric learning (SPHML). Int J Mach Learn Cybernetics 12(9):2529\u20132541. https:\/\/doi.org\/10.1007\/s13042-021-01336-2","journal-title":"International Journal of Machine Learning and Cybernetics"},{"key":"1417_CR2","doi-asserted-by":"publisher","DOI":"10.1007\/s13042-021-01327-3","author":"S An","year":"2021","unstructured":"An S, Hu Q, Wang C, Guo G, Li P (2021) Data reduction based on NN-kNN measure for NN classification and regression. Int J Mach Learn Cybern. https:\/\/doi.org\/10.1007\/s13042-021-01327-3","journal-title":"Int J Mach Learn Cybern"},{"key":"1417_CR3","unstructured":"Bakker B, Schmidhuber J (2003) Hierarchical reinforcement learning based on automatic discovery of subgoals and specialization of subpolicies. In: EWRL-6\u20192003: European workshop on reinforcement learning"},{"issue":"1\u20132","key":"1417_CR4","doi-asserted-by":"publisher","first-page":"341","DOI":"10.1023\/A:1025696116075","volume":"13","author":"AG Barto","year":"2003","unstructured":"Barto AG, Mahadevan S (2003) Recent advances in hierarchical reinforcement learning. Discrete Event Dyn Syst 13(1\u20132):341\u2013379","journal-title":"Discrete Event Dyn Syst"},{"key":"1417_CR5","unstructured":"Dietterich TG (1998) The maxq method for hierarchical reinforcement learning. In: Proceedings of the 15th international conference on machine learning"},{"key":"1417_CR6","doi-asserted-by":"crossref","unstructured":"Dietterich TG (2000) An overview of maxq hierarchical reinforcement learning. In: Proceedings of the 4th international symposium on abstraction, reformulation, and approximation","DOI":"10.1007\/3-540-44914-0_2"},{"issue":"9","key":"1417_CR7","doi-asserted-by":"publisher","first-page":"2101","DOI":"10.1007\/s13042-020-01104-8","volume":"11","author":"Changjun Fan","year":"2020","unstructured":"Fan C, Zeng L, Feng Y, Cheng G, Huang J, Liu Z (2020) A novel learning-based approach for efficient dismantling of networks. In J Mach Learn Cybernetics 11(9):2101\u20132111. https:\/\/doi.org\/10.1007\/s13042-020-01104-8","journal-title":"International Journal of Machine Learning and Cybernetics"},{"issue":"4","key":"1417_CR8","doi-asserted-by":"publisher","first-page":"2425","DOI":"10.1007\/s00500-018-03689-3","volume":"24","author":"Yanghe Feng","year":"2020","unstructured":"Feng Y, Dai L, Gao J, Cheng  G (2020) Uncertain pursuit-evasion game. Soft Comput 24(4):2425\u20132429. https:\/\/doi.org\/10.1007\/s00500-018-03689-3","journal-title":"Soft Computing"},{"issue":"4","key":"1417_CR9","doi-asserted-by":"publisher","first-page":"2463","DOI":"10.1007\/s00500-018-03732-3","volume":"24","author":"Yanghe Feng","year":"2020","unstructured":"Feng Y, Shi W, Shi W,  Cheng G, Huang J, Liu Z (2020) Benchmarking framework for command and control mission planning under uncertain environment. Soft Comput 24(4):2463\u20132478. https:\/\/doi.org\/10.1007\/s00500-018-03732-3","journal-title":"Soft Computing"},{"issue":"17","key":"1417_CR10","doi-asserted-by":"publisher","first-page":"5783","DOI":"10.1007\/s00500-017-2659-7","volume":"22","author":"Yanghe Feng","year":"2018","unstructured":"Feng Y, Yang X, Cheng G (2018) Stability in mean for multi-dimensional uncertain differential equation. Soft Comput 22(17):5783\u20135789. https:\/\/doi.org\/10.1007\/s00500-017-2659-7","journal-title":"Soft Computing"},{"key":"1417_CR11","doi-asserted-by":"crossref","unstructured":"Gu S, Holly E, Lillicrap T, Levine S (2017) Deep reinforcement learning for robotic manipulation with asynchronous off-policy updates. In: 2017 IEEE international conference on robotics and automation (ICRA), pp 3389\u20133396","DOI":"10.1109\/ICRA.2017.7989385"},{"key":"1417_CR12","doi-asserted-by":"crossref","unstructured":"Johnson F, Dana K (2020) Feudal steering: hierarchical learning for steering angle prediction. In: 2020 IEEE\/CVF conference on computer vision and pattern recognition workshops (CVPRW)","DOI":"10.1109\/CVPRW50498.2020.00509"},{"key":"1417_CR13","unstructured":"Konidaris G, Barto A (2007) Building portable options: skill transfer in reinforcement learning. In: International journal conference on artificial intelligence"},{"key":"1417_CR14","unstructured":"Li S, Wang R, Tang M, Zhang C (2019) Hierarchical reinforcement learning with advantage-based auxiliary rewards. arXiv preprint arXiv: 1910.04450"},{"key":"1417_CR15","unstructured":"Lillicrap T, Hunt JJ, Pritzel A, Heess N, Erez T, Tassa Y, Silver D, Wierstra D (2015) Continuous control with deep reinforcement learning. Computer arXiv: 1509:02971"},{"key":"1417_CR16","unstructured":"Mcgovern A (2001) Automatic discovery of subgoals in reinforcement learning using diverse density. In: Proceedings of the 18th international conference on machine learning"},{"issue":"7540","key":"1417_CR17","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V, Kavukcuoglu K, Silver D, Rusu AA, Veness J, Bellemare MG, Graves A, Riedmiller M, Fidjeland AK, Ostrovski G (2015) Human-level control through deep reinforcement learning. Nature 518(7540):529\u2013533","journal-title":"Nature"},{"key":"1417_CR18","unstructured":"Nachum O, Gu S, Lee H, Levine S (2018) Data-efficient hierarchical reinforcement learning. arXiv preprint arXiv: 180508296"},{"key":"1417_CR19","unstructured":"Parr RE (1999) Hierarchical control and learning for markov decision processes. Thesis, University of California"},{"key":"1417_CR20","first-page":"803","volume":"3","author":"TJ Perkins","year":"2003","unstructured":"Perkins TJ, Barto AG, Brodley CE, Danyluk A (2003) Lyapunov design for safe reinforcement learning. J Mach Learn Res 3:803\u2013832","journal-title":"J Mach Learn Res"},{"key":"1417_CR21","doi-asserted-by":"crossref","unstructured":"Rafati J, Noelle D (2019) Efficient exploration through intrinsic motivation learning for unsupervised subgoal discovery in model-free hierarchical reinforcement learning. arXiv preprint arXiv: 191110164","DOI":"10.1609\/aaai.v33i01.330110009"},{"key":"1417_CR22","unstructured":"Schulman J, Levine S, Abbeel P, Jordan MI, Moritz P (2015) Trust region policy optimization. International conference on machine learning arXiv: 1502:05477"},{"key":"1417_CR23","unstructured":"Schulman J, Wolski F, Dhariwal P, Radford A, Klimov O (2017) Proximal policy optimization algorithms. arXiv preprint arXiv: 170706347"},{"key":"1417_CR24","unstructured":"Stolle M, Precup D (2002) Learning options in reinforcement learning. In: Abstraction, reformulation and approximation, 5th international symposium, SARA 2002, Kananaskis, Alberta, Canada, August 2\u20134, 2002, Proceedings"},{"key":"1417_CR25","doi-asserted-by":"crossref","unstructured":"Tai L, Paolo G, Liu M (2017) Virtual-to-real deep reinforcement learning: continuous control of mobile robots for mapless navigation. In: 2017 IEEE\/RSJ international conference on intelligent robots and systems (IROS), pp 31\u201336","DOI":"10.1109\/IROS.2017.8202134"},{"key":"1417_CR26","unstructured":"Vezhnevets AS, Osindero S, Schaul T, Heess N, Jaderberg M, Silver D, Kavukcuoglu K (2017) Feudal networks for hierarchical reinforcement learning. In: International conference on machine learning, PMLR, pp 3540\u20133549"},{"key":"1417_CR27","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3100928","author":"Y Wang","year":"2021","unstructured":"Wang Y, Liu R, Lin D, Chen D, Li P, Hu Q, Philip CL (2021) Chen coarse-to-fine: progressive knowledge transfer-based multitask convolutional neural network for intelligent large-scale fault diagnosis. IEEE Trans Neural Netw Learn Syst. https:\/\/doi.org\/10.1109\/TNNLS.2021.3100928","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"1417_CR28","doi-asserted-by":"publisher","unstructured":"Wu G, Fan M, Shi J, Feng Y. Reinforcement learning based truck-and-drone Coordinated Delivery. In: IEEE Transactions on Artificial Intelligence, pp 1\u20131. https:\/\/doi.org\/10.1109\/TAI.2021.3087666","DOI":"10.1109\/TAI.2021.3087666"},{"issue":"6","key":"1417_CR29","doi-asserted-by":"publisher","first-page":"1513","DOI":"10.1007\/s13042-018-0830-9","volume":"10","author":"Zeshui Xu","year":"2019","unstructured":"Xu Z, He Y, Wang X (2019) An overview of probabilistic-based expressions for qualitative decision-making: techniques comparisons and developments. Int J Mach Learn Cybernetics 10(6):1513\u20131528. https:\/\/doi.org\/10.1007\/s13042-018-0830-9","journal-title":"International Journal of Machine Learning and Cybernetics"},{"issue":"2","key":"1417_CR30","doi-asserted-by":"publisher","first-page":"231","DOI":"10.1007\/s13042-019-00969-8","volume":"11","author":"Ruifeng Xu","year":"2020","unstructured":"Xu R, Wen Z, Gui L, Lu Q, Li B, Wang X (2020) Ensemble with estimation: seeking for optimization in class noisy data. Int J Mach Learn Cybernetics 11(2):231\u2013248. https:\/\/doi.org\/10.1007\/s13042-019-00969-8","journal-title":"International Journal of Machine Learning and Cybernetics"},{"issue":"3","key":"1417_CR31","doi-asserted-by":"publisher","first-page":"715","DOI":"10.1007\/s13042-019-01028-y","volume":"11","author":"Dejian Yu","year":"2020","unstructured":"Yu D, Xu Z, Wang X (2020) Bibliometric analysis of support vector machines research trend: a case study in China. Int J Mach Learn Cybernetics 11(3):715\u2013728. https:\/\/doi.org\/10.1007\/s13042-019-01028-y","journal-title":"International Journal of Machine Learning and Cybernetics"},{"issue":"3","key":"1417_CR32","doi-asserted-by":"publisher","first-page":"338","DOI":"10.1016\/S0019-9958(65)90241-X","volume":"8","author":"LA Zadeh","year":"1965","unstructured":"Zadeh LA (1965) Fuzzy sets. Inf. Control 8(3):338\u2013353","journal-title":"Inf. Control"},{"key":"1417_CR33","doi-asserted-by":"crossref","unstructured":"Zadeh LA (1996) Knowledge representation in fuzzy logic. In: Fuzzy sets, fuzzy logic, and fuzzy systems","DOI":"10.1142\/9789814261302_0039"},{"key":"1417_CR34","doi-asserted-by":"crossref","unstructured":"Zhang P, Hao J, Wang W, Tang H, Ma Y, Duan Y, Zheng Y (2020) Kogun: accelerating deep reinforcement learning via integrating human suboptimal knowledge. arXiv preprint arXiv: 200207418","DOI":"10.24963\/ijcai.2020\/317"},{"key":"1417_CR35","unstructured":"Zhou WJ, Yu Y (2020) Temporal-adaptive hierarchical reinforcement learning. arXiv preprint arXiv: 200202080"}],"container-title":["International Journal of Machine Learning and Cybernetics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-021-01417-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s13042-021-01417-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-021-01417-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,21]],"date-time":"2022-01-21T09:36:19Z","timestamp":1642757779000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s13042-021-01417-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,8,30]]},"references-count":35,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2022,2]]}},"alternative-id":["1417"],"URL":"https:\/\/doi.org\/10.1007\/s13042-021-01417-2","relation":{},"ISSN":["1868-8071","1868-808X"],"issn-type":[{"type":"print","value":"1868-8071"},{"type":"electronic","value":"1868-808X"}],"subject":[],"published":{"date-parts":[[2021,8,30]]},"assertion":[{"value":"24 June 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 August 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 August 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}