{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,18]],"date-time":"2025-05-18T04:05:57Z","timestamp":1747541157366,"version":"3.40.5"},"reference-count":44,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2025,4,19]],"date-time":"2025-04-19T00:00:00Z","timestamp":1745020800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2025,4,19]],"date-time":"2025-04-19T00:00:00Z","timestamp":1745020800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["No.62102444"],"award-info":[{"award-number":["No.62102444"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Complex Intell. Syst."],"published-print":{"date-parts":[[2025,6]]},"DOI":"10.1007\/s40747-025-01895-9","type":"journal-article","created":{"date-parts":[[2025,4,19]],"date-time":"2025-04-19T08:20:31Z","timestamp":1745050831000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Hierarchical reinforcement learning based on macro actions"],"prefix":"10.1007","volume":"11","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8304-3692","authenticated-orcid":false,"given":"Hao","family":"Jiang","sequence":"first","affiliation":[]},{"given":"Gongju","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Shengze","family":"Li","sequence":"additional","affiliation":[]},{"given":"Jieyuan","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Long","family":"Yan","sequence":"additional","affiliation":[]},{"given":"Xinhai","family":"Xu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,4,19]]},"reference":[{"key":"1895_CR1","first-page":"3","volume":"46","author":"AM Hafiz","year":"2023","unstructured":"Hafiz AM, Hassaballah M, Alqahtani A, Alsubai S, Hameed MA (2023) Reinforcement learning with an ensemble of binary action deep q-networks. Comput Syst Sci Eng 46:3","journal-title":"Comput Syst Sci Eng"},{"key":"1895_CR2","unstructured":"Schulman J (2015) Trust region policy optimization. arXiv preprint arXiv:1502.05477"},{"key":"1895_CR3","unstructured":"Lillicrap TP, Hunt JJ, Pritzel A, Heess N, Erez T, Tassa Y, Silver D, Wierstra D (2015) Continuous control with deep reinforcement learning. arXiv preprint arXiv:1509.02971"},{"key":"1895_CR4","unstructured":"Heess N, Tb D, Sriram S, Lemmon J, Merel J, Wayne G, Tassa Y, Erez T, Wang Z, Eslami S, et al (2017) Emergence of locomotion behaviours in rich environments. arXiv preprint arXiv:1707.02286"},{"key":"1895_CR5","doi-asserted-by":"crossref","unstructured":"Abbasi SF, Bilal M, Mukherjee T, Churm J, Pournik O, Epiphaniou G, Arvanitis TN (2024) Deep learning-based synthetic skin lesion image classification. In: Digital Health and Informatics Innovations for Sustainable Health Care Systems, pp. 1145\u20131150. IOS Press","DOI":"10.3233\/SHTI240612"},{"key":"1895_CR6","doi-asserted-by":"crossref","unstructured":"Mukherjee T, Gour S, Abbasi SF, Pournik O, Arvanitis TN (2024) Development of a cnn for adult brain tumour characterisation: Implications and future directions for transfer learning. In: Digital Health and Informatics Innovations for Sustainable Health Care Systems, pp. 1674\u20131678. IOS Press","DOI":"10.3233\/SHTI240745"},{"key":"1895_CR7","doi-asserted-by":"publisher","DOI":"10.1016\/j.bspc.2024.106024","volume":"91","author":"H Liu","year":"2024","unstructured":"Liu H, Ghadimi N (2024) Hybrid convolutional neural network and flexible dwarf mongoose optimization algorithm for strong kidney stone diagnosis. Biomed Signal Process Control 91:106024. https:\/\/doi.org\/10.1016\/j.bspc.2024.106024","journal-title":"Biomed Signal Process Control"},{"key":"1895_CR8","doi-asserted-by":"crossref","unstructured":"Rajeswaran A, Kumar V, Gupta A, Vezzani G, Schulman J, Todorov E, Levine S (2017) Learning complex dexterous manipulation with deep reinforcement learning and demonstrations. arXiv preprint arXiv:1709.10087","DOI":"10.15607\/RSS.2018.XIV.049"},{"issue":"1","key":"1895_CR9","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1109\/TETCI.2023.3304948","volume":"8","author":"Z Xiao","year":"2024","unstructured":"Xiao Z, Xing H, Zhao B, Qu R, Luo S, Dai P, Li K, Zhu Z (2024) Deep contrastive representation learning with self-distillation. IEEE Trans Emerg Top Comput Intell 8(1):3\u201315. https:\/\/doi.org\/10.1109\/TETCI.2023.3304948","journal-title":"IEEE Trans Emerg Top Comput Intell"},{"issue":"12","key":"1895_CR10","doi-asserted-by":"publisher","first-page":"7387","DOI":"10.1109\/TMC.2022.3208457","volume":"22","author":"F Song","year":"2023","unstructured":"Song F, Xing H, Wang X, Luo S, Dai P, Xiao Z, Zhao B (2023) Evolutionary multi-objective reinforcement learning based trajectory control and task offloading in uav-assisted mobile edge computing. IEEE Trans Mob Comput 22(12):7387\u20137405. https:\/\/doi.org\/10.1109\/TMC.2022.3208457","journal-title":"IEEE Trans Mob Comput"},{"key":"1895_CR11","doi-asserted-by":"crossref","unstructured":"Gu S, Holly E, Lillicrap T, Levine S (2017) Deep reinforcement learning for robotic manipulation with asynchronous off-policy updates. In: 2017 IEEE International Conference on Robotics and Automation (ICRA), pp. 3389\u20133396. IEEE","DOI":"10.1109\/ICRA.2017.7989385"},{"issue":"3","key":"1895_CR12","doi-asserted-by":"publisher","first-page":"123","DOI":"10.1177\/02783649231167210","volume":"42","author":"K Rana","year":"2023","unstructured":"Rana K, Dasagi V, Haviland J, Talbot B, Milford M, S\u00fcnderhauf N (2023) Bayesian controller fusion: leveraging control priors in deep reinforcement learning for robotics. Int J Robot Res 42(3):123\u2013146","journal-title":"Int J Robot Res"},{"key":"1895_CR13","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2023.121502","volume":"237","author":"Y Huang","year":"2024","unstructured":"Huang Y, Zhou C, Cui K, Lu X (2024) A multi-agent reinforcement learning framework for optimizing financial trading strategies based on timesnet. Expert Syst Appl 237:121502","journal-title":"Expert Syst Appl"},{"key":"1895_CR14","first-page":"3","volume":"9","author":"O Vinyals","year":"2019","unstructured":"Vinyals O, Babuschkin I, Czarnecki WM, Mathieu M et al (2019) Grandmaster level in starcraft ii using multi-agent reinforcement learning. Nature 9:3","journal-title":"Nature"},{"key":"1895_CR15","first-page":"9","volume":"5","author":"P Dayan","year":"1992","unstructured":"Dayan P, Hinton GE (1992) Feudal reinforcement learning. Adv Neural Inform Process Syst 5:9","journal-title":"Adv Neural Inform Process Syst"},{"key":"1895_CR16","first-page":"8","volume":"10","author":"R Parr","year":"1997","unstructured":"Parr R, Russell S (1997) Reinforcement learning with hierarchies of machines. Adv Neural Inform Process Syst 10:8","journal-title":"Adv Neural Inform Process Syst"},{"issue":"1\u20132","key":"1895_CR17","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","volume":"112","author":"RS Sutton","year":"1999","unstructured":"Sutton RS, Precup D, Singh S (1999) Between mdps and semi-mdps: a framework for temporal abstraction in reinforcement learning. Artif Intell 112(1\u20132):181\u2013211","journal-title":"Artif Intell"},{"key":"1895_CR18","doi-asserted-by":"publisher","first-page":"341","DOI":"10.1023\/A:1025696116075","volume":"13","author":"AG Barto","year":"2003","unstructured":"Barto AG, Mahadevan S (2003) Recent advances in hierarchical reinforcement learning. Discr Event Dyn Syst 13:341\u2013379","journal-title":"Discr Event Dyn Syst"},{"key":"1895_CR19","unstructured":"Han L, Xiong J, Sun P, Sun X, Fang M, Guo Q, Chen Q, Shi T, Yu H, Wu X et al (2020) Tstarbot-x: An open-sourced and comprehensive study for efficient league training in starcraft ii full game. arXiv preprint arXiv:2011.13729"},{"key":"1895_CR20","unstructured":"Song Y, Wang J, Lukasiewicz T, Xu Z, Xu M (2020) Scc: A flexible strategy for multi-agent reinforcement learning. ArXiv"},{"key":"1895_CR21","doi-asserted-by":"crossref","unstructured":"Silva A, Gombolay M (2021) Encoding human domain knowledge to warm start reinforcement learning. In: Proceedings of the AAAI Conference on Artificial Intelligence 35:5042\u20135050","DOI":"10.1609\/aaai.v35i6.16638"},{"key":"1895_CR22","doi-asserted-by":"crossref","unstructured":"Xiao Y, Hoffman J, Xia T, Amato C (2020) Learning multi-robot decentralized macro-action-based policies via a centralized q-net. In: 2020 IEEE International Conference on Robotics and Automation (ICRA), pp. 10695\u201310701. IEEE","DOI":"10.1109\/ICRA40945.2020.9196684"},{"key":"1895_CR23","unstructured":"Xiao Y, Hoffman J, Amato C (2020) Macro-action-based deep multi-agent reinforcement learning. In: Conference on Robot Learning, pp. 1146\u20131161. PMLR"},{"key":"1895_CR24","unstructured":"Farquhar G, Afouras T, Nardelli N, Whiteson S (2019) Challenges and solutions in starcraft micromanagement tasks. ArXiv"},{"key":"1895_CR25","unstructured":"Samvelyan M, Rashid T, De Witt CS, Farquhar G, Nardelli N, Rudner TG, Hung C-M, Torr PH Foerster J, Whiteson S (2019) The starcraft multi-agent challenge. arXiv preprint arXiv:1902.04043"},{"key":"1895_CR26","doi-asserted-by":"crossref","unstructured":"Bacon P-L, Harb J, Precup D (2017) The option-critic architecture. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 31","DOI":"10.1609\/aaai.v31i1.10916"},{"key":"1895_CR27","first-page":"9","volume":"29","author":"TD Kulkarni","year":"2016","unstructured":"Kulkarni TD, Narasimhan K, Saeedi A, Tenenbaum J (2016) Hierarchical deep reinforcement learning: Integrating temporal abstraction and intrinsic motivation. Adv Neural Inform Process Syst 29:9","journal-title":"Adv Neural Inform Process Syst"},{"key":"1895_CR28","unstructured":"Levy A, Konidaris G, Platt R, Saenko K (2019) Learning multi-level hierarchies with hindsight. In: Proceedings of International Conference on Learning Representations"},{"key":"1895_CR29","first-page":"8","volume":"31","author":"O Nachum","year":"2018","unstructured":"Nachum O, Gu SS, Lee H, Levine S (2018) Data-efficient hierarchical reinforcement learning. Adv Neural Inform Process Syst 31:8","journal-title":"Adv Neural Inform Process Syst"},{"key":"1895_CR30","doi-asserted-by":"crossref","unstructured":"Tessler C, Givony S, Zahavy T, Mankowitz D, Mannor S (2017) A deep hierarchical approach to lifelong learning in minecraft. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 31","DOI":"10.1609\/aaai.v31i1.10744"},{"key":"1895_CR31","unstructured":"Vezhnevets AS, Osindero S, Schaul T, Heess N, Jaderberg M, Silver D, Kavukcuoglu K (2017) Feudal networks for hierarchical reinforcement learning. In: International Conference on Machine Learning, pp. 3540\u20133549. PMLR"},{"key":"1895_CR32","doi-asserted-by":"crossref","unstructured":"Khan MJ, Ahmed SH, Sukthankar G (2022) Transformer-based value function decomposition for cooperative multi-agent reinforcement learning in starcraft. In: Proceedings of the AAAI Conference on Artificial Intelligence and Interactive Digital Entertainment 18:113\u2013119","DOI":"10.1609\/aiide.v18i1.21954"},{"key":"1895_CR33","unstructured":"Vinyals O, Ewalds T, Bartunov S, Georgiev P, Vezhnevets AS, Yeo M, Makhzani A, K\u00fcttler H, Agapiou J, Schrittwieser J, et al (2017) Starcraft ii: A new challenge for reinforcement learning. arXiv preprint arXiv:1708.04782"},{"issue":"7","key":"1895_CR34","doi-asserted-by":"publisher","first-page":"4957","DOI":"10.1109\/TII.2020.3019386","volume":"17","author":"H Liang","year":"2020","unstructured":"Liang H, Zhang X, Hong X, Zhang Z, Li M, Hu G, Hou F (2020) Reinforcement learning enabled dynamic resource allocation in the internet of vehicles. IEEE Trans Industr Inf 17(7):4957\u20134967","journal-title":"IEEE Trans Industr Inf"},{"key":"1895_CR35","volume-title":"Markov decision processes: discrete stochastic dynamic programming","author":"ML Puterman","year":"2014","unstructured":"Puterman ML (2014) Markov decision processes: discrete stochastic dynamic programming. John Wiley & Sons"},{"issue":"11","key":"1895_CR36","doi-asserted-by":"publisher","first-page":"1073","DOI":"10.1057\/jors.1993.181","volume":"44","author":"DJ White","year":"1993","unstructured":"White DJ (1993) A survey of applications of markov decision processes. J Oper Res Soc 44(11):1073\u20131096","journal-title":"J Oper Res Soc"},{"issue":"3","key":"1895_CR37","doi-asserted-by":"publisher","first-page":"649","DOI":"10.2307\/3211944","volume":"7","author":"SM Ross","year":"1970","unstructured":"Ross SM (1970) Average cost semi-markov decision processes. J Appl Probab 7(3):649\u2013656","journal-title":"J Appl Probab"},{"issue":"1","key":"1895_CR38","doi-asserted-by":"publisher","first-page":"25","DOI":"10.1016\/j.ress.2004.11.001","volume":"90","author":"D Chen","year":"2005","unstructured":"Chen D, Trivedi KS (2005) Optimization for condition-based maintenance with semi-markov decision process. Reliab Eng Syst Saf 90(1):25\u201329","journal-title":"Reliab Eng Syst Saf"},{"key":"1895_CR39","volume-title":"An introduction to fuzzy logic applications in intelligent systems","author":"RR Yager","year":"2012","unstructured":"Yager RR, Zadeh LA (2012) An introduction to fuzzy logic applications in intelligent systems, vol 165. Springer, Berlin"},{"key":"1895_CR40","unstructured":"Hauskrecht M, Meuleau N, Kaelbling LP, Dean TL, Boutilier C (2013) Hierarchical solution of markov decision processes using macro-actions. arXiv preprint arXiv:1301.7381"},{"key":"1895_CR41","unstructured":"Durugkar IP, Rosenbaum C, Dernbach S, Mahadevan S (2016) Deep reinforcement learning with macro-actions. arXiv preprint arXiv:1606.04615"},{"key":"1895_CR42","doi-asserted-by":"crossref","unstructured":"Banach S (1922) Sur les op\u00e9rations dans les ensembles abstraits et leur application aux \u00e9quations int\u00e9grales. Fundam Math 3(1):133\u2013181","DOI":"10.4064\/fm-3-1-133-181"},{"key":"1895_CR43","first-page":"28182","volume":"34","author":"A Lampinen","year":"2021","unstructured":"Lampinen A, Chan S, Banino A, Hill F (2021) Towards mental time travel: a hierarchical memory for reinforcement learning agents. Adv Neural Inf Process Syst 34:28182\u201328195","journal-title":"Adv Neural Inf Process Syst"},{"key":"1895_CR44","unstructured":"Mnih V (2016) Asynchronous methods for deep reinforcement learning. arXiv preprint arXiv:1602.01783"}],"container-title":["Complex &amp; Intelligent Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s40747-025-01895-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s40747-025-01895-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s40747-025-01895-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,17]],"date-time":"2025-05-17T11:22:08Z","timestamp":1747480928000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s40747-025-01895-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,19]]},"references-count":44,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2025,6]]}},"alternative-id":["1895"],"URL":"https:\/\/doi.org\/10.1007\/s40747-025-01895-9","relation":{},"ISSN":["2199-4536","2198-6053"],"issn-type":[{"type":"print","value":"2199-4536"},{"type":"electronic","value":"2198-6053"}],"subject":[],"published":{"date-parts":[[2025,4,19]]},"assertion":[{"value":"24 August 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 March 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 April 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"All authors certify that they have no affiliations with or involvement in any organization or entity with any financial interest or non-financial interest in the subject matter or materials discussed in this manuscript.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Informed consent was obtained from all individual participants included in the study.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical Approval and informed consent for data used"}}],"article-number":"247"}}