{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,30]],"date-time":"2026-04-30T18:47:36Z","timestamp":1777574856411,"version":"3.51.4"},"reference-count":40,"publisher":"Springer Science and Business Media LLC","issue":"23","license":[{"start":{"date-parts":[[2025,7,11]],"date-time":"2025-07-11T00:00:00Z","timestamp":1752192000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2025,7,11]],"date-time":"2025-07-11T00:00:00Z","timestamp":1752192000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"DOI":"10.13039\/501100012285","name":"Departement Economie, Wetenschap en Innovatie","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100012285","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100019185","name":"HORIZON EUROPE Global Challenges and European Industrial Competitiveness","doi-asserted-by":"publisher","award":["101070596"],"award-info":[{"award-number":["101070596"]}],"id":[{"id":"10.13039\/100019185","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2025,8]]},"DOI":"10.1007\/s00521-025-11304-4","type":"journal-article","created":{"date-parts":[[2025,7,11]],"date-time":"2025-07-11T03:42:02Z","timestamp":1752205322000},"page":"18989-19007","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["GPI-tree search: algorithms for decision-time planning with the general policy improvement theorem"],"prefix":"10.1007","volume":"37","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6300-6993","authenticated-orcid":false,"given":"Louis","family":"Bagot","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9358-8565","authenticated-orcid":false,"given":"Lynn","family":"D\u2019eer","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8267-9955","authenticated-orcid":false,"given":"Steven","family":"Latr\u00e9","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2969-3133","authenticated-orcid":false,"given":"Tom","family":"De Schepper","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4812-4841","authenticated-orcid":false,"given":"Kevin","family":"Mets","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,7,11]]},"reference":[{"key":"11304_CR1","unstructured":"Sutton RS, Barto AG (2018) Reinforcement learning: an introduction, 2nd edn"},{"key":"11304_CR2","unstructured":"Zhu Z, Lin K, Zhou J (2020) Transfer learning in deep reinforcement learning: a survey. CoRR abs\/2009.07888"},{"key":"11304_CR3","unstructured":"Machado MC, Rosenbaum C, Guo X, Liu M, Tesauro G, Campbell M (2018) Eigenoption discovery through the deep successor representation. In: International conference on learning representations"},{"key":"11304_CR4","unstructured":"Jinnai Y, Park JW, Machado MC, Konidaris G (2019) Exploration in reinforcement learning with deep covering options. In: International conference on learning representations"},{"key":"11304_CR5","unstructured":"Gregor K, Rezende DJ, Wierstra D (2017) Variational intrinsic control. ICLR Workshop"},{"key":"11304_CR6","unstructured":"Eysenbach B, Gupta A, Ibarz J, Levine S (2019) Diversity is all you need: learning skills without a reward function. In: International conference on learning representations"},{"key":"11304_CR7","unstructured":"Barreto A, Dabney W, Munos R, Hunt JJ, Schaul T, Hasselt HP, Silver D (2017) Successor features for transfer in reinforcement learning. In: Advances in neural information processing systems, vol. 30"},{"key":"11304_CR8","unstructured":"Barreto A, Borsa D, Quan J, Schaul T, Silver D, Hessel M, Mankowitz D, Zidek A, Munos R (2018) Transfer in deep reinforcement learning using successor features and generalised policy improvement. In: International conference on machine learning, pp 501\u2013510. PMLR"},{"issue":"4","key":"11304_CR9","doi-asserted-by":"publisher","first-page":"160","DOI":"10.1145\/122344.122377","volume":"2","author":"RS Sutton","year":"1991","unstructured":"Sutton RS (1991) Dyna, an integrated architecture for learning, planning, and reacting. ACM Sigart Bulletin 2(4):160\u2013163","journal-title":"ACM Sigart Bulletin"},{"key":"11304_CR10","unstructured":"Ha D, Schmidhuber J (2018) World models. arXiv preprint arXiv:1803.10122"},{"key":"11304_CR11","unstructured":"Hafner D, Lillicrap T, Ba J, Norouzi M (2019) Dream to control: learning behaviors by latent imagination. In: International conference on learning representations"},{"key":"11304_CR12","doi-asserted-by":"crossref","unstructured":"Coulom R (2006) Efficient selectivity and backup operators in monte-carlo tree search. In: International conference on computers and games, pp 72\u201383. Springer","DOI":"10.1007\/978-3-540-75538-8_7"},{"issue":"6419","key":"11304_CR13","doi-asserted-by":"publisher","first-page":"1140","DOI":"10.1126\/science.aar6404","volume":"362","author":"D Silver","year":"2018","unstructured":"Silver D, Hubert T, Schrittwieser J, Antonoglou I, Lai M, Guez A, Lanctot M, Sifre L, Kumaran D, Graepel T et al (2018) A general reinforcement learning algorithm that masters chess, shogi, and go through self-play. Science 362(6419):1140\u20131144","journal-title":"Science"},{"issue":"7839","key":"11304_CR14","doi-asserted-by":"publisher","first-page":"604","DOI":"10.1038\/s41586-020-03051-4","volume":"588","author":"J Schrittwieser","year":"2020","unstructured":"Schrittwieser J, Antonoglou I, Hubert T, Simonyan K, Sifre L, Schmitt S, Guez A, Lockhart E, Hassabis D, Graepel T et al (2020) Mastering atari, go, chess and shogi by planning with a learned model. Nature 588(7839):604\u2013609","journal-title":"Nature"},{"issue":"4","key":"11304_CR15","doi-asserted-by":"publisher","first-page":"1054","DOI":"10.2307\/1427934","volume":"27","author":"R Agrawal","year":"1995","unstructured":"Agrawal R (1995) Sample mean based index policies by o (log n) regret for the multi-armed bandit problem. Adv Appl Probab 27(4):1054\u20131078","journal-title":"Adv Appl Probab"},{"issue":"19","key":"11304_CR16","doi-asserted-by":"publisher","first-page":"8584","DOI":"10.1073\/pnas.92.19.8584","volume":"92","author":"MN Katehakis","year":"1995","unstructured":"Katehakis MN, Robbins H (1995) Sequential choice from several populations. Proc Natl Acad Sci 92(19):8584\u20138585","journal-title":"Proc Natl Acad Sci"},{"issue":"1\u20132","key":"11304_CR17","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","volume":"112","author":"RS Sutton","year":"1999","unstructured":"Sutton RS, Precup D, Singh S (1999) Between mdps and semi-mdps: a framework for temporal abstraction in reinforcement learning. Artif Intell 112(1\u20132):181\u2013211","journal-title":"Artif Intell"},{"issue":"1","key":"11304_CR18","doi-asserted-by":"publisher","first-page":"9","DOI":"10.1023\/A:1022633531479","volume":"3","author":"RS Sutton","year":"1988","unstructured":"Sutton RS (1988) Learning to predict by the methods of temporal differences. Mach Learn 3(1):9\u201344","journal-title":"Mach Learn"},{"issue":"3","key":"11304_CR19","doi-asserted-by":"publisher","first-page":"203","DOI":"10.1007\/s10472-011-9258-6","volume":"61","author":"CD Rosin","year":"2011","unstructured":"Rosin CD (2011) Multi-armed bandits with episode context. Ann Math Artif Intell 61(3):203\u2013230","journal-title":"Ann Math Artif Intell"},{"issue":"11","key":"11304_CR20","doi-asserted-by":"publisher","first-page":"1856","DOI":"10.1016\/j.artint.2011.03.007","volume":"175","author":"S Gelly","year":"2011","unstructured":"Gelly S, Silver D (2011) Monte-carlo tree search and rapid action value estimation in computer go. Artif Intell 175(11):1856\u20131875","journal-title":"Artif Intell"},{"key":"11304_CR21","unstructured":"Hamrick JB, Bapst V, Sanchez-Gonzalez A, Pfaff T, Weber T, Buesing L, Battaglia PW (2020) Combining q-learning and search with amortized value estimates. In: International conference on learning representations (2020)"},{"key":"11304_CR22","unstructured":"Hasselt H (2010) Double q-learning. Advances in neural information processing systems 23"},{"issue":"1","key":"11304_CR23","doi-asserted-by":"publisher","first-page":"172","DOI":"10.3390\/make4010009","volume":"4","author":"M Hutsebaut-Buysse","year":"2022","unstructured":"Hutsebaut-Buysse M, Mets K, Latr\u00e9 S (2022) Hierarchical reinforcement learning: A survey and open research challenges. Machine Learning and Knowledge Extraction 4(1):172\u2013221","journal-title":"Machine Learning and Knowledge Extraction"},{"key":"11304_CR24","unstructured":"Bagot L, Mets K, Latr\u00e9 S (2020) Learning intrinsically motivated options to stimulate policy exploration. ICML Workshop on LifeLong Learning"},{"key":"11304_CR25","unstructured":"Bagot L, Mets K, De\u00a0Schepper T, Latr\u00e9 S (2022) Deep learning of intrinsically motivated options in the arcade learning environment. NeurIPS Workshop on Deep Reinforcement Learning"},{"key":"11304_CR26","unstructured":"Eysenbach B, Salakhutdinov R, Levine S (2021) The information geometry of unsupervised reinforcement learning. arXiv preprint arXiv:2110.02719"},{"key":"11304_CR27","doi-asserted-by":"crossref","unstructured":"Todorov E, Erez T, Tassa Y (2012) Mujoco: A physics engine for model-based control. In: 2012 IEEE\/RSJ international conference on intelligent robots and systems, pp. 5026\u20135033","DOI":"10.1109\/IROS.2012.6386109"},{"key":"11304_CR28","unstructured":"Ma C, Wen J, Bengio Y (2018) Universal successor representations for transfer reinforcement learning"},{"key":"11304_CR29","unstructured":"Borsa D, Barreto A, Quan J, Mankowitz D, Munos R, Van\u00a0Hasselt H, Silver D, Schaul T (2018) Universal successor features approximators. arXiv preprint arXiv:1812.07626"},{"key":"11304_CR30","first-page":"38181","volume":"36","author":"LN Alegre","year":"2023","unstructured":"Alegre LN, Bazzan A, Now\u00e9 A, Silva B (2023) Multi-step generalized policy improvement by leveraging approximate models. Adv Neural Inf Process Syst 36:38181\u201338205","journal-title":"Adv Neural Inf Process Syst"},{"key":"11304_CR31","doi-asserted-by":"crossref","unstructured":"Lee J, Jeon W, Kim G-H, Kim K-E (2020) Monte-carlo tree search in continuous action spaces with value gradients. In: Proceedings of the AAAI conference on artificial intelligence, vol. 34, pp. 4561\u20134568","DOI":"10.1609\/aaai.v34i04.5885"},{"key":"11304_CR32","unstructured":"Hansen S, Dabney W, Barreto A, Warde-Farley D, Wiele TV, Mnih V (2020) Fast task inference with variational intrinsic successor features. In: International conference on learning representations"},{"key":"11304_CR33","unstructured":"Machado MC, Barreto A, Precup D (2021) Temporal abstraction in reinforcement learning with the successor representation. arXiv preprint arXiv:2110.05740"},{"key":"11304_CR34","first-page":"196","volume":"21","author":"L Lehnert","year":"2020","unstructured":"Lehnert L, Littman ML (2020) Successor features combine elements of model-free and model-based reinforcement learning. J Mach Learn Res 21:196\u20131","journal-title":"J Mach Learn Res"},{"key":"11304_CR35","doi-asserted-by":"crossref","unstructured":"Sutton RS, Machado MC, Holland GZ, Timbers DSF, Tanner B, White A (2022) Reward-respecting subtasks for model-based reinforcement learning. arXiv preprint arXiv:2202.03466","DOI":"10.1016\/j.artint.2023.104001"},{"key":"11304_CR36","doi-asserted-by":"crossref","unstructured":"Ilhan E, Etaner-Uyar A\u015e (2017) Monte carlo tree search with temporal-difference learning for general video game playing. In: 2017 IEEE conference on computational intelligence and games (CIG), pp 317\u2013324. IEEE","DOI":"10.1109\/CIG.2017.8080453"},{"key":"11304_CR37","unstructured":"Kurutach T, Clavera I, Duan Y, Tamar A, Abbeel P (2018) Model-ensemble trust-region policy optimization. In: International conference on learning representations"},{"key":"11304_CR38","unstructured":"Chua K, Calandra R, McAllister R, Levine S (2018) Deep reinforcement learning in a handful of trials using probabilistic dynamics models. In: Advances in neural information processing systems, vol. 31"},{"key":"11304_CR39","unstructured":"Wang T, Ba J (2020) Exploring model-based planning with policy networks. In: International conference on learning representations"},{"key":"11304_CR40","unstructured":"Mansour Y (1999) Computational learning theory. http:\/\/www.math.tau.ac.il\/~mansour\/rl-course\/scribe6\/scribe6.html"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-025-11304-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-025-11304-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-025-11304-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,7]],"date-time":"2025-09-07T04:58:18Z","timestamp":1757221098000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-025-11304-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,11]]},"references-count":40,"journal-issue":{"issue":"23","published-print":{"date-parts":[[2025,8]]}},"alternative-id":["11304"],"URL":"https:\/\/doi.org\/10.1007\/s00521-025-11304-4","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"value":"0941-0643","type":"print"},{"value":"1433-3058","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,7,11]]},"assertion":[{"value":"15 November 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 March 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 July 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no Conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}