{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,28]],"date-time":"2025-06-28T04:04:33Z","timestamp":1751083473724,"version":"3.41.0"},"reference-count":28,"publisher":"Springer Science and Business Media LLC","issue":"19","license":[{"start":{"date-parts":[[2025,5,7]],"date-time":"2025-05-07T00:00:00Z","timestamp":1746576000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,5,7]],"date-time":"2025-05-07T00:00:00Z","timestamp":1746576000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100003725","name":"National Research Foundation of Korea","doi-asserted-by":"crossref","award":["NRF-2018R1A6A1A03025526"],"award-info":[{"award-number":["NRF-2018R1A6A1A03025526"]}],"id":[{"id":"10.13039\/501100003725","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100003725","name":"National Research Foundation of Korea","doi-asserted-by":"crossref","award":["NRF-2023R1A2C1003143"],"award-info":[{"award-number":["NRF-2023R1A2C1003143"]}],"id":[{"id":"10.13039\/501100003725","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2025,7]]},"DOI":"10.1007\/s00521-025-11252-z","type":"journal-article","created":{"date-parts":[[2025,5,7]],"date-time":"2025-05-07T18:47:30Z","timestamp":1746643650000},"page":"14171-14187","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Curiosity-driven dual-policy action selection in temporal difference learning for model predictive control"],"prefix":"10.1007","volume":"37","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-4372-8675","authenticated-orcid":false,"given":"Chang-Hun","family":"Ji","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-6861-3030","authenticated-orcid":false,"given":"Yo-Han","family":"Choi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5835-7972","authenticated-orcid":false,"given":"Youn-Hee","family":"Han","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,5,7]]},"reference":[{"key":"11252_CR1","unstructured":"Hansen N, Wang X, Su H (2022) Temporal difference learning for model predictive control. In: international conference on machine learning, PMLR"},{"issue":"1","key":"11252_CR2","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1561\/2200000086","volume":"16","author":"TM Moerland","year":"2023","unstructured":"Moerland TM, Broekens J, Plaat A, Jonker CM (2023) Model-based reinforcement learning: a survey. Found Trends Mach Learning 16(1):1\u2013118","journal-title":"Found Trends Mach Learning"},{"key":"11252_CR3","unstructured":"Hafner D, Lillicrap T, Ba J, Norouzi M (2020) Dream to control: Learning behaviors by latent imagination. In: international conference on learning representations. https:\/\/openreview.net\/forum?id=S1lOTC4tDS"},{"key":"11252_CR4","unstructured":"Sekar R, Rybkin O, Daniilidis K, Abbeel P, Hafner D, Pathak D (2020) Planning to explore via self-supervised world models. In: III, H.D., Singh, A. (eds.) Proceedings of the 37th International conference on machine learning. Proceedings of Machine Learning Research, 119: 8583\u20138592"},{"key":"11252_CR5","unstructured":"Cui B, Chow Y, Ghavamzadeh M (2021) Control-aware representations for model-based reinforcement learning. In: international conference on learning representations. https:\/\/openreview.net\/forum?id=dgd4EJqsbW5"},{"key":"11252_CR6","doi-asserted-by":"crossref","unstructured":"Liu Y, Datta G, Novoseller E, Brown DS (2023) Efficient preference-based reinforcement learning using learned dynamics models. In: 2023 IEEE international conference on robotics and automation (ICRA), pp. 2921\u20132928. IEEE","DOI":"10.1109\/ICRA48891.2023.10161081"},{"key":"11252_CR7","unstructured":"Lowrey K, Rajeswaran A, Kakade S, Todorov E, Mordatch I (2019) Plan online, learn offline: Efficient learning and exploration via model-based control. In: international conference on learning representations. https:\/\/openreview.net\/forum?id=Byey7n05FQ"},{"key":"11252_CR8","unstructured":"Tassa Y, Doron Y, Muldal A, Erez T, Li Y, Casas DdL, Budden D, Abdolmaleki A, Merel J, Lefrancq A, et al (2018) Deepmind control suite. arXiv preprint arXiv:1801.00690"},{"key":"11252_CR9","unstructured":"Kaiser L et al (2019) Model-based Reinforcement Learning for Atari. arXiv preprint arXiv:1903.00374"},{"key":"11252_CR10","unstructured":"Janner M, Fu J, Zhang M, Levine S (2019) When to trust your model: Model-based policy optimization. Advances in neural information processing systems 32"},{"key":"11252_CR11","unstructured":"Rajeswaran A, Mordatch I, Kumar V (2020) A game theoretic framework for model based reinforcement learning. In: international conference on machine learning, pp. 7953\u20137963. PMLR"},{"key":"11252_CR12","unstructured":"Fan Y, Ming Y (2021) Model-based reinforcement learning for continuous control with posterior sampling. In: international conference on machine learning, pp. 3078\u20133087. PMLR"},{"issue":"7839","key":"11252_CR13","doi-asserted-by":"publisher","first-page":"604","DOI":"10.1038\/s41586-020-03051-4","volume":"588","author":"J Schrittwieser","year":"2020","unstructured":"Schrittwieser J, Antonoglou I, Hubert T, Simonyan K, Sifre L, Schmitt S, Guez A, Lockhart E, Hassabis D, Graepel T (2020) Mastering atari, go, chess and shogi by planning with a learned model. Nature 588(7839):604\u2013609","journal-title":"Nature"},{"key":"11252_CR14","first-page":"25476","volume":"34","author":"W Ye","year":"2021","unstructured":"Ye W, Liu S, Kurutach T, Abbeel P, Gao Y (2021) Mastering atari games with limited data. Adv Neural Inf Process Syst 34:25476\u201325488","journal-title":"Adv Neural Inf Process Syst"},{"key":"11252_CR15","unstructured":"Nachum O, Ahn M, Ponte H, Gu S, Kumar V (2019) Multi-agent manipulation via locomotion using hierarchical sim2real. arXiv preprint arXiv:1908.05224"},{"key":"11252_CR16","doi-asserted-by":"crossref","unstructured":"Kaspar M, Mu\u00f1oz Osorio JD, Bock J (2020) Sim2real transfer for reinforcement learning without dynamics randomization. In: 2020 IEEE\/RSJ international conference on intelligent robots and systems (IROS), pp. 4383\u20134388. doi 10.1109\/IROS45743.2020.9341260","DOI":"10.1109\/IROS45743.2020.9341260"},{"key":"11252_CR17","unstructured":"Zhuang Z, Fu Z, Wang J, Atkeson C, Schwertfeger S, Finn C, Zhao H (2023) Robot parkour learning. In: conference on robot learning (CoRL)"},{"key":"11252_CR18","unstructured":"Williams G, Aldrich A, Theodorou E (2015) Model predictive path integral control using covariance variable importance sampling. arXiv preprint arXiv:1509.01149"},{"key":"11252_CR19","unstructured":"Dann C, Mansour Y, Mohri M, Sekhari A, Sridharan K (2022) Guarantees for epsilon-greedy reinforcement learning with function approximation. In: international conference on machine learning, pp. 4666\u20134689. PMLR"},{"key":"11252_CR20","unstructured":"Cesa-Bianchi N, Gentile C, Lugosi G, Neu G (2017) Boltzmann exploration done right. Advances in neural information processing systems 30"},{"key":"11252_CR21","doi-asserted-by":"crossref","unstructured":"Pathak D, Agrawal P, Efros AA, Darrell T (2017) Curiosity-driven exploration by self-supervised prediction. In: international conference on machine learning, pp. 2778\u20132787. PMLR","DOI":"10.1109\/CVPRW.2017.70"},{"key":"11252_CR22","unstructured":"Lillicrap TP, Hunt JJ, Pritzel A, Heess N, Erez T, Tassa Y, Silver D, Wierstra D (2019) Continuous Control with Deep Reinforcement Learning arXiv:1509.02971 [cs.LG]"},{"key":"11252_CR23","unstructured":"Stadie BC, Levine S, Abbeel P (2015) Incentivizing exploration in reinforcement learning with deep predictive models. ArXiv abs\/1507.00814"},{"key":"11252_CR24","unstructured":"Achiam J, Sastry S (2017) Surprise-based intrinsic motivation for deep reinforcement learning. CoRR arxiv:1703.01732"},{"key":"11252_CR25","doi-asserted-by":"crossref","unstructured":"Abril RK (2018) Curiosity-driven reinforcement learning with homeostatic regulation. In: 2018 international joint conference on neural networks (IJCNN), pp. 1\u20136. IEEE","DOI":"10.1109\/IJCNN.2018.8489075"},{"key":"11252_CR26","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1016\/j.neucom.2020.08.024","volume":"418","author":"J Li","year":"2020","unstructured":"Li J, Shi X, Li J, Zhang X, Wang J (2020) Random curiosity-driven exploration in deep reinforcement learning. Neurocomputing 418:139\u2013147","journal-title":"Neurocomputing"},{"key":"11252_CR27","doi-asserted-by":"publisher","unstructured":"Todorov E, Erez T, Tassa Y (2012) Mujoco: A physics engine for model-based control. In: 2012 IEEE\/RSJ international conference on intelligent robots and systems, pp. 5026\u20135033. https:\/\/doi.org\/10.1109\/IROS.2012.6386109","DOI":"10.1109\/IROS.2012.6386109"},{"key":"11252_CR28","unstructured":"Hafner D, Pasukonis J, Ba J, Lillicrap T (2023) Mastering diverse domains through world models. arXiv preprint arXiv:2301.04104"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-025-11252-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-025-11252-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-025-11252-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,27]],"date-time":"2025-06-27T08:27:48Z","timestamp":1751012868000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-025-11252-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,7]]},"references-count":28,"journal-issue":{"issue":"19","published-print":{"date-parts":[[2025,7]]}},"alternative-id":["11252"],"URL":"https:\/\/doi.org\/10.1007\/s00521-025-11252-z","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"type":"print","value":"0941-0643"},{"type":"electronic","value":"1433-3058"}],"subject":[],"published":{"date-parts":[[2025,5,7]]},"assertion":[{"value":"4 February 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"1 April 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 May 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no conflict of interest to declare that is relevant to the content of this article.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}