{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,16]],"date-time":"2025-11-16T20:51:24Z","timestamp":1763326284341,"version":"3.45.0"},"reference-count":27,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2025,10,18]],"date-time":"2025-10-18T00:00:00Z","timestamp":1760745600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,10,18]],"date-time":"2025-10-18T00:00:00Z","timestamp":1760745600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Artif Life Robotics"],"published-print":{"date-parts":[[2025,11]]},"DOI":"10.1007\/s10015-025-01068-4","type":"journal-article","created":{"date-parts":[[2025,10,18]],"date-time":"2025-10-18T20:50:09Z","timestamp":1760820609000},"page":"752-763","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Target-oriented exploration guided by anticipated returns"],"prefix":"10.1007","volume":"30","author":[{"given":"Akane","family":"Tsuboya","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yu","family":"Kono","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tatsuji","family":"Takahashi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,10,18]]},"reference":[{"issue":"7676","key":"1068_CR1","doi-asserted-by":"publisher","first-page":"354","DOI":"10.1038\/nature24270","volume":"550","author":"D Silver","year":"2017","unstructured":"Silver D, Schrittwieser J, Simonyan K et al (2017) Mastering the game of go without human knowledge. Nature 550(7676):354\u2013359","journal-title":"Nature"},{"key":"1068_CR2","unstructured":"Kool W, van Hoof H, Welling M (2019) Attention, learn to solve routing problems! in Proceedings of the 7th International Conference on Learning Representations"},{"issue":"7897","key":"1068_CR3","doi-asserted-by":"publisher","first-page":"414","DOI":"10.1038\/s41586-021-04301-9","volume":"602","author":"J Degrave","year":"2022","unstructured":"Degrave J, Felici F, Buchli J et al (2022) Magnetic control of tokamak plasmas through deep reinforcement learning. Nature 602(7897):414\u2013419","journal-title":"Nature"},{"issue":"7540","key":"1068_CR4","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V, Kavukcuoglu K, Silver D et al (2015) Human-level control through deep reinforcement learning. Nature 518(7540):529\u2013533","journal-title":"Nature"},{"key":"1068_CR5","doi-asserted-by":"crossref","unstructured":"Takahashi T, Kohno Y, Uragami D (2016) Cognitive satisficing: Bounded rationality in reinforcement learning. Transactions of the Japanese Society for Artificial Intelligence 31(6):AI30-M_1-11. (in Japanese)","DOI":"10.1527\/tjsai.AI30-M"},{"key":"1068_CR6","unstructured":"Burda Y, Edwards H, Storkey A, Klimov O (2019) Exploration by random network distillation. in Proceedings of the 7th International Conference on Learning Representations pp. 1\u201317"},{"key":"1068_CR7","unstructured":"Eysenbach B, Gupta A, Ibarz J, Levine S (2019) Diversity is all you need: Learning skills without a reward function. in Proceedings of the 7th International Conference on Learning Representations"},{"key":"1068_CR8","unstructured":"Burda Y, Edwards H, Pathak D, et\u00a0al (2019) Large-scale study of curiosity-driven learning. in Proceedings of the 7th International Conference on Learning Representations"},{"key":"1068_CR9","doi-asserted-by":"crossref","unstructured":"Liu M, Zhu M, Zhang W (2022) Goal-conditioned reinforcement learning: Problems and solutions. in Proceedings of the 31st International Joint Conference on Artificial Intelligence pp. 5502\u20135511","DOI":"10.24963\/ijcai.2022\/770"},{"key":"1068_CR10","unstructured":"Arumugam D, Kumar S, Gummadi R, Van\u00a0Roy B (2024) Satisficing exploration for deep reinforcement learning. in Finding the Frame: A Reinforcement Learning Conference Workshop for Examining Conceptual Frameworks"},{"key":"1068_CR11","doi-asserted-by":"publisher","unstructured":"Kamiya T, Takahashi T (2022) Softsatisficing: Risk-sensitive softmax action selection. Biosystems (213:104633). https:\/\/doi.org\/10.1016\/j.biosystems.2022.104633","DOI":"10.1016\/j.biosystems.2022.104633"},{"issue":"1","key":"1068_CR12","doi-asserted-by":"publisher","first-page":"589","DOI":"10.3156\/jsoft.36.1_589","volume":"36","author":"A Tsuboya","year":"2024","unstructured":"Tsuboya A, Kono Y, Takahashi T (2024) A sequential decision-making model in contextual foraging behavior. Journal of Japan Society for Fuzzy Theory and Intelligent Informatics 36(1):589\u2013600 ((in Japanese))","journal-title":"Journal of Japan Society for Fuzzy Theory and Intelligent Informatics"},{"key":"1068_CR13","doi-asserted-by":"publisher","first-page":"105276","DOI":"10.1016\/j.biosystems.2024.105276","volume":"243","author":"D Uragami","year":"2024","unstructured":"Uragami D, Sonota N, Takahashi T (2024) Social satisficing: Multi-agent reinforcement learning with satisficing agents. BioSystems 243:105276. https:\/\/doi.org\/10.1016\/j.biosystems.2024.105276","journal-title":"BioSystems"},{"key":"1068_CR14","doi-asserted-by":"publisher","first-page":"46","DOI":"10.1016\/j.biosystems.2019.02.009","volume":"180","author":"A Tamatsukuri","year":"2019","unstructured":"Tamatsukuri A, Takahashi T (2019) Guaranteed satisficing and finite regret: Analysis of a cognitive satisficing value function. Biosystems 180:46\u201353. https:\/\/doi.org\/10.1016\/j.biosystems.2019.02.009","journal-title":"Biosystems"},{"key":"1068_CR15","unstructured":"Satori K, Yoshida Y, Kamiya T, Takahashi T (2019) Toward deep satisficing reinforcement learning. in Proceedings of the 33rd Annual Conference of the Japanese Society for Artificial Intelligence. (in Japanese)"},{"key":"1068_CR16","unstructured":"Kono Y, Kume J, Ikeda R, Takahashi T (2023) Target-oriented exploration in deep reinforcement learning. in Proceedings of the 37th Annual Conference of the Japanese Society for Artificial Intelligence. (in Japanese)"},{"key":"1068_CR17","unstructured":"Lillicrap TP, Hunt JJ, Pritzel A, et\u00a0al (2015) Continuous control with deep reinforcement learning. arXiv preprint (arXiv:1509.02971)"},{"key":"1068_CR18","unstructured":"Espeholt L, Soyer H, Munos R, et\u00a0al (2018) Impala: Scalable distributed deep-rl with importance weighted actor-learner architectures. in Proceedings of the 35th International Conference on Machine Learning pp. 1406\u20131415"},{"key":"1068_CR19","unstructured":"Arthur D, Vassilvitskii S (2007) k-means++: the advantages of careful seeding. in Proceedings of the 18th Annual ACM-SIAM Symposium on Discrete Algorithms pp. 1027\u20131035"},{"key":"1068_CR20","unstructured":"Kono Y, Takahashi T (2018) Autonomous optimal exploration through satisficing. in Proceedings of the 32nd Annual Conference of the Japanese Society for Artificial Intelligence. (in Japanese)"},{"key":"1068_CR21","doi-asserted-by":"crossref","unstructured":"van Hasselt H, Guez A, Silver D (2016) Deep reinforcement learning with double q-learning. in Proceedings of the 30th Association for the Advancement of Artificial Intelligence pp. 2094\u20132100","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"1068_CR22","unstructured":"Fujimoto S, van Hoof H, Meger D (2018) Addressing function approximation error in actor-critic methods. in Proceedings of the 35th International Conference on Machine Learning pp. 1587\u20131596"},{"key":"1068_CR23","unstructured":"Brockman G, Cheung V, Pettersson L, et\u00a0al (2016) Openai gym. arXiv preprint (arXiv:1606.01540)"},{"key":"1068_CR24","unstructured":"Ikeda R, Minami A, Kono Y, Takahashi T (2022) Developing a scalable and simple verification task of deep reinforcement learning. in Proceedings of the 36th Annual Conference of the Japanese Society for Artificial Intelligence. (in Japanese)"},{"key":"1068_CR25","volume-title":"Reinforcement Learning: An Introduction","author":"RS Sutton","year":"2018","unstructured":"Sutton RS, Barto AG (2018) Reinforcement Learning: An Introduction. MIT Press, Cambridge"},{"key":"1068_CR26","unstructured":"OpenAI (2024). Openai gym: Cartpole. https:\/\/www.gymlibrary.dev\/environments\/classic_control\/cart_pole\/. Accessed 01 May 2025"},{"key":"1068_CR27","doi-asserted-by":"crossref","unstructured":"Pathak D, Agrawal P, Efros AA, Darrell T (2017) Curiosity-driven exploration by self-supervised prediction. in Proceedings of the 34th International Conference on Machine Learning pp. 2778\u20132787","DOI":"10.1109\/CVPRW.2017.70"}],"container-title":["Artificial Life and Robotics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10015-025-01068-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10015-025-01068-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10015-025-01068-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,16]],"date-time":"2025-11-16T20:48:14Z","timestamp":1763326094000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10015-025-01068-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,18]]},"references-count":27,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2025,11]]}},"alternative-id":["1068"],"URL":"https:\/\/doi.org\/10.1007\/s10015-025-01068-4","relation":{},"ISSN":["1433-5298","1614-7456"],"issn-type":[{"type":"print","value":"1433-5298"},{"type":"electronic","value":"1614-7456"}],"subject":[],"published":{"date-parts":[[2025,10,18]]},"assertion":[{"value":"14 May 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 September 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 October 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}