{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,8]],"date-time":"2026-02-08T07:05:30Z","timestamp":1770534330940,"version":"3.49.0"},"reference-count":33,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2026,2,7]],"date-time":"2026-02-07T00:00:00Z","timestamp":1770422400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,2,7]],"date-time":"2026-02-07T00:00:00Z","timestamp":1770422400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61374186"],"award-info":[{"award-number":["61374186"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Auton Agent Multi-Agent Syst"],"published-print":{"date-parts":[[2026,6]]},"DOI":"10.1007\/s10458-026-09735-x","type":"journal-article","created":{"date-parts":[[2026,2,7]],"date-time":"2026-02-07T08:54:44Z","timestamp":1770454484000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["A multi-objective goal-oriented reinforcement learning algorithm for dynamic multi-objective sequential decision making"],"prefix":"10.1007","volume":"40","author":[{"given":"Haofang","family":"Yu","sequence":"first","affiliation":[]},{"given":"Hong-chuan","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Yanyan","family":"Huang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,2,7]]},"reference":[{"key":"9735_CR1","doi-asserted-by":"crossref","unstructured":"Hayes, C. F., R\u00e3dulescu, R., Bargiacchi, E. et al. (2022). A practical guide to multi-objective reinforcement learning and planning, Autonomous Agents and Multi-Agent Systems, 36(1), 26.","DOI":"10.1007\/s10458-022-09552-y"},{"key":"9735_CR2","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1613\/jair.3987","volume":"48","author":"DM Roijers","year":"2013","unstructured":"Roijers, D. M., Vamplew, P., Whiteson, S., & Dazeley, R. (2013). A survey of multi-objective sequential decision-making. Journal of Artificial Intelligence Research, 48, 67\u2013113.","journal-title":"Journal of Artificial Intelligence Research"},{"key":"9735_CR3","doi-asserted-by":"crossref","unstructured":"Wiering, M. A., De Jong, E. D. (2007). Computing optimal stationary policies for multi-objective markov decision processes, In 2007 IEEE International Symposium on Approximate Dynamic Programming and Reinforcement Learning, IEEE, (pp. 158\u2013165).","DOI":"10.1109\/ADPRL.2007.368183"},{"key":"9735_CR4","unstructured":"Perny, P., Weng, P. (2010). On finding compromise solutions in multiobjective Markov decision processes, In ECAI 2010, IOS Press, (pp. 969\u2013970)."},{"key":"9735_CR5","doi-asserted-by":"crossref","unstructured":"Reymond, M., Hayes, C. F., Steckelmacher, D., Roijers, D. M., Now\u00e9, A. (2023). Actor-critic multi-objective reinforcement learning for non-linear utility functions, Autonomous Agents and Multi-Agent Systems, 37, no. 2, (pp. 23).","DOI":"10.1007\/s10458-023-09604-x"},{"key":"9735_CR6","unstructured":"Roijers, D. M., Steckelmacher, D., Now\u00e9, A. (2018). \u201cMulti-objective reinforcement learning for the expected utility of the return,\u201d In Proceedings of the Adaptive and Learning Agents workshop at FAIM, 2018."},{"key":"9735_CR7","doi-asserted-by":"crossref","unstructured":"Sato, H. (2014). Inverted PBI in MOEA\/D and its impact on the search performance on multi and many-objective optimization, In Proceedings of the 2014 Annual Conference on Genetic and Evolutionary Computation, (pp. 645\u2013652).","DOI":"10.1145\/2576768.2598297"},{"key":"9735_CR8","doi-asserted-by":"crossref","unstructured":"Van Moffaert, K., Drugan, M. M., Now\u00e9, A. (2013). Scalarized multi-objective reinforcement learning: Novel design techniques, In 2013 IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL), IEEE, (pp. 191\u2013199).","DOI":"10.1109\/ADPRL.2013.6615007"},{"key":"9735_CR9","unstructured":"Abels, A., Roijers, D., Lenaerts, T., Now\u00e9, A., Steckelmacher, D. (2019). Dynamic weights in multi-objective deep reinforcement learning, In International Conference on Machine Learning, PMLR, (pp. 11\u201320)."},{"key":"9735_CR10","unstructured":"Yang, R., Sun, X., Narasimhan, K. (2019). A generalized algorithm for multi-objective reinforcement learning and policy adaptation, Advances in Neural Information Processing Systems, 32."},{"key":"9735_CR11","unstructured":"Bauer, C. Jeffrey, K., et al. (2003). Service robots in health care: The evolution of mechanical solutions to human resource problems, Bon Secours Health System Inc., Technology Early Warning System (TEWS), Future of Service Robots in Health Care, (pp. 1\u201310)."},{"key":"9735_CR12","doi-asserted-by":"crossref","unstructured":"Miao, C., Chen, G., Yan, C., et al. (2021). Path planning optimization of indoor mobile robot based on adaptive ant colony algorithm. Computers and Industrial Engineering, 156,\u00a0107230.","DOI":"10.1016\/j.cie.2021.107230"},{"key":"9735_CR13","doi-asserted-by":"crossref","unstructured":"Wang, Y., Liu, K., Geng, L., & Zhang, S. (2024). Knowledge hierarchy-based dynamic multi-objective optimization method for AUV path planning in cooperative search missions. Ocean Engineering, 312, 119267.","DOI":"10.1016\/j.oceaneng.2024.119267"},{"issue":"6","key":"9735_CR14","doi-asserted-by":"publisher","first-page":"1891","DOI":"10.1109\/TCBB.2017.2685320","volume":"15","author":"YN Guo","year":"2017","unstructured":"Guo, Y. N., Cheng, J., Luo, S., et al. (2017). Robust dynamic multi-objective vehicle routing optimization method. IEEE\/ACM Transactions on Computational Biology and Bioinformatics, 15(6), 1891\u20131903.","journal-title":"IEEE\/ACM Transactions on Computational Biology and Bioinformatics"},{"key":"9735_CR15","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1016\/j.engappai.2019.08.014","volume":"86","author":"MM Hasan","year":"2019","unstructured":"Hasan, M. M., Lwin, K., Imani, M., et al. (2019). Dynamic multi-objective optimisation using deep reinforcement learning: benchmark, algorithm and an application to identify vulnerable zones based on water quality. Engineering Applications of Artificial Intelligence, 86, 107\u2013135.","journal-title":"Engineering Applications of Artificial Intelligence"},{"key":"9735_CR16","doi-asserted-by":"crossref","unstructured":"Wang, Y., Ma, Y., Li, Q., et al. (2025). A dynamic multi-objective optimization evolutionary algorithm based on classification of environmental change intensity and collaborative prediction strategy, The Journal of Supercomputing, 81(1), 54.","DOI":"10.1007\/s11227-024-06480-4"},{"key":"9735_CR17","doi-asserted-by":"crossref","unstructured":"Peng, H., Xiong, J., Pi, C., et al. (2024). A dynamic multi-objective optimization evolutionary algorithm with adaptive boosting. Swarm and Evolutionary Computation, 89, 101621.","DOI":"10.1016\/j.swevo.2024.101621"},{"issue":"1","key":"9735_CR18","first-page":"36","volume":"3","author":"J Ding","year":"2018","unstructured":"Ding, J., Yang, C., Xiao, Q., et al. (2018). Dynamic evolutionary multiobjective optimization for raw ore allocation in mineral processing. IEEE Transactions on Emerging Topics in Computational Intelligence, 3(1), 36\u201348.","journal-title":"IEEE Transactions on Emerging Topics in Computational Intelligence"},{"key":"9735_CR19","doi-asserted-by":"crossref","unstructured":"Vamplew, P., Yearwood, J., Dazeley, R., Berry, A. (2008). On the limitations of scalarisation for multi-objective reinforcement learning of pareto fronts, In AI 2008: Advances in Artificial Intelligence: 21st Australasian Joint Conference on Artificial Intelligence Auckland, New Zealand: Springer, (pp. 372\u2013378).","DOI":"10.1007\/978-3-540-89378-3_37"},{"issue":"4","key":"9735_CR20","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3524495","volume":"55","author":"S Jiang","year":"2022","unstructured":"Jiang, S., Zou, J., Yang, S., & Yao, X. (2022). Evolutionary dynamic multi-objective optimisation: A survey. ACM Computing Surveys, 55(4), 1\u201347.","journal-title":"ACM Computing Surveys"},{"key":"9735_CR21","doi-asserted-by":"crossref","unstructured":"Wang, X., Zheng, J., Hou, Z., et al. (2024). A novel preference-driven evolutionary algorithm for dynamic multi-objective problems. Swarm and Evolutionary Computation, 89, 101638.","DOI":"10.1016\/j.swevo.2024.101638"},{"key":"9735_CR22","doi-asserted-by":"crossref","unstructured":"Hasan, M. M., Lwin, K., Shabut, A., et al. (2019). Design and development of a benchmark for dynamic multi-objective optimisation problem in the context of deep reinforcement learning, In 2019 22nd International Conference on Computer and Information Technology, IEEE, (pp. 1\u20136).","DOI":"10.1109\/ICCIT48885.2019.9038529"},{"key":"9735_CR23","doi-asserted-by":"crossref","unstructured":"Liu, M., Zhu, M., Zhang, W. (2022). \u201cGoal-conditioned reinforcement learning: Problems and solutions,\u201d arXiv: 2201.08299","DOI":"10.24963\/ijcai.2022\/770"},{"key":"9735_CR24","doi-asserted-by":"publisher","first-page":"1159","DOI":"10.1613\/jair.1.13554","volume":"76","author":"C Colas","year":"2022","unstructured":"Colas, C., Karch, T., Sigaud, O., et al. (2022). Autotelic agents with intrinsically motivated goal-conditioned reinforcement learning: a short survey. Journal of Artificial Intelligence Research, 76, 1159\u20131199.","journal-title":"Journal of Artificial Intelligence Research"},{"key":"9735_CR25","first-page":"1094","volume":"2","author":"LP Kaelbling","year":"1993","unstructured":"Kaelbling, L. P. (1993). Learning to achieve goals. IJCAI, 2, 1094\u20131099.","journal-title":"IJCAI"},{"key":"9735_CR26","unstructured":"Schaul, T., Horgan, D., Gregor, K., et al. (2015). \u201cUniversal value function approximators,\u201d In International conference on machine learning, PMLR, (pp. 1312\u20131320)."},{"key":"9735_CR27","unstructured":"Trott, A., Zheng, S., Xiong, C., et al. (2019). \u201cKeeping your distance: Solving sparse reward tasks using self-balancing shaped rewards,\u201d Advances in Neural Information Processing Systems, 32."},{"key":"9735_CR28","first-page":"8622","volume":"34","author":"I Durugkar","year":"2021","unstructured":"Durugkar, I., Tec, M., Niekum, S., et al. (2021). Adversarial intrinsic motivation for reinforcement learning. Advances in Neural Information Processing Systems, 34, 8622\u20138636.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"9735_CR29","unstructured":"Andrychowicz, M., Wolski, F., Ray, A., et al. (2017) \u201cHindsight experience replay,\u201d Advances in neural information processing systems, 30."},{"key":"9735_CR30","unstructured":"Fang, M., Zhou, T., Du, Y., et al. (2019). \u201cCurriculum-guided hindsight experience replay,\u201d Advances in neural information processing systems, 32."},{"key":"9735_CR31","unstructured":"Pitis, S., Chan, H., Zhao, S., et al. (2020). \u201cMaximum entropy gain exploration for long horizon multi-goal reinforcement learning,\u201d In International Conference on Machine Learning, PMLR, (pp. 7750\u20137761)."},{"key":"9735_CR32","doi-asserted-by":"crossref","unstructured":"Vo\u00df, T., Beume, N., Rudolph, G., Igel, C. (2008). Scalarization versus indicator-based selection in multi-objective CMA evolution strategies, In 2013 IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL), IEEE, (pp. 3036\u20133043).","DOI":"10.1109\/CEC.2008.4631208"},{"key":"9735_CR33","doi-asserted-by":"crossref","unstructured":"Van Veldhuizen, D. A., Lamont, G. B. (2000). On measuring multiobjective evolutionary algorithm performance, In Proceedings of the 2000 congress on evolutionary computation, La Jolla, USA: IEEE, 1, (pp. 204\u2013211).","DOI":"10.1109\/CEC.2000.870296"}],"container-title":["Autonomous Agents and Multi-Agent Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10458-026-09735-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10458-026-09735-x","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10458-026-09735-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,7]],"date-time":"2026-02-07T08:54:49Z","timestamp":1770454489000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10458-026-09735-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,2,7]]},"references-count":33,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2026,6]]}},"alternative-id":["9735"],"URL":"https:\/\/doi.org\/10.1007\/s10458-026-09735-x","relation":{},"ISSN":["1387-2532","1573-7454"],"issn-type":[{"value":"1387-2532","type":"print"},{"value":"1573-7454","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,2,7]]},"assertion":[{"value":"12 October 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 January 2026","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 February 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing Interests"}}],"article-number":"5"}}