{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T12:59:17Z","timestamp":1762001957808,"version":"3.37.3"},"reference-count":21,"publisher":"Springer Science and Business Media LLC","issue":"9","license":[{"start":{"date-parts":[[2022,7,26]],"date-time":"2022-07-26T00:00:00Z","timestamp":1658793600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,7,26]],"date-time":"2022-07-26T00:00:00Z","timestamp":1658793600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Ambient Intell Human Comput"],"published-print":{"date-parts":[[2023,9]]},"DOI":"10.1007\/s12652-022-04232-x","type":"journal-article","created":{"date-parts":[[2022,7,27]],"date-time":"2022-07-27T13:18:51Z","timestamp":1658927931000},"page":"12663-12672","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["PMDRL: Pareto-front-based multi-objective deep reinforcement learning"],"prefix":"10.1007","volume":"14","author":[{"given":"Fangjie","family":"Yang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Honglan","family":"Huang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wei","family":"Shi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yang","family":"Ma","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1039-9735","authenticated-orcid":false,"given":"Yanghe","family":"Feng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Guangquan","family":"Cheng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhong","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,7,26]]},"reference":[{"key":"4232_CR1","doi-asserted-by":"crossref","unstructured":"Ahmadzadeh S, Kormushev P, Caldwell D (2014) Multi-objective reinforcement learning for AUV thruster failure recovery. In: IEEE symposium on adaptive dynamic programming and reinforcement learning, pp 1\u20138.","DOI":"10.1109\/ADPRL.2014.7010621"},{"issue":"8\u20139","key":"4232_CR2","doi-asserted-by":"publisher","first-page":"3222","DOI":"10.1016\/j.foreco.2008.01.038","volume":"255","author":"L Diaz-Balteiro","year":"2008","unstructured":"Diaz-Balteiro L, Romero C (2008) Making forestry decisions with multiple criteria: a review and an assessment. For Ecol Manag 255(8\u20139):3222\u20133241","journal-title":"For Ecol Manag"},{"issue":"17","key":"4232_CR3","doi-asserted-by":"publisher","first-page":"5783","DOI":"10.1007\/s00500-017-2659-7","volume":"22","author":"Y Feng","year":"2018","unstructured":"Feng Y, Yang X, Cheng G (2018) Stability in mean for multi-dimensional uncertain differential equation. Soft Comput 22(17):5783\u20135789","journal-title":"Soft Comput"},{"issue":"4","key":"4232_CR4","doi-asserted-by":"publisher","first-page":"2425","DOI":"10.1007\/s00500-018-03689-3","volume":"24","author":"Y Feng","year":"2020","unstructured":"Feng Y, Dai L, Gao J, Cheng G (2020a) Uncertain pursuit-evasion game. Soft Comput 24(4):2425\u20132429","journal-title":"Soft Comput"},{"issue":"4","key":"4232_CR5","doi-asserted-by":"publisher","first-page":"2463","DOI":"10.1007\/s00500-018-03732-3","volume":"24","author":"Y Feng","year":"2020","unstructured":"Feng Y, Shi W, Cheng G, Huang J, Liu Z (2020b) Benchmarking framework for command and control mission planning under uncertain environment. Soft Comput 24(4):2463\u20132478","journal-title":"Soft Comput"},{"key":"4232_CR6","doi-asserted-by":"publisher","first-page":"185","DOI":"10.1007\/978-3-319-34181-1_17","volume-title":"Sustainable aviation","author":"A Gardi","year":"2016","unstructured":"Gardi A, Sabatini R, Marino M, Kistan T (2016) Multi-objective 4d trajectory optimization for online strategic and tactical air traffic management. Sustainable aviation. Springer, Cham, pp 185\u2013200"},{"key":"4232_CR7","doi-asserted-by":"crossref","unstructured":"Handa H (2009) Solving multi-objective reinforcement learning problems by EDA-RL-acquisition of various strategies. In: 2009 ninth international conference on intelligent systems design and applications, pp 426\u2013431","DOI":"10.1109\/ISDA.2009.92"},{"key":"4232_CR8","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2021.3116762","author":"S Liu","year":"2021","unstructured":"Liu S, Feng Y, Wu K, Cheng G, Huang J, Liu Z (2021a) Graph-attention-based casual discovery with trust region-navigated clipping policy optimization. IEEE Trans Cybern. https:\/\/doi.org\/10.1109\/TCYB.2021.3116762","journal-title":"IEEE Trans Cybern"},{"key":"4232_CR9","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2021.3102160","author":"Y Liu","year":"2021","unstructured":"Liu Y, Wang Y, Feng Y, Wu Y (2021b) Neural network-based adaptive boundary control of a flexible riser with input deadzone and output constraint. IEEE Trans Cybern. https:\/\/doi.org\/10.1109\/TCYB.2021.3102160","journal-title":"IEEE Trans Cybern"},{"key":"4232_CR10","unstructured":"Lizotte D, Bowling M, Murphy S (2010) Efficient reinforcement learning with multiple reward functions for randomized controlled trial analysis. In: Proceedings of the 27th international conference on machine learning, pp 695\u2013702"},{"issue":"1","key":"4232_CR11","first-page":"3253","volume":"13","author":"D Lizotte","year":"2012","unstructured":"Lizotte D, Bowling M, Murphy S (2012) Linear fitted-q iteration with multiple reward functions. J Mach Learn Res 13(1):3253\u20133295","journal-title":"J Mach Learn Res"},{"issue":"7540","key":"4232_CR12","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V, Kavukcuoglu K, Silver D, Rusu A, Veness J, Bellemare M, Graves A, Riedmiller M, Fidjeland A, Ostrovski G (2015) Human-level control through deep reinforcement learning. Nature 518(7540):529\u2013533","journal-title":"Nature"},{"key":"4232_CR13","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1016\/j.neucom.2016.11.094","volume":"263","author":"S Parisi","year":"2017","unstructured":"Parisi S, Pirotta M, Peters J (2017) Manifold-based multi-objective policy search with sample reuse. Neurocomputing 263:3\u201314","journal-title":"Neurocomputing"},{"key":"4232_CR14","doi-asserted-by":"crossref","unstructured":"Pirotta M, Parisi S, Restelli M (2015) Multi-objective reinforcement learning with continuous pareto frontier approximation. In: Twenty-ninth AAAI conference on artificial intelligence","DOI":"10.1609\/aaai.v29i1.9617"},{"key":"4232_CR15","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1016\/j.neucom.2016.10.100","volume":"263","author":"M Ruiz-Montiel","year":"2017","unstructured":"Ruiz-Montiel M, Mandow L, P\u00e9rez-de-la-Cruz J (2017) A temporal difference method for multi-objective reinforcement learning. Neurocomputing 263:15\u201325","journal-title":"Neurocomputing"},{"key":"4232_CR16","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1707.06347","author":"J Schulman","year":"2017","unstructured":"Schulman J, Wolski F, Dhariwal P, Radford A, Klimov O (2017) Proximal policy optimization algorithms. Mach Learn. https:\/\/doi.org\/10.48550\/arXiv.1707.06347","journal-title":"Mach Learn"},{"key":"4232_CR17","doi-asserted-by":"publisher","first-page":"4247","DOI":"10.1109\/TCOMM.2021.3070349","volume":"69","author":"B Shen","year":"2021","unstructured":"Shen B, Yang Y, Feng Y, Zhou Z (2021) A generalized construction of mutually orthogonal complementary sequence sets with non-power-of-two lengths. IEEE Trans Commun 69:4247\u20134253","journal-title":"IEEE Trans Commun"},{"issue":"1","key":"4232_CR18","first-page":"3483","volume":"15","author":"K Van Moffaert","year":"2014","unstructured":"Van Moffaert K, Now\u00e9 A (2014) Multi-objective reinforcement learning using sets of pareto dominating policies. J Mach Learn Res 15(1):3483\u20133512","journal-title":"J Mach Learn Res"},{"issue":"3\u20134","key":"4232_CR19","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1007\/BF00992698","volume":"8","author":"C Watkins","year":"1992","unstructured":"Watkins C, Dayan P (1992) Q-Learning. Mach Learn 8(3\u20134):279\u2013292","journal-title":"Mach Learn"},{"key":"4232_CR20","doi-asserted-by":"publisher","DOI":"10.1109\/TAI.2021.3087666","author":"G Wu","year":"2021","unstructured":"Wu G, Fan M, Shi J, Feng Y (2021) Reinforcement learning based truck-and-drone coordinated delivery. IEEE Trans Artif Intell. https:\/\/doi.org\/10.1109\/TAI.2021.3087666","journal-title":"IEEE Trans Artif Intell"},{"key":"4232_CR21","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1016\/j.ast.2018.05.032","volume":"79","author":"M Zhang","year":"2018","unstructured":"Zhang M, Filippone A, Bojdo N (2018) Multi-objective optimisation of aircraft departure trajectories. Aerosp Sci Technol 79:37\u201347","journal-title":"Aerosp Sci Technol"}],"container-title":["Journal of Ambient Intelligence and Humanized Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s12652-022-04232-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s12652-022-04232-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s12652-022-04232-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,7,24]],"date-time":"2023-07-24T16:34:22Z","timestamp":1690216462000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s12652-022-04232-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,7,26]]},"references-count":21,"journal-issue":{"issue":"9","published-print":{"date-parts":[[2023,9]]}},"alternative-id":["4232"],"URL":"https:\/\/doi.org\/10.1007\/s12652-022-04232-x","relation":{},"ISSN":["1868-5137","1868-5145"],"issn-type":[{"type":"print","value":"1868-5137"},{"type":"electronic","value":"1868-5145"}],"subject":[],"published":{"date-parts":[[2022,7,26]]},"assertion":[{"value":"12 December 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 June 2022","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 July 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}