{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,5]],"date-time":"2025-12-05T03:44:36Z","timestamp":1764906276185,"version":"3.37.3"},"reference-count":76,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2025,2,17]],"date-time":"2025-02-17T00:00:00Z","timestamp":1739750400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,2,17]],"date-time":"2025-02-17T00:00:00Z","timestamp":1739750400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"DOI":"10.1007\/s11227-025-07010-6","type":"journal-article","created":{"date-parts":[[2025,2,18]],"date-time":"2025-02-18T01:44:32Z","timestamp":1739843072000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Investigating the performance of multi-objective reinforcement learning techniques in the context of IoT with harvesting energy"],"prefix":"10.1007","volume":"81","author":[{"given":"Bakhta","family":"Haouari","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Rania","family":"Mzid","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Olfa","family":"Mosbahi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,2,17]]},"reference":[{"key":"7010_CR1","volume-title":"Artificial Intelligence","author":"PH Winston","year":"1992","unstructured":"Winston PH (1992) Artificial Intelligence. Addison-Wesley Longman Publishing Co., Inc"},{"issue":"15","key":"7010_CR2","doi-asserted-by":"publisher","first-page":"2787","DOI":"10.1016\/j.comnet.2010.05.010","volume":"54","author":"L Atzori","year":"2010","unstructured":"Atzori L, Iera A, Morabito G (2010) The internet of things: a survey. Comput Netw 54(15):2787\u20132805","journal-title":"Comput Netw"},{"key":"7010_CR3","doi-asserted-by":"crossref","unstructured":"Huang C, Wang J, Wang S, Zhang Y (2023) Internet of medical things: A systematic review. Neurocomputing p. 126719","DOI":"10.1016\/j.neucom.2023.126719"},{"key":"7010_CR4","doi-asserted-by":"publisher","first-page":"100124","DOI":"10.1016\/j.nexus.2022.100124","volume":"7","author":"K Obaideen","year":"2022","unstructured":"Obaideen K, Yousef BA, AlMallahi MN, Tan YC, Mahmoud M, Jaber H, Ramadan M (2022) An overview of smart irrigation systems using IoT. Energy Nexus 7:100124","journal-title":"Energy Nexus"},{"key":"7010_CR5","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s10515-018-0244-7","volume":"26","author":"A Mehiaoui","year":"2019","unstructured":"Mehiaoui A, Wozniak E, Babau JP, Tucci-Piergiovanni S, Mraidha C (2019) Optimizing the deployment of tree-shaped functional graphs of real-time system on distributed architectures. Autom Softw Eng 26:1\u201357","journal-title":"Autom Softw Eng"},{"key":"7010_CR6","doi-asserted-by":"publisher","first-page":"327","DOI":"10.1016\/j.ins.2020.06.005","volume":"539","author":"W Lakhdhar","year":"2020","unstructured":"Lakhdhar W, Mzid R, Khalgui M, Frey G, Li Z, Zhou M (2020) A guidance framework for synthesis of multi-core reconfigurable real-time systems. Inf Sci 539:327\u2013346","journal-title":"Inf Sci"},{"key":"7010_CR7","doi-asserted-by":"crossref","unstructured":"Lassoued R, Mzid R (2022) A multi-objective evolution strategy for real-time task placement on heterogeneous processors. International Conference on Intelligent Systems Design and Applications pp. 448\u2013457","DOI":"10.1007\/978-3-031-35501-1_45"},{"key":"7010_CR8","doi-asserted-by":"publisher","first-page":"424","DOI":"10.1007\/s11241-018-9299-6","volume":"54","author":"R Bouaziz","year":"2018","unstructured":"Bouaziz R, Lemarchand L, Singhoff F, Zalila B, Jmaiel M (2018) Multi-objective design exploration approach for ravenscar real-time systems. Real-Time Syst 54:424\u2013483","journal-title":"Real-Time Syst"},{"key":"7010_CR9","doi-asserted-by":"publisher","first-page":"43128","DOI":"10.1109\/ACCESS.2023.3272115","volume":"11","author":"D Zhou","year":"2023","unstructured":"Zhou D, Du J, Arai S (2023) Efficient elitist cooperative evolutionary algorithm for multi-objective reinforcement learning. IEEE Access 11:43128\u201343139","journal-title":"IEEE Access"},{"issue":"4","key":"7010_CR10","doi-asserted-by":"publisher","first-page":"53","DOI":"10.3390\/jlpea12040053","volume":"12","author":"S Shresthamali","year":"2022","unstructured":"Shresthamali S, Kondo M, Nakamura H (2022) Multi-objective resource scheduling for IoT systems using reinforcement learning. J Low Power Electron Appl 12(4):53","journal-title":"J Low Power Electron Appl"},{"key":"7010_CR11","doi-asserted-by":"publisher","first-page":"51","DOI":"10.1007\/s10994-010-5232-5","volume":"84","author":"P Vamplew","year":"2011","unstructured":"Vamplew P, Dazeley R, Berry A, Issabekov R, Dekker E (2011) Empirical evaluation methods for multiobjective reinforcement learning algorithms. Mach Learn 84:51\u201380","journal-title":"Mach Learn"},{"key":"7010_CR12","doi-asserted-by":"crossref","unstructured":"Vamplew P, Yearwood J, Dazeley R, Berry A (2008) On the limitations of scalarisation for multi-objective reinforcement learning of pareto fronts. AI 2008: Advances in Artificial Intelligence: 21st Australasian Joint Conference on Artificial Intelligence Auckland, New Zealand, December 1-5, 2008. Proceedings 21 pp. 372\u2013378","DOI":"10.1007\/978-3-540-89378-3_37"},{"key":"7010_CR13","doi-asserted-by":"crossref","unstructured":"Van\u00a0Moffaert K, Drugan MM, Now\u00e9 A (2013) Scalarized multi-objective reinforcement learning: Novel design techniques. 2013 IEEE symposium on adaptive dynamic programming and reinforcement learning (ADPRL) pp. 191\u2013199","DOI":"10.1109\/ADPRL.2013.6615007"},{"key":"7010_CR14","unstructured":"Van\u00a0Moffaert K, Drugan MM, Now\u00e9 A (2014) Learning sets of pareto optimal policies. Thirteenth International Conference on Autonomous Agents and Multiagent Systems-Adaptive Learning Agents Workshop (ALA)"},{"issue":"1","key":"7010_CR15","first-page":"3483","volume":"15","author":"K Van Moffaert","year":"2014","unstructured":"Van Moffaert K, Now\u00e9 A (2014) Multi-objective reinforcement learning using sets of pareto dominating policies. J Mach Learn Res 15(1):3483\u20133512","journal-title":"J Mach Learn Res"},{"issue":"1","key":"7010_CR16","doi-asserted-by":"publisher","first-page":"26","DOI":"10.1007\/s10458-022-09552-y","volume":"36","author":"CF Hayes","year":"2022","unstructured":"Hayes CF, R\u0103dulescu R, Bargiacchi E, K\u00e4llstr\u00f6m J, Macfarlane M, Reymond M, Verstraeten T, Zintgraf LM, Dazeley R, Heintz F et al (2022) A practical guide to multi-objective reinforcement learning and planning. Auton Agents Multi-Agent Syst 36(1):26","journal-title":"Auton Agents Multi-Agent Syst"},{"key":"7010_CR17","unstructured":"Alegre LN, Bazzan AL, Roijers DM, Now\u00e9 A, da\u00a0Silva BC (2023) Sample-efficient multi-objective learning via generalized policy improvement prioritization. arXiv preprint arXiv:2301.07784"},{"key":"7010_CR18","first-page":"15593","volume":"36","author":"XQ Cai","year":"2023","unstructured":"Cai XQ, Zhang P, Zhao L, Bian J, Sugiyama M, Llorens A (2023) Distributional pareto-optimal multi-objective reinforcement learning. Advan Neural Inf Process Syst 36:15593\u201315613","journal-title":"Advan Neural Inf Process Syst"},{"key":"7010_CR19","unstructured":"Lu H, Herman D, Yu Y (2023) Multi-objective reinforcement learning: Convexity, stationarity and pareto optimality. The Eleventh International Conference on Learning Representations"},{"key":"7010_CR20","doi-asserted-by":"publisher","first-page":"526","DOI":"10.1016\/j.procs.2023.08.018","volume":"221","author":"L Zhang","year":"2023","unstructured":"Zhang L, Qi Z, Shi Y (2023) Multi-objective reinforcement learning-concept, approaches and applications. Proced Comput Sci 221:526\u2013532","journal-title":"Proced Comput Sci"},{"key":"7010_CR21","doi-asserted-by":"crossref","unstructured":"Vo\u00df T, Beume N, Rudolph G, Igel C (2008) Scalarization versus indicator-based selection in multi-objective cma evolution strategies. 2008 IEEE Congress on Evolutionary Computation (IEEE World Congress on Computational Intelligence) pp. 3036\u20133043","DOI":"10.1109\/CEC.2008.4631208"},{"key":"7010_CR22","unstructured":"Peschl M, Zgonnikov A, Oliehoek FA, Siebert LC (2021) Moral: Aligning ai with human norms through multi-objective reinforced active learning. arXiv preprint arXiv:2201.00012"},{"key":"7010_CR23","unstructured":"Reymond M, Bargiacchi E, Now\u00e9 A (2022) Pareto conditioned networks. arXiv preprint arXiv:2204.05036"},{"key":"7010_CR24","doi-asserted-by":"publisher","first-page":"103915","DOI":"10.1016\/j.engappai.2020.103915","volume":"96","author":"TT Nguyen","year":"2020","unstructured":"Nguyen TT, Nguyen ND, Vamplew P, Nahavandi S, Dazeley R, Lim CP (2020) A multi-objective deep reinforcement learning framework. Eng Appl Artif Intell 96:103915","journal-title":"Eng Appl Artif Intell"},{"key":"7010_CR25","unstructured":"Corne DW, Jerram NR, Knowles JD, Oates MJ (2001) Pesa-ii: Region-based selection in evolutionary multiobjective optimization. Proceedings of the 3rd annual conference on genetic and evolutionary computation pp. 283\u2013290"},{"issue":"1","key":"7010_CR26","first-page":"345","volume":"20","author":"S Singh","year":"2017","unstructured":"Singh S, Malik A, Kumar R (2017) Energy efficient heterogeneous DEEC protocol for enhancing lifetime in WSNs. Eng Sci Technol, Int J 20(1):345\u2013353","journal-title":"Eng Sci Technol, Int J"},{"issue":"5","key":"7010_CR27","doi-asserted-by":"publisher","first-page":"151","DOI":"10.1049\/iet-wss.2015.0017","volume":"6","author":"S Singh","year":"2016","unstructured":"Singh S, Chand S, Kumar R, Malik A, Kumar B (2016) NEECP: novel energy-efficient clustering protocol for prolonging lifetime of WSNs. IET Wirel Sens Syst 6(5):151\u2013157","journal-title":"IET Wirel Sens Syst"},{"key":"7010_CR28","doi-asserted-by":"publisher","first-page":"2117","DOI":"10.1007\/s11277-014-1629-y","volume":"77","author":"S Chand","year":"2014","unstructured":"Chand S, Singh S, Kumar B (2014) Heterogeneous heed protocol for wireless sensor networks. Wirel Pers Commun 77:2117\u20132139","journal-title":"Wirel Pers Commun"},{"key":"7010_CR29","doi-asserted-by":"publisher","first-page":"451","DOI":"10.1007\/s11277-015-2939-4","volume":"86","author":"S Singh","year":"2016","unstructured":"Singh S, Chand S, Kumar B (2016) Energy efficient clustering protocol using fuzzy logic for heterogeneous WSNs. Wirel Pers Commun 86:451\u2013475","journal-title":"Wirel Pers Commun"},{"key":"7010_CR30","doi-asserted-by":"crossref","unstructured":"Kumar S, Das R, Das D, Sarkar MK (2021) Fuzzy-based on-demand multi-node charging scheme to reduce death rate of sensors in wireless rechargeable sensor networks. 2021 10th International Conference on Internet of Everything, Microwave Engineering, Communication and Networks (IEMECON) pp. 1\u20137","DOI":"10.1109\/IEMECON53809.2021.9689198"},{"issue":"3","key":"7010_CR31","doi-asserted-by":"publisher","first-page":"389","DOI":"10.1007\/s11235-022-00951-w","volume":"81","author":"R Das","year":"2022","unstructured":"Das R, Dash D, Yadav CBK (2022) An efficient charging scheme using battery constrained mobile charger in wireless rechargeable sensor networks. Telecommun Syst 81(3):389\u2013415","journal-title":"Telecommun Syst"},{"issue":"15","key":"7010_CR32","doi-asserted-by":"publisher","first-page":"e5573","DOI":"10.1002\/dac.5573","volume":"36","author":"R Das","year":"2023","unstructured":"Das R, Dash D (2023) Collaborative data gathering and recharging using multiple mobile vehicles in wireless rechargeable sensor network. Int J Commun Syst 36(15):e5573","journal-title":"Int J Commun Syst"},{"key":"7010_CR33","doi-asserted-by":"publisher","first-page":"130","DOI":"10.1016\/j.comcom.2023.03.022","volume":"204","author":"R Das","year":"2023","unstructured":"Das R, Dash D (2023) Joint on-demand data gathering and recharging by multiple mobile vehicles in delay sensitive WRSn using variable length GA. Comput Commun 204:130\u2013146","journal-title":"Comput Commun"},{"key":"7010_CR34","doi-asserted-by":"publisher","first-page":"100379","DOI":"10.1016\/j.dajour.2023.100379","volume":"10","author":"HK Apat","year":"2024","unstructured":"Apat HK, Sahoo B, Goswami V, Barik RK (2024) A hybrid meta-heuristic algorithm for multi-objective IoT service placement in fog computing environments. Decis Anal J 10:100379","journal-title":"Decis Anal J"},{"issue":"16","key":"7010_CR35","doi-asserted-by":"publisher","first-page":"5430","DOI":"10.3390\/s21165430","volume":"21","author":"N Iqbal","year":"2021","unstructured":"Iqbal N, Imran Ahmad S, Ahmad R, Kim DH (2021) A scheduling mechanism based on optimization using IoT-tasks orchestration for efficient patient health monitoring. Sensors 21(16):5430","journal-title":"Sensors"},{"key":"7010_CR36","doi-asserted-by":"crossref","unstructured":"Haouari B, Mzid R, Mosbahi O (2023) PSRL: A new method for real-time task placement and scheduling using reinforcement learning. Software Engineering and Knowledge Engineering pp. 555\u2013560","DOI":"10.18293\/SEKE2023-178"},{"issue":"27","key":"7010_CR37","doi-asserted-by":"publisher","first-page":"20375","DOI":"10.1007\/s00521-023-08778-5","volume":"35","author":"B Haouari","year":"2023","unstructured":"Haouari B, Mzid R, Mosbahi O (2023) A reinforcement learning-based approach for online optimal control of self-adaptive real-time systems. Neural Comput Appl 35(27):20375\u201320401","journal-title":"Neural Comput Appl"},{"key":"7010_CR38","doi-asserted-by":"crossref","unstructured":"Haouari B, Mzid R, Mosbahi O (2022) On the use of reinforcement learning for real-time system design and refactoring. International Conference on Intelligent Systems Design and Applications pp. 503\u2013512","DOI":"10.1007\/978-3-031-35501-1_50"},{"key":"7010_CR39","doi-asserted-by":"crossref","unstructured":"Feit F, Metzger A, Pohl K (2022) Explaining online reinforcement learning decisions of self-adaptive systems. 2022 IEEE International Conference on Autonomic Computing and Self-Organizing Systems (ACSOS) pp. 51\u201360","DOI":"10.1109\/ACSOS55765.2022.00023"},{"issue":"4","key":"7010_CR40","doi-asserted-by":"publisher","first-page":"1251","DOI":"10.1007\/s00607-022-01052-x","volume":"106","author":"A Metzger","year":"2022","unstructured":"Metzger A, Quinton C, Mann Z\u00c1, Baresi L, Pohl K (2022) Realizing self-adaptive systems via online reinforcement learning and feature-model-guided exploration. Computing 106(4):1251\u20131272","journal-title":"Computing"},{"key":"7010_CR41","doi-asserted-by":"crossref","unstructured":"Palm A, Metzger A, Pohl K (2020) Online reinforcement learning for self-adaptive information systems. International Conference on Advanced Information Systems Engineering pp. 169\u2013184","DOI":"10.1007\/978-3-030-49435-3_11"},{"key":"7010_CR42","doi-asserted-by":"crossref","unstructured":"Natarajan S, Tadepalli P (2005) Dynamic preferences in multi-criteria reinforcement learning. Proceedings of the 22nd international conference on Machine learning pp. 601\u2013608","DOI":"10.1145\/1102351.1102427"},{"issue":"1","key":"7010_CR43","doi-asserted-by":"publisher","first-page":"93","DOI":"10.25046\/aj040110","volume":"4","author":"H Yamamoto","year":"2019","unstructured":"Yamamoto H, Hayashida T, Nishizaki I, Sekizaki S (2019) Hypervolume-based multi-objective reinforcement learning: interactive approach. Advan Sci, Technol Eng Syst J 4(1):93\u2013100","journal-title":"Advan Sci, Technol Eng Syst J"},{"key":"7010_CR44","doi-asserted-by":"publisher","first-page":"2370","DOI":"10.1007\/s10489-020-01633-3","volume":"50","author":"Y Qin","year":"2020","unstructured":"Qin Y, Wang H, Yi S, Li X, Zhai L (2020) Virtual machine placement based on multi-objective reinforcement learning. Appl Intell 50:2370\u20132383","journal-title":"Appl Intell"},{"key":"7010_CR45","doi-asserted-by":"crossref","unstructured":"Barrett L, Narayanan S (2008) Learning all optimal policies with multiple criteria. Proceedings of the 25th international conference on Machine learning pp. 41\u201347","DOI":"10.1145\/1390156.1390162"},{"key":"7010_CR46","doi-asserted-by":"crossref","unstructured":"Haouari B, Mzid R, Mosbahi O (2024) Reinforcement learning for multi-objective task placement on heterogeneous architectures with real-time constraints. Proceedings of the 19th International Conference on Evaluation of Novel Approaches to Software Engineering - Volume 1: ENASE pp. 179\u2013189","DOI":"10.5220\/0012721500003687"},{"issue":"1","key":"7010_CR47","doi-asserted-by":"publisher","first-page":"44","DOI":"10.3390\/s20010044","volume":"20","author":"YH Xu","year":"2019","unstructured":"Xu YH, Xie JW, Zhang YG, Hua M, Zhou W (2019) Reinforcement learning (RL)-based energy efficient resource allocation for energy harvesting-powered wireless body area network. Sensors 20(1):44","journal-title":"Sensors"},{"issue":"12","key":"7010_CR48","doi-asserted-by":"publisher","first-page":"3450","DOI":"10.3390\/s20123450","volume":"20","author":"M Diyan","year":"2020","unstructured":"Diyan M, Silva BN, Han K (2020) A multi-objective approach for optimal energy management in smart home using the reinforcement learning. Sensors 20(12):3450","journal-title":"Sensors"},{"key":"7010_CR49","doi-asserted-by":"crossref","unstructured":"Lu J, Mannion P, Mason K (2024) A meta-learning approach for multi-objective reinforcement learning in sustainable home energy management. ECAI 2024 - 27th European Conference on Artificial Intelligence, 19-24 October 2024, Santiago de Compostela, Spain - Including 13th Conference on Prestigious Applications of Intelligent Systems (PAIS 2024) 392, 2814\u20132821","DOI":"10.3233\/FAIA240817"},{"issue":"5","key":"7010_CR50","doi-asserted-by":"publisher","first-page":"1329","DOI":"10.1109\/JAS.2023.123378","volume":"10","author":"X He","year":"2023","unstructured":"He X, Lv C (2023) Towards energy-efficient autonomous driving: A multi-objective reinforcement learning approach. IEEE\/CAA J Autom Sin 10(5):1329\u20131331","journal-title":"IEEE\/CAA J Autom Sin"},{"issue":"8","key":"7010_CR51","first-page":"1","volume":"54","author":"Y Tian","year":"2021","unstructured":"Tian Y, Si L, Zhang X, Cheng R, He C, Tan KC, Jin Y (2021) Evolutionary large-scale multi-objective optimization: a survey. ACM Comput Surv (CSUR) 54(8):1\u201334","journal-title":"ACM Comput Surv (CSUR)"},{"key":"7010_CR52","volume-title":"Evolutionary algorithms for solving multi-objective problems","author":"CAC Coello","year":"2007","unstructured":"Coello CAC (2007) Evolutionary algorithms for solving multi-objective problems. Springer"},{"issue":"3","key":"7010_CR53","doi-asserted-by":"publisher","first-page":"946","DOI":"10.21917\/ijsc.2015.0133","volume":"5","author":"I Muhammad","year":"2015","unstructured":"Muhammad I, Yan Z (2015) Supervised machine learning approaches: a survey. ICTACT J Soft Comput 5(3):946\u2013952","journal-title":"ICTACT J Soft Comput"},{"key":"7010_CR54","doi-asserted-by":"crossref","unstructured":"Shanthamallu US, Spanias A, Tepedelenlioglu C, Stanley M (2017) A brief survey of machine learning methods and their sensor and iot applications. 2017 8th International Conference on Information, Intelligence, Systems & Applications (IISA) pp. 1\u20138","DOI":"10.1109\/IISA.2017.8316459"},{"key":"7010_CR55","first-page":"679","volume":"6","author":"R Bellman","year":"1957","unstructured":"Bellman R (1957) A Markovian decision process. J Math Mech 6:679\u2013684","journal-title":"J Math Mech"},{"key":"7010_CR56","unstructured":"Howard RA (1960) Dynamic programming and Markov processes"},{"key":"7010_CR57","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2023.120495","volume":"231","author":"AK Shakya","year":"2023","unstructured":"Shakya AK, Pillai G, Chakrabarty S (2023) Reinforcement learning algorithms: a brief survey. Expert Syst Appl 231:120495","journal-title":"Expert Syst Appl"},{"key":"7010_CR58","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1146\/annurev-statistics-031219-041220","volume":"7","author":"J Clifton","year":"2020","unstructured":"Clifton J, Laber E (2020) Q-learning: theory and applications. Annu Rev Stat Appl 7:279\u2013301","journal-title":"Annu Rev Stat Appl"},{"key":"7010_CR59","doi-asserted-by":"crossref","unstructured":"Feng S, Wu X, Zhao Y, Li Y (2023) Dispatching and scheduling dependent tasks based on multi-agent deep reinforcement learning. Softw Eng Knowl Eng pp. 281\u2013286","DOI":"10.18293\/SEKE2023-059"},{"key":"7010_CR60","unstructured":"Watkins CJCH (1989) Learning from delayed rewards"},{"issue":"3","key":"7010_CR61","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1007\/BF00992698","volume":"8","author":"CJ Watkins","year":"1992","unstructured":"Watkins CJ, Dayan P (1992) Q-learning. Mach Learn 8(3):279\u2013292","journal-title":"Mach Learn"},{"key":"7010_CR62","doi-asserted-by":"publisher","first-page":"133653","DOI":"10.1109\/ACCESS.2019.2941229","volume":"7","author":"B Jang","year":"2019","unstructured":"Jang B, Kim M, Harerimana G, Kim JW (2019) Q-learning algorithms: a comprehensive classification and applications. IEEE Access 7:133653\u2013133667","journal-title":"IEEE Access"},{"key":"7010_CR63","first-page":"13303","volume":"35","author":"A Ghosh","year":"2022","unstructured":"Ghosh A, Zhou X, Shroff N (2022) Provably efficient model-free constrained RL with linear function approximation. Advan Neural Inf Process Syst 35:13303\u201313315","journal-title":"Advan Neural Inf Process Syst"},{"key":"7010_CR64","first-page":"99","volume":"93","author":"S Koenig","year":"1993","unstructured":"Koenig S, Simmons RG (1993) Complexity analysis of real-time reinforcement learning. AAAI 93:99\u2013105","journal-title":"AAAI"},{"issue":"2","key":"7010_CR65","doi-asserted-by":"publisher","first-page":"173","DOI":"10.1162\/106365600568202","volume":"8","author":"E Zitzler","year":"2000","unstructured":"Zitzler E, Deb K, Thiele L (2000) Comparison of multiobjective evolutionary algorithms: Empirical results. Evolut Comput 8(2):173\u2013195","journal-title":"Evolut Comput"},{"key":"7010_CR66","doi-asserted-by":"crossref","unstructured":"Knowles J, Corne D (2002) On metrics for comparing nondominated sets. Proceedings of the 2002 Congress on Evolutionary Computation. CEC\u201902 (Cat. No. 02TH8600) 1:711\u2013716","DOI":"10.1109\/CEC.2002.1007013"},{"key":"7010_CR67","doi-asserted-by":"crossref","unstructured":"Fonseca CM, Paquete L, L\u00f3pez-Ib\u00e1nez M (2006) An improved dimension-sweep algorithm for the hypervolume indicator. 2006 IEEE international conference on evolutionary computation pp. 1157\u20131163","DOI":"10.1109\/CEC.2006.1688440"},{"key":"7010_CR68","doi-asserted-by":"publisher","first-page":"60","DOI":"10.1016\/j.jspi.2014.12.004","volume":"160","author":"Y Cao","year":"2015","unstructured":"Cao Y, Smucker BJ, Robinson TJ (2015) On using the hypervolume indicator to compare pareto fronts: applications to multi-criteria optimal experimental design. J Stat Plan Inference 160:60\u201374","journal-title":"J Stat Plan Inference"},{"key":"7010_CR69","unstructured":"Yang R, Sun X, Narasimhan K (2019) A generalized algorithm for multi-objective reinforcement learning and policy adaptation. Advances in neural information processing systems 32"},{"key":"7010_CR70","doi-asserted-by":"crossref","unstructured":"Zitzler E, Knowles J, Thiele L (2008) Quality assessment of pareto set approximations. Multiobjective optimization: Interactive and evolutionary approaches 5252 373\u2013404","DOI":"10.1007\/978-3-540-88908-3_14"},{"key":"7010_CR71","doi-asserted-by":"crossref","unstructured":"Zitzler E, Thiele L (1998) Multiobjective optimization using evolutionary algorithms-a comparative case study. International conference on parallel problem solving from nature pp. 292\u2013301","DOI":"10.1007\/BFb0056872"},{"key":"7010_CR72","unstructured":"Fonseca CM, Knowles JD, Thiele L, Zitzler E et\u00a0al (2005) A tutorial on the performance assessment of stochastic multiobjective optimizers. Third international conference on evolutionary multi-criterion optimization (EMO 2005) 216:240"},{"issue":"2","key":"7010_CR73","doi-asserted-by":"publisher","first-page":"742","DOI":"10.1109\/TSG.2013.2268664","volume":"5","author":"M Pipattanasomporn","year":"2013","unstructured":"Pipattanasomporn M, Kuzlu M, Rahman S, Teklu Y (2013) Load profiles of selected major household appliances and their demand response opportunities. IEEE Trans Smart Grid 5(2):742\u2013750","journal-title":"IEEE Trans Smart Grid"},{"key":"7010_CR74","first-page":"969","volume":"2010","author":"P Perny","year":"2010","unstructured":"Perny P, Weng P (2010) On finding compromise solutions in multiobjective markov decision processes. ECAI 2010:969\u2013970","journal-title":"ECAI"},{"issue":"3","key":"7010_CR75","doi-asserted-by":"publisher","first-page":"1722","DOI":"10.1109\/COMST.2020.2988367","volume":"22","author":"L Lei","year":"2020","unstructured":"Lei L, Tan Y, Zheng K, Liu S, Zhang K, Shen X (2020) Deep reinforcement learning for autonomous internet of things: model, applications and challenges. IEEE Commun Surv Tutor 22(3):1722\u20131760","journal-title":"IEEE Commun Surv Tutor"},{"issue":"98","key":"7010_CR76","first-page":"1","volume":"22","author":"A Agarwal","year":"2021","unstructured":"Agarwal A, Kakade SM, Lee JD, Mahajan G (2021) On the theory of policy gradient methods: optimality, approximation, and distribution shift. J Mach Learn Res 22(98):1\u201376","journal-title":"J Mach Learn Res"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-025-07010-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11227-025-07010-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-025-07010-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,18]],"date-time":"2025-02-18T01:45:17Z","timestamp":1739843117000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11227-025-07010-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,2,17]]},"references-count":76,"journal-issue":{"issue":"4","published-online":{"date-parts":[[2025,3]]}},"alternative-id":["7010"],"URL":"https:\/\/doi.org\/10.1007\/s11227-025-07010-6","relation":{},"ISSN":["1573-0484"],"issn-type":[{"value":"1573-0484","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,2,17]]},"assertion":[{"value":"30 January 2025","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 February 2025","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"515"}}