{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,3]],"date-time":"2026-06-03T16:19:41Z","timestamp":1780503581717,"version":"3.54.1"},"reference-count":52,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"5","license":[{"start":{"date-parts":[[2025,5,1]],"date-time":"2025-05-01T00:00:00Z","timestamp":1746057600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"},{"start":{"date-parts":[[2025,5,1]],"date-time":"2025-05-01T00:00:00Z","timestamp":1746057600000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2025,5]]},"DOI":"10.1109\/tpami.2025.3528944","type":"journal-article","created":{"date-parts":[[2025,1,14]],"date-time":"2025-01-14T19:49:27Z","timestamp":1736884167000},"page":"3322-3331","source":"Crossref","is-referenced-by-count":14,"title":["Safe and Balanced: A Framework for Constrained Multi-Objective Reinforcement Learning"],"prefix":"10.1109","volume":"47","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2722-3779","authenticated-orcid":false,"given":"Shangding","family":"Gu","sequence":"first","affiliation":[{"name":"Department of Electrical Engineering and Computer Sciences, University of California, Berkeley, CA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Bilgehan","family":"Sel","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, Virginia Tech, Blacksburg, VA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1521-7566","authenticated-orcid":false,"given":"Yuhao","family":"Ding","sequence":"additional","affiliation":[{"name":"Cubist Systematic Strategies, New York City, NY, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Lu","family":"Wang","sequence":"additional","affiliation":[{"name":"Microsoft Research Asia, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2559-2383","authenticated-orcid":false,"given":"Qingwei","family":"Lin","sequence":"additional","affiliation":[{"name":"Microsoft Research Asia, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4840-076X","authenticated-orcid":false,"given":"Alois","family":"Knoll","sequence":"additional","affiliation":[{"name":"Department of Informatics, Technical University of Munich, Munich, Germany"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7909-4545","authenticated-orcid":false,"given":"Ming","family":"Jin","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, Virginia Tech, Blacksburg, VA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.3389\/fnbot.2023.1280341"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/j.artint.2023.103905"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1177\/0278364913495721"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2021.3054625"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.3390\/robotics11040081"},{"key":"ref6","first-page":"27 730","article-title":"Training language models to follow instructions with human feedback","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Ouyang"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/s10614-021-10119-4"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3054719"},{"key":"ref9","first-page":"5824","article-title":"Gradient surgery for multi-task learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Yu"},{"key":"ref10","first-page":"12 169","article-title":"In defense of the unitary scalarization for deep multi-task learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Kurin"},{"key":"ref11","first-page":"883","article-title":"A constrained multi-objective reinforcement learning framework","volume-title":"Proc. Conf. Robot Learn.","author":"Huang"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref13","first-page":"11 480","article-title":"CRPO: A new approach for safe reinforcement learning with convergence guarantee","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Xu"},{"issue":"1","key":"ref14","first-page":"4431","article-title":"On the theory of policy gradient methods: Optimality, approximation, and distribution shift","volume":"22","author":"Agarwal","year":"2021","journal-title":"J. Mach. Learn. Res."},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/tpami.2024.3457538"},{"key":"ref16","first-page":"25 439","article-title":"Offline constrained multi-objective reinforcement learning via pessimistic dual value iteration","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Wu"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.3390\/electronics9091363"},{"key":"ref18","first-page":"4312","article-title":"Safe exploration in finite Markov decision processes with Gaussian processes","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Turchetta"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ECC.2015.7330913"},{"key":"ref20","first-page":"997","article-title":"Safe exploration for optimization with Gaussian processes","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Sui"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.12103"},{"key":"ref22","first-page":"8103","article-title":"A lyapunov-based approach to safe reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Chow"},{"key":"ref23","article-title":"Lyapunov-based safe policy optimization for continuous control","author":"Chow","year":"2019"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2018.8619572"},{"key":"ref25","article-title":"Temporal logic guided safe reinforcement learning using control barrier functions","author":"Li","year":"2019"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1002\/rnc.5132"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.12107"},{"key":"ref28","first-page":"22","article-title":"Constrained policy optimization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Achiam"},{"key":"ref29","article-title":"Projection-based constrained policy optimization","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Yang"},{"key":"ref30","article-title":"Reward constrained policy optimization","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Tessler"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i9.26299"},{"key":"ref32","first-page":"11","article-title":"A distributional view on multi-objective policy optimization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Abdolmaleki"},{"key":"ref33","first-page":"16 428","article-title":"Multi-task learning as a bargaining game","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Navon"},{"key":"ref34","first-page":"18 878","article-title":"Conflict-averse gradient descent for multi-task learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Liu"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1016\/j.crma.2012.03.014"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-010-5232-5"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1613\/jair.4961"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2016.11.094"},{"key":"ref39","first-page":"38 103","article-title":"On the convergence of stochastic multi-objective gradient manipulation and beyond","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Zhou"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1287\/opre.2020.2024"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.12079"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1137\/19M1288012"},{"key":"ref43","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Sutton"},{"key":"ref44","first-page":"1531","article-title":"A natural policy gradient","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Kakade"},{"key":"ref45","first-page":"1889","article-title":"Trust region policy optimization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Schulman"},{"key":"ref46","volume-title":"Vector Optimization, Theory, Application, and Extensions","author":"John","year":"2004"},{"key":"ref47","article-title":"OpenAI gym","author":"Brockman","year":"2016"},{"key":"ref48","article-title":"Benchmarking safe exploration in deep reinforcement learning","author":"Ray","year":"2019"},{"key":"ref49","article-title":"Deepmind control suite","author":"Tassa","year":"2018"},{"key":"ref50","article-title":"OmniSafe: An infrastructure for accelerating safe reinforcement learning research","author":"Ji","year":"2023"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2022\/520"},{"key":"ref52","article-title":"Foundation models for decision making: Problems, methods, and opportunities","author":"Yang","year":"2023"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"https:\/\/ieeexplore.ieee.org\/ielam\/34\/10958761\/10840326-aam.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/34\/10958761\/10840326.pdf?arnumber=10840326","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,14]],"date-time":"2025-04-14T18:19:19Z","timestamp":1744654759000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10840326\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5]]},"references-count":52,"journal-issue":{"issue":"5"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2025.3528944","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"value":"0162-8828","type":"print"},{"value":"2160-9292","type":"electronic"},{"value":"1939-3539","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,5]]}}}