{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T14:09:36Z","timestamp":1768313376260,"version":"3.49.0"},"reference-count":31,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T00:00:00Z","timestamp":1765238400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T00:00:00Z","timestamp":1765238400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,12,9]]},"DOI":"10.1109\/cdc57313.2025.11312471","type":"proceedings-article","created":{"date-parts":[[2026,1,12]],"date-time":"2026-01-12T18:19:56Z","timestamp":1768241996000},"page":"5147-5152","source":"Crossref","is-referenced-by-count":0,"title":["Enabling Pareto-Stationarity Exploration in Multi-Objective Reinforcement Learning: A Multi-Objective Weighted-Chebyshev Actor-Critic Approach"],"prefix":"10.1109","author":[{"given":"Fnu","family":"Hairi","sequence":"first","affiliation":[{"name":"University of Wisconsin-Whitewater,Department of Computer Science,Whitewater,WI,USA"}]},{"given":"Yang","family":"Jiao","sequence":"additional","affiliation":[{"name":"Amazon,Seattle,USA"}]},{"given":"Tianchen","family":"Zhou","sequence":"additional","affiliation":[{"name":"Amazon,Seattle,USA"}]},{"given":"Haibo","family":"Yang","sequence":"additional","affiliation":[{"name":"Rochester Institute of Technology,Department of Computing and Information Sciences,Rochester,NY,USA"}]},{"given":"Chaosheng","family":"Dong","sequence":"additional","affiliation":[{"name":"Amazon,Seattle,USA"}]},{"given":"Fan","family":"Yang","sequence":"additional","affiliation":[{"name":"Amazon,Seattle,USA"}]},{"given":"Michinari","family":"Momma","sequence":"additional","affiliation":[{"name":"Amazon,Seattle,USA"}]},{"given":"Yan","family":"Gao","sequence":"additional","affiliation":[{"name":"Amazon,Seattle,USA"}]},{"given":"Jia","family":"Liu","sequence":"additional","affiliation":[{"name":"The Ohio State University,Department of Electrical and Computer Engineering,Columbus,OH,USA"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/s10458-022-09552-y"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583259"},{"key":"ref3","author":"Zhou","year":"2024","journal-title":"Finite-time convergence and sample complexity of actor-critic multi-objective reinforcement learning"},{"key":"ref4","first-page":"11096","article-title":"Random hypervolume scalarizations for provable multi-objective black box optimization","volume-title":"International conference on machine learning","author":"Zhang"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1016\/j.crma.2012.03.014"},{"key":"ref6","author":"Hairi","year":"2025","journal-title":"Enabling pareto-stationarity exploration in multi-objective reinforcement learning: A multi-objective weighted-chebyshev actor-critic approach"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4615-5563-6"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1080\/10556788.2018.1510928"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1007\/s10479-021-04033-z"},{"key":"ref10","article-title":"Mitigating gradient bias in multi-objective learning: A provably convergent approach","volume-title":"The Eleventh International Conference on Learning Representations","author":"Fernando"},{"key":"ref11","article-title":"Federated multi-objective learning","author":"Yang","year":"2024"},{"key":"ref12","author":"Xiao","year":"2023","journal-title":"Direction-oriented multi-objective learning: Simple and provable stochastic algorithms"},{"key":"ref13","first-page":"197","article-title":"Multi-criteria reinforcement learning","volume-title":"ICML","volume":"98","author":"G\u00e1bor"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3449846"},{"key":"ref15","author":"Lin","year":"2024","journal-title":"Few for many: Tchebycheff set scalarization for many-objective optimization"},{"key":"ref16","first-page":"15895","article-title":"A multi-objective\/multi-task learning framework induced by pareto stationarity","volume-title":"International Conference on Machine Learning","author":"Momma"},{"key":"ref17","article-title":"Traversing pareto optimal policies: Provably efficient multi-objective reinforcement learning","author":"Qiu","year":"2024"},{"key":"ref18","article-title":"Pareto multi-task learning","volume":"32","author":"Lin","year":"2019","journal-title":"Advances in neural information processing systems"},{"key":"ref19","article-title":"Multi-objective reinforcement learning for the expected utility of the return","volume-title":"Proceedings of the Adaptive and Learning Agents workshop at FAIM","volume":"2018","author":"Roijers"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/JSAIT.2021.3078754"},{"key":"ref21","author":"Xu","year":"2020","journal-title":"Improving sample complexity bounds for (natural) actor-critic algorithms"},{"key":"ref22","volume-title":"Reinforcement learning: An introduction","author":"Sutton","year":"2018"},{"key":"ref23","article-title":"Finite-time convergence and sample complexity of multi-agent actor-critic reinforcement learning with average reward","volume-title":"International Conference on Learning Representations","author":"Hairi"},{"key":"ref24","first-page":"5872","article-title":"Fully decentralized multi-agent reinforcement learning with networked agents","volume-title":"International Conference on Machine Learning","author":"Zhang"},{"key":"ref25","volume-title":"Random hypervolume scalarizations for provable multi-objective black box optimization","volume":"abs\/2006.04655","author":"Golovin","year":"2020"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1016\/S0005-1098(99)00099-0"},{"key":"ref27","first-page":"38 103","article-title":"On the convergence of stochastic multi-objective gradient manipulation and beyond","volume":"35","author":"Zhou","year":"2022","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref28","article-title":"Multi-task learning as multi-objective optimization","volume":"31","author":"Sener","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i6.20633"},{"key":"ref30","article-title":"Direction-oriented multi-objective learning: Simple and provable stochastic algorithms","volume":"36","author":"Xiao","year":"2024","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330668"}],"event":{"name":"2025 IEEE 64th Conference on Decision and Control (CDC)","location":"Rio de Janeiro, Brazil","start":{"date-parts":[[2025,12,9]]},"end":{"date-parts":[[2025,12,12]]}},"container-title":["2025 IEEE 64th Conference on Decision and Control (CDC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11311984\/11311968\/11312471.pdf?arnumber=11312471","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T08:36:05Z","timestamp":1768293365000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11312471\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,9]]},"references-count":31,"URL":"https:\/\/doi.org\/10.1109\/cdc57313.2025.11312471","relation":{},"subject":[],"published":{"date-parts":[[2025,12,9]]}}}