{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,3]],"date-time":"2025-12-03T19:00:09Z","timestamp":1764788409411,"version":"3.46.0"},"reference-count":83,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"12","license":[{"start":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T00:00:00Z","timestamp":1764547200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T00:00:00Z","timestamp":1764547200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T00:00:00Z","timestamp":1764547200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Science Foundation of China","doi-asserted-by":"publisher","award":["62495093","U24A20324"],"award-info":[{"award-number":["62495093","U24A20324"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004608","name":"Natural Science Foundation of Jiangsu","doi-asserted-by":"publisher","award":["BK20241199","BK20243039"],"award-info":[{"award-number":["BK20241199","BK20243039"]}],"id":[{"id":"10.13039\/501100004608","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Neural Netw. Learning Syst."],"published-print":{"date-parts":[[2025,12]]},"DOI":"10.1109\/tnnls.2025.3591838","type":"journal-article","created":{"date-parts":[[2025,10,13]],"date-time":"2025-10-13T17:41:33Z","timestamp":1760377293000},"page":"20199-20213","source":"Crossref","is-referenced-by-count":0,"title":["Generalizable Offline Multiobjective Reinforcement Learning via Preference-Conditioned Diffuser"],"prefix":"10.1109","volume":"36","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-8604-3042","authenticated-orcid":false,"given":"Yuchen","family":"Xiao","sequence":"first","affiliation":[{"name":"National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7803-0766","authenticated-orcid":false,"given":"Lei","family":"Yuan","sequence":"additional","affiliation":[{"name":"National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lihe","family":"Li","sequence":"additional","affiliation":[{"name":"National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ziqian","family":"Zhang","sequence":"additional","affiliation":[{"name":"National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-9908-5303","authenticated-orcid":false,"given":"Yichen","family":"Li","sequence":"additional","affiliation":[{"name":"National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1052-5447","authenticated-orcid":false,"given":"Yang","family":"Yu","sequence":"additional","affiliation":[{"name":"National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2020.3042053"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-023-10484-6"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.59277\/ROMJIST.2024.3-4.04"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2024.124695"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.59277\/ROMJIST.2024.3-4.01"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1016\/j.asoc.2024.111687"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3207346"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2021.3054625"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-021-09997-9"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/3477600"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2023.3244945"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2019.2919699"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3105937"},{"key":"ref14","first-page":"14636","article-title":"A generalized algorithm for multi-objective reinforcement learning and policy adaptation","volume-title":"Proc. NeurIPS","author":"Yang"},{"article-title":"PD-MORL: Preference-driven multi-objective reinforcement learning algorithm","volume-title":"Proc. ICLR","author":"Ba\u015faklar","key":"ref15"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.52202\/079017-4453"},{"key":"ref17","first-page":"25439","article-title":"Offline constrained multi-objective reinforcement learning via pessimistic dual value iteration","volume-title":"Proc. NeurIPS","volume":"34","author":"Wu"},{"article-title":"Scaling Pareto-efficient decision making via offline multi-objective RL","volume-title":"Proc. ICLR","author":"Zhu","key":"ref18"},{"key":"ref19","first-page":"15084","article-title":"Decision transformer: Reinforcement learning via sequence modeling","volume-title":"Proc. Int. Conf. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Chen"},{"article-title":"RvS: What is essential for offline RL via supervised learning?","volume-title":"Proc. ICLR","author":"Emmons","key":"ref20"},{"article-title":"Policy-regularized offline multi-objective reinforcement learning","volume-title":"Proc. AAMAS","author":"Lin","key":"ref21"},{"key":"ref22","article-title":"D4RL: Datasets for deep data-driven reinforcement learning","author":"Fu","year":"2020","journal-title":"arXiv:2004.07219"},{"key":"ref23","first-page":"1179","article-title":"Conservative Q-learning for offline reinforcement learning","volume-title":"Proc. Int. Conf. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Kumar"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/3626235"},{"key":"ref25","article-title":"A survey on mixup augmentations and beyond","author":"Jin","year":"2024","journal-title":"arXiv:2409.05202"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.1998.712192"},{"article-title":"Learning from delayed rewards","year":"1989","author":"Watkins","key":"ref28"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992696"},{"key":"ref31","article-title":"Actor-critic algorithms","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"12","author":"Konda"},{"article-title":"Continuous control with deep reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Lillicrap","key":"ref32"},{"key":"ref33","first-page":"1582","article-title":"Addressing function approximation error in actor-critic methods","volume-title":"Proc. 35th Int. Conf. Mach. Learn., (ICML)","volume":"80","author":"Fujimoto"},{"key":"ref34","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Haarnoja"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1007\/s10458-022-09552-y"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ICIBA62489.2024.10868359"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/LCOMM.2024.3470890"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ICPRE62586.2024.10768615"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49660.2025.10889885"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1016\/j.jnca.2021.103049"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/IMC-SSGP63352.2024.10919539"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2021.104315"},{"key":"ref43","first-page":"10607","article-title":"Prediction-guided multi-objective reinforcement learning for continuous robot control","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Xu"},{"article-title":"Efficient discovery of Pareto front for multi-objective reinforcement learning","volume-title":"Proc. 13th Int. Conf. Learn. Represent.","author":"Liu","key":"ref44"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2025.3528944"},{"article-title":"Q-pensieve: Boosting sample efficiency of multi-objective RL through memory sharing of Q-snapshots","volume-title":"Proc. ICLR","author":"Hung","key":"ref46"},{"key":"ref47","first-page":"2980","article-title":"Pessimistic off-policy multi-objective optimization","volume-title":"Proc. 27th Int. Conf. Artif. Intell. Statist.","author":"Alizadeh"},{"article-title":"Moduli: Unlocking preference generalization via diffusion models for offline multi-objective reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Yuan","key":"ref48"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.52202\/079017-4453"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3261988"},{"key":"ref51","first-page":"8633","article-title":"Video diffusion models","volume-title":"Proc. NeurIPS","author":"Ho"},{"key":"ref52","article-title":"DiffusionGPT: LLM-driven text-to-image generation system","author":"Qin","year":"2024","journal-title":"arXiv:2401.10061"},{"key":"ref53","first-page":"9902","article-title":"Planning with diffusion for flexible behavior synthesis","volume-title":"Proc. ICML","author":"J\u00e4nner"},{"article-title":"Is conditional generative modeling all you need for decision making?","volume-title":"Proc. ICLR","author":"Ajay","key":"ref54"},{"article-title":"Diffusion policies as an expressive policy class for offline reinforcement learning","volume-title":"Proc. Deep Reinforcement Learn. Workshop NeurIPS","author":"Wang","key":"ref55"},{"key":"ref56","first-page":"46323","article-title":"Synthetic experience replay","volume-title":"Proc. NeurIPS","author":"Lu"},{"article-title":"DMBP: Diffusion model based predictor for robust offline reinforcement learning against state observation perturbations","volume-title":"Proc. ICLR","author":"Yang","key":"ref57"},{"key":"ref58","article-title":"MADiff: Offline multi-agent learning with diffusion models","author":"Zhu","year":"2023","journal-title":"arXiv:2305.17330"},{"key":"ref59","article-title":"Diffusion-based episodes augmentation for offline multi-agent reinforcement learning","author":"Oh","year":"2024","journal-title":"arXiv:2408.13092"},{"key":"ref60","article-title":"Diffusion models for reinforcement learning: A survey","author":"Zhu","year":"2023","journal-title":"arXiv:2311.01223"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4899-7687-1_79"},{"key":"ref62","first-page":"7968","article-title":"Improving generalization in reinforcement learning with mixture regularization","volume-title":"Proc. NeurIPS","author":"Wang"},{"article-title":"MixRL: Data mixing augmentation for regression using reinforcement learning","year":"2021","author":"Hwang","key":"ref63"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2023.120136"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1016\/0304-4149(94)00064-Z"},{"key":"ref66","first-page":"6840","article-title":"Denoising diffusion probabilistic models","volume-title":"Proc. NIPS","volume":"33","author":"Ho"},{"article-title":"Score-based generative modeling through stochastic differential equations","volume-title":"Proc. ICLR","author":"Song","key":"ref67"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460487"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1908.08681"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01261-8_1"},{"article-title":"Towards non-asymptotic convergence for diffusion-based generative models","volume-title":"Proc. 12th Int. Conf. Learn. Represent.","author":"Li","key":"ref73"},{"article-title":"O(d\/T) convergence theory for diffusion probabilistic models under minimal assumptions","volume-title":"Proc. 13th Int. Conf. Learn. Represent.","author":"Li","key":"ref74"},{"key":"ref75","article-title":"A survey of progress on cooperative multi-agent reinforcement learning in open environment","author":"Yuan","year":"2023","journal-title":"arXiv:2312.01058"},{"key":"ref76","article-title":"Multi-agent embodied AI: Advances and future directions","author":"Feng","year":"2025","journal-title":"arXiv:2505.05108"},{"article-title":"Data-efficient hierarchical reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Nachum","key":"ref77"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1109\/TIE.2020.3038072"},{"article-title":"Real-time reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Ramstedt","key":"ref79"},{"key":"ref80","first-page":"32211","article-title":"Consistency models","volume-title":"Proc. ICML","author":"Song"},{"key":"ref81","first-page":"335","article-title":"Boosting continuous control with consistency policy","author":"Chen","year":"2024","journal-title":"Autonomous Agents and Multiagent Systems"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN52387.2021.9534433"},{"issue":"8","key":"ref83","first-page":"9","article-title":"Language models are unsupervised multitask learners","volume":"1","author":"Radford","year":"2019","journal-title":"OpenAI Blog"}],"container-title":["IEEE Transactions on Neural Networks and Learning Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/5962385\/11272992\/11201894.pdf?arnumber=11201894","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,3]],"date-time":"2025-12-03T18:43:15Z","timestamp":1764787395000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11201894\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12]]},"references-count":83,"journal-issue":{"issue":"12"},"URL":"https:\/\/doi.org\/10.1109\/tnnls.2025.3591838","relation":{},"ISSN":["2162-237X","2162-2388"],"issn-type":[{"type":"print","value":"2162-237X"},{"type":"electronic","value":"2162-2388"}],"subject":[],"published":{"date-parts":[[2025,12]]}}}