{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,6]],"date-time":"2026-04-06T16:53:28Z","timestamp":1775494408295,"version":"3.50.1"},"reference-count":58,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"3","license":[{"start":{"date-parts":[[2024,6,1]],"date-time":"2024-06-01T00:00:00Z","timestamp":1717200000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,6,1]],"date-time":"2024-06-01T00:00:00Z","timestamp":1717200000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,6,1]],"date-time":"2024-06-01T00:00:00Z","timestamp":1717200000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62236007"],"award-info":[{"award-number":["62236007"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61976043"],"award-info":[{"award-number":["61976043"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Cogn. Dev. Syst."],"published-print":{"date-parts":[[2024,6]]},"DOI":"10.1109\/tcds.2023.3326297","type":"journal-article","created":{"date-parts":[[2023,10,23]],"date-time":"2023-10-23T18:09:00Z","timestamp":1698084540000},"page":"1051-1062","source":"Crossref","is-referenced-by-count":3,"title":["State Augmentation via Self-Supervision in Offline Multiagent Reinforcement Learning"],"prefix":"10.1109","volume":"16","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3180-0815","authenticated-orcid":false,"given":"Siying","family":"Wang","sequence":"first","affiliation":[{"name":"School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China"}]},{"given":"Xiaodie","family":"Li","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6114-3441","authenticated-orcid":false,"given":"Hong","family":"Qu","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China"}]},{"given":"Wenyu","family":"Chen","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1098\/rstb.2010.0154"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1177\/147470490600400119"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-020-68734-4"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2018.2840971"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2019.2926477"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2021.3050723"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2019.2928820"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2020.3034452"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2013.2255286"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC.2014.6958095"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2018.2795041"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2020.106706"},{"key":"ref14","article-title":"Safe, multi-agent, reinforcement learning for autonomous driving","author":"Shalev-Shwartz","year":"2016","journal-title":"arXiv:1610.03295"},{"key":"ref15","article-title":"Offline reinforcement learning: Tutorial, review, and perspectives on open problems","author":"Levine","year":"2020","journal-title":"arXiv:2005.01643"},{"key":"ref16","first-page":"2052","article-title":"Off-policy deep reinforcement learning without exploration","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Fujimoto"},{"key":"ref17","article-title":"Behavior regularized offline reinforcement learning","author":"Wu","year":"2019","journal-title":"arXiv:1911.11361"},{"key":"ref18","first-page":"1702","article-title":"Offline-to-online reinforcement learning via balanced replay and pessimistic Q-ensemble","volume-title":"Proc. Conf. Robot Learn.","author":"Lee"},{"key":"ref19","first-page":"1179","article-title":"Conservative Q-learning for offline reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Kumar"},{"key":"ref20","first-page":"1","article-title":"Stabilizing off-policy Q-learning via bootstrapping error reduction","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Kumar"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3146976"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3109284"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2021.114896"},{"key":"ref24","article-title":"Algaedice: Policy gradient from arbitrary experience","author":"Nachum","year":"2019","journal-title":"arXiv:1912.02074"},{"key":"ref25","first-page":"5774","article-title":"Offline reinforcement learning with fisher divergence critic regularization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Kostrikov"},{"key":"ref26","first-page":"7436","article-title":"Uncertainty-based offline reinforcement learning with diversified Q-ensemble","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"An"},{"key":"ref27","article-title":"AWAC: Accelerating online reinforcement learning with offline datasets","author":"Nair","year":"2020","journal-title":"arXiv:2006.09359"},{"key":"ref28","first-page":"15084","article-title":"Decision transformer: Reinforcement learning via sequence modeling","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Chen"},{"key":"ref29","first-page":"27042","article-title":"Online decision transformer","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Zheng"},{"key":"ref30","first-page":"24631","article-title":"Prompting decision transformer for few-shot policy generalization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Xu"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0172395"},{"key":"ref32","article-title":"A survey of learning in multiagent environments: Dealing with non-stationarity","author":"Hernandez-Leal","year":"2017","journal-title":"arXiv:1707.09183"},{"issue":"178","key":"ref33","first-page":"1","article-title":"Monotonic value function factorisation for deep multi-agent reinforcement learning","volume":"21","author":"Rashid","year":"2020","journal-title":"J. Mach. Learn. Res."},{"key":"ref34","first-page":"6382","article-title":"Multi-agent actor\u2013critic for mixed cooperative-competitive environments","volume-title":"Proc. 31st Int. Conf. Neural Inf. Process. Syst. NeurIPS","author":"Lowe"},{"key":"ref35","first-page":"4292","article-title":"QMIX: Monotonic value function factorisation for deep multi-agent reinforcement learning","volume-title":"Proc. 35th Int. Conf. Mach. Learn.","author":"Rashid"},{"key":"ref36","first-page":"5887","article-title":"QTRAN: Learning to factorize with transformation for cooperative multi-agent reinforcement learning","volume-title":"Proc. 36th Int. Conf. Mach. Learn.","author":"Son"},{"key":"ref37","first-page":"2085","article-title":"Value-decomposition networks for cooperative multi-agent learning based on team reward","volume-title":"Proc. 17th Int. Conf. Autonomous Agents MultiAgent Syst., AAMAS","author":"Sunehag"},{"key":"ref38","first-page":"1","article-title":"Trust region policy optimisation in multi-agent reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Kuba"},{"key":"ref39","first-page":"10299","article-title":"Believe what you see: Implicit constraint approach for offline multi-agent reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Yang"},{"key":"ref40","first-page":"17221","article-title":"Plan better AMID conservatism: Offline multi-agent reinforcement learning with actor rectification","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Pan"},{"key":"ref41","article-title":"Offline pre-trained multi-agent decision transformer: One big sequence model tackles all SMAC tasks","author":"Meng","year":"2021","journal-title":"arXiv:2112.02845"},{"key":"ref42","article-title":"Network randomization: A simple technique for generalization in deep reinforcement learning","author":"Lee","year":"2019","journal-title":"arXiv:1910.05396"},{"key":"ref43","first-page":"19884","article-title":"Reinforcement learning with augmented data","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Laskin"},{"key":"ref44","first-page":"5639","article-title":"CURL: Contrastive unsupervised representations for reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Laskin"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i12.17276"},{"key":"ref46","article-title":"Simplifying deep reinforcement learning via self-supervision","author":"Zha","year":"2021","journal-title":"arXiv:2106.05526"},{"key":"ref47","article-title":"Reinforcement learning upside down: Don\u2019t predict rewards\u2013just map them to actions","author":"Schmidhuber","year":"2019","journal-title":"arXiv:1912.02875"},{"key":"ref48","first-page":"907","article-title":"S4RL: Surprisingly simple self-supervision for offline reinforcement learning in robotics","volume-title":"Proc. 5th Conf. Robot Learn.","author":"Sinha"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-28929-8"},{"key":"ref50","article-title":"Continuous control with deep reinforcement learning","author":"Lillicrap","year":"2015","journal-title":"arXiv:1509.02971"},{"key":"ref51","first-page":"1","article-title":"Rethinking individual global max in cooperative multi-agent reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Hong"},{"key":"ref52","article-title":"Hypernetworks","author":"Ha","year":"2016","journal-title":"arXiv:1609.09106"},{"key":"ref53","first-page":"2186","article-title":"The starCraft multi-agent challenge","volume-title":"Proc. 18th Int. Conf. Autonomous Agents MultiAgent Syst., AAMAS","author":"Samvelyan"},{"key":"ref54","first-page":"1741","article-title":"Learning transferable cooperative behavior in multi-agent teams","volume-title":"Proc. 19th Int. Conf. Autonomous Agents MultiAgent Syst.","author":"Agarwal"},{"key":"ref55","first-page":"29","article-title":"Deep recurrent Q-learning for partially observable MDPs","volume-title":"Proc. 29th AAAI Conf. Artif. Intell. AAAI","author":"Hausknecht"},{"key":"ref56","article-title":"D4RL: Datasets for deep data-driven reinforcement learning","author":"Fu","year":"2020","journal-title":"arXiv:2004.07219"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/SSCI.2017.8280935"},{"key":"ref58","first-page":"885","article-title":"RoboNet: Large-scale multi-robot learning","volume-title":"Proc. Conf. Robot Learn.","author":"Dasari"}],"container-title":["IEEE Transactions on Cognitive and Developmental Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7274989\/10552653\/10290946.pdf?arnumber=10290946","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,25]],"date-time":"2024-06-25T21:10:12Z","timestamp":1719349812000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10290946\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6]]},"references-count":58,"journal-issue":{"issue":"3"},"URL":"https:\/\/doi.org\/10.1109\/tcds.2023.3326297","relation":{},"ISSN":["2379-8920","2379-8939"],"issn-type":[{"value":"2379-8920","type":"print"},{"value":"2379-8939","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,6]]}}}