{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,31]],"date-time":"2026-01-31T04:09:03Z","timestamp":1769832543435,"version":"3.49.0"},"reference-count":38,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,10,14]],"date-time":"2024-10-14T00:00:00Z","timestamp":1728864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,10,14]],"date-time":"2024-10-14T00:00:00Z","timestamp":1728864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100010002","name":"Ministry of Education","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100010002","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,10,14]]},"DOI":"10.1109\/iros58592.2024.10802580","type":"proceedings-article","created":{"date-parts":[[2024,12,25]],"date-time":"2024-12-25T19:17:39Z","timestamp":1735154259000},"page":"13695-13702","source":"Crossref","is-referenced-by-count":2,"title":["Transformer-based Multi-Agent Reinforcement Learning for Generalization of Heterogeneous Multi-Robot Cooperation"],"prefix":"10.1109","author":[{"given":"Yuxin","family":"Cai","sequence":"first","affiliation":[{"name":"Nanyang Technological University,School of Mechanical and Aerospace Engineering,Singapore"}]},{"given":"Xiangkun","family":"He","sequence":"additional","affiliation":[{"name":"Nanyang Technological University,School of Mechanical and Aerospace Engineering,Singapore"}]},{"given":"Hongliang","family":"Guo","sequence":"additional","affiliation":[{"name":"Agency for Science, Technology and Research (ASTAR),Institute for Infocomm Research (I2R),Singapore"}]},{"given":"Wei-Yun","family":"Yau","sequence":"additional","affiliation":[{"name":"Agency for Science, Technology and Research (ASTAR),Institute for Infocomm Research (I2R),Singapore"}]},{"given":"Chen","family":"Lv","sequence":"additional","affiliation":[{"name":"Nanyang Technological University,School of Mechanical and Aerospace Engineering,Singapore"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/MRA.2013.2252996"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2023.3263459"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8793609"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.3025287"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2018.08.022"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2011.5980279"},{"issue":"54","key":"ref7","first-page":"1","article-title":"Deep reinforcement learning for swarm systems","volume":"20","author":"H\u00fcttenrauch","year":"2019","journal-title":"Journal of Machine Learning Research"},{"key":"ref8","article-title":"Introduction to reinforcement learning","volume":"135","author":"Sutton","year":"1998"},{"key":"ref9","first-page":"16509","article-title":"Multi-agent reinforcement learning is a sequence modeling problem","volume":"35","author":"Wen","year":"2022","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2024.106129"},{"key":"ref11","article-title":"Heterogeneous multi-robot reinforcement learning","author":"Bettini","year":"2023"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/3303848"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1002\/rob.20212"},{"key":"ref14","article-title":"Generalization in cooperative multi-agent systems","author":"Mahajan","year":"2022"},{"key":"ref15","article-title":"Learning transferable cooperative behavior in multi-agent teams","author":"Agarwal","year":"2019"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3196782"},{"key":"ref17","first-page":"6860","article-title":"Coach-player multi-agent reinforcement learning for dynamic team composition","volume-title":"International Conference on Machine Learning","author":"Liu"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3188904"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/IROS55552.2023.10341748"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.3390\/s23073625"},{"key":"ref21","article-title":"Multi-agent actor-critic for mixed cooperative-competitive environments","volume":"30","author":"Lowe","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ROBIO58561.2023.10354600"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/MRS60187.2023.10416792"},{"issue":"1","key":"ref24","first-page":"7234","article-title":"Monotonic value function factorisation for deep multi-agent reinforcement learning","volume":"21","author":"Rashid","year":"2020","journal-title":"The Journal of Machine Learning Research"},{"key":"ref25","first-page":"24611","article-title":"The surprising effectiveness of ppo in cooperative multi-agent games","volume":"35","author":"Yu","year":"2022","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref27","article-title":"Updet: Universal multi-agent reinforcement learning via policy decoupling with transformers","author":"Hu","year":"2021"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3105869"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-71682-4_5"},{"key":"ref30","first-page":"1989","article-title":"Scaling multi-agent reinforcement learning with selective parameter sharing","volume-title":"International Conference on Machine Learning","author":"Christianos"},{"key":"ref31","article-title":"Generalization of heterogeneous multi-robot policies via awareness and communication of capabilities","author":"Howell","year":"2024"},{"key":"ref32","first-page":"13458","article-title":"Settling the variance of multi-agent policy gradients","volume":"34","author":"Kuba","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-28929-8"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2022.3146300"},{"key":"ref35","article-title":"Graph attention networks","author":"Veli\u010dkovi\u0107","year":"2017"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/MCS.2019.2949973"},{"key":"ref37","article-title":"Continuous control with deep reinforcement learning","author":"Lillicrap","year":"2015"},{"key":"ref38","article-title":"High-dimensional continuous control using generalized advantage estimation","author":"Schulman","year":"2015"}],"event":{"name":"2024 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","location":"Abu Dhabi, United Arab Emirates","start":{"date-parts":[[2024,10,14]]},"end":{"date-parts":[[2024,10,18]]}},"container-title":["2024 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10801246\/10801290\/10802580.pdf?arnumber=10802580","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,26]],"date-time":"2024-12-26T07:05:54Z","timestamp":1735196754000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10802580\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,14]]},"references-count":38,"URL":"https:\/\/doi.org\/10.1109\/iros58592.2024.10802580","relation":{},"subject":[],"published":{"date-parts":[[2024,10,14]]}}}