{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,18]],"date-time":"2025-09-18T10:39:03Z","timestamp":1758191943256,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":30,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,12,18]]},"DOI":"10.1145\/3719545.3759428","type":"proceedings-article","created":{"date-parts":[[2025,9,16]],"date-time":"2025-09-16T09:38:41Z","timestamp":1758015521000},"page":"112-120","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Multi-agent Multi-game Entity Transformer: Towards Generalist Models in MARL"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6237-9472","authenticated-orcid":false,"given":"Rundong","family":"Wang","sequence":"first","affiliation":[{"name":"Nanyang Technological University, Singapore, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-7242-2744","authenticated-orcid":false,"given":"Weixuan","family":"Wang","sequence":"additional","affiliation":[{"name":"Tencent, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-3389-1418","authenticated-orcid":false,"given":"Xianhan","family":"Zeng","sequence":"additional","affiliation":[{"name":"Tencent, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-2587-5440","authenticated-orcid":false,"given":"Liang","family":"Wang","sequence":"additional","affiliation":[{"name":"Tencent, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7149-7400","authenticated-orcid":false,"given":"Zhengjie","family":"Liang","sequence":"additional","affiliation":[{"name":"Tencent, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-2791-426X","authenticated-orcid":false,"given":"Yiming","family":"Gao","sequence":"additional","affiliation":[{"name":"Tencent, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-0391-5404","authenticated-orcid":false,"given":"Feiyu","family":"Liu","sequence":"additional","affiliation":[{"name":"Tencent, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-9025-8972","authenticated-orcid":false,"given":"Siqin","family":"Li","sequence":"additional","affiliation":[{"name":"Tencent, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8913-2901","authenticated-orcid":false,"given":"Xianliang","family":"Wang","sequence":"additional","affiliation":[{"name":"Tencent, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4940-3831","authenticated-orcid":false,"given":"Qiang","family":"Fu","sequence":"additional","affiliation":[{"name":"Tencent, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-9181-878X","authenticated-orcid":false,"given":"Wei","family":"Yang","sequence":"additional","affiliation":[{"name":"Tencent, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-6366-4781","authenticated-orcid":false,"given":"Lanxiao","family":"Huang","sequence":"additional","affiliation":[{"name":"Tencent, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-7150-2132","authenticated-orcid":false,"given":"Longtao","family":"Zheng","sequence":"additional","affiliation":[{"name":"Nanyang Technological University, Singapore, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1796-8013","authenticated-orcid":false,"given":"Zinovi","family":"Rabinobich","sequence":"additional","affiliation":[{"name":"Carleton University, Ottawa, Canada"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7064-7438","authenticated-orcid":false,"given":"Bo","family":"An","sequence":"additional","affiliation":[{"name":"Nanyang Technological University, Singapore, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,9,16]]},"reference":[{"key":"e_1_3_3_2_2_2","unstructured":"Hangbo Bao Li Dong and Furu Wei. 2021. Beit: Bert pre-training of image transformers. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2106.08254 (2021)."},{"key":"e_1_3_3_2_3_2","doi-asserted-by":"crossref","unstructured":"Richard Bellman. 1957. A Markovian. Journal of Mathematics and Mechanics (1957) 679\u2013684.","DOI":"10.1512\/iumj.1957.6.56038"},{"key":"e_1_3_3_2_4_2","doi-asserted-by":"crossref","unstructured":"Daniel\u00a0S Bernstein Robert Givan Neil Immerman and Shlomo Zilberstein. 2002. The complexity of decentralized control of Markov decision processes. Mathematics of Operations Research 27 4 (2002) 819\u2013840.","DOI":"10.1287\/moor.27.4.819.297"},{"key":"e_1_3_3_2_5_2","unstructured":"Tom Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared\u00a0D Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell et\u00a0al. 2020. Language models are few-shot learners. Advances in Neural Information Processing Systems 33 (2020) 1877\u20131901."},{"key":"e_1_3_3_2_6_2","first-page":"2058","volume-title":"International Conference on Machine Learning","author":"Chang Kai-Wei","year":"2015","unstructured":"Kai-Wei Chang, Akshay Krishnamurthy, Alekh Agarwal, Hal Daum\u00e9\u00a0III, and John Langford. 2015. Learning to search better than your teacher. In International Conference on Machine Learning. PMLR, 2058\u20132066."},{"key":"e_1_3_3_2_7_2","unstructured":"Lili Chen Kevin Lu Aravind Rajeswaran Kimin Lee Aditya Grover Misha Laskin Pieter Abbeel Aravind Srinivas and Igor Mordatch. 2021. Decision transformer: Reinforcement learning via sequence modeling. Advances in Neural Information Processing Systems 34 (2021) 15084\u201315097."},{"key":"e_1_3_3_2_8_2","unstructured":"Jacob Devlin Ming-Wei Chang Kenton Lee and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1810.04805 (2018)."},{"key":"e_1_3_3_2_9_2","unstructured":"Alexey Dosovitskiy Lucas Beyer Alexander Kolesnikov Dirk Weissenborn Xiaohua Zhai Thomas Unterthiner Mostafa Dehghani Matthias Minderer Georg Heigold Sylvain Gelly et\u00a0al. 2020. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2010.11929 (2020)."},{"key":"e_1_3_3_2_10_2","unstructured":"Yang Gao Huazhe Xu Ji Lin Fisher Yu Sergey Levine and Trevor Darrell. 2018. Reinforcement learning from imperfect demonstrations. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1802.05313 (2018)."},{"key":"e_1_3_3_2_11_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11757"},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"crossref","unstructured":"Max Jaderberg Wojciech\u00a0M Czarnecki Iain Dunning Luke Marris Guy Lever Antonio\u00a0Garcia Castaneda Charles Beattie Neil\u00a0C Rabinowitz Ari\u00a0S Morcos Avraham Ruderman et\u00a0al. 2019. Human-level performance in 3D multiplayer games with population-based reinforcement learning. Science 364 6443 (2019) 859\u2013865.","DOI":"10.1126\/science.aau6249"},{"key":"e_1_3_3_2_13_2","unstructured":"Michael Janner Qiyang Li and Sergey Levine. 2021. Offline reinforcement learning as one big sequence modeling problem. Advances in Neural Information Processing Systems 34 (2021) 1273\u20131286."},{"key":"e_1_3_3_2_14_2","unstructured":"Diederik\u00a0P Kingma and Max Welling. 2013. Auto-encoding variational bayes. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1312.6114 (2013)."},{"key":"e_1_3_3_2_15_2","unstructured":"Kuang-Huei Lee Ofir Nachum Mengjiao Yang Lisa Lee Daniel Freeman Winnie Xu Sergio Guadarrama Ian Fischer Eric Jang Henryk Michalewski et\u00a0al. 2022. Multi-Game Decision Transformers. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2205.15241 (2022)."},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"crossref","unstructured":"Yinhan Liu Jiatao Gu Naman Goyal Xian Li Sergey Edunov Marjan Ghazvininejad Mike Lewis and Luke Zettlemoyer. 2020. Multilingual denoising pre-training for neural machine translation. Transactions of the Association for Computational Linguistics 8 (2020) 726\u2013742.","DOI":"10.1162\/tacl_a_00343"},{"key":"e_1_3_3_2_17_2","doi-asserted-by":"crossref","unstructured":"Volodymyr Mnih Koray Kavukcuoglu David Silver Andrei\u00a0A Rusu Joel Veness Marc\u00a0G Bellemare Alex Graves Martin Riedmiller Andreas\u00a0K Fidjeland Georg Ostrovski et\u00a0al. 2015. Human-level control through deep reinforcement learning. Nature 518 7540 (2015) 529\u2013533.","DOI":"10.1038\/nature14236"},{"key":"e_1_3_3_2_18_2","unstructured":"OpenAI. 2019. OpenAI Five. https:\/\/openai.com\/blog\/openai-five\/. Accessed March 4 2019."},{"key":"e_1_3_3_2_19_2","unstructured":"Emilio Parisotto Jimmy\u00a0Lei Ba and Ruslan Salakhutdinov. 2015. Actor-mimic: Deep multitask and transfer reinforcement learning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1511.06342 (2015)."},{"key":"e_1_3_3_2_20_2","unstructured":"Scott Reed Konrad Zolna Emilio Parisotto Sergio\u00a0Gomez Colmenarejo Alexander Novikov Gabriel Barth-Maron Mai Gimenez Yury Sulsky Jackie Kay Jost\u00a0Tobias Springenberg et\u00a0al. 2022. A generalist agent. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2205.06175 (2022)."},{"key":"e_1_3_3_2_21_2","unstructured":"Yu Rong Yatao Bian Tingyang Xu Weiyang Xie Ying Wei Wenbing Huang and Junzhou Huang. 2020. Self-supervised graph transformer on large-scale molecular data. Advances in Neural Information Processing Systems 33 (2020) 12559\u201312571."},{"key":"e_1_3_3_2_22_2","unstructured":"Andrei\u00a0A Rusu Sergio\u00a0Gomez Colmenarejo Caglar Gulcehre Guillaume Desjardins James Kirkpatrick Razvan Pascanu Volodymyr Mnih Koray Kavukcuoglu and Raia Hadsell. 2015. Policy distillation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1511.06295 (2015)."},{"key":"e_1_3_3_2_23_2","unstructured":"Simon Schmitt Jonathan\u00a0J Hudson Augustin Zidek Simon Osindero Carl Doersch Wojciech\u00a0M Czarnecki Joel\u00a0Z Leibo Heinrich Kuttler Andrew Zisserman Karen Simonyan et\u00a0al. 2018. Kickstarting deep reinforcement learning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1803.03835 (2018)."},{"key":"e_1_3_3_2_24_2","unstructured":"Ikechukwu Uchendu Ted Xiao Yao Lu Banghua Zhu Mengyuan Yan Jos\u00e9phine Simon Matthew Bennice Chuyuan Fu Cong Ma Jiantao Jiao et\u00a0al. 2022. Jump-Start Reinforcement Learning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2204.02372 (2022)."},{"key":"e_1_3_3_2_25_2","unstructured":"Laurens Van\u00a0der Maaten and Geoffrey Hinton. 2008. Visualizing data using t-SNE. Journal of machine learning research 9 11 (2008)."},{"key":"e_1_3_3_2_26_2","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan\u00a0N Gomez \u0141ukasz Kaiser and Illia Polosukhin. 2017. Attention is all you need. Advances in Neural Information Processing Systems 30 (2017)."},{"key":"e_1_3_3_2_27_2","unstructured":"Muning Wen Jakub\u00a0Grudzien Kuba Runji Lin Weinan Zhang Ying Wen Jun Wang and Yaodong Yang. 2022. Multi-Agent Reinforcement Learning is a Sequence Modeling Problem. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2205.14953 (2022)."},{"key":"e_1_3_3_2_28_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.naacl-main.41"},{"key":"e_1_3_3_2_29_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.6144"},{"key":"e_1_3_3_2_30_2","unstructured":"Seongjun Yun Minbyul Jeong Raehyun Kim Jaewoo Kang and Hyunwoo\u00a0J Kim. 2019. Graph transformer networks. Advances in Neural Information Processing Systems 32 (2019)."},{"key":"e_1_3_3_2_31_2","volume-title":"Advances in Neural Information Processing Systems","author":"Zaheer Manzil","year":"2017","unstructured":"Manzil Zaheer, Satwik Kottur, Siamak Ravanbakhsh, Barnabas Poczos, Russ\u00a0R Salakhutdinov, and Alexander\u00a0J Smola. 2017. Deep Sets. In Advances in Neural Information Processing Systems, I.\u00a0Guyon, U.\u00a0Von Luxburg, S.\u00a0Bengio, H.\u00a0Wallach, R.\u00a0Fergus, S.\u00a0Vishwanathan, and R.\u00a0Garnett (Eds.), Vol.\u00a030."}],"event":{"name":"DAI '24: 6th International Conference on Distributed Artificial Intelligences","acronym":"DAI '24","location":"Singapore Singapore"},"container-title":["Proceedings of the 2024 Sixth International Conference on Distributed Artificial Intelligences"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3719545.3759428","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,17]],"date-time":"2025-09-17T13:11:37Z","timestamp":1758114697000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3719545.3759428"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,18]]},"references-count":30,"alternative-id":["10.1145\/3719545.3759428","10.1145\/3719545"],"URL":"https:\/\/doi.org\/10.1145\/3719545.3759428","relation":{},"subject":[],"published":{"date-parts":[[2024,12,18]]},"assertion":[{"value":"2025-09-16","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}