{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,24]],"date-time":"2025-06-24T04:04:30Z","timestamp":1750737870128,"version":"3.41.0"},"publisher-location":"Singapore","reference-count":33,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819665846","type":"print"},{"value":"9789819665853","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-6585-3_14","type":"book-chapter","created":{"date-parts":[[2025,6,23]],"date-time":"2025-06-23T14:40:51Z","timestamp":1750689651000},"page":"197-210","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["GATE: Guided Contrastive State Space for\u00a0Multi-agent Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Hao","family":"Chen","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bin","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Guoliang","family":"Fan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,6,24]]},"reference":[{"key":"14_CR1","unstructured":"Andrychowicz, M., et al.: Hindsight experience replay. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"14_CR2","unstructured":"Chen, T., Kornblith, S., Norouzi, M., Hinton, G.: A simple framework for contrastive learning of visual representations. In: International Conference on Machine Learning, pp. 1597\u20131607. PMLR (2020)"},{"key":"14_CR3","unstructured":"Doersch, C.: Tutorial on variational autoencoders. arXiv preprint arXiv:1606.05908 (2016)"},{"key":"14_CR4","doi-asserted-by":"crossref","unstructured":"Guicheng, S., Yang, W.: Review on dec-pomdp model for marl algorithms. In: Smart Communications, Intelligent Algorithms and Interactive Methods: Proceedings of 4th International Conference on Wireless Communications and Applications (ICWCA 2020), pp. 29\u201335. Springer, Cham (2022)","DOI":"10.1007\/978-981-16-5164-9_5"},{"key":"14_CR5","unstructured":"Jaques, N., et al.: Social influence as intrinsic motivation for multi-agent deep reinforcement learning. In: International Conference on Machine Learning, pp. 3040\u20133049. PMLR (2019)"},{"key":"14_CR6","unstructured":"Jeon, J., Kim, W., Jung, W., Sung, Y.: Maser: multi-agent reinforcement learning with subgoals generated from experience replay buffer. In: International Conference on Machine Learning, pp. 10041\u201310052. PMLR (2022)"},{"key":"14_CR7","unstructured":"Jing, L., Vincent, P., LeCun, Y., Tian, Y.: Understanding dimensional collapse in contrastive self-supervised learning. arXiv preprint arXiv:2110.09348 (2021)"},{"key":"14_CR8","unstructured":"Khosla, P., et al.: Supervised contrastive learning. In: Advances in Neural Information Processing Systems, vol. 33, pp. 18661\u201318673 (2020)"},{"key":"14_CR9","unstructured":"Laskin, M., Srinivas, A., Abbeel, P.: Curl: contrastive unsupervised representations for reinforcement learning. In: International Conference on Machine Learning, pp. 5639\u20135650. PMLR (2020)"},{"key":"14_CR10","unstructured":"Lowe, R., Wu, Y., Tamar, A., Harb, J., Abbeel, P., Mordatch, I.: Multi-agent actor-critic for mixed cooperative-competitive environments. In: Proceedings of the 31st International Conference on Neural Information Processing Systems. NIPS\u201917, pp. 6382\u20136393. Curran Associates Inc., Red Hook, NY, USA (2017)"},{"key":"14_CR11","unstructured":"Ma, W., Chang, Y.C., Yang, J., Wang, Y.K., Lin, C.T.: Contrastive learning-based agent modeling for deep reinforcement learning. arXiv preprint arXiv:2401.00132 (2023)"},{"key":"14_CR12","unstructured":"Ma, Z., Wang, R., Li, F.F., Bernstein, M., Krishna, R.: Elign: expectation alignment as a multi-agent intrinsic reward. In: Advances in Neural Information Processing Systems, vol. 35, pp. 8304\u20138317 (2022)"},{"key":"14_CR13","unstructured":"Van\u00a0der Maaten, L., Hinton, G.: Visualizing data using t-SNE. J. Mach. Learn. Res. 9(11) (2008)"},{"key":"14_CR14","unstructured":"Oord, A.v.d., Li, Y., Vinyals, O.: Representation learning with contrastive predictive coding. arXiv preprint arXiv:1807.03748 (2018)"},{"key":"14_CR15","unstructured":"OpenAI, et al.: Solving Rubik\u2019s cube with a robot hand (2019)"},{"key":"14_CR16","unstructured":"Rashid, T., Farquhar, G., Peng, B., Whiteson, S.: Weighted qmix: expanding monotonic value function factorisation for deep multi-agent reinforcement learning. In: Advances in Neural Information Processing Systems, vol. 33, pp. 10199\u201310210 (2020)"},{"issue":"1","key":"14_CR17","first-page":"7234","volume":"21","author":"T Rashid","year":"2020","unstructured":"Rashid, T., Samvelyan, M., De Witt, C.S., Farquhar, G., Foerster, J., Whiteson, S.: Monotonic value function factorisation for deep multi-agent reinforcement learning. J. Mach. Learn. Res. 21(1), 7234\u20137284 (2020)","journal-title":"J. Mach. Learn. Res."},{"key":"14_CR18","doi-asserted-by":"crossref","unstructured":"Sallab, A.E., Abdou, M., Perot, E., Yogamani, S.: Deep reinforcement learning framework for autonomous driving. arXiv preprint arXiv:1704.02532 (2017)","DOI":"10.2352\/ISSN.2470-1173.2017.19.AVM-023"},{"key":"14_CR19","unstructured":"Samvelyan, M., et al.: The starcraft multi-agent challenge. arXiv preprint arXiv:1902.04043 (2019)"},{"key":"14_CR20","unstructured":"Savinov, N., et al.: Episodic curiosity through reachability. arXiv preprint arXiv:1810.02274 (2018)"},{"key":"14_CR21","unstructured":"Son, K., Kim, D., Kang, W.J., Hostallero, D.E., Yi, Y.: Qtran: learning to factorize with transformation for cooperative multi-agent reinforcement learning. In: International Conference on Machine Learning, pp. 5887\u20135896. PMLR (2019)"},{"key":"14_CR22","doi-asserted-by":"crossref","unstructured":"Song, H., Feng, M., Zhou, W., Li, H.: Ma2cl: masked attentive contrastive learning for multi-agent reinforcement learning. arXiv preprint arXiv:2306.02006 (2023)","DOI":"10.24963\/ijcai.2023\/470"},{"key":"14_CR23","unstructured":"Sunehag, P., et al.: Value-decomposition networks for cooperative multi-agent learning (2017)"},{"key":"14_CR24","doi-asserted-by":"crossref","unstructured":"Tao, L., Zhang, J., Bowman, M., Zhang, X.: A multi-agent approach for adaptive finger cooperation in learning-based in-hand manipulation. In: 2023 IEEE International Conference on Robotics and Automation (ICRA), pp. 3897\u20133903. IEEE (2023)","DOI":"10.1109\/ICRA48891.2023.10160909"},{"key":"14_CR25","unstructured":"Trott, A., Zheng, S., Xiong, C., Socher, R.: Keeping your distance: solving sparse reward tasks using self-balancing shaped rewards. In: Advances in Neural Information Processing Systems, vol. 32 (2019)"},{"key":"14_CR26","unstructured":"Wang, J., Ren, Z., Liu, T., Yu, Y., Zhang, C.: Qplex: duplex dueling multi-agent q-learning. arXiv preprint arXiv:2008.01062 (2020)"},{"key":"14_CR27","doi-asserted-by":"crossref","unstructured":"Xu, Z., et al.: Consensus learning for cooperative multi-agent reinforcement learning. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a037, pp. 11726\u201311734 (2023)","DOI":"10.1609\/aaai.v37i10.26385"},{"key":"14_CR28","unstructured":"Yang, Y., et al.: Qatten: a general framework for cooperative multiagent reinforcement learning. arXiv preprint arXiv:2002.03939 (2020)"},{"key":"14_CR29","doi-asserted-by":"crossref","unstructured":"Ye, D., et al.: Mastering complex control in moba games with deep reinforcement learning (2020)","DOI":"10.1609\/aaai.v34i04.6144"},{"key":"14_CR30","unstructured":"Ye, D., Lu, Z.: Mutual-information regularized multi-agent policy iteration. In: Advances in Neural Information Processing Systems, vol. 36 (2024)"},{"key":"14_CR31","unstructured":"Yu, C., et al.: The surprising effectiveness of PPO in cooperative multi-agent games. In: Advances in Neural Information Processing Systems, vol. 35, pp. 24611\u201324624 (2022)"},{"key":"14_CR32","doi-asserted-by":"crossref","unstructured":"Zhang, K., Yang, Z., Ba\u015far, T.: Multi-agent reinforcement learning: a selective overview of theories and algorithms. In: Handbook of Reinforcement Learning and Control, pp. 321\u2013384 (2021)","DOI":"10.1007\/978-3-030-60990-0_12"},{"issue":"5","key":"14_CR33","doi-asserted-by":"publisher","first-page":"4887","DOI":"10.1007\/s40747-023-00985-w","volume":"9","author":"Y Zhou","year":"2023","unstructured":"Zhou, Y., et al.: Cooperative multi-agent target searching: a deep reinforcement learning approach based on parallel hindsight experience replay. Complex Intell. Syst. 9(5), 4887\u20134898 (2023)","journal-title":"Complex Intell. Syst."}],"container-title":["Lecture Notes in Computer Science","Neural Information Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-6585-3_14","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,23]],"date-time":"2025-06-23T14:41:03Z","timestamp":1750689663000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-6585-3_14"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9789819665846","9789819665853"],"references-count":33,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-6585-3_14","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"24 June 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICONIP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Neural Information Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Auckland","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"New Zealand","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 December 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6 December 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"31","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iconip2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/iconip2024.org","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}