{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,24]],"date-time":"2025-06-24T04:02:41Z","timestamp":1750737761061,"version":"3.41.0"},"publisher-location":"Singapore","reference-count":29,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819665785","type":"print"},{"value":"9789819665792","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-6579-2_23","type":"book-chapter","created":{"date-parts":[[2025,6,23]],"date-time":"2025-06-23T12:06:47Z","timestamp":1750680407000},"page":"338-353","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Temporal State Prediction and\u00a0Sequence Recovery for\u00a0Multi-agent Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Yong","family":"Wang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mingxiao","family":"Feng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Haolin","family":"Song","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wengang","family":"Zhou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Houqiang","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,6,24]]},"reference":[{"key":"23_CR1","unstructured":"Bello, I., Pham, H., Le, Q.V., Norouzi, M., Bengio, S.: Neural combinatorial optimization with reinforcement learning. arXiv preprint arXiv:1611.09940 (2016)"},{"key":"23_CR2","doi-asserted-by":"publisher","first-page":"427","DOI":"10.1109\/TII.2012.2219061","volume":"9","author":"Y Cao","year":"2012","unstructured":"Cao, Y., et al.: An overview of recent progress in the study of distributed multi-agent coordination. IEEE Trans. Ind. Inf. 9, 427\u2013438 (2012)","journal-title":"IEEE Trans. Ind. Inf."},{"key":"23_CR3","unstructured":"Chen, Y., et\u00a0al.: Towards human-level bimanual dexterous manipulation with reinforcement learning. In: Advances in Neural Information Processing Systems, pp. 5150\u20135163 (2022)"},{"key":"23_CR4","unstructured":"Feng, M., et\u00a0al.: Joint-predictive representations for multi-agent reinforcement learning (2023). https:\/\/openreview.net\/forum?id=S80ioOGLpD9"},{"key":"23_CR5","unstructured":"Grill, J.B., et\u00a0al.: Bootstrap your own latent-a new approach to self-supervised learning. In: Advances in Neural Information Processing Systems, pp. 5150\u20135163 (2020)"},{"key":"23_CR6","unstructured":"Guan, C., et\u00a0al.: Efficient multi-agent communication via self-supervised information aggregation. In: Advances in Neural Information Processing Systems, pp. 1020\u20131033 (2022)"},{"key":"23_CR7","unstructured":"Hafner, D., et\u00a0al.: Learning latent dynamics for planning from pixels. In: International Conference on Machine Learning, pp. 2555\u20132565. PMLR (2019)"},{"key":"23_CR8","unstructured":"Hansen, N., Wang, X., Su, H.: Temporal difference learning for model predictive control. In: International Conference on Machine Learning, PMLR (2022)"},{"issue":"6","key":"23_CR9","doi-asserted-by":"publisher","first-page":"4909","DOI":"10.1109\/TITS.2021.3054625","volume":"23","author":"BR Kiran","year":"2021","unstructured":"Kiran, B.R., et al.: Deep reinforcement learning for autonomous driving: a survey. IEEE Trans. Intell. Transp. Syst. 23(6), 4909\u20134926 (2021)","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"23_CR10","unstructured":"Kool, W., van Hoof, H., Welling, M.: Attention, learn to solve routing problems! In: International Conference on Learning Representations (2018)"},{"key":"23_CR11","unstructured":"Kuba, J., et\u00a0al.: Trust region policy optimisation in multi-agent reinforcement learning. In: International Conference on Learning Representations, p.\u00a01046 (2022)"},{"key":"23_CR12","unstructured":"Laskin, M., Srinivas, A., Abbeel, P.: CURL: contrastive unsupervised representations for reinforcement learning. In: International Conference on Machine Learning, pp. 5639\u20135650. PMLR (2020)"},{"key":"23_CR13","unstructured":"Li, P., et\u00a0al.: Race: improve multi-agent reinforcement learning with representation asymmetry and collaborative evolution. In: International Conference on Machine Learning, pp. 19490\u201319503. PMLR (2023)"},{"key":"23_CR14","doi-asserted-by":"publisher","unstructured":"Oliehoek, F.A., Amato, C., et\u00a0al.: A Concise Introduction to Decentralized POMDPs, vol.\u00a01. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-28929-8","DOI":"10.1007\/978-3-319-28929-8"},{"key":"23_CR15","doi-asserted-by":"crossref","unstructured":"Panerati, J., et\u00a0al.: Learning to fly-a gym environment with PyBullet physics for reinforcement learning of multi-agent quadcopter control. arXiv preprint arXiv:2103.02142 (2021)","DOI":"10.1109\/IROS51168.2021.9635857"},{"key":"23_CR16","unstructured":"Samvelyan, M., Rashid, T., De\u00a0Witt, C.S., Farquhar, G., et\u00a0al.: The StarCraft multi-agent challenge. arXiv preprint arXiv:1902.04043 (2019)"},{"key":"23_CR17","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347 (2017)"},{"key":"23_CR18","unstructured":"Schumacher, P., et\u00a0al.: Dep-RL: embodied exploration for reinforcement learning in overactuated and musculoskeletal systems. arXiv preprint arXiv:2206.00484 (2022)"},{"key":"23_CR19","unstructured":"Schwarzer, M., et\u00a0al.: Data-efficient reinforcement learning with self-predictive representations. In: International Conference on Learning Representations (2020)"},{"key":"23_CR20","unstructured":"Shang, W., Espeholt, L., Raichuk, A., Salimans, T.: Agent-centric representations for multi-agent reinforcement learning. arXiv preprint arXiv:2104.09402 (2021)"},{"key":"23_CR21","doi-asserted-by":"crossref","unstructured":"Song, H., Feng, M., Zhou, W., Li, H.: MA2CL: masked attentive contrastive learning for multi-agent reinforcement learning. In: The Thirty-Second International Joint Conference on Artificial Intelligence (2023)","DOI":"10.24963\/ijcai.2023\/470"},{"key":"23_CR22","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., et\u00a0al.: Attention is all you need. In: Advances in Neural Information Processing Systems (2017)"},{"key":"23_CR23","unstructured":"Venugopal, A., Milani, S., Fang, F., Ravindran, B.: Bi-level latent variable model for sample-efficient multi-agent reinforcement learning. arXiv preprint arXiv:2304.06011 (2023)"},{"key":"23_CR24","unstructured":"Wen, M., et\u00a0al.: Multi-agent reinforcement learning is a sequence modeling problem. In: Advances in Neural Information Processing Systems, pp. 16509\u201316521 (2022)"},{"key":"23_CR25","unstructured":"de\u00a0Witt, C.S., et\u00a0al.: Deep multi-agent reinforcement learning for decentralized continuous cooperative control. arXiv preprint arXiv:2003.06709 (2020)"},{"key":"23_CR26","unstructured":"Yu, C., et\u00a0al.: The surprising effectiveness of PPO in cooperative multi-agent games. In: Advances in Neural Information Processing Systems, pp. 24611\u201324624 (2022)"},{"key":"23_CR27","unstructured":"Yu, T., et\u00a0al.: Mask-based latent reconstruction for reinforcement learning. In: Advances in Neural Information Processing Systems, pp. 25117\u201325131 (2022)"},{"key":"23_CR28","unstructured":"Zhu, J., et\u00a0al.: Making better decision by directly planning in continuous control. In: The Eleventh International Conference on Learning Representations (2022)"},{"issue":"3","key":"23_CR29","first-page":"3421","volume":"45","author":"J Zhu","year":"2022","unstructured":"Zhu, J., et al.: Masked contrastive representation learning for reinforcement learning. IEEE Trans. Pattern Anal. Mach. Intell. 45(3), 3421\u20133433 (2022)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."}],"container-title":["Lecture Notes in Computer Science","Neural Information Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-6579-2_23","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,23]],"date-time":"2025-06-23T12:06:54Z","timestamp":1750680414000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-6579-2_23"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9789819665785","9789819665792"],"references-count":29,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-6579-2_23","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"24 June 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICONIP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Neural Information Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Auckland","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"New Zealand","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 December 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6 December 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"31","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iconip2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/iconip2024.org","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}