{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,4]],"date-time":"2026-06-04T16:03:28Z","timestamp":1780589008868,"version":"3.54.1"},"reference-count":69,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2024,3,16]],"date-time":"2024-03-16T00:00:00Z","timestamp":1710547200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,3,16]],"date-time":"2024-03-16T00:00:00Z","timestamp":1710547200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Mach. Intell. Res."],"published-print":{"date-parts":[[2024,4]]},"DOI":"10.1007\/s11633-023-1464-2","type":"journal-article","created":{"date-parts":[[2024,3,15]],"date-time":"2024-03-15T21:01:31Z","timestamp":1710536491000},"page":"349-368","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Enhancing Multi-agent Coordination via Dual-channel Consensus"],"prefix":"10.1007","volume":"21","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5387-9942","authenticated-orcid":false,"given":"Qingyang","family":"Zhang","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2787-5873","authenticated-orcid":false,"given":"Kaishen","family":"Wang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4857-9053","authenticated-orcid":false,"given":"Jingqing","family":"Ruan","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1359-0364","authenticated-orcid":false,"given":"Yiming","family":"Yang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8251-9118","authenticated-orcid":false,"given":"Dengpeng","family":"Xing","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1111-1529","authenticated-orcid":false,"given":"Bo","family":"Xu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2024,3,16]]},"reference":[{"key":"1464_CR1","doi-asserted-by":"publisher","first-page":"321","DOI":"10.1007\/978-3-030-60990-0_12","volume-title":"Handbook of Reinforcement Learning and Control","author":"K Q Zhang","year":"2021","unstructured":"K. Q. Zhang, Z. R. Yang, T. Ba\u015far. Multi-agent reinforcement learning: A selective overview of theories and algorithms. Handbook of Reinforcement Learning and Control, K. G. Vamvoudakis, Y. Wan, F. L. Lewis, D. Cansever, Eds., Cham, Germany: Springer, pp.321\u2013384, 2021. DOI: https:\/\/doi.org\/10.1007\/978-3-030-60990-0_12."},{"key":"1464_CR2","doi-asserted-by":"publisher","unstructured":"W. Ren, R. W. Beard, E. M. Atkins. A survey of consensus problems in multi-agent coordination. In Proceedings of the American Control Conference, Portland, USA, pp. 1859\u20131864, 2005. DOI: https:\/\/doi.org\/10.1109\/ACC.2005.1470239.","DOI":"10.1109\/ACC.2005.1470239"},{"key":"1464_CR3","doi-asserted-by":"publisher","unstructured":"H. Y. Mao, W. L. Liu, J. Y. Hao, J. Luo, D. Li, Z. C. Zhang, J. Wang, Z. Xiao. Neighborhood cognition consistent multi-agent reinforcement learning. In Proceedings of the 34th AAAI Conference on Artificial Intelligence, New York, USA, pp. 7219\u20137226, 2020. DOI: https:\/\/doi.org\/10.1609\/aaai.v34i05.6212.","DOI":"10.1609\/aaai.v34i05.6212"},{"key":"1464_CR4","unstructured":"J. N. Foerster, Y. M. Assael, N. de Freitas, S. Whiteson. Learning to communicate with deep multi-agent reinforcement learning. In Proceedings of the 30th International Conference on Neural Information Processing Systems, Barcelona, Spain, pp. 2145\u20132153, 2016."},{"key":"1464_CR5","unstructured":"J. C. Jiang, Z. Q. Lu. Learning attentional communication for multi-agent cooperation. In Proceedings of the 32nd International Conference on Neural Information Processing Systems, Montreal, Canada, pp. 7265\u20137275, 2018."},{"key":"1464_CR6","unstructured":"A. Das, T. Gervet, J. Romoff, D. Batra, D. Parikh, M. Rabbat, J. Pineau. TarMAC: Targeted multi-agent communication. In Proceedings of the 36th International Conference on Machine Learning, Long Beach, USA, pp. 1538\u20131546, 2019."},{"key":"1464_CR7","doi-asserted-by":"publisher","unstructured":"Y. Liu, W. X. Wang, Y. J. Hu, J. Y. Hao, X. G. Chen, Y. Gao. Multi-agent game abstraction via graph attention neural network. In Proceedings of the 34th AAAI Conference on Artificial Intelligence, New York, USA, pp. 7211\u20137218, 2020. DOI: https:\/\/doi.org\/10.1609\/aaai.v34i05.6211.","DOI":"10.1609\/aaai.v34i05.6211"},{"key":"1464_CR8","doi-asserted-by":"publisher","unstructured":"A. Rasouli, I. Kotseruba, J. K. Tsotsos. Agreeing to cross: How drivers and pedestrians communicate. In Proceedings of IEEE Intelligent Vehicles Symposium, Los Angeles, USA, pp. 264\u2013269, 2017. DOI: https:\/\/doi.org\/10.1109\/IVS.2017.7995730.","DOI":"10.1109\/IVS.2017.7995730"},{"key":"1464_CR9","doi-asserted-by":"publisher","unstructured":"Z. Tian, S. H. Zou, I. Davies, T. Warr, L. S. Wu, H. B. Ammar, J. Wang. Learning to communicate implicitly by actions. In Proceedings of the 34th AAAI Conference on Artificial Intelligence, New York, USA, pp. 7261\u20137268, 2020. DOI: https:\/\/doi.org\/10.1609\/aaai.v34i05.6217.","DOI":"10.1609\/aaai.v34i05.6217"},{"issue":"2","key":"1464_CR10","doi-asserted-by":"publisher","first-page":"895","DOI":"10.1007\/s10462-021-09996-w","volume":"55","author":"S Gronauer","year":"2022","unstructured":"S. Gronauer, K. Diepold. Multi-agent deep reinforcement learning: A survey. Artificial Intelligence Review, vol. 55, no. 2, pp. 895\u2013943, 2022. DOI: https:\/\/doi.org\/10.1007\/s10462-021-09996-w.","journal-title":"Artificial Intelligence Review"},{"key":"1464_CR11","unstructured":"T. Rashid, M. Samvelyan, C. S. de Witt, G. Farquhar, J. N. Foerster, S. Whiteson. QMIX: Monotonic value function factorisation for deep multi-agent reinforcement learning. In Proceedings of the 35th International Conference on Machine Learning, Stockholm, Sweden, pp. 4292\u20134301, 2018."},{"key":"1464_CR12","unstructured":"J. H. Wang, Z. Z. Ren, T. Liu, Y. Yu, C. J. Zhang. QPLEX: Duplex dueling multi-agent Q-learning. In Proceedings of the 9th International Conference on Learning Representations, 2021."},{"key":"1464_CR13","unstructured":"P. Khosla, P. Teterwak, C. Wang, A. Sarna, Y. L. Tian, P. Isola, A. Maschinot, C. Liu, D. Krishnan. Supervised contrastive learning. In Proceedings of the 34th International Conference on Neural Information Processing Systems, pp. 18661\u201318673, 2020."},{"key":"1464_CR14","unstructured":"C. A. S. de Witt, J. N. Foerster, G. Farquhar, P. H. S. Torr, W. B\u00f6ehmer, S. Whiteson. Multi-agent common knowledge reinforcement learning. In Proceedings of the 33rd International Conference on Neural Information Processing Systems, Vancouver, Canada, Article number 890, 2019."},{"key":"1464_CR15","doi-asserted-by":"crossref","unstructured":"M. Samvelyan, T. Rashid, C. S. de Witt, G. Farquhar, N. Nardelli, T. G. J. Rudner, C. M. Hung, P. H. S. Torr, J. N. Foerster, S. Whiteson. The StarCraft multi-agent challenge. In Proceedings of the 18th International Conference on Autonomous Agents and Multi-Agent Systems, Montreal, Canada, pp. 2186\u20132188, 2019.","DOI":"10.65109\/LVZZ5205"},{"key":"1464_CR16","doi-asserted-by":"publisher","unstructured":"K. Kurach, A. Raichuk, P. Sta\u0144czyk, M. Zaj\u0105c, O. Bachem, L. Espeholt, C. Riquelme, D. Vincent, M. Michalski, O. Bousquet, S. Gelly. Google research football: A novel reinforcement learning environment. In Proceedings of the 34th AAAI Conference on Artificial Intelligence, New York, USA, pp. 4501\u20134510, 2020. DOI: https:\/\/doi.org\/10.1609\/aaai.v34i04.5878.","DOI":"10.1609\/aaai.v34i04.5878"},{"key":"1464_CR17","unstructured":"S. Sukhbaatar, A. Szlam, R. Fergus. Learning multiagent communication with backpropagation. In Proceedings of the 30th International Conference on Neural Information Processing Systems, Barcelona, Spain, pp. 2252\u20132260, 2016."},{"key":"1464_CR18","unstructured":"A. Singh, T. Jain, S. Sukhbaatar. Learning when to communicate at scale in multi-agent cooperative and competitive tasks. In Proceedings of the 7th International Conference on Learning Representations, New Orleans, USA, 2019."},{"key":"1464_CR19","unstructured":"S. Iqbal, F. Sha. Actor-attention-critic for multi-agent reinforcement learning. In Proceedings of the 36th International Conference on Machine Learning, Long Beach, USA, pp. 2961\u20132970, 2019."},{"key":"1464_CR20","unstructured":"Y. Hoshen. VAIN: Attentional multi-agent predictive modeling. In Proceedings of the 31st International Conference on Neural Information Processing Systems, Long Beach, USA, pp. 2698\u20132708, 2017."},{"key":"1464_CR21","doi-asserted-by":"publisher","unstructured":"K. Q. Zhang, Z. R. Yang, T. Basar. Networked multi-agent reinforcement learning in continuous spaces. In Proceedings of IEEE Conference on Decision and Control, Miami, USA, pp. 2771\u20132776, 2018. DOI: https:\/\/doi.org\/10.1109\/CDC.2018.8619581.","DOI":"10.1109\/CDC.2018.8619581"},{"key":"1464_CR22","unstructured":"K. Q. Zhang, Z. R. Yang, H. Liu, T. Zhang, T. Basar. Fully decentralized multi-agent reinforcement learning with networked agents. In Proceedings of the 35th International Conference on Machine Learning, Stockholm, Sweden, pp. 5867\u20135876, 2018."},{"key":"1464_CR23","unstructured":"T. S. Chu, S. Chinchali, S. Katti. Multi-agent reinforcement learning for networked system control. In Proceedings of the 8th International Conference on Learning Representations, Addis Ababa, Ethiopia, 2020."},{"key":"1464_CR24","unstructured":"R. D. Wang, X. He, R. S. Yu, W. Qiu, B. An, Z. Rabinovich. Learning efficient multi-agent communication: An information bottleneck approach. In Proceedings of the 37th International Conference on Machine Learning, Article number 919, 2020."},{"key":"1464_CR25","unstructured":"H. Y. Mao, Z. B. Gong, Z. C. Zhang, Z. Xiao, Y. Ni. Learning multi-agent communication under limited-bandwidth restriction for internet packet routing, [Online], Available: https:\/\/arxiv.org\/abs\/1903.05561, 2019."},{"key":"1464_CR26","doi-asserted-by":"crossref","unstructured":"P. Sunehag, G. Lever, A. Gruslys, W. M. Czarnecki, V. Zambaldi, M. Jaderberg, M. Lanctot, N. Sonnerat, J. Z. Leibo, K. Tuyls, T. Graepel. Value-decomposition networks for cooperative multi-agent learning, [Online], Available: https:\/\/arxiv.org\/abs\/1706.05296, 2017.","DOI":"10.65109\/JSRC7365"},{"key":"1464_CR27","unstructured":"A. Mahajan, T. Rashid, M. Samvelyan, S. Whiteson. MAVEN: Multi-agent variational exploration. In Proceedings of the 33rd International Conference on Neural Information Processing Systems, Vancouver, Canada, Article number 684, 2019."},{"key":"1464_CR28","unstructured":"T. H. Wang, T. Gupta, A. Mahajan, B. Peng, S. Whiteson, C. J. Zhang. RODE: Learning roles to decompose multi-agent tasks. In Proceedings of the 9th International Conference on Learning Representations, 2021."},{"key":"1464_CR29","doi-asserted-by":"publisher","unstructured":"J. Q. Ruan, L. H. Meng, X. T. Xiong, D. P. Xing, B. Xu. Learning multi-agent action coordination via electing first-move agent. In Proceedings of the 32nd International Conference on Automated Planning and Scheduling, Singapore, Singapore, pp. 624\u2013628, 2022. DOI: https:\/\/doi.org\/10.1609\/icaps.v32il.19850.","DOI":"10.1609\/icaps.v32il.19850"},{"key":"1464_CR30","doi-asserted-by":"publisher","first-page":"341","DOI":"10.1016\/j.neucom.2022.09.144","volume":"513","author":"X J Zhang","year":"2022","unstructured":"X. J. Zhang, Y. Liu, H. Y. Mao, C. Yu. Common belief multi-agent reinforcement learning based on variational recurrent models. Neurocomputing, vol. 513, pp.341\u2013350, 2022. DOI: https:\/\/doi.org\/10.1016\/j.neucom.2022.09.144.","journal-title":"Neurocomputing"},{"key":"1464_CR31","doi-asserted-by":"publisher","unstructured":"Z. W. Xu, B. Zhang, D. P. Li, Z. R. Zhang, G. C. Zhou, H. Chen, G. L. Fan. Consensus learning for cooperative multi-agent reinforcement learning. In Proceedings of the 37th AAAI Conference on Artificial Intelligence, Washington DC, USA, pp. 11726\u201311734, 2023. DOI: https:\/\/doi.org\/10.1609\/aaai.v37il0.26385.","DOI":"10.1609\/aaai.v37il0.26385"},{"key":"1464_CR32","doi-asserted-by":"publisher","DOI":"10.1201\/9781003040620","volume-title":"Recurrent Neural Networks: Design and Applications","author":"L R Medsker","year":"1999","unstructured":"L. R. Medsker, L. C. Jain. Recurrent Neural Networks: Design and Applications, Boca Raton, USA: CRC Press, 1999. DOI: https:\/\/doi.org\/10.1201\/9781003040620."},{"key":"1464_CR33","unstructured":"S. Y. Li, L. L. Zheng, J. H. Wang, C. J. Zhang. Learning subgoal representations with slow dynamics. In Proceedings of the 9th International Conference on Learning Representations, 2021."},{"key":"1464_CR34","unstructured":"S. Y. Li, J. Zhang, J. H. Wang, Y. Yu, C. J. Zhang. Active hierarchical exploration with stable subgoal representation learning. In Proceedings of the 10th International Conference on Learning Representations, 2022."},{"key":"1464_CR35","doi-asserted-by":"publisher","unstructured":"F. Schroff, D. Kalenichenko, J. Phibin. FaceNet: A unified embedding for face recognition and clustering. In Proceedings of IEEE Conference on Computer Vision and Pattern Recognition, Boston, USA, pp. 815\u2013823, 2015. DOI: https:\/\/doi.org\/10.1109\/CVPR.2015.7298682.","DOI":"10.1109\/CVPR.2015.7298682"},{"key":"1464_CR36","unstructured":"O. Nachum, S. X. Gu, H. Lee, S. Levine. Near-optimal representation learning for hierarchical reinforcement learning. In Proceedings of the 7th International Conference on Learning Representations, New Orleans, USA, 2019."},{"key":"1464_CR37","unstructured":"D. Yarats, I. Kostrikov, R. Fergus. Image augmentation is all you need: Regularizing deep reinforcement learning from pixels. In Proceedings of the 9th International Conference on Learning Representations, 2021."},{"key":"1464_CR38","unstructured":"M. Laskin, A. Srinivas, P. Abbeel. CURL: Contrastive unsupervised representations for reinforcement learning. In Proceedings of the 37th International Conference on Machine Learning, pp. 5639\u20135650, 2020."},{"key":"1464_CR39","unstructured":"A. van den Oord, Y. Z. Li, O. Vinyals. Representation learning with contrastive predictive coding, [Online], Available: https:\/\/arxiv.org\/abs\/1807.03748, 2018."},{"key":"1464_CR40","unstructured":"Y. L. Lo, B. Sengupta. Learning to ground decentralized multi-agent communication with contrastive learning, [Online], Available: https:\/\/arxiv.org\/abs\/2203.03344, 2022."},{"key":"1464_CR41","doi-asserted-by":"publisher","first-page":"289","DOI":"10.1613\/jair.2447","volume":"32","author":"F A Oliehoek","year":"2008","unstructured":"F. A. Oliehoek, M. T. J. Spaan, N. Vlassis. Optimal and approximate Q-value functions for decentralized POM-DPs. Journal of Artificial Intelligence Research, vol. 32, pp. 289\u2013353, 2008. DOI: https:\/\/doi.org\/10.1613\/jair.2447.","journal-title":"Journal of Artificial Intelligence Research"},{"issue":"5","key":"1464_CR42","doi-asserted-by":"publisher","first-page":"655","DOI":"10.1109\/TAC.2005.846556","volume":"50","author":"W Ren","year":"2005","unstructured":"W. Ren, R. W. Beard. Consensus seeking in multiagent systems under dynamically changing interaction topologies. IEEE Transactions on Automatic Control, vol. 50, no. 5, pp. 655\u2013661, 2005. DOI: https:\/\/doi.org\/10.1109\/TAC.2005.846556.","journal-title":"IEEE Transactions on Automatic Control"},{"key":"1464_CR43","doi-asserted-by":"publisher","unstructured":"L. J. Shan, H. Zhu. Consistency check in modelling multi-agent systems. In Proceedings of the 28th Annual International Computer Software and Applications Conference, Hong Kong, China, pp. 114\u2013119, 2004. DOI: https:\/\/doi.org\/10.1109\/CMPSAC.2004.1342814.","DOI":"10.1109\/CMPSAC.2004.1342814"},{"key":"1464_CR44","unstructured":"J. Y. Yu, L. Wang. Group consensus of multi-agent systems with undirected communication graphs. In Proceedings of the 7th Asian Control Conference, Hong Kong, China, pp. 105\u2013110, 2009."},{"key":"1464_CR45","doi-asserted-by":"publisher","first-page":"925","DOI":"10.1016\/S0925-2312(99)00011-9","volume":"26\u201327","author":"L Wiskott","year":"1999","unstructured":"L. Wiskott. Learning invariance manifolds. Neurocomputing, vol. 26\u201327, pp. 925\u2013932, 1999. DOI: https:\/\/doi.org\/10.1016\/S0925-2312(99)00011-9.","journal-title":"Neurocomputing"},{"issue":"4","key":"1464_CR46","doi-asserted-by":"publisher","first-page":"715","DOI":"10.1162\/089976602317318938","volume":"14","author":"L Wiskott","year":"2002","unstructured":"L. Wiskott, T. J. Sejnowski. Slow feature analysis: Unsupervised learning of invariances. Neural Computation, vol. 14, no. 4, pp. 715\u2013770, 2002. DOI: https:\/\/doi.org\/10.1162\/089976602317318938.","journal-title":"Neural Computation"},{"key":"1464_CR47","doi-asserted-by":"publisher","unstructured":"D. Jayaraman, K. Grauman. Slow and steady feature analysis: Higher order temporal coherence in video. In Proceedings of IEEE Conference on Computer Vision and Pattern Recognition, Las Vegas, USA, pp. 3852\u20133861, 2016. DOI: https:\/\/doi.org\/10.1109\/CVPR.2016.418.","DOI":"10.1109\/CVPR.2016.418"},{"key":"1464_CR48","doi-asserted-by":"publisher","unstructured":"A. Jansen, M. Plakal, R. Pandya, D. P. W. Ellis, S. Hershey, J. Y. Liu, R. C. Moore, R. A. Saurous. Unsupervised learning of semantic audio representations. In Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing, Calgary, Canada, pp. 126\u2013130, 2018. DOI: https:\/\/doi.org\/10.1109\/ICASSP.2018.8461684.","DOI":"10.1109\/ICASSP.2018.8461684"},{"issue":"8","key":"1464_CR49","doi-asserted-by":"publisher","first-page":"1798","DOI":"10.1109\/TPAMI.2013.50","volume":"35","author":"Y Bengio","year":"2013","unstructured":"Y. Bengio, A. Courville, P. Vincent. Representation learning: A review and new perspectives. IEEE Transactions on Pattern Analysis and Machine Intelligence, vol. 35, no.8, pp. 1798\u20131828, 2013. DOI: https:\/\/doi.org\/10.1109\/TPAMI.2013.50.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1464_CR50","doi-asserted-by":"publisher","first-page":"379","DOI":"10.1016\/j.neunet.2018.07.006","volume":"108","author":"T Lesort","year":"2018","unstructured":"T. Lesort, N. D\u00edaz-Rodr\u00edguez, J. F. Goudou, D. Filliat. State representation learning for control: An overview. Neural Networks, vol. 108, pp. 379\u2013392, 2018. DOI: https:\/\/doi.org\/10.1016\/j.neunet.2018.07.006.","journal-title":"Neural Networks"},{"key":"1464_CR51","doi-asserted-by":"publisher","unstructured":"R. Hadsell, S. Chopra, Y. LeCun. Dimensionality reduction by learning an invariant mapping. In Proceedings of IEEE Computer Society Conference on Computer Vision and Pattern Recognition, New York, USA, pp. 1735\u20131742, 2006. DOI: https:\/\/doi.org\/10.1109\/CVPR.2006.100.","DOI":"10.1109\/CVPR.2006.100"},{"key":"1464_CR52","unstructured":"T. T. Xiao, X. L. Wang, A. A. Efros, T. Darrell. What should not be contrastive in contrastive learning. In Proceedings of the 9th International Conference on Learning Representations, 2021."},{"key":"1464_CR53","doi-asserted-by":"publisher","first-page":"193907","DOI":"10.1109\/ACCESS.2020.3031549","volume":"8","author":"P H Le-Khac","year":"2020","unstructured":"P. H. Le-Khac, G. Healy, A. F. Smeaton. Contrastive representation learning: A framework and review. IEEE Access, vol. 8, pp. 193907\u2013193934, 2020. DOI: https:\/\/doi.org\/10.1109\/ACCESS.2020.3031549.","journal-title":"IEEE Access"},{"key":"1464_CR54","unstructured":"A. Radford, K. Narasimhan, T. Salimans, I. Sutskever. Improving language understanding by generative pre-training, [Online], Available: https:\/\/api.semanticscholar.org\/CorpusID:49313245, 2018."},{"key":"1464_CR55","doi-asserted-by":"publisher","unstructured":"J. Devlin, M. W. Chang, K. Lee, K. Toutanova. BERT: Pre-training of deep bidirectional transformers for language understanding. In Proceedings of Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Minneapolis, USA, pp.4171\u20134186, 2019. DOI: https:\/\/doi.org\/10.18653\/v1\/N19-1423.","DOI":"10.18653\/v1\/N19-1423"},{"key":"1464_CR56","doi-asserted-by":"publisher","unstructured":"K. M. He, H. Q. Fan, Y. X. Wu, S. N. Xie, R. Girshick. Momentum contrast for unsupervised visual representation learning. In Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition, Seattle, USA, pp. 9729\u20139738, 2020. DOI: https:\/\/doi.org\/10.1109\/CVPR42600.2020.00975.","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"1464_CR57","unstructured":"T. Chen, S. Kornblith, M. Norouzi, G. Hinton. A simple framework for contrastive learning of visual representations. In Proceedings of the 37th International Conference on Machine Learning, Article number 149, 2020."},{"key":"1464_CR58","unstructured":"C. Doersch. Tutorial on variational autoencoders, [Online], Available: https:\/\/arxiv.org\/abs\/1606.05908, 2016."},{"key":"1464_CR59","unstructured":"D. P. Kingma, D. J. Rezende, S. Mohamed, M. Welling. Semi-supervised learning with deep generative models. In Proceedings of the 27th International Conference on Neural Information Processing Systems, Montreal, Canada, pp. 3581\u20133589, 2014."},{"key":"1464_CR60","unstructured":"T. Schaul, J. Quan, I. Antonoglou, D. Silver. Prioritized experience replay. In Proceedings of the 4th International Conference on Learning Representations, San Juan, Puerto Rico, 2016."},{"key":"1464_CR61","unstructured":"J. Schulman, P. Moritz, S. Levine, M. Jordan, P. Abbeel. High-dimensional continuous control using generalized advantage estimation, [Online], Available: https:\/\/arxiv.org\/abs\/1506.02438, 2015."},{"key":"1464_CR62","unstructured":"K. Son, D. Kim, W. J. Kang, D. Hostallero, Y. Yi. QTRAN: Learning to factorize with transformation for cooperative multi-agent reinforcement learning. In Proceedings of the 36th International Conference on Machine Learning, Long Beach, USA, pp. 5887\u20135896, 2019."},{"key":"1464_CR63","doi-asserted-by":"publisher","unstructured":"J. Foerster, G. Farquhar, T. Afouras, N. Nardelli, S. Whiteson. Counterfactual multi-agent policy gradients. In Proceedings of the 32nd AAAI Conference on Artificial Intelligence, New Orleans, USA, pp. 2974\u20132982, 2018. DOI: https:\/\/doi.org\/10.1609\/aaai.v32i1.11794.","DOI":"10.1609\/aaai.v32i1.11794"},{"key":"1464_CR64","unstructured":"C. Yu, A. Velu, E. Vinitsky, J. X. Gao, Y. Wang, A. M. Bayen, Y. Wu. The surprising effectiveness of PPO in cooperative multi-agent games. In Proceedings of the 36th Conference on Neural Information Processing Systems, New Orleans, USA, 2022."},{"key":"1464_CR65","unstructured":"T. H. Wang, J. H. Wang, C. Y. Zheng, C. J. Zhang. Learning nearly decomposable value functions via communication minimization. In Proceedings of the 8th International Conference on Learning Representations, Addis Ababa, Ethiopia, 2020."},{"key":"1464_CR66","doi-asserted-by":"publisher","unstructured":"L. Yuan, J. H. Wang, F. X. Zhang, C. H. Wang, Z. Z. Zhang, Y. Yu, C. J. Zhang. Multi-agent incentive communication via decentralized teammate modeling. In Proceedings of the 36th AAAI Conference on Artificial Intelligence, pp. 9466\u20139474, 2022. DOI: https:\/\/doi.org\/10.1609\/aaai.v36i9.21179.","DOI":"10.1609\/aaai.v36i9.21179"},{"key":"1464_CR67","unstructured":"V. Nair, G. E. Hinton. Rectified linear units improve restricted Boltzmann machines. In Proceedings of the 27th International Conference on Machine Learning, Haifa, Israel, pp. 807\u2013814, 2010."},{"key":"1464_CR68","unstructured":"D. P. Kingma, J. Ba. Adam: A method for stochastic optimization. In Proceedings of the 3rd International Conference on Learning Representations, San Diego, USA, 2015."},{"issue":"5\u20136","key":"1464_CR69","doi-asserted-by":"publisher","first-page":"602","DOI":"10.1016\/j.neunet.2005.06.042","volume":"18","author":"A Graves","year":"2005","unstructured":"A. Graves, J. Schmidhuber. Framewise phoneme classification with bidirectional LSTM and other neural network architectures. Neural Networks, vol. 18, no. 5\u20136, pp.602\u2013610, 2005. DOI: https:\/\/doi.org\/10.1016\/j.neunet.2005.06.042.","journal-title":"Neural Networks"}],"container-title":["Machine Intelligence Research"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11633-023-1464-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11633-023-1464-2","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11633-023-1464-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,6,4]],"date-time":"2026-06-04T08:35:26Z","timestamp":1780562126000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11633-023-1464-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,3,16]]},"references-count":69,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2024,4]]}},"alternative-id":["1464"],"URL":"https:\/\/doi.org\/10.1007\/s11633-023-1464-2","relation":{},"ISSN":["2731-538X","2731-5398"],"issn-type":[{"value":"2731-538X","type":"print"},{"value":"2731-5398","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,3,16]]},"assertion":[{"value":"16 June 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 August 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 March 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"Bo Xu is an associate editor for\n                      Machine Intelligence Research\n                      and was not involved in the editorial review, or the decision to publish this article. All authors declare that there are no other competing interests.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations of conflict of interest"}}]}}