{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,3]],"date-time":"2026-01-03T06:13:16Z","timestamp":1767420796680,"version":"3.48.0"},"reference-count":31,"publisher":"Springer Science and Business Media LLC","issue":"18","license":[{"start":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T00:00:00Z","timestamp":1764547200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T00:00:00Z","timestamp":1764547200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/100014718","name":"Innovative Research Group Project of the National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62103403"],"award-info":[{"award-number":["62103403"]}],"id":[{"id":"10.13039\/100014718","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2025,12]]},"DOI":"10.1007\/s10489-025-07035-7","type":"journal-article","created":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T09:25:25Z","timestamp":1765272325000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Sample-efficient multi-agent reinforcement learning with high update-to-data ratio and state-action embedding"],"prefix":"10.1007","volume":"55","author":[{"given":"Chenyang","family":"Miao","sequence":"first","affiliation":[]},{"given":"Yingzhuo","family":"Jiang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5539-4260","authenticated-orcid":false,"given":"Yunduan","family":"Cui","sequence":"additional","affiliation":[]},{"given":"Yidong","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Tianfu","family":"Sun","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,12,9]]},"reference":[{"key":"7035_CR1","unstructured":"Sutton RS, Barto AG (2018) Reinforcement learning: an introduction"},{"issue":"11","key":"7035_CR2","doi-asserted-by":"publisher","first-page":"1238","DOI":"10.1177\/0278364913495721","volume":"32","author":"J Kober","year":"2013","unstructured":"Kober J, Bagnell JA, Peters J (2013) Reinforcement learning in robotics: a survey. Int J Robot Res 32(11):1238\u20131274","journal-title":"Int J Robot Res"},{"issue":"2","key":"7035_CR3","doi-asserted-by":"publisher","first-page":"1143","DOI":"10.1109\/LRA.2020.2966414","volume":"5","author":"A Amini","year":"2020","unstructured":"Amini A, Gilitschenski I, Phillips J, Moseyko J, Banerjee R, Karaman S, Rus D (2020) Learning robust control policies for end-to-end autonomous driving from data-driven simulation. IEEE Robot Autom Lett 5(2):1143\u20131150","journal-title":"IEEE Robot Autom Lett"},{"issue":"2","key":"7035_CR4","doi-asserted-by":"publisher","first-page":"1174","DOI":"10.1109\/LRA.2021.3138545","volume":"7","author":"H Liang","year":"2021","unstructured":"Liang H, Cong L, Hendrich N, Li S, Sun F, Zhang J (2021) Multifingered grasping based on multimodal reinforcement learning. IEEE Robot Autom Lett 7(2):1174\u20131181","journal-title":"IEEE Robot Autom Lett"},{"issue":"7","key":"7035_CR5","doi-asserted-by":"publisher","first-page":"4307","DOI":"10.1109\/LRA.2023.3281290","volume":"8","author":"C Yang","year":"2023","unstructured":"Yang C, Pu C, Xin G, Zhang J, Li Z (2023) Learning complex motor skills for legged robot fall recovery. IEEE Robot Autom Lett 8(7):4307\u20134314","journal-title":"IEEE Robot Autom Lett"},{"issue":"7540","key":"7035_CR6","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V, Kavukcuoglu K, Silver D, Rusu AA, Veness J, Bellemare MG, Graves A, Riedmiller M, Fidjeland AK, Ostrovski G et al (2015) Human-level control through deep reinforcement learning. Nature 518(7540):529\u2013533","journal-title":"Nature"},{"issue":"6419","key":"7035_CR7","doi-asserted-by":"publisher","first-page":"1140","DOI":"10.1126\/science.aar6404","volume":"362","author":"D Silver","year":"2018","unstructured":"Silver D, Hubert T, Schrittwieser J, Antonoglou I, Lai M, Guez A, Lanctot M, Sifre L, Kumaran D, Graepel T et al (2018) A general reinforcement learning algorithm that masters chess, shogi, and go through self-play. Science 362(6419):1140\u20131144","journal-title":"Science"},{"key":"7035_CR8","doi-asserted-by":"crossref","unstructured":"Kumar A, Fu Z, Pathak D, Malik J (2021) Rma: rapid motor adaptation for legged robots","DOI":"10.15607\/RSS.2021.XVII.011"},{"issue":"3","key":"7035_CR9","first-page":"4","volume":"2","author":"L Smith","year":"2023","unstructured":"Smith L, Kostrikov I, Levine S (2023) Demonstrating a walk in the park: learning to walk in 20 minutes with model-free reinforcement learning. Robotics: Sci Syst (RSS) Demo 2(3):4","journal-title":"Robotics: Sci Syst (RSS) Demo"},{"key":"7035_CR10","doi-asserted-by":"crossref","unstructured":"Yarats D, Zhang A, Kostrikov I, Amos B, Pineau J, Fergus R (2021) Improving sample efficiency in model-free reinforcement learning from images. In: Proceedings of the AAAI conference on artificial intelligence, vol 35, pp 10674\u201310681","DOI":"10.1609\/aaai.v35i12.17276"},{"key":"7035_CR11","unstructured":"Ball PJ, Smith L, Kostrikov I, Levine S (2023) Efficient online reinforcement learning with offline data. In: International conference on machine learning, pp 1577\u20131594. PMLR"},{"key":"7035_CR12","unstructured":"Janner M, Fu J, Zhang M, Levine S (2019) When to trust your model: model-based policy optimization. Advances in neural information processing systems, vol 32"},{"key":"7035_CR13","unstructured":"Chen X, Wang C, Zhou Z, Ross K (2021) Randomized ensembled double q-learning: learning fast without a model. In: International Conference on Learning Representations (ICLR)"},{"key":"7035_CR14","unstructured":"Haarnoja T, Zhou A, Abbeel P, Levine S (2018) Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: International conference on machine learning, pp 1861\u20131870. PMLR"},{"key":"7035_CR15","unstructured":"Hiraoka T, Imagawa T, Hashimoto T, Onishi T, Tsuruoka Y (2022) Dropout q-functions for doubly efficient reinforcement learning. In: International conference on learning representations"},{"key":"7035_CR16","doi-asserted-by":"publisher","DOI":"10.1016\/j.conengprac.2020.104331","volume":"97","author":"L Zhu","year":"2020","unstructured":"Zhu L, Cui Y, Takami G, Kanokogi H, Matsubara T (2020) Scalable reinforcement learning for plant-wide control of vinyl acetate monomer process. Control Eng Pract 97:104331","journal-title":"Control Eng Pract"},{"key":"7035_CR17","unstructured":"Miao C, Cui Y, Li H, Wu X (2024) Effective multi-agent deep reinforcement learning control with relative entropy regularization. IEEE Trans Autom Sci Eng 1\u201315"},{"key":"7035_CR18","unstructured":"Niu Y, Paleja RR, Gombolay MC (2021) Multi-agent graph-attention communication and teaming. In: International Conference on Autonomous Agents and Multiagent Systems (AAMAS), vol 21, p 20"},{"issue":"2","key":"7035_CR19","doi-asserted-by":"publisher","first-page":"1549","DOI":"10.1016\/j.ifacol.2020.12.2021","volume":"53","author":"W Suttle","year":"2020","unstructured":"Suttle W, Yang Z, Zhang K, Wang Z, Ba\u015far T, Liu J (2020) A multi-agent off-policy actor-critic algorithm for distributed reinforcement learning. IFAC-PapersOnLine 53(2):1549\u20131554","journal-title":"IFAC-PapersOnLine"},{"key":"7035_CR20","doi-asserted-by":"crossref","unstructured":"Mao H, Liu W, Hao J, Luo J, Li D, Zhang Z, Wang J, Xiao Z (2020) Neighborhood cognition consistent multi-agent reinforcement learning. In: Proceedings of the AAAI conference on artificial intelligence, vol 34, pp 7219\u20137226","DOI":"10.1609\/aaai.v34i05.6212"},{"key":"7035_CR21","unstructured":"Anand A, Racah E, Ozair S, Bengio Y, C\u00f4t\u00e9 M-A, Hjelm RD (2019) Unsupervised state representation learning in Atari. Advances in neural information processing systems, vol 32"},{"issue":"3","key":"7035_CR22","doi-asserted-by":"publisher","first-page":"42","DOI":"10.1109\/MSP.2021.3134634","volume":"39","author":"L Ericsson","year":"2022","unstructured":"Ericsson L, Gouk H, Loy CC, Hospedales TM (2022) Self-supervised representation learning: introduction, advances, and challenges. IEEE Signal Process Mag 39(3):42\u201362","journal-title":"IEEE Signal Process Mag"},{"key":"7035_CR23","unstructured":"Laskin M, Srinivas A, Abbeel P (2020) Curl: contrastive unsupervised representations for reinforcement learning. In: International conference on machine learning, pp 5639\u20135650. PMLR"},{"key":"7035_CR24","unstructured":"Stooke A, Lee K, Abbeel P, Laskin M (2021) Decoupling representation learning from reinforcement learning. In: International conference on machine learning, pp 9870\u20139879. PMLR"},{"key":"7035_CR25","unstructured":"Ota K, Oiki T, Jha D, Mariyama T, Nikovski D (2020) Can increasing input dimensionality improve deep reinforcement learning? In: International conference on machine learning, pp 7424\u20137433. PMLR"},{"key":"7035_CR26","unstructured":"Fujimoto S, Chang W-D, Smith E, Gu SS, Precup D, Meger D (2024) For sale: state-action representation learning for deep reinforcement learning. Advances in Neural Information Processing Systems, vol 36"},{"key":"7035_CR27","unstructured":"Lowe R, Wu YI, Tamar A, Harb J, Pieter\u00a0Abbeel O, Mordatch I (2017) Multi-agent actor-critic for mixed cooperative-competitive environments. Advances in neural information processing systems (NIPS), vol 30"},{"issue":"1","key":"7035_CR28","first-page":"1929","volume":"15","author":"N Srivastava","year":"2014","unstructured":"Srivastava N, Hinton G, Krizhevsky A, Sutskever I, Salakhutdinov R (2014) Dropout: a simple way to prevent neural networks from overfitting. J Mach Learn Res 15(1):1929\u20131958","journal-title":"J Mach Learn Res"},{"key":"7035_CR29","unstructured":"Xu J, Sun X, Zhang Z, Zhao G, Lin J (2019) Understanding and improving layer normalization. Advances in neural information processing systems, vol 32"},{"key":"7035_CR30","unstructured":"Gelada C, Kumar S, Buckman J, Nachum O, Bellemare MG (2019) Deepmdp: learning continuous latent space models for representation learning. In: International conference on machine learning, pp 2170\u20132179. PMLR"},{"key":"7035_CR31","first-page":"24611","volume":"35","author":"C Yu","year":"2022","unstructured":"Yu C, Velu A, Vinitsky E, Gao J, Wang Y, Bayen A, Wu Y (2022) The surprising effectiveness of ppo in cooperative multi-agent games. Adv Neural Inf Process Syst 35:24611\u201324624","journal-title":"Adv Neural Inf Process Syst"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-025-07035-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-025-07035-7","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-025-07035-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,3]],"date-time":"2026-01-03T06:11:18Z","timestamp":1767420678000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-025-07035-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12]]},"references-count":31,"journal-issue":{"issue":"18","published-print":{"date-parts":[[2025,12]]}},"alternative-id":["7035"],"URL":"https:\/\/doi.org\/10.1007\/s10489-025-07035-7","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"type":"print","value":"0924-669X"},{"type":"electronic","value":"1573-7497"}],"subject":[],"published":{"date-parts":[[2025,12]]},"assertion":[{"value":"21 May 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 November 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 December 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"1138"}}