{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,27]],"date-time":"2025-10-27T19:07:35Z","timestamp":1761592055432,"version":"build-2065373602"},"reference-count":5,"publisher":"Springer Science and Business Media LLC","issue":"12","license":[{"start":{"date-parts":[[2025,10,23]],"date-time":"2025-10-23T00:00:00Z","timestamp":1761177600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,10,23]],"date-time":"2025-10-23T00:00:00Z","timestamp":1761177600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Sci. China Inf. Sci."],"published-print":{"date-parts":[[2025,12]]},"DOI":"10.1007\/s11432-024-4578-2","type":"journal-article","created":{"date-parts":[[2025,10,27]],"date-time":"2025-10-27T06:27:17Z","timestamp":1761546437000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Multi-agent robust policy evaluation for reinforcement learning via primal-dual online time-averaging"],"prefix":"10.1007","volume":"68","author":[{"given":"Gang","family":"Chen","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Changli","family":"Pu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yaoyao","family":"Zhou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiumin","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Huimiao","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,10,23]]},"reference":[{"key":"4578_CR1","first-page":"9672","volume-title":"Proceedings of the 32nd International Conference on Neural Information Processing Systems","author":"H T Wai","year":"2018","unstructured":"Wai H T, Yang Z, Wang Z, et al. Multi-agent reinforcement learning via double averaging primal-dual optimization. In: Proceedings of the 32nd International Conference on Neural Information Processing Systems, 2018. 9672\u20139683"},{"key":"4578_CR2","doi-asserted-by":"publisher","first-page":"110092","DOI":"10.1016\/j.automatica.2021.110092","volume":"136","author":"X Sha","year":"2022","unstructured":"Sha X, Zhang J, You K, et al. Fully asynchronous policy evaluation in distributed reinforcement learning over networks. Automatica, 2022, 136: 110092","journal-title":"Automatica"},{"key":"4578_CR3","doi-asserted-by":"publisher","first-page":"362","DOI":"10.1007\/s11768-020-00007-x","volume":"18","author":"X Zhao","year":"2020","unstructured":"Zhao X, Yi P, Li L. Distributed policy evaluation via inexact ADMM in multi-agent reinforcement learning. Control Theor Technol, 2020, 18: 362\u2013378","journal-title":"Control Theor Technol"},{"key":"4578_CR4","doi-asserted-by":"publisher","first-page":"2720","DOI":"10.1109\/TAC.2016.2616646","volume":"62","author":"D Mateos-Nunez","year":"2017","unstructured":"Mateos-Nunez D, Cortes J. Distributed saddle-point subgradient algorithms with Laplacian averaging. IEEE Trans Automat Contr, 2017, 62: 2720\u20132735","journal-title":"IEEE Trans Automat Contr"},{"key":"4578_CR5","volume-title":"Reinforcement Learning: An Introduction","author":"R S Sutton","year":"2018","unstructured":"Sutton R S, Barto A G. Reinforcement Learning: An Introduction. Cambridge: MIT Press, 2018"}],"container-title":["Science China Information Sciences"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11432-024-4578-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11432-024-4578-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11432-024-4578-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,27]],"date-time":"2025-10-27T19:02:33Z","timestamp":1761591753000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11432-024-4578-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,23]]},"references-count":5,"journal-issue":{"issue":"12","published-print":{"date-parts":[[2025,12]]}},"alternative-id":["4578"],"URL":"https:\/\/doi.org\/10.1007\/s11432-024-4578-2","relation":{},"ISSN":["1674-733X","1869-1919"],"issn-type":[{"type":"print","value":"1674-733X"},{"type":"electronic","value":"1869-1919"}],"subject":[],"published":{"date-parts":[[2025,10,23]]},"assertion":[{"value":"16 May 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 August 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 July 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"23 October 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"229203"}}