{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,13]],"date-time":"2026-02-13T12:23:45Z","timestamp":1770985425397,"version":"3.50.1"},"reference-count":40,"publisher":"Springer Science and Business Media LLC","issue":"9","license":[{"start":{"date-parts":[[2022,8,18]],"date-time":"2022-08-18T00:00:00Z","timestamp":1660780800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,8,18]],"date-time":"2022-08-18T00:00:00Z","timestamp":1660780800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61871430"],"award-info":[{"award-number":["61871430"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62172142"],"award-info":[{"award-number":["62172142"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62102134"],"award-info":[{"award-number":["62102134"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2023,5]]},"DOI":"10.1007\/s10489-022-04028-8","type":"journal-article","created":{"date-parts":[[2022,8,18]],"date-time":"2022-08-18T09:02:53Z","timestamp":1660813373000},"page":"10365-10379","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["Decentralized multi-task reinforcement learning policy gradient method with momentum over networks"],"prefix":"10.1007","volume":"53","author":[{"given":"Shi","family":"Junru","sequence":"first","affiliation":[]},{"given":"Wang","family":"Qiong","sequence":"additional","affiliation":[]},{"given":"Liu","family":"Muhua","sequence":"additional","affiliation":[]},{"given":"Ji","family":"Zhihang","sequence":"additional","affiliation":[]},{"given":"Zheng","family":"Ruijuan","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1572-5293","authenticated-orcid":false,"given":"Wu","family":"Qingtao","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,8,18]]},"reference":[{"issue":"1","key":"4028_CR1","doi-asserted-by":"publisher","first-page":"24","DOI":"10.1038\/s41591-018-0316-z","volume":"25","author":"A Esteva","year":"2019","unstructured":"Esteva A, Robicquet A, Ramsundar B, Kuleshov V, DePristo M, Chou K, Cui C, Corrado G, Thrun S, Dean J (2019) A guide to deep learning in healthcare. Nat Med 25(1):24\u201329","journal-title":"Nat Med"},{"key":"4028_CR2","doi-asserted-by":"crossref","unstructured":"Wang WY, Li J, He X (2018) Deep reinforcement learning for NLP. In: Proceedings of the 56th annual metting of association for computational linguistics, ACL, pp 19\u201321","DOI":"10.18653\/v1\/P18-5007"},{"key":"4028_CR3","doi-asserted-by":"crossref","unstructured":"Zhu Y, Mottaghi R, Kolve E, Lim JJ, Gupta A, Fei-Fei L, Farhadi A (2017) Target-driven visual navigation in indoor scenes using deep reinforcement learning. In: 2017 IEEE International conference on robotics and automation, ICRA 2017, Singapore, Singapore, May 29 - June 3, 2017, pp 3357\u20133364","DOI":"10.1109\/ICRA.2017.7989381"},{"key":"4028_CR4","unstructured":"Sutton RS, Barto AG (2018) Reinforcement learning - an introduction. The MIT Press Cambridge, pp 1\u2013552"},{"key":"4028_CR5","unstructured":"Rummery GA, Niranjan M (1994) On-line q-learning using connectionist systems. Technical Report, 37"},{"issue":"3","key":"4028_CR6","doi-asserted-by":"publisher","first-page":"58","DOI":"10.1145\/203330.203343","volume":"38","author":"G Tesauro","year":"1995","unstructured":"Tesauro G (1995) Temporal difference learning and td-gammon. Commun ACM 38(3):58\u201368","journal-title":"Commun ACM"},{"key":"4028_CR7","unstructured":"Sutton RS, McAllester DA, Singh SP, Mansour Y (1999) Policy gradient methods for reinforcement learning with function approximation. In: Advances in neural information processing systems, NIPS, pp 1057\u20131063"},{"key":"4028_CR8","unstructured":"Fazel M, Ge R, Kakade SM, Mesbahi M (2018) Global convergence of policy gradient methods for the linear quadratic regulator. In: Proceedings of the 35th international conference on machine learning, ICML, pp 1466\u20131475"},{"key":"4028_CR9","unstructured":"Tu S, Recht B (2019) The gap between model-based and model-free methods on the linear quadratic regulator: an asymptotic viewpoint. In: Conference on learning theory, COLT, pp 3036\u20133083"},{"key":"4028_CR10","doi-asserted-by":"crossref","unstructured":"Luo Y, Chiu C, Jaitly N, Sutskever I (2017) Learning online alignments with continuous rewards policy gradient. In: 2017 IEEE International conference on acoustics, speech and signal processing, ICASSP 2017, New Orleans, LA, USA, March 5-9, 2017, pp 2801\u20132805","DOI":"10.1109\/ICASSP.2017.7952667"},{"key":"4028_CR11","unstructured":"Andreas J, Klein D, Levine S (2017) Modular multitask reinforcement learning with policy sketches. In: Proceedings of the 34th international conference on machine learning, ICML, pp 166\u2013175"},{"key":"4028_CR12","unstructured":"Igl M, Gambardella A, He J, Nardelli N, Siddharth N, Boehmer W, Whiteson S (2020) Multitask soft option learning. In: Proceedings of the 36th conference on uncertainty in artificial intelligence, UAI, pp 969\u2013978"},{"key":"4028_CR13","unstructured":"D\u2019Eramo C, Tateo D, Bonarini A, Restelli M, Peters J (2020) Sharing knowledge in multi-task deep reinforcement learning. In: Proceedings of the 8th international conference on learning representations, ICLR"},{"issue":"4","key":"4028_CR14","doi-asserted-by":"publisher","first-page":"4663","DOI":"10.1007\/s10489-021-02683-x","volume":"52","author":"F Cui","year":"2022","unstructured":"Cui F, Di H, Shen L, Ouchi K, Liu Z, Xu J (2022) Modeling semantic and emotional relationship in multi-turn emotional conversations using multi-task learning. Appl Intell 52(4):4663\u20134673","journal-title":"Appl Intell"},{"key":"4028_CR15","unstructured":"Zeng S, Anwar MA, Doan TT, Raychowdhury A, Romberg J (2021) A decentralized policy gradient approach to multi-task reinforcement learning. In: Proceedings of the thirty-seventh conference on uncertainty in artificial intelligence, UAI 2021, virtual event, 27-30 July 2021. Proceedings of machine learning research, vol 161, pp 1002\u20131012"},{"key":"4028_CR16","doi-asserted-by":"crossref","unstructured":"Ma W, Dentcheva D, Zavlanos MM (2017) Risk-averse sensor planning using distributed policy gradient. In: American control conference, ACC, pp 4839\u20134844","DOI":"10.23919\/ACC.2017.7963704"},{"key":"4028_CR17","doi-asserted-by":"crossref","unstructured":"Pinyoanuntapong P, Lee M, Wang P (2019) Distributed multi-hop traffic engineering via stochastic policy gradient reinforcement learning. In: IEEE Global communications conference, GLOBECOM, pp 1\u20136","DOI":"10.1109\/GLOBECOM38437.2019.9013134"},{"issue":"3","key":"4028_CR18","doi-asserted-by":"publisher","first-page":"5340","DOI":"10.1109\/LRA.2021.3074885","volume":"6","author":"A Khan","year":"2021","unstructured":"Khan A, Kumar V, Ribeiro A (2021) Large scale distributed collaborative unlabeled motion planning with graph policy gradients. IEEE Robot Autom Lett 6(3):5340\u20135347","journal-title":"IEEE Robot Autom Lett"},{"key":"4028_CR19","doi-asserted-by":"crossref","unstructured":"Bono G, Dibangoye JS, Matignon L, Pereyron F, Simonin O (2018) Cooperative multi-agent policy gradient. In: Machine learning and knowledge discovery in databases - European conference, ECML PKDD, pp 459\u2013476","DOI":"10.1007\/978-3-030-10925-7_28"},{"key":"4028_CR20","doi-asserted-by":"crossref","unstructured":"Lu S, Zhang K, Chen T, Basar T, Horesh L (2021) Decentralized policy gradient descent ascent for safe multi-agent reinforcement learning. In: Proceedings of the 35th conference on artificial intelligence, AAAI, pp 8767\u20138775","DOI":"10.1609\/aaai.v35i10.17062"},{"key":"4028_CR21","unstructured":"Cutkosky A, Orabona F (2019) Momentum-based variance reduction in non-convex SGD. In: Advances in neural information processing systems, NIPS, pp 15210\u201315219"},{"issue":"3","key":"4028_CR22","doi-asserted-by":"publisher","first-page":"1107","DOI":"10.1109\/TNNLS.2020.3040325","volume":"33","author":"W Tao","year":"2022","unstructured":"Tao W, Wu G, Tao Q (2022) Momentum acceleration in the individual convergence of nonsmooth convex optimization with constraints. IEEE Trans Neur Netw Learn Syst 33(3):1107\u20131118","journal-title":"IEEE Trans Neur Netw Learn Syst"},{"key":"4028_CR23","unstructured":"Huang F, Gao S, Pei J, Huang H (2020) Momentum-based policy gradient methods. In: Proceedings of the 37th international conference on machine learning, ICML, pp 4422\u20134433. http:\/\/proceedings.mlr.press\/v119\/huang20a.html. Accessed 13 Aug 2021"},{"key":"4028_CR24","doi-asserted-by":"publisher","first-page":"229","DOI":"10.1007\/BF00992696","volume":"8","author":"RJ Williams","year":"1992","unstructured":"Williams RJ (1992) Simple statistical gradient-following algorithms for connectionist reinforcement learning. Mach Learn 8:229\u2013256","journal-title":"Mach Learn"},{"key":"4028_CR25","first-page":"319","volume":"15","author":"J Baxter","year":"2001","unstructured":"Baxter J, Bartlett PL (2001) Infinite-horizon policy-gradient estimation. J Artif Itell Res 15:319\u2013350","journal-title":"J Artif Itell Res"},{"key":"4028_CR26","unstructured":"Schulman J, Levine S, Abbeel P, Jordan MI, Moritz P (2015) Trust region policy optimization. In: Proceedings of the 32nd international conference on machine learning, ICML, pp 1889\u20131897"},{"issue":"2-3","key":"4028_CR27","doi-asserted-by":"publisher","first-page":"255","DOI":"10.1007\/s10994-015-5484-1","volume":"100","author":"M Pirotta","year":"2015","unstructured":"Pirotta M, Restelli M, Bascetta L (2015) Policy gradient in Lipschitz Markov decision processes. Mach Learn 100(2-3):255\u2013283","journal-title":"Mach Learn"},{"key":"4028_CR28","unstructured":"Agarwal A, Kakade SM, Lee JD, Mahajan G (2020) Optimality and approximation with policy gradient methods in markov decision processes. In: Conference on learning theory, COLT 2020, 9-12 July 2020, virtual event [Graz, Austria]. Proceedings of machine learning research, vol 125, pp 64\u201366"},{"key":"4028_CR29","unstructured":"Shen Z, Ribeiro A, Hassani H, Qian H, Mi C (2019) Hessian aided policy gradient. In: Proceedings of the 36th international conference on machine learning, ICML, vol 97, pp 5729\u2013 5738"},{"key":"4028_CR30","unstructured":"Xu P, Gao F, Gu Q (2019) An improved convergence analysis of stochastic variance-reduced policy gradient. In: Proceedings of the 35th conference on uncertainty in artificial intelligence, UAI, pp 541\u2013551"},{"key":"4028_CR31","unstructured":"Papini M, Binaghi D, Canonaco G, Pirotta M, Restelli M (2018) Stochastic variance-reduced policy gradient. In: Proceedings of the 35th international conference on machine learning, ICML, pp 4023\u20134032"},{"key":"4028_CR32","unstructured":"Yuan H, Lian X, Liu J, Zhou Y (2020) Stochastic recursive momentum for policy gradient methods. arXiv:2003.04302"},{"key":"4028_CR33","doi-asserted-by":"crossref","unstructured":"Lu S, Zhang K, Chen T, Basar T, Horesh L (2021) Decentralized policy gradient descent ascent for safe multi-agent reinforcement learning. In: Thirty-Fifth AAAI conference on artificial intelligence, AAAI 2021, thirty-third conference on innovative applications of artificial intelligence, IAAI 2021, The eleventh symposium on educational advances in artificial intelligence, EAAI 2021, pp 8767\u20138775","DOI":"10.1609\/aaai.v35i10.17062"},{"key":"4028_CR34","doi-asserted-by":"crossref","unstructured":"Jiang Z, Lee XY, Tan SY, Tan KL, Balu A, Lee YM, Hegde C, Sarkar S (2021) MDPGT: momentum-based decentralized policy gradient tracking. arXiv:2112.02813","DOI":"10.1609\/aaai.v36i9.21169"},{"key":"4028_CR35","unstructured":"Khanduri P, Sharma P, Kafle S, Bulusu S, Rajawat K, Varshney PK (2020) Distributed stochastic non-convex optimization: momentum-based variance reduction. arXiv:2005.00224"},{"key":"4028_CR36","unstructured":"Foerster JN, Assael YM, de Freitas N, Whiteson S (2016) Learning to communicate with deep multi-agent reinforcement learning. arXiv:1605.06676"},{"key":"4028_CR37","unstructured":"Peng P, Yuan Q, Wen Y, Yang Y, Tang Z, Long H, Wang J (2017) Multiagent bidirectionally-coordinated nets for learning to play starcraft combat games. arXiv:1703.10069"},{"key":"4028_CR38","unstructured":"Leibo JZ, Zambaldi VF, Lanctot M, Marecki J, Graepel T (2017) Multi-agent reinforcement learning in sequential social dilemmas. arXiv:1702.03037"},{"key":"4028_CR39","doi-asserted-by":"crossref","unstructured":"Zhang K, Yang Z, Basar T (2018) Networked multi-agent reinforcement learning in continuous spaces. In: 2018 IEEE Conference on decision and control (CDC)2018, pp 2771\u2013 2776","DOI":"10.1109\/CDC.2018.8619581"},{"key":"4028_CR40","doi-asserted-by":"crossref","unstructured":"Lu S, Zhang K, Chen T, Basar T, Horesh L (2021) Decentralized policy gradient descent ascent for safe multi-agent reinforcement learning. In: Thirty-Fifth AAAI conference on artificial intelligence, AAAI, pp 8767\u20138775","DOI":"10.1609\/aaai.v35i10.17062"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-022-04028-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-022-04028-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-022-04028-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,5,19]],"date-time":"2023-05-19T11:43:49Z","timestamp":1684496629000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-022-04028-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,8,18]]},"references-count":40,"journal-issue":{"issue":"9","published-print":{"date-parts":[[2023,5]]}},"alternative-id":["4028"],"URL":"https:\/\/doi.org\/10.1007\/s10489-022-04028-8","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"value":"0924-669X","type":"print"},{"value":"1573-7497","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,8,18]]},"assertion":[{"value":"22 July 2022","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 August 2022","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}