{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T03:24:32Z","timestamp":1740108272306,"version":"3.37.3"},"reference-count":19,"publisher":"Springer Science and Business Media LLC","issue":"10","license":[{"start":{"date-parts":[[2024,8,6]],"date-time":"2024-08-06T00:00:00Z","timestamp":1722902400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,8,6]],"date-time":"2024-08-06T00:00:00Z","timestamp":1722902400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["52130403","52130403","52130403"],"award-info":[{"award-number":["52130403","52130403","52130403"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Computing"],"published-print":{"date-parts":[[2024,10]]},"DOI":"10.1007\/s00607-024-01329-3","type":"journal-article","created":{"date-parts":[[2024,8,6]],"date-time":"2024-08-06T03:46:04Z","timestamp":1722915964000},"page":"3335-3352","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Phasic parallel-network policy: a deep reinforcement learning framework based on action correlation"],"prefix":"10.1007","volume":"106","author":[{"given":"Jiahao","family":"Li","sequence":"first","affiliation":[]},{"given":"Tianhan","family":"Gao","sequence":"additional","affiliation":[]},{"given":"Qingwei","family":"Mi","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,8,6]]},"reference":[{"issue":"1","key":"1329_CR1","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1177\/0278364919887447","volume":"39","author":"OM Andrychowicz","year":"2020","unstructured":"Andrychowicz OM, Baker B, Chociej M, Jozefowicz R, McGrew B, Pachocki J, Zaremba W (2020) Learning dexterous in-hand manipulation. Int J Robot Res 39(1):3\u201320. https:\/\/doi.org\/10.1177\/0278364919887447","journal-title":"Int J Robot Res"},{"issue":"1","key":"1329_CR2","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3477600","volume":"55","author":"C Yu","year":"2021","unstructured":"Yu C, Liu J, Nemati S, Yin G (2021) Reinforcement learning in healthcare: a survey. ACM Comput Surv (CSUR) 55(1):1\u201336. https:\/\/doi.org\/10.1145\/3477600","journal-title":"ACM Comput Surv (CSUR)"},{"issue":"6623","key":"1329_CR3","doi-asserted-by":"publisher","first-page":"990","DOI":"10.1126\/science.add4679","volume":"378","author":"J Perolat","year":"2022","unstructured":"Perolat J, De Vylder B, Hennes D, Tarassov E, Strub F, de Boer V, Tuyls K (2022) Mastering the game of Stratego with model-free multiagent reinforcement learning. Science 378(6623):990\u2013996. https:\/\/doi.org\/10.1126\/science.add4679","journal-title":"Science"},{"key":"1329_CR4","unstructured":"Schulman J, Wolski F, Dhariwal P, Radford A, Klimov O (2017) Proximal policy optimization algorithms. arXiv preprint https:\/\/arxiv.org\/abs\/1707.06347"},{"key":"1329_CR5","unstructured":"Cobbe KW, Hilton J, Klimov O, Schulman J (2021) Phasic policy gradient. In: International conference on machine learning, PMLR. 139:2020\u20132027, pp 2020\u20132027. https:\/\/proceedings.mlr.press\/v139\/cobbe21a"},{"key":"1329_CR6","doi-asserted-by":"publisher","unstructured":"Lillicrap TP, Hunt JJ, Pritzel A, Heess N, Erez T, Tassa Y, Wierstra D (2015) Continuous control with deep reinforcement learning. https:\/\/doi.org\/10.48550\/arXiv.1509.02971","DOI":"10.48550\/arXiv.1509.02971"},{"key":"1329_CR7","unstructured":"Mnih V, Badia AP, Mirza M, Graves A, Lillicrap T, Harley T, Kavukcuoglu K (2016) Asynchronous methods for deep reinforcement learning. In: International conference on machine learning, PMLR 48:1928\u20131937, pp 1928\u20131937. https:\/\/arxiv.org\/abs\/1602.01783"},{"key":"1329_CR8","doi-asserted-by":"publisher","unstructured":"Igl M, Farquhar G, Luketina J, Boehmer W, Whiteson S (2020) The impact of non-stationarity on generalisation in deep reinforcement learning. https:\/\/doi.org\/10.48550\/arXiv.2006.05826","DOI":"10.48550\/arXiv.2006.05826"},{"issue":"2","key":"1329_CR9","doi-asserted-by":"publisher","first-page":"025011","DOI":"10.13140\/RG.2.2.18979.43041","volume":"4","author":"AG Polat","year":"2023","unstructured":"Polat AG, Alpaslan FN (2023) The reusability prior: comparing deep learning models without training. Mach Learn Sci Technol 4(2):025011. https:\/\/doi.org\/10.13140\/RG.2.2.18979.43041","journal-title":"Mach Learn Sci Technol"},{"key":"1329_CR10","doi-asserted-by":"publisher","unstructured":"Ashukha A, Lyzhov A, Molchanov D, Vetrov D (2020) Pitfalls of in-domain uncertainty estimation and ensembling in deep learning. https:\/\/doi.org\/10.48550\/arXiv.2002.06470","DOI":"10.48550\/arXiv.2002.06470"},{"issue":"3","key":"1329_CR11","doi-asserted-by":"publisher","first-page":"1299","DOI":"10.1109\/TKDE.2023.3302804","volume":"36","author":"S Guo","year":"2023","unstructured":"Guo S, Zou L, Chen H, Qu B, Chi H, Philip SY, Chang Y (2023) Sample efficient offline-to-online reinforcement learning. IEEE Trans Knowl Data Eng 36(3):1299\u20131310. https:\/\/doi.org\/10.1109\/TKDE.2023.3302804","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"1329_CR12","doi-asserted-by":"publisher","first-page":"581","DOI":"10.1007\/s10489-018-1296-x","volume":"49","author":"X Zhao","year":"2019","unstructured":"Zhao X, Ding S, An Y, Jia W (2019) Applications of asynchronous deep reinforcement learning based on dynamic updating weights. Appl Intell 49:581\u2013591. https:\/\/doi.org\/10.1007\/s10489-018-1296-x","journal-title":"Appl Intell"},{"key":"1329_CR13","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2023.109917","volume":"145","author":"X Du","year":"2024","unstructured":"Du X, Chen H, Wang C, Xing Y, Yang J, Philip SY, He L (2024) Robust multi-agent reinforcement learning via Bayesian distributional value estimation. Pattern Recogn 145:109917. https:\/\/doi.org\/10.1016\/j.patcog.2023.109917","journal-title":"Pattern Recogn"},{"key":"1329_CR14","doi-asserted-by":"publisher","first-page":"4303","DOI":"10.1007\/s10489-019-01501-9","volume":"49","author":"S Ding","year":"2019","unstructured":"Ding S, Zhao X, Xu X, Sun T, Jia W (2019) An effective asynchronous framework for small scale reinforcement learning problems. Appl Intell 49:4303\u20134318. https:\/\/doi.org\/10.1007\/s10489-019-01501-9","journal-title":"Appl Intell"},{"key":"1329_CR15","doi-asserted-by":"publisher","unstructured":"Schulman J, Moritz P, Levine S, Jordan M, Abbeel P (2015) High-dimensional continuous control using generalized advantage estimation. https:\/\/doi.org\/10.48550\/arXiv.1506.02438","DOI":"10.48550\/arXiv.1506.02438"},{"key":"1329_CR16","volume-title":"Markov decision processes: discrete stochastic dynamic programming","author":"ML Puterman","year":"2014","unstructured":"Puterman ML (2014) Markov decision processes: discrete stochastic dynamic programming. Wiley, Blackwell, London"},{"key":"1329_CR17","doi-asserted-by":"publisher","first-page":"457","DOI":"10.1007\/s10994-021-05946-3","volume":"110","author":"E H\u00fcllermeier","year":"2021","unstructured":"H\u00fcllermeier E, Waegeman W (2021) Aleatoric and epistemic uncertainty in machine learning: An introduction to concepts and methods. Mach Learn 110:457\u2013506","journal-title":"Mach Learn"},{"key":"1329_CR18","doi-asserted-by":"publisher","unstructured":"Cobbe K, Hesse C, Hilton J, Schulman J (2020) Leveraging procedural generation to benchmark reinforcement learning. In: International conference on machine learning. PMLR, pp 2048\u20132056. https:\/\/doi.org\/10.48550\/arXiv.1912.01588","DOI":"10.48550\/arXiv.1912.01588"},{"key":"1329_CR19","doi-asserted-by":"publisher","unstructured":"Mohanty S, Poonganam J, Gaidon A, Kolobov A, Wulfe B, Chakraborty D, Cobbe K (2021) Measuring sample efficiency and generalization in reinforcement learning benchmarks: Neurips 2020 procgen benchmark. https:\/\/doi.org\/10.48550\/arXiv.2103.15332","DOI":"10.48550\/arXiv.2103.15332"}],"container-title":["Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00607-024-01329-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00607-024-01329-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00607-024-01329-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,5]],"date-time":"2024-09-05T07:20:00Z","timestamp":1725520800000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00607-024-01329-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8,6]]},"references-count":19,"journal-issue":{"issue":"10","published-print":{"date-parts":[[2024,10]]}},"alternative-id":["1329"],"URL":"https:\/\/doi.org\/10.1007\/s00607-024-01329-3","relation":{},"ISSN":["0010-485X","1436-5057"],"issn-type":[{"type":"print","value":"0010-485X"},{"type":"electronic","value":"1436-5057"}],"subject":[],"published":{"date-parts":[[2024,8,6]]},"assertion":[{"value":"22 April 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 July 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 August 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}