{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,5]],"date-time":"2026-05-05T12:01:35Z","timestamp":1777982495313,"version":"3.51.4"},"reference-count":32,"publisher":"Springer Science and Business Media LLC","issue":"21","license":[{"start":{"date-parts":[[2023,7,29]],"date-time":"2023-07-29T00:00:00Z","timestamp":1690588800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,7,29]],"date-time":"2023-07-29T00:00:00Z","timestamp":1690588800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62103403"],"award-info":[{"award-number":["62103403"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100007162","name":"Department of Science and Technology of Guangdong Province","doi-asserted-by":"crossref","award":["2021A0505030056"],"award-info":[{"award-number":["2021A0505030056"]}],"id":[{"id":"10.13039\/501100007162","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100012165","name":"Key Technologies Research and Development Program","doi-asserted-by":"publisher","award":["2020YFB2104300"],"award-info":[{"award-number":["2020YFB2104300"]}],"id":[{"id":"10.13039\/501100012165","id-type":"DOI","asserted-by":"publisher"}]},{"name":"the Science and Technology Development Fun, Macao S.A.R.","award":["0015\/2019\/AKP"],"award-info":[{"award-number":["0015\/2019\/AKP"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2023,11]]},"DOI":"10.1007\/s10489-023-04867-z","type":"journal-article","created":{"date-parts":[[2023,7,29]],"date-time":"2023-07-29T08:01:20Z","timestamp":1690617680000},"page":"24847-24863","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":9,"title":["Efficient distributional reinforcement learning with Kullback-Leibler divergence regularization"],"prefix":"10.1007","volume":"53","author":[{"given":"Renxing","family":"Li","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhiwei","family":"Shang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chunhua","family":"Zheng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Huiyun","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qing","family":"Liang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5539-4260","authenticated-orcid":false,"given":"Yunduan","family":"Cui","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,7,29]]},"reference":[{"key":"4867_CR1","unstructured":"Sutton RS, Barto AG (2018) Reinforcement learning: An introduction, MIT press"},{"issue":"11","key":"4867_CR2","doi-asserted-by":"publisher","first-page":"1238","DOI":"10.1177\/0278364913495721","volume":"32","author":"J Kober","year":"2013","unstructured":"Kober J, Bagnell JA, Peters J (2013) Reinforcement learning in robotics: A survey. Int J Robot Res 32(11):1238\u20131274","journal-title":"Int J Robot Res"},{"issue":"6","key":"4867_CR3","doi-asserted-by":"publisher","first-page":"2042","DOI":"10.1109\/TNNLS.2017.2773458","volume":"29","author":"B Kiumarsi","year":"2017","unstructured":"Kiumarsi B, Vamvoudakis KG, Modares H, Lewis FL (2017) Optimal and autonomous control using reinforcement learning: A survey. IEEE Trans Actions Neural Netw Learn Syst 29(6):2042\u20132062","journal-title":"IEEE Trans Actions Neural Netw Learn Syst"},{"key":"4867_CR4","unstructured":"Zhu W, Guo X, Owaki D, Kutsuzawa K, Hayashibe M (2021) A survey of sim-to-real transfer techniques applied to reinforcement learning for bioinspired robots. IEEE Trans Neural Netw Learn Syst"},{"issue":"6","key":"4867_CR5","doi-asserted-by":"publisher","first-page":"26","DOI":"10.1109\/MSP.2017.2743240","volume":"34","author":"K Arulkumaran","year":"2017","unstructured":"Arulkumaran K, Deisenroth MP, Brundage M, Bharath AA (2017) Deep reinforcement learning: A brief survey. IEEE Signal Process Magazine 34(6):26\u201338","journal-title":"IEEE Signal Process Magazine"},{"issue":"7540","key":"4867_CR6","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V, Kavukcuoglu K, Silver D, Rusu AA, Veness J, Bellemare MG, Graves A, Riedmiller M, Fidjeland AK, Ostrovski G et al (2015) Human-level control through deep reinforcement learning. Nature 518(7540):529\u2013533","journal-title":"Nature"},{"issue":"7587","key":"4867_CR7","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver D, Huang A, Maddison CJ, Guez A, Sifre L, Van Den Driessche G, Schrittwieser J, Antonoglou I, Panneershelvam V, Lanctot M et al (2016) Mastering the game of go with deep neural networks and tree search. Nature 529(7587):484\u2013489","journal-title":"Nature"},{"key":"4867_CR8","unstructured":"Fujimoto S, Hoof H, Meger D (2018) Addressing function approximation error in actor-critic methods, in: International conference on machine learning, PMLR, pp 1587-1596"},{"key":"4867_CR9","doi-asserted-by":"crossref","unstructured":"Gu S, Holly E, Lillicrap T, Levine S (2017) Deep reinforcement learning for robotic manipulation with asynchronous off-policy updates, in: 2017 IEEE international conference on robotics and automation (ICRA), IEEE, pp 3389-3396","DOI":"10.1109\/ICRA.2017.7989385"},{"key":"4867_CR10","unstructured":"Rusu AA, Vec\u0306eri\u00edk M, Roth\u00f6rl T, Heess N, Pascanu R, Hadsell R (2017) Simto- real robot learning from pixels with progressive nets, in: Conference on Robot Learning, PMLR, pp 262-270"},{"key":"4867_CR11","unstructured":"Bellemare MG, Dabney W, Munos R (2017) A distributional perspective on reinforcement learning"},{"key":"4867_CR12","unstructured":"Zhang P, Chen X, Zhao L, Xiong W, Qin T, Liu T-Y (2021) Distributional reinforcement learning for multi-dimensional reward functions. Adv Neural Inf Process Syst 34"},{"key":"4867_CR13","unstructured":"Xie J, Sun W (2021) Distributional deep reinforcement learning-based emegercy frequency control. IEEE Trans Power Syst 1\u20131"},{"issue":"3","key":"4867_CR14","doi-asserted-by":"publisher","first-page":"416","DOI":"10.1109\/TIV.2019.2919467","volume":"4","author":"K Min","year":"2019","unstructured":"Min K, Kim H, Huh K (2019) Deep distributional reinforcement learning based high-level driving policy determination. IEEE Trans Intell Veh 4(3):416\u2013424","journal-title":"IEEE Trans Intell Veh"},{"key":"4867_CR15","doi-asserted-by":"publisher","first-page":"4504","DOI":"10.1609\/aaai.v33i01.33014504","volume":"33","author":"C Lyle","year":"2019","unstructured":"Lyle C, Bellemare MG, Castro PS (2019) A comparative analysis of expected and distributional reinforcement learning. Proceedings of the AAAI Conference on Artificial Intelligence 33:4504\u20134511","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"4867_CR16","unstructured":"Rowland M, Bellemare M, Dabney W, Munos R, Teh YW (2018) An analysis of categorical distributional reinforcement learning, in: International Conference on Artificial Intelligence and Statistics, PMLR, pp 29-37"},{"issue":"1","key":"4867_CR17","first-page":"3207","volume":"13","author":"MG Azar","year":"2012","unstructured":"Azar MG, G\u00f3mez V, Kappen HJ (2012) Dynamic policy programming. J Mach Learn Res 13(1):3207\u20133245","journal-title":"J Mach Learn Res"},{"key":"4867_CR18","first-page":"12163","volume":"33","author":"N Vieillard","year":"2020","unstructured":"Vieillard N, Kozuno T, Scherrer B, Pietquin O, Munos R, Geist M (2020) Leverage the average: an analysis of kl regularization in reinforcement learning. Adv Neural Inf Process Syst 33:12163\u201312174","journal-title":"Adv Neural Inf Process Syst"},{"key":"4867_CR19","unstructured":"Kozuno T, Uchibe E, Doya K (2019) Theoretical analysis of efficiency and ro robustness of softmax and gap-increasing operators in reinforcement learning, in: The 22nd International Conference on Artificial Intelligence and Statistics, PMLR, pp 2995-3003"},{"key":"4867_CR20","doi-asserted-by":"publisher","first-page":"13","DOI":"10.1016\/j.neunet.2017.06.007","volume":"94","author":"Y Cui","year":"2017","unstructured":"Cui Y, Matsubara T, Sugimoto K (2017) Kernel dynamic policy programming: Applicable reinforcement learning to robot systems with high dimensional states. Neural Netw 94:13\u201323","journal-title":"Neural Netw"},{"key":"4867_CR21","doi-asserted-by":"publisher","first-page":"72","DOI":"10.1016\/j.robot.2018.11.004","volume":"112","author":"Y Tsurumine","year":"2019","unstructured":"Tsurumine Y, Cui Y, Uchibe E, Matsubara T (2019) Deep reinforcement learning with smooth policy update: Application to robotic cloth manipulation. Robot Auto Syst 112:72\u201383","journal-title":"Robot Auto Syst"},{"key":"4867_CR22","doi-asserted-by":"publisher","DOI":"10.1016\/j.conengprac.2020.104331","volume":"97","author":"L Zhu","year":"2020","unstructured":"Zhu L, Cui Y, Takami G, Kanokogi H, Matsubara T (2020) Scalable reinforcement learning for plant-wide control of vinyl acetate monomer process. Control Eng Prac 97:104331","journal-title":"Control Eng Prac"},{"key":"4867_CR23","doi-asserted-by":"crossref","unstructured":"Zhu L, Takami G, Kawahara M, Kanokogi H, Matsubara T (2022) Alleviating parameter-tuning burden in reinforcement learning for large-scale process control. Comput & Chemical Eng 107658","DOI":"10.1016\/j.compchemeng.2022.107658"},{"key":"4867_CR24","unstructured":"Van den Oord A, Kalchbrenner N, Espeholt L, Vinyals O, Graves A, et al.(2016) Conditional image generation with pixelcnn decoders. Adv Neural Inf Process Syst 29"},{"key":"4867_CR25","doi-asserted-by":"crossref","unstructured":"Todorov E (2006) Linearly-solvable markov decision problems, in: Advances in neural information processing systems (NIPS), pp 1369\u20131376","DOI":"10.7551\/mitpress\/7503.003.0176"},{"key":"4867_CR26","unstructured":"Scherrer B, Lesner B (2012) On the use of non-stationary policies for stationary infinite-horizon markov decision processes. Adv Neural Inf Process Syst 25"},{"key":"4867_CR27","doi-asserted-by":"crossref","unstructured":"Dabney W, Rowland M, Bellemare M, Munos R (2018) Distributional rein-forcement learning with quantile regression, in: Proceedings of the AAAI Conference on Artificial Intelligence, vol 32","DOI":"10.1609\/aaai.v32i1.11791"},{"key":"4867_CR28","unstructured":"Abadi M, et al.(2016) TensorFlow: A system for Large-Scale machine learning, in: 12th USENIX symposium on operating systems design and implementation (OSDI 16), pp 265-283"},{"key":"4867_CR29","unstructured":"Guadarrama S, et al.(2018) TF-Agents: A library for reinforcement learning in tensorflow, https:\/\/github.com\/tensorflow\/agents, [Online; accessed 25-Jun-2019] . https:\/\/github.com\/tensorflow\/agents"},{"key":"4867_CR30","unstructured":"Yang D, Zhao L, Lin Z, Qin T, Bian J, Liu T-Y (2019) Fully parameter ized quantile function for distributional reinforcement learning. Adv Neural Inf Process Syst 32"},{"key":"4867_CR31","unstructured":"Van der Maaten L, Hinton G (2008) Visualizing data using t-sne. J Mach Learn Res 9(11)"},{"key":"4867_CR32","unstructured":"Huang S, Dossa RFJ, Ye C, Braga J, Chakraborty D, Mehta K, Ara\u00fajo JG (2022) Cleanrl: High-quality single-file implementations of deep reinforcement learning algorithms. J Mach Learn Res 23(274):1-18"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-023-04867-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-023-04867-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-023-04867-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,10,23]],"date-time":"2023-10-23T10:10:38Z","timestamp":1698055838000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-023-04867-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,7,29]]},"references-count":32,"journal-issue":{"issue":"21","published-print":{"date-parts":[[2023,11]]}},"alternative-id":["4867"],"URL":"https:\/\/doi.org\/10.1007\/s10489-023-04867-z","relation":{"is-supplemented-by":[{"id-type":"doi","id":"10.36227\/techrxiv.19679454","asserted-by":"object"}],"has-preprint":[{"id-type":"doi","id":"10.36227\/techrxiv.19679454.v1","asserted-by":"object"},{"id-type":"doi","id":"10.36227\/techrxiv.19679454","asserted-by":"object"}]},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"value":"0924-669X","type":"print"},{"value":"1573-7497","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,7,29]]},"assertion":[{"value":"3 July 2023","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 July 2023","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}