{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,5]],"date-time":"2026-05-05T12:28:58Z","timestamp":1777984138514,"version":"3.51.4"},"reference-count":39,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"1","license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"Shenzhen Fundamental Research","award":["JCYJ20220818102415033"],"award-info":[{"award-number":["JCYJ20220818102415033"]}]},{"name":"Shenzhen Fundamental Research","award":["KJZD20230923114222045"],"award-info":[{"award-number":["KJZD20230923114222045"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Emerg. Topics Comput."],"published-print":{"date-parts":[[2026,1]]},"DOI":"10.1109\/tetc.2025.3644748","type":"journal-article","created":{"date-parts":[[2025,12,22]],"date-time":"2025-12-22T18:42:40Z","timestamp":1766428960000},"page":"94-104","source":"Crossref","is-referenced-by-count":0,"title":["Multi-Discounting Reinforcement Learning Based on Reward Decomposition"],"prefix":"10.1109","volume":"14","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-4307-5618","authenticated-orcid":false,"given":"Pengbin","family":"Chen","sequence":"first","affiliation":[{"name":"Guangdong Key Laboratory of Intelligent Morphing Mechanisms and Adaptive Robotics, School of Intelligence Science and Engineering, Harbin Institute of Technology Shenzhen, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7485-6344","authenticated-orcid":false,"given":"Qi","family":"Liu","sequence":"additional","affiliation":[{"name":"Guangdong Key Laboratory of Intelligent Morphing Mechanisms and Adaptive Robotics, School of Intelligence Science and Engineering, Harbin Institute of Technology Shenzhen, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7890-9677","authenticated-orcid":false,"given":"Yanjie","family":"Li","sequence":"additional","affiliation":[{"name":"Guangdong Key Laboratory of Intelligent Morphing Mechanisms and Adaptive Robotics, School of Intelligence Science and Engineering, Harbin Institute of Technology Shenzhen, Shenzhen, China"}]},{"given":"Kejian","family":"Yan","sequence":"additional","affiliation":[{"name":"Guangdong Key Laboratory of Intelligent Morphing Mechanisms and Adaptive Robotics, School of Intelligence Science and Engineering, Harbin Institute of Technology Shenzhen, Shenzhen, China"}]},{"given":"Shuangkang","family":"Ma","sequence":"additional","affiliation":[{"name":"Guangdong Key Laboratory of Intelligent Morphing Mechanisms and Adaptive Robotics, School of Intelligence Science and Engineering, Harbin Institute of Technology Shenzhen, Shenzhen, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TETC.2022.3184112"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TETC.2021.3073744"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TETC.2021.3115793"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3070584"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3124466"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TETC.2018.2890682"},{"key":"ref7","volume-title":"Reinforcement Learning: An Introduction","author":"Sutton","year":"2018"},{"key":"ref8","article-title":"Playing atari with deep reinforcement learning","author":"Mnih","year":"2013","journal-title":"NIPS Deep Learn. Workshop"},{"key":"ref9","article-title":"Continuous control with deep reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Lillicrap","year":"2016"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11694"},{"key":"ref11","article-title":"Dota 2 with large scale deep reinforcement learning","author":"Berner","year":"2019"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/RCAR.2016.7784001"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9197148"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1724-z"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-021-04357-7"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/s10458-022-09575-5"},{"key":"ref17","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Fujimoto","year":"2018"},{"key":"ref18","first-page":"5398","article-title":"Hybrid reward architecture for reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Seijen","year":"2017"},{"key":"ref19","first-page":"656","article-title":"Q-decomposition for reinforcement learning agents","volume-title":"Proc. 20th Int. Conf. Mach. Learn.","author":"Russell","year":"2003"},{"key":"ref20","first-page":"1445","article-title":"Multiple-goal reinforcement learning with modular sarsa(o)","volume-title":"Proc. 18th Int. Joint Conf. Artif. Intell.","author":"Sprague","year":"2003"},{"key":"ref21","first-page":"47","article-title":"Explainable reinforcement learning via reward decomposition","volume-title":"Proc. IJCAI\/ECAI Workshop Explainable Artif. Intell.","author":"Juozapaitis","year":"2019"},{"key":"ref22","first-page":"12001","article-title":"Value function decomposition for iterative design of reinforcement learning agents","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"35","author":"MacGlashan","year":"2022"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1613\/jair.3987"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1007\/s10458-022-09552-y"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2024.112703"},{"key":"ref26","first-page":"1265","article-title":"Biasing approximate dynamic programming with a lower discount factor","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Petrik","year":"2009"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2018.8594418"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.65109\/jazo1666"},{"key":"ref29","first-page":"269","article-title":"Discount factor as a regularizer in reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Amit","year":"2020"},{"key":"ref30","first-page":"9072","article-title":"On the role of discount factor in offline reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Hu","year":"2022"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1287\/moor.19.1.152"},{"key":"ref32","first-page":"3742","article-title":"Unifying task specification in reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"White","year":"2017"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1098\/rspb.1998.0534"},{"key":"ref34","article-title":"Hyperbolic discounting and learning over multiple horizons","author":"Fedus","year":"2019"},{"key":"ref35","article-title":"Hyperbolically discounted advantage estimation for generalization in reinforcement learning","volume-title":"Proc. Decis. Awareness Reinforcement Learn. Workshop ICML 2022","author":"Nafi","year":"2022"},{"key":"ref36","first-page":"5468","article-title":"Separating value functions across time-scales","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Romoff","year":"2019"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.6027"},{"key":"ref38","article-title":"Soft actor-critic algorithms and applications","author":"Haarnoja","year":"2018"},{"key":"ref39","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Haarnoja","year":"2018"}],"container-title":["IEEE Transactions on Emerging Topics in Computing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/6245516\/11433431\/11311448.pdf?arnumber=11311448","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,13]],"date-time":"2026-03-13T19:54:39Z","timestamp":1773431679000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11311448\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,1]]},"references-count":39,"journal-issue":{"issue":"1"},"URL":"https:\/\/doi.org\/10.1109\/tetc.2025.3644748","relation":{"has-preprint":[{"id-type":"doi","id":"10.36227\/techrxiv.171078237.77836600\/v1","asserted-by":"object"}]},"ISSN":["2168-6750","2376-4562"],"issn-type":[{"value":"2168-6750","type":"electronic"},{"value":"2376-4562","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,1]]}}}