{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,28]],"date-time":"2026-03-28T06:02:04Z","timestamp":1774677724750,"version":"3.50.1"},"reference-count":56,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"3","license":[{"start":{"date-parts":[[2024,6,1]],"date-time":"2024-06-01T00:00:00Z","timestamp":1717200000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,6,1]],"date-time":"2024-06-01T00:00:00Z","timestamp":1717200000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,6,1]],"date-time":"2024-06-01T00:00:00Z","timestamp":1717200000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"National Key R&amp;D Program of China","award":["2022YFC3801700"],"award-info":[{"award-number":["2022YFC3801700"]}]},{"DOI":"10.13039\/501100002858","name":"China Postdoctoral Science Foundation","doi-asserted-by":"publisher","award":["2022M710517"],"award-info":[{"award-number":["2022M710517"]}],"id":[{"id":"10.13039\/501100002858","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Joint Project JD User Growth Engine","award":["H20211431"],"award-info":[{"award-number":["H20211431"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U21A20512"],"award-info":[{"award-number":["U21A20512"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Research Grants Council of the Hong Kong SAR","award":["PolyU11211521"],"award-info":[{"award-number":["PolyU11211521"]}]},{"name":"Research Grants Council of the Hong Kong SAR","award":["PolyU15218622"],"award-info":[{"award-number":["PolyU15218622"]}]},{"name":"Research Grants Council of the Hong Kong SAR","award":["PolyU15215623"],"award-info":[{"award-number":["PolyU15215623"]}]},{"DOI":"10.13039\/501100004377","name":"Hong Kong Polytechnic University","doi-asserted-by":"publisher","award":["P0039734"],"award-info":[{"award-number":["P0039734"]}],"id":[{"id":"10.13039\/501100004377","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004377","name":"Hong Kong Polytechnic University","doi-asserted-by":"publisher","award":["P0035379"],"award-info":[{"award-number":["P0035379"]}],"id":[{"id":"10.13039\/501100004377","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Emerg. Top. Comput. Intell."],"published-print":{"date-parts":[[2024,6]]},"DOI":"10.1109\/tetci.2024.3361860","type":"journal-article","created":{"date-parts":[[2024,2,26]],"date-time":"2024-02-26T20:13:47Z","timestamp":1708978427000},"page":"2213-2227","source":"Crossref","is-referenced-by-count":6,"title":["Reinforcement Learning With Adaptive Policy Gradient Transfer Across Heterogeneous Problems"],"prefix":"10.1109","volume":"8","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-7960-2605","authenticated-orcid":false,"given":"Gengzhi","family":"Zhang","sequence":"first","affiliation":[{"name":"College of Computer Science, Chongqing University, Chongqing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1521-2757","authenticated-orcid":false,"given":"Liang","family":"Feng","sequence":"additional","affiliation":[{"name":"College of Computer Science, Chongqing University, Chongqing, China"}]},{"given":"Yu","family":"Wang","sequence":"additional","affiliation":[{"name":"Department of User Growth and Operations, Jing Dong Retail, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9013-2020","authenticated-orcid":false,"given":"Min","family":"Li","sequence":"additional","affiliation":[{"name":"Department of User Growth and Operations, Jing Dong Retail, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7935-7210","authenticated-orcid":false,"given":"Hong","family":"Xie","sequence":"additional","affiliation":[{"name":"College of Computer Science, Chongqing University, Chongqing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6802-2463","authenticated-orcid":false,"given":"Kay Chen","family":"Tan","sequence":"additional","affiliation":[{"name":"Department of Computing, The Hong Kong Polytechnic University, Hong Kong SAR"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4615-3618-5"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2017.2743240"},{"key":"ref3","article-title":"Deep reinforcement learning: An overview","author":"Li","year":"2017"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2017.2773458"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/SSCI47803.2020.9308468"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref7","article-title":"Sample efficient actor-critic with experience replay","author":"Wang","year":"2016"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TETCI.2018.2823329"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.2352\/ISSN.2470-1173.2017.19.AVM-023"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2021.3054625"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TETCI.2021.3098354"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TETCI.2022.3145706"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/MCI.2020.3039066"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2020.2989465"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2018.2845361"},{"issue":"7","key":"ref16","first-page":"1633","article-title":"Transfer learning for reinforcement learning domains: A survey","volume":"10","author":"Taylor","year":"2009","journal-title":"J. Mach. Learn. Res."},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-27645-3_5"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TETCI.2022.3209655"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3292075"},{"key":"ref20","first-page":"1312","article-title":"Universal value function approximators","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Schaul","year":"2015"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/1160633.1160762"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11757"},{"key":"ref23","first-page":"1331","article-title":"Distilling policy distillation","volume-title":"Proc. 22nd Int. Conf. Artif. Intell. Statist.","author":"Czarnecki","year":"2019"},{"key":"ref24","first-page":"166","article-title":"Modular multitask reinforcement learning with policy sketches","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Andreas","year":"2017"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/1273496.1273607"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11757"},{"key":"ref27","article-title":"Learning invariant feature spaces to trans-fer skills with reinforcement learning","author":"Gupta","year":"2017"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.32657\/10356\/90191"},{"key":"ref29","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1613\/jair.301"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2017.2743240"},{"key":"ref32","first-page":"2829","article-title":"Continuous deep Q-learning with model-based acceleration","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Gu","year":"2016"},{"key":"ref33","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Mnih","year":"2016"},{"key":"ref34","article-title":"High-dimensional continuous control using generalized advantage estimation","author":"Schulman","year":"2015"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992696"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1145\/203330.203343"},{"key":"ref37","first-page":"1008","article-title":"Actor-critic algorithms","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Konda","year":"2000"},{"key":"ref38","article-title":"Off-policy actor-critic","author":"Degris","year":"2012"},{"key":"ref39","first-page":"1889","article-title":"Trust region policy optimization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Schulman","year":"2015"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-016-5547-y"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2017.2743240"},{"key":"ref42","article-title":"Policy distillation","author":"Rusu","year":"2015"},{"key":"ref43","article-title":"Actor-mimic: Deep multitask and transfer reinforcement learning","author":"Parisotto","year":"2015"},{"key":"ref44","first-page":"4499","article-title":"Distral: Robust multitask reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.31st Int. Conf. Neural Inf. Process. Syst.","author":"Teh","year":"2017"},{"key":"ref45","first-page":"2469","article-title":"Policy optimization with demonstrations","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Kang","year":"2018"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8463162"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8462977"},{"key":"ref48","article-title":"Attend, adapt and transfer: Attentive deep architecture for adaptive transfer from multiple sources in the same domain","author":"Rajendran","year":"2015"},{"key":"ref49","article-title":"Context-aware policy reuse","author":"Li","year":"2018"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11718"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2020\/428"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2023.3271445"},{"key":"ref53","first-page":"31","article-title":"An automated measure of MDP similarity for transfer in reinforcement learning","volume-title":"Proc. Workshops 28th AAAI Conf. Artif. Intell.","author":"Ammar","year":"2014"},{"key":"ref54","article-title":"A taxonomy of similarity metrics for Markov decision processes","author":"Viss","year":"2021"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2005.1555955"},{"key":"ref56","article-title":"Adam: A method for stochastic optimization","author":"Kingma","year":"2014"}],"container-title":["IEEE Transactions on Emerging Topics in Computational Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7433297\/10538459\/10444921.pdf?arnumber=10444921","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,26]],"date-time":"2024-11-26T18:56:50Z","timestamp":1732647410000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10444921\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6]]},"references-count":56,"journal-issue":{"issue":"3"},"URL":"https:\/\/doi.org\/10.1109\/tetci.2024.3361860","relation":{},"ISSN":["2471-285X"],"issn-type":[{"value":"2471-285X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,6]]}}}