{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T15:55:43Z","timestamp":1759334143476,"version":"build-2065373602"},"reference-count":28,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"5","license":[{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62276028","U20A20167"],"award-info":[{"award-number":["62276028","U20A20167"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Hebei Province Central Leading Local Science and Technology Development","award":["236Z1811G","246Z1817G"],"award-info":[{"award-number":["236Z1811G","246Z1817G"]}]},{"name":"Innovation Capability Improvement Plan Project of Hebei Province","award":["22567626H"],"award-info":[{"award-number":["22567626H"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Emerg. Top. Comput. Intell."],"published-print":{"date-parts":[[2025,10]]},"DOI":"10.1109\/tetci.2025.3548787","type":"journal-article","created":{"date-parts":[[2025,3,18]],"date-time":"2025-03-18T13:45:24Z","timestamp":1742305524000},"page":"3560-3571","source":"Crossref","is-referenced-by-count":0,"title":["TUCA-HER: An Improved HER for Robot Manipulation Skill Learning via Trajectory Utility and Conservative Advantage"],"prefix":"10.1109","volume":"9","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1228-2757","authenticated-orcid":false,"given":"Peiliang","family":"Wu","sequence":"first","affiliation":[{"name":"Key Laboratory for Computer Virtual Technology and System Integration of Hebei Province, School of Information Science and Engineering, Yanshan University, Qinhuangdao, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-3849-1649","authenticated-orcid":false,"given":"Zhaoqi","family":"Wang","sequence":"additional","affiliation":[{"name":"Key Laboratory for Computer Virtual Technology and System Integration of Hebei Province, School of Information Science and Engineering, Yanshan University, Qinhuangdao, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-7631-2459","authenticated-orcid":false,"given":"Yao","family":"Li","sequence":"additional","affiliation":[{"name":"Key Laboratory for Computer Virtual Technology and System Integration of Hebei Province, School of Information Science and Engineering, Yanshan University, Qinhuangdao, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7683-2776","authenticated-orcid":false,"given":"Wenbai","family":"Chen","sequence":"additional","affiliation":[{"name":"School of Automation, Beijing Information Science and Technology University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3437-4265","authenticated-orcid":false,"given":"Guowei","family":"Gao","sequence":"additional","affiliation":[{"name":"School of Automation, Beijing Information Science and Technology University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TII.2023.3271113"},{"article-title":"Behavior proximal policy optimization","year":"2023","author":"Zhuang","key":"ref2"},{"key":"ref3","first-page":"8280","article-title":"Off-policy reinforcement learning with delayed rewards","volume-title":"Proc. 39th Int. Conf. Mach. Learn.","author":"Han","year":"2022"},{"key":"ref4","first-page":"1","article-title":"Hindsight experience replay","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"30","author":"Andrychowicz","year":"2017"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-63820-7_94"},{"issue":"1","key":"ref6","first-page":"99","article-title":"A learning method for robot manipulation skills oriented to sparse rewards","volume":"41","author":"Wu","year":"2024","journal-title":"Control Theory Appl."},{"key":"ref7","first-page":"1","article-title":"Hindsight foresight relabeling for meta-reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Wan","year":"2022"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ISIC.1992.225046"},{"key":"ref9","first-page":"158","article-title":"Implicit behavioral cloning","volume-title":"Proc. 5th Conf. Robot Learn.","author":"Florence","year":"2022"},{"key":"ref10","first-page":"4028","article-title":"IQ-learn: Inverse soft-Q learning for imitation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Garg","year":"2021"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2022.3144867"},{"issue":"05","key":"ref12","first-page":"590","article-title":"Summarize of hierarchical reinforcement learning","volume":"12","author":"Zhou","year":"2017","journal-title":"CAAI Trans. Intell. Syst."},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2021.3049555"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2022.119192"},{"article-title":"Soft actor-critic algorithms and applications","year":"2018","author":"Haarnoja","key":"ref15"},{"article-title":"Simultaneous double Q-learning with conservative advantage learning for actor-critic methods","year":"2022","author":"Li","key":"ref16"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2023.126620"},{"article-title":"MHER: Model-based hindsight experience replay","year":"2021","author":"Yang","key":"ref18"},{"key":"ref19","article-title":"Curriculum-guided hindsight experience replay","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Fang","year":"2019"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3174258"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9197421"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10161119"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TIE.2022.3172754"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2020.2990722"},{"key":"ref25","first-page":"1","article-title":"Bilinear value networks","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Hong","year":"2022"},{"key":"ref26","first-page":"14783","article-title":"Rewriting history with inverse RL: Hindsight inference for policy improvement","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Eysenbach","year":"2020"},{"key":"ref27","first-page":"11767","article-title":"Softmax deep double deterministic policy gradients","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Pan","year":"2020"},{"key":"ref28","first-page":"1","article-title":"Pessimistic model-based offline reinforcement learning under partial coverage","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Uehara","year":"2021"}],"container-title":["IEEE Transactions on Emerging Topics in Computational Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/7433297\/11177632\/10930738.pdf?arnumber=10930738","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,30]],"date-time":"2025-09-30T13:04:43Z","timestamp":1759237483000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10930738\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10]]},"references-count":28,"journal-issue":{"issue":"5"},"URL":"https:\/\/doi.org\/10.1109\/tetci.2025.3548787","relation":{},"ISSN":["2471-285X"],"issn-type":[{"type":"electronic","value":"2471-285X"}],"subject":[],"published":{"date-parts":[[2025,10]]}}}