{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T00:37:47Z","timestamp":1775090267600,"version":"3.50.1"},"reference-count":37,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"5","license":[{"start":{"date-parts":[[2024,5,1]],"date-time":"2024-05-01T00:00:00Z","timestamp":1714521600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,5,1]],"date-time":"2024-05-01T00:00:00Z","timestamp":1714521600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,5,1]],"date-time":"2024-05-01T00:00:00Z","timestamp":1714521600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62176225"],"award-info":[{"award-number":["62176225"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62276168"],"award-info":[{"award-number":["62276168"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61836005"],"award-info":[{"award-number":["61836005"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Natural Science Foundation of Fujian Province, China","award":["2022J05176"],"award-info":[{"award-number":["2022J05176"]}]},{"name":"Guangdong Province, China","award":["2023A1515010869"],"award-info":[{"award-number":["2023A1515010869"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Artif. Intell."],"published-print":{"date-parts":[[2024,5]]},"DOI":"10.1109\/tai.2023.3328848","type":"journal-article","created":{"date-parts":[[2023,11,3]],"date-time":"2023-11-03T14:19:46Z","timestamp":1699021186000},"page":"2139-2150","source":"Crossref","is-referenced-by-count":3,"title":["Improved Demonstration-Knowledge Utilization in Reinforcement Learning"],"prefix":"10.1109","volume":"5","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-8924-592X","authenticated-orcid":false,"given":"Yanyu","family":"Liu","sequence":"first","affiliation":[{"name":"Xiamen Key Laboratory of Big Data Intelligent Analysis and Decision, Department of Automation, Xiamen University, Xiamen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5246-403X","authenticated-orcid":false,"given":"Yifeng","family":"Zeng","sequence":"additional","affiliation":[{"name":"Department of Computer and Information Sciences, Northumbria University, Newcastle, U.K."}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1515-6449","authenticated-orcid":false,"given":"Biyang","family":"Ma","sequence":"additional","affiliation":[{"name":"Department of Computer Science, Minnan Normal University, Zhangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5715-2855","authenticated-orcid":false,"given":"Yinghui","family":"Pan","sequence":"additional","affiliation":[{"name":"National Engineering Laboratory for Big Data System Computing Technology, Shenzhen University, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8624-1301","authenticated-orcid":false,"given":"Huifan","family":"Gao","sequence":"additional","affiliation":[{"name":"Xiamen Key Laboratory of Big Data Intelligent Analysis and Decision, Department of Automation, Xiamen University, Xiamen, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-9675-5987","authenticated-orcid":false,"given":"Yuting","family":"Zhang","sequence":"additional","affiliation":[{"name":"Xiamen Key Laboratory of Big Data Intelligent Analysis and Decision, Department of Automation, Xiamen University, Xiamen, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-021-10061-9"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.26599\/TST.2021.9010012"},{"key":"ref3","first-page":"10905","article-title":"SCC: An efficient deep reinforcement learning agent mastering the game of StarCraft II","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Wang","year":"2021"},{"key":"ref4","first-page":"1040","article-title":"Learning from demonstration","volume-title":"Proc. 9th Int. Conf. Neural Inf. Process. Syst.","author":"Schaal","year":"1996"},{"key":"ref5","first-page":"729","article-title":"Guided reinforcement learning with learned skills","volume-title":"Proc. Conf. Robot Learn.","author":"Pertsch","year":"2022"},{"key":"ref6","first-page":"7588221","article-title":"Efficient reinforcement learning from demonstration via Bayesian network-based knowledge extraction","volume-title":"Comput. Intell. Neuroscience","volume":"2021","author":"Zhang","year":"2021"},{"key":"ref7","article-title":"Reinforcement learning with sparse rewards using guidance from offline demonstration","author":"Rengarajan","year":"2022"},{"key":"ref8","first-page":"761","article-title":"Bayesian Q-learning","volume-title":"Proc. AAAI\/IAAI","volume":"1998","author":"Dearden","year":"1998"},{"key":"ref9","first-page":"449","article-title":"A distributional perspective on reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Bellemare","year":"2017"},{"key":"ref10","article-title":"Distributed distributional deterministic policy gradients","author":"Barth-Maron","year":"2018"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3082568"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/3409501.3409517"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-020-2939-8"},{"key":"ref14","article-title":"Playing atari with deep reinforcement learning","author":"Mnih","year":"2013"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2018.09.036"},{"key":"ref16","first-page":"4028","article-title":"LQ-learn: Inverse soft-Q learning for imitation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Garg","year":"2021"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1016\/j.artint.2014.07.003"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01234-2_36"},{"key":"ref19","first-page":"12686","article-title":"Pretraining representations for data-efficient reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Schwarzer","year":"2021"},{"key":"ref20","first-page":"18459","article-title":"Behavior from the void: Unsupervised active pre-training","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Liu","year":"2021"},{"key":"ref21","article-title":"Diversity is all you need: Learning skills without a reward function","author":"Eysenbach","year":"2018"},{"key":"ref22","first-page":"565","article-title":"Reward shaping in episodic reinforcement learning","volume-title":"Proc. 16th Conf. Auton. Agents MultiAgent Syst.","author":"Grze\u015a","year":"2017"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2017.7965896"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/IROS51168.2021.9636020"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11757"},{"key":"ref26","article-title":"Leveraging demonstrations for deep reinforcement learning on robotics problems with sparse rewards","author":"Vecerik","year":"2017"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2020.02.008"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2017\/422"},{"key":"ref29","article-title":"Bayesian Q-learning with imperfect expert demonstrations","author":"Che","year":"2022"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1002\/9781118771075"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1214\/aoms\/1177729694"},{"issue":"4","key":"ref32","first-page":"363","article-title":"The mathematical method of production planning and organization","volume":"6","author":"Kantorovich","year":"1939","journal-title":"Manage. Sci."},{"key":"ref33","article-title":"OpenAI gym","author":"Brockman","year":"2016"},{"key":"ref34","first-page":"627","article-title":"A reduction of imitation learning and structured prediction to no-regret online learning","volume-title":"Proc. 14th Int. Conf. Artif. Intell. Statist.","author":"Ross","year":"2011"},{"key":"ref35","article-title":"Never Give Up: Learning directed exploration strategies","author":"Badia","year":"2020"},{"key":"ref36","first-page":"982","article-title":"Exact solutions to time-dependent MDPS","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"13","author":"Boyan","year":"2000"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2018.2801479"}],"container-title":["IEEE Transactions on Artificial Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9078688\/10532210\/10308404.pdf?arnumber=10308404","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T01:08:53Z","timestamp":1755911333000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10308404\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5]]},"references-count":37,"journal-issue":{"issue":"5"},"URL":"https:\/\/doi.org\/10.1109\/tai.2023.3328848","relation":{},"ISSN":["2691-4581"],"issn-type":[{"value":"2691-4581","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,5]]}}}