{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T04:03:03Z","timestamp":1774497783807,"version":"3.50.1"},"reference-count":44,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Basic Research Program of China","doi-asserted-by":"publisher","award":["2018YFA0701603"],"award-info":[{"award-number":["2018YFA0701603"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003995","name":"Natural Science Foundation of Anhui Province","doi-asserted-by":"publisher","award":["2008085MF213"],"award-info":[{"award-number":["2008085MF213"]}],"id":[{"id":"10.13039\/501100003995","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2020]]},"DOI":"10.1109\/access.2020.3009329","type":"journal-article","created":{"date-parts":[[2020,7,15]],"date-time":"2020-07-15T20:57:31Z","timestamp":1594846651000},"page":"129274-129284","source":"Crossref","is-referenced-by-count":23,"title":["Sample Efficient Reinforcement Learning Method via High Efficient Episodic Memory"],"prefix":"10.1109","volume":"8","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9309-9067","authenticated-orcid":false,"given":"Dujia","family":"Yang","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7296-3461","authenticated-orcid":false,"given":"Xiaowei","family":"Qin","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9041-3826","authenticated-orcid":false,"given":"Xiaodong","family":"Xu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3500-754X","authenticated-orcid":false,"given":"Chensheng","family":"Li","sequence":"additional","affiliation":[]},{"given":"Guo","family":"Wei","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1016\/j.neuron.2006.03.036"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1016\/j.tins.2010.01.006"},{"key":"ref33","article-title":"Episodic curiosity through reachability","author":"savinov","year":"2018","journal-title":"arXiv 1810 02274"},{"key":"ref32","article-title":"Random projection in neural episodic control","author":"nishio","year":"2019","journal-title":"arXiv 1904 01790"},{"key":"ref31","article-title":"Integrating episodic memory into a reinforcement learning agent using reservoir sampling","author":"young","year":"2018","journal-title":"arXiv 1806 00540"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"key":"ref37","first-page":"2753","article-title":"# exploration: A study of count-based exploration for deep reinforcement learning","author":"tang","year":"2017","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref36","first-page":"1","article-title":"Extensions of Lipschitz mappings into a Hilbert space","volume":"26","author":"johnson","year":"1984","journal-title":"Contemp Math"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/COMPSAC.2018.00075"},{"key":"ref34","article-title":"Asynchronous episodic deep deterministic policy gradient: Toward continuous control in computationally complex environments","author":"zhang","year":"2019","journal-title":"IEEE Trans Cybern"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143955"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.3758\/PBR.15.1.96"},{"key":"ref11","doi-asserted-by":"crossref","first-page":"354","DOI":"10.1038\/nature24270","article-title":"Mastering the game of go without human knowledge","volume":"550","author":"silver","year":"2017","journal-title":"Nature"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1016\/j.tics.2019.02.006"},{"key":"ref13","first-page":"1","article-title":"Deep reinforcement learning with double Q-learning","author":"van hasselt","year":"2016","journal-title":"Proc 30th AAAI Conf Artif Intell"},{"key":"ref14","article-title":"Dueling network architectures for deep reinforcement learning","author":"wang","year":"2015","journal-title":"arXiv 1511 06581"},{"key":"ref15","article-title":"Prioritized experience replay","author":"schaul","year":"2015","journal-title":"arXiv 1511 05952"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2018.2889999"},{"key":"ref17","first-page":"1","article-title":"Rainbow: Combining improvements in deep reinforcement learning","author":"hessel","year":"2017","journal-title":"Proc 32nd AAAI Conf Artif Intell"},{"key":"ref18","first-page":"1054","article-title":"Safe and efficient off-policy reinforcement learning","author":"munos","year":"2016","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1037\/a0023575"},{"key":"ref28","first-page":"2110","article-title":"Sample-efficient deep reinforcement learning via episodic backward update","author":"lee","year":"2019","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref4","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-319-63519-4","volume":"2","author":"yannakakis","year":"2018","journal-title":"Artificial Intelligence for Games"},{"key":"ref27","first-page":"10567","article-title":"Fast deep reinforcement learning using online adjustments from the past","author":"hansen","year":"2018","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.2352\/ISSN.2470-1173.2017.19.AVM-023"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1613\/jair.3912"},{"key":"ref29","author":"sutton","year":"2018","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2918703"},{"key":"ref8","doi-asserted-by":"crossref","first-page":"484","DOI":"10.1038\/nature16961","article-title":"Mastering the game of go with deep neural networks and tree search","volume":"529","author":"silver","year":"2016","journal-title":"Nature"},{"key":"ref7","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2017.2720851"},{"key":"ref9","first-page":"2","article-title":"Alphastar: Mastering the real-time strategy game starcraft II","author":"vinyals","year":"2019","journal-title":"DeepMind blog"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1177\/0278364913495721"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1093\/acprof:oso\/9780195100273.001.0001"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1037\/0033-295X.102.3.419"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4684-6775-8_5"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1016\/j.tins.2011.08.001"},{"key":"ref24","article-title":"Model-free episodic control","author":"blundell","year":"2016","journal-title":"arXiv 1606 04460"},{"key":"ref41","first-page":"3014","article-title":"Weighted importance sampling for off-policy learning with linear function approximation","author":"mahmood","year":"2014","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref23","first-page":"889","article-title":"Hippocampal contributions to control: The third way","author":"lengyel","year":"2008","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref44","article-title":"OpenAI gym","author":"brockman","year":"2016","journal-title":"arXiv 1606 01540 [cs]"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/337"},{"key":"ref43","article-title":"Learning to play in a day: Faster deep reinforcement learning by optimality tightening","author":"he","year":"2016","journal-title":"arXiv 1611 01606"},{"key":"ref25","first-page":"2827","article-title":"Neural episodic control","volume":"70","author":"pritzel","year":"2017","journal-title":"Proc 34th Int Conf Mach Learn"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6287639\/8948470\/09141230.pdf?arnumber=9141230","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,2]],"date-time":"2022-11-02T22:06:52Z","timestamp":1667426812000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9141230\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"references-count":44,"URL":"https:\/\/doi.org\/10.1109\/access.2020.3009329","relation":{},"ISSN":["2169-3536"],"issn-type":[{"value":"2169-3536","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020]]}}}