{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,24]],"date-time":"2026-01-24T13:48:41Z","timestamp":1769262521132,"version":"3.49.0"},"reference-count":26,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,7,18]],"date-time":"2022-07-18T00:00:00Z","timestamp":1658102400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,7,18]],"date-time":"2022-07-18T00:00:00Z","timestamp":1658102400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["12101133"],"award-info":[{"award-number":["12101133"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003399","name":"Shanghai Municipal Science and Technology Major","doi-asserted-by":"publisher","award":["2021 SHZDZX0 103"],"award-info":[{"award-number":["2021 SHZDZX0 103"]}],"id":[{"id":"10.13039\/501100003399","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,7,18]]},"DOI":"10.1109\/ijcnn55064.2022.9891902","type":"proceedings-article","created":{"date-parts":[[2022,9,30]],"date-time":"2022-09-30T19:56:04Z","timestamp":1664567764000},"page":"1-7","source":"Crossref","is-referenced-by-count":2,"title":["Deep Reinforcement Learning with Parametric Episodic Memory"],"prefix":"10.1109","author":[{"given":"Kangkang","family":"Chen","sequence":"first","affiliation":[{"name":"Institute of AI and Robotics, Fudan University, Academy for Engineering and Technology,Shanghai,China,200433"}]},{"given":"Zhongxue","family":"Gan","sequence":"additional","affiliation":[{"name":"Institute of AI and Robotics, Fudan University, Academy for Engineering and Technology,Shanghai,China,200433"}]},{"given":"Siyang","family":"Leng","sequence":"additional","affiliation":[{"name":"Institute of AI and Robotics, Fudan University, Academy for Engineering and Technology,Shanghai,China,200433"}]},{"given":"Chun","family":"Guan","sequence":"additional","affiliation":[{"name":"Institute of AI and Robotics, Fudan University, Academy for Engineering and Technology,Shanghai,China,200433"}]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2021\/365"},{"key":"ref11","first-page":"1889","article-title":"Trust region policy optimization","author":"schulman","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref12","article-title":"Prox-imal policy optimization algorithms","author":"schulman","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref13","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2015","journal-title":"ArXiv Preprint"},{"key":"ref14","first-page":"1587","article-title":"Addressing function approxi-mation error in actor-critic methods","author":"fujimoto","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref15","first-page":"72","article-title":"Efficient selectivity and backup operators in monte-carlo tree search","author":"coulom","year":"0","journal-title":"International conference on computers and games"},{"key":"ref16","doi-asserted-by":"crossref","first-page":"484","DOI":"10.1038\/nature16961","article-title":"Mastering the game of go with deep neural networks and tree search","volume":"529","author":"silver","year":"2016","journal-title":"Nature"},{"key":"ref17","first-page":"690","article-title":"Monte carlo tree search in continuous action spaces with execution uncertainty","author":"yee","year":"0","journal-title":"Proceedings of the 25th International Joint Conference on Artificial Intelligence"},{"key":"ref18","first-page":"2937","article-title":"Deep reinforcement learning in continuous action spaces: a case study in the game of simulated curling","author":"lee","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref19","first-page":"387","article-title":"Deterministic policy gradient algorithms","author":"silver","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref4","article-title":"Model-free episodic control","author":"blundell","year":"2016","journal-title":"ArXiv Preprint"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1016\/j.nlm.2004.06.005"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/337"},{"key":"ref5","first-page":"2827","article-title":"Neural episodic control","author":"pritzel","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref8","first-page":"4380","article-title":"Generalizable episodic memory for deep reinforcement learning","author":"hu","year":"0","journal-title":"Proceedings of the 38th International Conference on Machine Learning"},{"key":"ref7","article-title":"Episodic reinforcement learning with associative memory","author":"zhu","year":"0","journal-title":"International Conference on Learning Representations"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.3758\/BF03337828"},{"key":"ref9","first-page":"3878","article-title":"Self-imitation learning","author":"oh","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/j.tins.2011.08.001"},{"key":"ref20","first-page":"1479","article-title":"Unifying count-based exploration and intrinsic motivation","author":"bellemare","year":"0","journal-title":"Proceedings of the 30th International Conference on Neural Information Processing Systems"},{"key":"ref22","first-page":"1","article-title":"Exploration by random network distillation","author":"burda","year":"0","journal-title":"International Conference on Learning Representations"},{"key":"ref21","first-page":"1","article-title":"# exploration: A study of count-based exploration for deep reinforcement learning","volume":"30","author":"tang","year":"0","journal-title":"Neural Information Processing Systems (NIPS) Conference"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1126\/science.aar6404"},{"key":"ref23","doi-asserted-by":"crossref","first-page":"354","DOI":"10.1038\/nature24270","article-title":"Mastering the game of go without human knowledge","volume":"550","author":"silver","year":"2017","journal-title":"Nature"},{"key":"ref26","author":"brockman","year":"2016","journal-title":"OpenAI Gym"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"}],"event":{"name":"2022 International Joint Conference on Neural Networks (IJCNN)","location":"Padua, Italy","start":{"date-parts":[[2022,7,18]]},"end":{"date-parts":[[2022,7,23]]}},"container-title":["2022 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9891857\/9889787\/09891902.pdf?arnumber=9891902","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,3]],"date-time":"2022-11-03T22:56:49Z","timestamp":1667516209000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9891902\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,7,18]]},"references-count":26,"URL":"https:\/\/doi.org\/10.1109\/ijcnn55064.2022.9891902","relation":{},"subject":[],"published":{"date-parts":[[2022,7,18]]}}}