{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T17:53:39Z","timestamp":1775066019698,"version":"3.50.1"},"reference-count":39,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"4","license":[{"start":{"date-parts":[[2025,4,1]],"date-time":"2025-04-01T00:00:00Z","timestamp":1743465600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,4,1]],"date-time":"2025-04-01T00:00:00Z","timestamp":1743465600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,4,1]],"date-time":"2025-04-01T00:00:00Z","timestamp":1743465600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2021ZD0113604"],"award-info":[{"award-number":["2021ZD0113604"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"name":"China Agriculture Research System of MOF and MARA","award":["CARS-23-D07"],"award-info":[{"award-number":["CARS-23-D07"]}]},{"name":"Central Guiding Local Science and Technology Development Fund Projects","award":["2023ZY1-CGZY-01"],"award-info":[{"award-number":["2023ZY1-CGZY-01"]}]},{"name":"Major Research Project of National Natural Science Foundation of China","award":["92267110"],"award-info":[{"award-number":["92267110"]}]},{"name":"Major Research Project of National Natural Science Foundation of China","award":["62076202"],"award-info":[{"award-number":["62076202"]}]},{"name":"Open Research Projects of Zhejiang Laboratory","award":["2022NB0AB07"],"award-info":[{"award-number":["2022NB0AB07"]}]},{"name":"Shaanxi Province Key Research and Development Program of China","award":["2022GY-090"],"award-info":[{"award-number":["2022GY-090"]}]},{"name":"CAAI-Huawei MindSpore Open Fund","award":["CAAIXSJLJJ-2021-041A"],"award-info":[{"award-number":["CAAIXSJLJJ-2021-041A"]}]},{"name":"Doctor\u2019s Scientific Research and Innovation Foundation of Northwestern Polytechnical University","award":["CX2022016"],"award-info":[{"award-number":["CX2022016"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Neural Netw. Learning Syst."],"published-print":{"date-parts":[[2025,4]]},"DOI":"10.1109\/tnnls.2024.3397704","type":"journal-article","created":{"date-parts":[[2024,5,17]],"date-time":"2024-05-17T13:41:15Z","timestamp":1715953275000},"page":"7449-7461","source":"Crossref","is-referenced-by-count":5,"title":["Eliminating Primacy Bias in Online Reinforcement Learning by Self-Distillation"],"prefix":"10.1109","volume":"36","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0905-0816","authenticated-orcid":false,"given":"Jingchen","family":"Li","sequence":"first","affiliation":[{"name":"Information Technology Research Center, Beijing Academy of Agriculture and Forestry Science, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2180-8941","authenticated-orcid":false,"given":"Haobin","family":"Shi","sequence":"additional","affiliation":[{"name":"School of Computer Science, Northwestern Polytechnical University, Xian, Shaanxi, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Huarui","family":"Wu","sequence":"additional","affiliation":[{"name":"Information Technology Research Center, Beijing Academy of Agriculture and Forestry Science, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chunjiang","family":"Zhao","sequence":"additional","affiliation":[{"name":"Information Technology Research Center, Beijing Academy of Agriculture and Forestry Science, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9234-4836","authenticated-orcid":false,"given":"Kao-Shing","family":"Hwang","sequence":"additional","affiliation":[{"name":"Department of Electrical Engineering, National Sun Yat-sen University, Kaohsiung, Taiwan"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","first-page":"19773","article-title":"Accelerating reinforcement learning through GPU Atari emulation","volume-title":"Proc. 34th Int. Conf. Neural Inf. Process. Syst.","author":"Dalton"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1038\/nature24270"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i10.21421"},{"key":"ref4","first-page":"907","article-title":"S4RL: Surprisingly simple self-supervision for offline reinforcement learning in robotics","volume-title":"Proc. 5th Conf. Robot Learn.","volume":"164","author":"Sinha"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TFUZZ.2022.3170646"},{"key":"ref6","first-page":"25611","article-title":"How to leverage unlabeled data in offline reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Yu"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-022-00573-6"},{"key":"ref8","first-page":"11501","article-title":"Conservative data sharing for multi-task offline reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Yu"},{"key":"ref9","first-page":"16828","article-title":"The primacy bias in deep reinforcement learning","volume-title":"Proc. 39th Int. Conf. Mach. Learn.","volume":"162","author":"Nikishin"},{"key":"ref10","first-page":"21810","article-title":"MOReL: Model-based offline reinforcement learning","volume-title":"Proc. Int. Conf. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Kidambi"},{"key":"ref11","first-page":"626","article-title":"Trust region-guided proximal policy optimization","volume-title":"Proc. 33rd Int. Conf. Neural Inf. Process. Syst.","author":"Wang"},{"key":"ref12","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Haarnoja"},{"key":"ref13","article-title":"A study on overfitting in deep reinforcement learning","author":"Zhang","year":"2018","journal-title":"arXiv:1804.06893"},{"key":"ref14","first-page":"512","article-title":"What is being transferred in transfer learning?","volume-title":"Proc. 34th Int. Conf. Neural Inf. Process. Syst.","author":"Neyshabur"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9197465"},{"key":"ref16","article-title":"Investigating generalisation in continuous deep reinforcement learning","author":"Zhao","year":"2019","journal-title":"arXiv:1902.07015"},{"key":"ref17","article-title":"Observational overfitting in reinforcement learning","author":"Song","year":"2019","journal-title":"arXiv:1912.02975"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2023.01.016"},{"key":"ref19","first-page":"11920","article-title":"Reinforcement learning with prototypical representations","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Yarats"},{"key":"ref20","first-page":"9870","article-title":"Decoupling representation learning from reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Stooke"},{"key":"ref21","first-page":"12686","article-title":"Pretraining representations for data-efficient reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Schwarzer"},{"key":"ref22","article-title":"Prioritized experience replay","author":"Schaul","year":"2015","journal-title":"arXiv:1511.05952"},{"key":"ref23","first-page":"3878","article-title":"Self-imitation learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Oh"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/tase.2023.3323307"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11694"},{"key":"ref26","first-page":"4994","article-title":"Online reinforcement learning in stochastic games","volume-title":"Proc. 31st Int. Conf. Neural Inf. Process. Syst.","author":"Wei"},{"key":"ref27","first-page":"7193","article-title":"Online robust reinforcement learning with model uncertainty","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Wang"},{"key":"ref28","first-page":"15254","article-title":"Meta-gradient reinforcement learning with an objective discovered online","volume-title":"Proc. 34th Int. Conf. Neural Inf. Process. Syst.","author":"Xu"},{"key":"ref29","first-page":"5331","article-title":"Efficient off-policy meta-reinforcement learning via probabilistic context variables","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Rakelly"},{"key":"ref30","first-page":"3061","article-title":"Revisiting fundamentals of experience replay","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Fedus"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1037\/a0029550"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2021.107103"},{"key":"ref33","article-title":"Towards understanding ensemble, knowledge distillation and self-distillation in deep learning","author":"Allen-Zhu","year":"2020","journal-title":"arXiv:2012.09816"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2004.1389841"},{"key":"ref35","article-title":"Image augmentation is all you need: Regularizing deep reinforcement learning from pixels","author":"Kostrikov","year":"2020","journal-title":"arXiv:2004.13649"},{"key":"ref36","first-page":"29304","article-title":"Deep reinforcement learning at the edge of the statistical precipice","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Agarwal"},{"key":"ref37","first-page":"1","article-title":"Data-efficient reinforcement learning with self-predictive representations","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Schwarzer"},{"key":"ref38","first-page":"14345","article-title":"When to use parametric models in reinforcement learning?","volume-title":"Proc. 33rd Int. Conf. Neural Inf. Process. Syst.","author":"van Hasselt"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.2307\/j.ctt4cgngj.10"}],"container-title":["IEEE Transactions on Neural Networks and Learning Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/5962385\/10949581\/10533687.pdf?arnumber=10533687","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,5]],"date-time":"2025-12-05T18:39:12Z","timestamp":1764959952000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10533687\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4]]},"references-count":39,"journal-issue":{"issue":"4"},"URL":"https:\/\/doi.org\/10.1109\/tnnls.2024.3397704","relation":{},"ISSN":["2162-237X","2162-2388"],"issn-type":[{"value":"2162-237X","type":"print"},{"value":"2162-2388","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,4]]}}}