{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T16:07:18Z","timestamp":1774022838627,"version":"3.50.1"},"reference-count":44,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100018537","name":"National Science and Technology Major Project","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100018537","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100013076","name":"National Major Science and Technology Projects of China","doi-asserted-by":"publisher","award":["2025ZD1604900"],"award-info":[{"award-number":["2025ZD1604900"]}],"id":[{"id":"10.13039\/501100013076","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Neural Networks"],"published-print":{"date-parts":[[2026,7]]},"DOI":"10.1016\/j.neunet.2026.108667","type":"journal-article","created":{"date-parts":[[2026,2,2]],"date-time":"2026-02-02T16:32:57Z","timestamp":1770049977000},"page":"108667","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Improving policy exploitation in online reinforcement learning with instant retrospect action"],"prefix":"10.1016","volume":"199","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0469-6028","authenticated-orcid":false,"given":"Gong","family":"Gao","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0009-2091-3900","authenticated-orcid":false,"given":"Weidong","family":"Zhao","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0008-7048-7148","authenticated-orcid":false,"given":"Xianhui","family":"Liu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2395-3432","authenticated-orcid":false,"given":"Ning","family":"Jia","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.neunet.2026.108667_bib0001","first-page":"29304","article-title":"Deep reinforcement learning at the edge of the statistical precipice","volume":"34","author":"Agarwal","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1016\/j.neunet.2026.108667_bib0002","first-page":"8229","article-title":"Learning markov state abstractions for deep reinforcement learning","volume":"34","author":"Allen","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1016\/j.neunet.2026.108667_bib0003","first-page":"133007","article-title":"Reinforcement learning under latent dynamics: Toward statistical and algorithmic modularity","volume":"37","author":"Amortila","year":"2025","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1016\/j.neunet.2026.108667_bib0004","doi-asserted-by":"crossref","first-page":"11230","DOI":"10.1609\/aaai.v38i10.29001","article-title":"Offline model-based optimization via policy-guided gradient search","volume":"38","author":"Chemingui","year":"2024","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"10.1016\/j.neunet.2026.108667_bib0005","series-title":"International conference on machine learning","first-page":"3852","article-title":"Adversarially trained actor critic for offline reinforcement learning","author":"Cheng","year":"2022"},{"key":"10.1016\/j.neunet.2026.108667_bib0006","series-title":"Encyclopedia of distances","author":"Deza","year":"2009"},{"key":"10.1016\/j.neunet.2026.108667_bib0007","series-title":"International conference on machine learning","first-page":"1407","article-title":"IMPALA: Scalable distributed deep-RL with importance weighted actor-learner architectures","author":"Espeholt","year":"2018"},{"key":"10.1016\/j.neunet.2026.108667_bib0008","first-page":"99433","article-title":"How to solve contextual goal-oriented problems with offline datasets?","volume":"37","author":"Fan","year":"2025","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1016\/j.neunet.2026.108667_bib0009","first-page":"61573","article-title":"For sale: State-action representation learning for deep reinforcement learning","volume":"36","author":"Fujimoto","year":"2024","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1016\/j.neunet.2026.108667_bib0010","series-title":"Advances in neural information processing systems","first-page":"20132","article-title":"A minimalist approach to offline reinforcement learning","volume":"vol. 34","author":"Fujimoto","year":"2021"},{"key":"10.1016\/j.neunet.2026.108667_bib0011","series-title":"International conference on machine learning","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","author":"Fujimoto","year":"2018"},{"key":"10.1016\/j.neunet.2026.108667_bib0012","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"20215","article-title":"Frustratingly easy regularization on representation can boost deep reinforcement learning","author":"He","year":"2023"},{"key":"10.1016\/j.neunet.2026.108667_bib0013","doi-asserted-by":"crossref","first-page":"1820","DOI":"10.52202\/068431-0133","article-title":"Reinforcement learning with automated auxiliary loss search","volume":"35","author":"He","year":"2022","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1016\/j.neunet.2026.108667_bib0014","first-page":"12519","article-title":"When to trust your model: Model-based policy optimization","volume":"32","author":"Janner","year":"2019","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1016\/j.neunet.2026.108667_bib0015","first-page":"59477","article-title":"Policy gradient for rectangular robust markov decision processes","volume":"36","author":"Kumar","year":"2023","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1016\/j.neunet.2026.108667_bib0016","unstructured":"Lillicrap, T. P. (2015). Continuous control with deep reinforcement learning. arXiv preprint arXiv: 1509.02971."},{"key":"10.1016\/j.neunet.2026.108667_bib0017","doi-asserted-by":"crossref","DOI":"10.1016\/j.neunet.2024.106588","article-title":"Adaptive pessimism via target Q-value for offline reinforcement learning","volume":"180","author":"Liu","year":"2024","journal-title":"Neural Networks"},{"key":"10.1016\/j.neunet.2026.108667_bib0018","doi-asserted-by":"crossref","DOI":"10.1016\/j.neunet.2024.106367","article-title":"Segmenting medical images with limited data","volume":"177","author":"Liu","year":"2024","journal-title":"Neural Networks"},{"key":"10.1016\/j.neunet.2026.108667_bib0019","first-page":"108167","article-title":"Optimistic critic reconstruction and constrained fine-tuning for general offline-to-online RL","volume":"37","author":"Luo","year":"2025","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1016\/j.neunet.2026.108667_bib0020","unstructured":"Luo, Z., Zhu, M., Liu, F., Li, J., Pan, Y., Zhou, J., & Zhu, T. (2024). DTR-bench: An in silico environment and benchmark platform for reinforcement learning based dynamic treatment regime. arXiv preprint arXiv: 2405.18610."},{"key":"10.1016\/j.neunet.2026.108667_bib0021","first-page":"56215","article-title":"Iteratively refined behavior regularization for offline reinforcement learning","volume":"37","author":"Ma","year":"2024","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1016\/j.neunet.2026.108667_bib0022","series-title":"Advances in neural information processing systems","first-page":"93568","article-title":"Offline reinforcement learning with OOD state correction and OOD action suppression","volume":"vol. 37","author":"Mao","year":"2025"},{"key":"10.1016\/j.neunet.2026.108667_bib0023","unstructured":"Mnih, V. (2013). Playing atari with deep reinforcement learning. arXiv preprint arXiv: 1312.5602."},{"key":"10.1016\/j.neunet.2026.108667_bib0024","unstructured":"Mnih, V. (2016). Asynchronous methods for deep reinforcement learning. arXiv preprint arXiv: 1602.01783."},{"key":"10.1016\/j.neunet.2026.108667_bib0025","unstructured":"Ni, T., Eysenbach, B., Seyedsalehi, E., Ma, M., Gehring, C., Mahajan, A., & Bacon, P.-L. (2024). Bridging state and history representations: Understanding self-predictive RL. arXiv preprint arXiv: 2401.08898."},{"key":"10.1016\/j.neunet.2026.108667_bib0026","first-page":"50429","article-title":"When do transformers shine in RL? Decoupling memory from credit assignment","volume":"36","author":"Ni","year":"2023","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1016\/j.neunet.2026.108667_bib0027","doi-asserted-by":"crossref","first-page":"489","DOI":"10.1016\/j.neunet.2023.04.043","article-title":"An adaptive reinforcement learning-based multimodal data fusion framework for human\u2013robot confrontation gaming","volume":"164","author":"Qi","year":"2023","journal-title":"Neural Networks"},{"key":"10.1016\/j.neunet.2026.108667_bib0028","series-title":"International conference on machine learning","article-title":"Augmenting decision with hypothesis in reinforcement learning","author":"Quang","year":"2024"},{"issue":"89","key":"10.1016\/j.neunet.2026.108667_bib0029","doi-asserted-by":"crossref","DOI":"10.1126\/scirobotics.adi9579","article-title":"Real-world humanoid locomotion with reinforcement learning","volume":"9","author":"Radosavovic","year":"2024","journal-title":"Science Robotics"},{"key":"10.1016\/j.neunet.2026.108667_bib0030","series-title":"International conference on machine learning","first-page":"28701","article-title":"Policy regularization with dataset constraint for offline reinforcement learning","author":"Ran","year":"2023"},{"key":"10.1016\/j.neunet.2026.108667_bib0031","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., & Klimov, O. (2017). Proximal policy optimization algorithms. arXiv preprint arXiv: 1707.06347."},{"key":"10.1016\/j.neunet.2026.108667_bib0032","first-page":"3115","article-title":"Q-learning with nearest neighbors","volume":"31","author":"Shah","year":"2018","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1016\/j.neunet.2026.108667_bib0033","unstructured":"Shaheen, A., Badr, A., Abohendy, A., Alsaadawy, H., & Alsayad, N. (2025). Reinforcement learning in strategy-based and atari games: A review of google deepminds innovations. arXiv preprint arXiv: 2502.10303."},{"key":"10.1016\/j.neunet.2026.108667_bib0034","doi-asserted-by":"crossref","first-page":"9558","DOI":"10.1609\/aaai.v35i11.17151","article-title":"Theoretically principled deep RL acceleration via nearest neighbor function approximation","volume":"35","author":"Shen","year":"2021","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"10.1016\/j.neunet.2026.108667_bib0035","first-page":"11592","article-title":"Revisiting the minimalist approach to offline reinforcement learning","volume":"36","author":"Tarasov","year":"2024","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1016\/j.neunet.2026.108667_bib0036","series-title":"Proceedings of the AAAI conference on artificial intelligence","article-title":"Deep reinforcement learning with double q-learning","volume":"vol. 30","author":"Van Hasselt","year":"2016"},{"key":"10.1016\/j.neunet.2026.108667_bib0037","doi-asserted-by":"crossref","first-page":"22410","DOI":"10.1609\/aaai.v38i20.30248","article-title":"Deep reinforcement learning for early diagnosis of lung cancer","volume":"38","author":"Wang","year":"2024","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"10.1016\/j.neunet.2026.108667_bib0038","series-title":"The twelfth international conference on learning representations","article-title":"Negatively correlated ensemble reinforcement learning for online diverse game level generation","author":"Wang","year":"2024"},{"key":"10.1016\/j.neunet.2026.108667_bib0039","doi-asserted-by":"crossref","first-page":"10674","DOI":"10.1609\/aaai.v35i12.17276","article-title":"Improving sample efficiency in model-free reinforcement learning from images","volume":"35","author":"Yarats","year":"2021","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"10.1016\/j.neunet.2026.108667_bib0040","doi-asserted-by":"crossref","first-page":"16539","DOI":"10.1609\/aaai.v38i15.29592","article-title":"Cheaper and faster: Distributed deep reinforcement learning with serverless computing","volume":"38","author":"Yu","year":"2024","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"10.1016\/j.neunet.2026.108667_bib0041","series-title":"International conference on machine learning","first-page":"40452","article-title":"Actor-critic alignment for offline-to-online reinforcement learning","author":"Yu","year":"2023"},{"key":"10.1016\/j.neunet.2026.108667_bib0042","doi-asserted-by":"crossref","first-page":"11372","DOI":"10.1609\/aaai.v37i9.26345","article-title":"Adaptive policy learning for offline-to-online reinforcement learning","volume":"37","author":"Zheng","year":"2023","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"10.1016\/j.neunet.2026.108667_bib0043","series-title":"2024\u202fIEEE international conference on robotics and automation (ICRA)","first-page":"9176","article-title":"Effective representation learning is more effective in reinforcement learning than you think","author":"Zheng","year":"2024"},{"key":"10.1016\/j.neunet.2026.108667_bib0044","unstructured":"Zhuang, Z., Shi, D., Suo, R., He, X., Zhang, H., Wang, T., Lyu, S., & Wang, D. (2025). TDMPBC: Self-imitative reinforcement learning for humanoid robot control. arXiv preprint arXiv: 2502.17322."}],"container-title":["Neural Networks"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0893608026001292?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0893608026001292?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T15:17:29Z","timestamp":1774019849000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0893608026001292"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,7]]},"references-count":44,"alternative-id":["S0893608026001292"],"URL":"https:\/\/doi.org\/10.1016\/j.neunet.2026.108667","relation":{},"ISSN":["0893-6080"],"issn-type":[{"value":"0893-6080","type":"print"}],"subject":[],"published":{"date-parts":[[2026,7]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Improving policy exploitation in online reinforcement learning with instant retrospect action","name":"articletitle","label":"Article Title"},{"value":"Neural Networks","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.neunet.2026.108667","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"108667"}}