{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,8]],"date-time":"2026-05-08T15:44:06Z","timestamp":1778255046995,"version":"3.51.4"},"reference-count":59,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"10","license":[{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"Hong Kong Research Grant Council through the General Research Fund","award":["11216323"],"award-info":[{"award-number":["11216323"]}]},{"DOI":"10.13039\/501100004787","name":"Research Impact Fund (RIF) Project","doi-asserted-by":"publisher","award":["R5060-19"],"award-info":[{"award-number":["R5060-19"]}],"id":[{"id":"10.13039\/501100004787","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Neural Netw. Learning Syst."],"published-print":{"date-parts":[[2025,10]]},"DOI":"10.1109\/tnnls.2025.3581217","type":"journal-article","created":{"date-parts":[[2025,6,26]],"date-time":"2025-06-26T13:43:54Z","timestamp":1750945434000},"page":"18188-18202","source":"Crossref","is-referenced-by-count":1,"title":["Neighboring State-Aware Policy for Deep Reinforcement Learning"],"prefix":"10.1109","volume":"36","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4857-5439","authenticated-orcid":false,"given":"Meng","family":"Xu","sequence":"first","affiliation":[{"name":"Department of Computer Science, City University of Hong Kong, Hong Kong, SAR, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8563-148X","authenticated-orcid":false,"given":"Xinhong","family":"Chen","sequence":"additional","affiliation":[{"name":"Department of Computer Science, City University of Hong Kong, Hong Kong, SAR, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Guanyi","family":"Zhao","sequence":"additional","affiliation":[{"name":"Department of Computer Science, City University of Hong Kong, Hong Kong, SAR, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zihao","family":"Wen","sequence":"additional","affiliation":[{"name":"Department of Computer Science, City University of Hong Kong, Hong Kong, SAR, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Weiwei","family":"Fu","sequence":"additional","affiliation":[{"name":"Department of Computer Science, City University of Hong Kong, Hong Kong, SAR, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9318-1482","authenticated-orcid":false,"given":"Jianping","family":"Wang","sequence":"additional","affiliation":[{"name":"Department of Computer Science, City University of Hong Kong, Hong Kong, SAR, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","article-title":"AlignMamba: Enhancing multimodal mamba with local and global cross-modal alignment","author":"Li","year":"2024","journal-title":"arXiv:2412.00833"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/3672398"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2024.3440924"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2021.3054625"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-021-09997-9"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/3579829"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2024.120261"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/tnnls.2025.3557930"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1287\/opre.2022.0495"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1016\/S0377-2217(96)00378-5"},{"key":"ref11","volume":"11","author":"Hardman","year":"2009","journal-title":"Judgment and Decision Making: Psychological Perspectives"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1016\/j.conb.2013.12.004"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref14","article-title":"Neighboring state-based RL exploration","author":"Cheng","year":"2022","journal-title":"arXiv:2212.10712"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2022.10.085"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/s11063-024-11461-y"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2024.3368055"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TETCI.2024.3359039"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/3623405"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2024.3385570"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2023.3325633"},{"key":"ref22","first-page":"1","article-title":"Mitigating off-policy bias in actor-critic methods with one-step Q-learning: A novel correction approach","volume":"1","author":"Saglam","year":"2024","journal-title":"Trans. Mach. Learn. Res."},{"key":"ref23","first-page":"1","article-title":"Learning to infer unseen contexts in causal contextual reinforcement learning","volume-title":"Proc. Self-Supervision Reinforcement Learn. (SSL-RL) Workshop","author":"Eghbal-zadeh"},{"key":"ref24","article-title":"Contextualize me\u2014The case for context in reinforcement learning","author":"Benjamins","year":"2022","journal-title":"arXiv:2202.04500"},{"key":"ref25","first-page":"9767","article-title":"Multi-task reinforcement learning with context-based representations","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Sodhani"},{"key":"ref26","first-page":"2948","article-title":"Self-paced context evaluation for contextual reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Eimer"},{"key":"ref27","article-title":"CARL: A benchmark for contextual and adaptive reinforcement learning","author":"Benjamins","year":"2021","journal-title":"arXiv:2110.02102"},{"key":"ref28","first-page":"75491","article-title":"CEIL: Generalized contextual imitation learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Liu"},{"key":"ref29","first-page":"43057","article-title":"Supervised pretraining can learn in-context reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Lee"},{"key":"ref30","first-page":"1","article-title":"Structured state space models for in-context reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Lu"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561593"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i8.26146"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20309-1_31"},{"key":"ref34","article-title":"Contextual transformer for offline meta reinforcement learning","author":"Lin","year":"2022","journal-title":"arXiv:2211.08016"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33013739"},{"key":"ref36","first-page":"1","article-title":"A long N-step surrogate stage reward for deep reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"36","author":"Zhong"},{"key":"ref37","article-title":"RLx2: Training a sparse deep reinforcement learning model from scratch","author":"Tan","year":"2022","journal-title":"arXiv:2205.15043"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2024.108033"},{"key":"ref39","article-title":"Combining information-seeking exploration and reward maximization: Unified inference on continuous state and action spaces under partial observability","author":"Malekzadeh","year":"2022","journal-title":"arXiv:2212.07946"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-47705-8_14"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2022.11.065"},{"key":"ref42","first-page":"1","article-title":"ODE-based recurrent model-free reinforcement learning for POMDPs","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Zhao"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/IROS51168.2021.9636140"},{"key":"ref44","first-page":"8707","article-title":"Deep reinforcement learning with robust and smooth policy","volume-title":"Proc. 37th Int. Conf. Mach. Learn.","author":"Shen"},{"key":"ref45","first-page":"9797","article-title":"Safe reinforcement learning in constrained Markov decision processes","volume-title":"Proc. 37th Int. Conf. Mach. Learn.","author":"Wachi"},{"key":"ref46","article-title":"Recurrent off-policy baselines for memory-based continuous control","author":"Yang","year":"2021","journal-title":"arXiv:2110.12628"},{"key":"ref47","first-page":"25817","article-title":"Scalable multi-agent reinforcement learning through intelligent information aggregation","volume-title":"Proc. 40th Int. Conf. Mach. Learn. (ICML)","author":"Nayak"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6212"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/tnnls.2024.3446996"},{"key":"ref50","article-title":"Mamba: Linear-time sequence modeling with selective state spaces","author":"Gu","year":"2023","journal-title":"arXiv:2312.00752"},{"key":"ref51","first-page":"2159","article-title":"Deterministic policy gradient: Convergence analysis","volume-title":"Proc. Uncertainty Artif. Intell.","author":"Xiong"},{"key":"ref52","first-page":"28701","article-title":"Policy regularization with dataset constraint for offline reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Ran"},{"key":"ref53","article-title":"Adaptive regularization of representation rank as an implicit constraint of Bellman equation","author":"He","year":"2024","journal-title":"arXiv:2404.12754"},{"key":"ref54","first-page":"17662","article-title":"Online meta-critic learning for off-policy actor-critic methods","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Zhou"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2024.3401014"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2022.3170485"},{"key":"ref57","article-title":"Automatic noise filtering with dynamic sparse training in deep reinforcement learning","author":"Grooten","year":"2023","journal-title":"arXiv:2302.06548"},{"key":"ref58","first-page":"1627","article-title":"Where to pay attention in sparse training for feature selection?","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Sokar"},{"key":"ref59","first-page":"3952","article-title":"Supervised feature selection via ensemble gradient information from sparse neural networks","volume-title":"Proc. Int. Conf. Artif. Intell. Statist.","author":"Liu"}],"container-title":["IEEE Transactions on Neural Networks and Learning Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/5962385\/11195929\/11052833.pdf?arnumber=11052833","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,8]],"date-time":"2025-10-08T17:39:31Z","timestamp":1759945171000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11052833\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10]]},"references-count":59,"journal-issue":{"issue":"10"},"URL":"https:\/\/doi.org\/10.1109\/tnnls.2025.3581217","relation":{},"ISSN":["2162-237X","2162-2388"],"issn-type":[{"value":"2162-237X","type":"print"},{"value":"2162-2388","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,10]]}}}