{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,28]],"date-time":"2026-02-28T18:19:40Z","timestamp":1772302780129,"version":"3.50.1"},"reference-count":50,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"9","license":[{"start":{"date-parts":[[2024,9,1]],"date-time":"2024-09-01T00:00:00Z","timestamp":1725148800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,9,1]],"date-time":"2024-09-01T00:00:00Z","timestamp":1725148800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,9,1]],"date-time":"2024-09-01T00:00:00Z","timestamp":1725148800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100000923","name":"Australian Research Council through the Laureate Project","doi-asserted-by":"publisher","award":["FL190100149"],"award-info":[{"award-number":["FL190100149"]}],"id":[{"id":"10.13039\/501100000923","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100000923","name":"Discovery Early Career Researcher Award","doi-asserted-by":"publisher","award":["DE200100245"],"award-info":[{"award-number":["DE200100245"]}],"id":[{"id":"10.13039\/501100000923","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Cybern."],"published-print":{"date-parts":[[2024,9]]},"DOI":"10.1109\/tcyb.2024.3356981","type":"journal-article","created":{"date-parts":[[2024,2,13]],"date-time":"2024-02-13T23:54:04Z","timestamp":1707868444000},"page":"5191-5204","source":"Crossref","is-referenced-by-count":9,"title":["Deep Reinforcement Learning in Nonstationary Environments With Unknown Change Points"],"prefix":"10.1109","volume":"54","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3624-7753","authenticated-orcid":false,"given":"Zihe","family":"Liu","sequence":"first","affiliation":[{"name":"Australian Artificial Intelligence Institute, University of Technology Sydney, Ultimo, NSW, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0690-4732","authenticated-orcid":false,"given":"Jie","family":"Lu","sequence":"additional","affiliation":[{"name":"Australian Artificial Intelligence Institute, University of Technology Sydney, Ultimo, NSW, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8367-6908","authenticated-orcid":false,"given":"Junyu","family":"Xuan","sequence":"additional","affiliation":[{"name":"Australian Artificial Intelligence Institute, University of Technology Sydney, Ultimo, NSW, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3960-0583","authenticated-orcid":false,"given":"Guangquan","family":"Zhang","sequence":"additional","affiliation":[{"name":"Australian Artificial Intelligence Institute, University of Technology Sydney, Ultimo, NSW, Australia"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2020.3015811"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2018.2878977"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2019.2921057"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2021.3111082"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2019.2901897"},{"key":"ref7","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017","journal-title":"arXiv:1707.06347"},{"key":"ref8","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Mnih"},{"key":"ref9","first-page":"1414","article-title":"Optimizing for the future in non-stationary MDPs","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Chandak"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1611835114"},{"key":"ref11","article-title":"Progressive neural networks","author":"Rusu","year":"2016","journal-title":"arXiv:1606.04671"},{"key":"ref12","first-page":"4528","article-title":"Progress & compress: A scalable framework for continual learning","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Schwarz"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143872"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.23919\/ACC.2017.7962986"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW50498.2020.00132"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/s10489-020-01758-5"},{"issue":"1","key":"ref17","first-page":"723","article-title":"A kernel two-sample test","volume":"13","author":"Gretton","year":"2012","journal-title":"J. Mach. Learn. Res."},{"key":"ref18","first-page":"21370","article-title":"On the distance between two neural networks and the stability of learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NIPS)","author":"Bernstein"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1613\/jair.1.13673"},{"key":"ref20","first-page":"5331","article-title":"Efficient off-policy meta-reinforcement learning via probabilistic context variables","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Rakelly"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3292075"},{"key":"ref22","first-page":"3987","article-title":"Continual learning through synaptic intelligence","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Zenke"},{"key":"ref23","first-page":"2990","article-title":"Continual learning with deep generative replay","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NIPS)","author":"Shin"},{"key":"ref24","first-page":"6467","article-title":"Gradient episodic memory for continual learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NIPS)","author":"Lopez-Paz"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.753"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561219"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2020.3028378"},{"key":"ref28","first-page":"11393","article-title":"Deep reinforcement learning amidst continual structured non-stationarity","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Xie"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3185549"},{"key":"ref30","first-page":"1","article-title":"Off-dynamics reinforcement learning: Training for transfer with domain classifiers","volume-title":"Proc. Int. Conf. Learn. Represent. (ICLR)","author":"Eysenbach"},{"key":"ref31","first-page":"838","article-title":"Lifelong robotic reinforcement learning by retaining experiences","volume-title":"Proc. Conf. Lifelong Learn. Agents","author":"Xie"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3200726"},{"key":"ref33","first-page":"14784","article-title":"You only live once: Single-life reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NIPS)","volume":"35","author":"Chen"},{"key":"ref34","article-title":"SMiRL: Surprise minimizing RL in dynamic environments","author":"Berseth","year":"2020","journal-title":"arXiv:1912.05510"},{"key":"ref35","first-page":"3242","article-title":"Policy consolidation for continual reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","volume":"97","author":"Kaplanis"},{"key":"ref36","first-page":"1","article-title":"Sequential decision-making under non-stationary environments via sequential change-point detection","volume-title":"Proc. Learn. Multiple Contexts (LMCE)","author":"Hadoux"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1007\/s10015-013-0106-0"},{"key":"ref38","doi-asserted-by":"crossref","first-page":"1930","DOI":"10.1007\/s10489-021-02321-6","article-title":"Change point detection for compositional multivariate data","volume":"52","author":"Prabuchandran","year":"2022","journal-title":"Appl. Intell."},{"key":"ref39","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NIPS)","author":"Sutton"},{"issue":"54","key":"ref40","first-page":"1679","article-title":"Value function based reinforcement learning in changing Markovian environments","volume":"9","author":"Cs\u00e1ji","year":"2008","journal-title":"J. Mach. Learn. Res."},{"key":"ref41","first-page":"16305","article-title":"Rethinking the pruning criteria for convolutional neural network","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NIPS)","author":"Huang"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1080\/00031305.1994.10476030"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1080\/00401706.1998.10485211"},{"key":"ref44","first-page":"8580","article-title":"Neural tangent kernel: Convergence and generalization in neural networks","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NIPS)","author":"Jacot"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2017.11.004"},{"key":"ref46","article-title":"OpenAI gym","author":"Brockman","year":"2016","journal-title":"arXiv:1606.01540"},{"key":"ref47","volume-title":"Minimalistic Gridworld environment for OpenAI gym","author":"Chevalier-Boisvert","year":"2018"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/TG.2018.2877047"},{"key":"ref49","first-page":"18878","article-title":"Conflict-averse gradient descent for multi-task learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NIPS)","author":"Liu"},{"key":"ref50","first-page":"5824","article-title":"Gradient surgery for multi-task learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NIPS)","author":"Yu"}],"container-title":["IEEE Transactions on Cybernetics"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6221036\/10646531\/10433932.pdf?arnumber=10433932","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,29]],"date-time":"2024-11-29T18:55:44Z","timestamp":1732906544000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10433932\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9]]},"references-count":50,"journal-issue":{"issue":"9"},"URL":"https:\/\/doi.org\/10.1109\/tcyb.2024.3356981","relation":{},"ISSN":["2168-2267","2168-2275"],"issn-type":[{"value":"2168-2267","type":"print"},{"value":"2168-2275","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,9]]}}}