{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,24]],"date-time":"2026-04-24T13:18:07Z","timestamp":1777036687042,"version":"3.51.4"},"reference-count":61,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"4","license":[{"start":{"date-parts":[[2025,4,1]],"date-time":"2025-04-01T00:00:00Z","timestamp":1743465600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,4,1]],"date-time":"2025-04-01T00:00:00Z","timestamp":1743465600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,4,1]],"date-time":"2025-04-01T00:00:00Z","timestamp":1743465600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"Australian Research Council through the Laureate Project","award":["FL190100149"],"award-info":[{"award-number":["FL190100149"]}]},{"name":"Discovery Early Career Researcher Award","award":["DE200100245"],"award-info":[{"award-number":["DE200100245"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Knowl. Data Eng."],"published-print":{"date-parts":[[2025,4]]},"DOI":"10.1109\/tkde.2025.3535961","type":"journal-article","created":{"date-parts":[[2025,1,29]],"date-time":"2025-01-29T19:08:50Z","timestamp":1738177730000},"page":"1930-1942","source":"Crossref","is-referenced-by-count":5,"title":["Learning Latent and Changing Dynamics in Real Non-Stationary Environments"],"prefix":"10.1109","volume":"37","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3624-7753","authenticated-orcid":false,"given":"Zihe","family":"Liu","sequence":"first","affiliation":[{"name":"Australian Artificial Intelligence Institute (AAII), University of Technology Sydney, Ultimo, NSW, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0690-4732","authenticated-orcid":false,"given":"Jie","family":"Lu","sequence":"additional","affiliation":[{"name":"Australian Artificial Intelligence Institute (AAII), University of Technology Sydney, Ultimo, NSW, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8367-6908","authenticated-orcid":false,"given":"Junyu","family":"Xuan","sequence":"additional","affiliation":[{"name":"Australian Artificial Intelligence Institute (AAII), University of Technology Sydney, Ultimo, NSW, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3960-0583","authenticated-orcid":false,"given":"Guangquan","family":"Zhang","sequence":"additional","affiliation":[{"name":"Australian Artificial Intelligence Institute (AAII), University of Technology Sydney, Ultimo, NSW, Australia"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2021.3130265"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2021.3127077"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2022.3176753"},{"key":"ref4","first-page":"1889","article-title":"Trust region policy optimization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Schulman"},{"key":"ref5","article-title":"DeepMind control suite","author":"Tassa","year":"2018"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1561\/2300000021"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2020.3006084"},{"key":"ref8","article-title":"Model-based reinforcement learning for biological sequence design","volume-title":"Proc. 8th Int. Conf. Learn. Representations","author":"Angermueller"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1007\/s10846-017-0468-y"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/3357384.3358027"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611974331.ch101"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5744"},{"key":"ref13","first-page":"767","article-title":"Interference and generalization in temporal difference learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Bengio"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143872"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.23919\/ACC.2017.7962986"},{"key":"ref16","first-page":"7214","article-title":"Non-stationary Markov decision processes, a worst-case approach using model-based reinforcement learning","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Lecarpentier"},{"key":"ref17","first-page":"9190","article-title":"Model-based reinforcement learning via latent-space collocation","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Rybkin"},{"key":"ref18","article-title":"Bayesian online changepoint detection","volume":"1050","author":"Adams","year":"2007","journal-title":"Stat"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW50498.2020.00132"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1611835114"},{"key":"ref21","first-page":"3242","article-title":"Policy consolidation for continual reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Kaplanis","year":"2019"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1007\/s10489-020-01758-5"},{"key":"ref23","doi-asserted-by":"crossref","first-page":"1930","DOI":"10.1007\/s10489-021-02321-6","article-title":"Change point detection for compositional multivariate data","volume":"52","author":"Prabuchandran","year":"2022","journal-title":"Appl. Intell."},{"key":"ref24","article-title":"Sequential decision-making under non-stationary environments via sequential change-point detection","volume-title":"Proc. Int. Workshop Learn. Over Mult. Contexts","author":"Hadoux"},{"key":"ref25","first-page":"5757","article-title":"Context-aware dynamics model for generalization in model-based reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Lee"},{"key":"ref26","first-page":"11393","article-title":"Deep reinforcement learning amidst continual structured non-stationarity","volume-title":"Proc. 38th Int. Conf. Mach. Learn.","author":"Xie"},{"key":"ref27","article-title":"Contextual Markov decision processes","author":"Hallak","year":"2015"},{"key":"ref28","first-page":"81","article-title":"Variational regret bounds for reinforcement learning","volume-title":"Proc. 35th Conf. Uncertainty Artif. Intell.","author":"Gajane","year":"2019"},{"issue":"4","key":"ref29","first-page":"1563","article-title":"Near-optimal regret bounds for reinforcement learning","volume":"11","author":"Jaksch","year":"2010","journal-title":"J. Mach. Learn. Res."},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(98)00023-X"},{"key":"ref31","first-page":"2555","article-title":"Learning latent dynamics for planning from pixels","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Hafner"},{"key":"ref32","article-title":"Variational recurrent models for solving partially observable control tasks","volume-title":"Proc. 8th Int. Conf. Learn. Representations","author":"Han"},{"key":"ref33","article-title":"Hidden-mode Markov decision processes","volume-title":"Proc. 16th Int. Joint Conf. Artif. Intell., Workshop Neural, Symbolic, Reinforcement Methods Sequence Learn.","author":"Choi"},{"issue":"1","key":"ref34","first-page":"1334","article-title":"End-to-end training of deep visuomotor policies","volume":"17","author":"Levine","year":"2016","journal-title":"J. Mach. Learn. Res."},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2016.7487156"},{"key":"ref36","article-title":"Model based reinforcement learning for Atari","volume-title":"Proc. 8th Int. Conf. Learn. Representations","author":"Kaiser"},{"key":"ref37","first-page":"10734","article-title":"A model-based reinforcement learning with adversarial training for online recommendation","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Bai"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989324"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11694"},{"key":"ref40","article-title":"Dream to control: Learning behaviors by latent imagination","volume-title":"Proc. 8th Int. Conf. Learn. Representations","author":"Hafner","year":"2020"},{"key":"ref41","article-title":"RL2: Fast reinforcement learning via slow reinforcement learning","author":"Duan","year":"2016"},{"key":"ref42","article-title":"Continuous adaptation via meta-learning in nonstationary and competitive environments","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Al-Shedivat"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3185549"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i15.29590"},{"key":"ref45","first-page":"642","article-title":"Meta reinforcement learning with latent variable Gaussian processes","volume-title":"Proc. 34th Conf. Uncertainty Artif. Intell.","author":"S\u00e6mundsson"},{"key":"ref46","article-title":"Learning to adapt in dynamic, real-world environments through meta-reinforcement learning","volume-title":"Proc. 7th Int. Conf. Learn. Representations","author":"Nagabandi"},{"key":"ref47","article-title":"Deep online learning via meta-learning: Continual adaptation for model-based RL","volume-title":"Proc. 7th Int. Conf. Learn. Representations","author":"Nagabandi"},{"key":"ref48","article-title":"Deep variational Bayes filters: Unsupervised learning of state space models from raw data","volume-title":"Proc. 5th Int. Conf. Learn. Representations","author":"Karl"},{"key":"ref49","first-page":"1280","article-title":"Probabilistic recurrent state-space models","volume-title":"Proc. 35th Int. Conf. Mach. Learn.","author":"Doerr"},{"key":"ref50","article-title":"Learning and querying fast generative models for reinforcement learning","author":"Buesing","year":"2018"},{"key":"ref51","first-page":"1154","article-title":"Offline reinforcement learning from images with latent space models","volume-title":"Proc. 3rd Annu. Conf. Learn. Dyn. Control","author":"Rafailov"},{"key":"ref52","first-page":"211","article-title":"Restarted Bayesian online change-point detector achieves optimal detection delay","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Alami"},{"key":"ref53","first-page":"857","article-title":"Stochastic neighbor embedding","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Hinton"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.2307\/2684253"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1016\/S0377-2217(96)00385-2"},{"key":"ref56","first-page":"277","article-title":"Model-predictive control via cross-entropy and gradient-based optimization","volume-title":"Proc. 2nd Annu. Conf. Learn. Dyn. Control","author":"Bharadhwaj"},{"key":"ref57","first-page":"1049","article-title":"Sample-efficient cross-entropy method for real-time planning","volume-title":"Proc. Conf. Robot Learn.","author":"Pinneri"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.12794\/metadc1505267"},{"key":"ref59","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Haarnoja"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1406.1078"},{"key":"ref61","article-title":"An environment for autonomous driving decision-making","author":"Leurent","year":"2018"}],"container-title":["IEEE Transactions on Knowledge and Data Engineering"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/69\/10918320\/10857660.pdf?arnumber=10857660","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,8]],"date-time":"2025-03-08T07:52:45Z","timestamp":1741420365000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10857660\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4]]},"references-count":61,"journal-issue":{"issue":"4"},"URL":"https:\/\/doi.org\/10.1109\/tkde.2025.3535961","relation":{},"ISSN":["1041-4347","1558-2191","2326-3865"],"issn-type":[{"value":"1041-4347","type":"print"},{"value":"1558-2191","type":"electronic"},{"value":"2326-3865","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,4]]}}}