{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T12:57:01Z","timestamp":1777467421507,"version":"3.51.4"},"reference-count":38,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2025,6,13]],"date-time":"2025-06-13T00:00:00Z","timestamp":1749772800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100001871","name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia","doi-asserted-by":"publisher","award":["PTDC\/CCI-COM\/5060\/2021"],"award-info":[{"award-number":["PTDC\/CCI-COM\/5060\/2021"]}],"id":[{"id":"10.13039\/501100001871","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001871","name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia","doi-asserted-by":"publisher","award":["UIDB\/50021\/2020"],"award-info":[{"award-number":["UIDB\/50021\/2020"]}],"id":[{"id":"10.13039\/501100001871","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001871","name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia","doi-asserted-by":"publisher","award":["2022.04180"],"award-info":[{"award-number":["2022.04180"]}],"id":[{"id":"10.13039\/501100001871","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001871","name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia","doi-asserted-by":"publisher","award":["2020.05360"],"award-info":[{"award-number":["2020.05360"]}],"id":[{"id":"10.13039\/501100001871","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Artificial Intelligence"],"published-print":{"date-parts":[[2025,10]]},"DOI":"10.1016\/j.artint.2025.104382","type":"journal-article","created":{"date-parts":[[2025,6,13]],"date-time":"2025-06-13T19:25:03Z","timestamp":1749842703000},"page":"104382","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":3,"special_numbering":"C","title":["Reinforcement learning in convergently non-stationary environments: Feudal hierarchies and learned representations"],"prefix":"10.1016","volume":"347","author":[{"given":"Diogo S.","family":"Carvalho","sequence":"first","affiliation":[]},{"given":"Pedro A.","family":"Santos","sequence":"additional","affiliation":[]},{"given":"Francisco S.","family":"Melo","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.artint.2025.104382_br0010","series-title":"Modern Trends in Controlled Stochastic Processes","first-page":"192","article-title":"Full gradient DQN reinforcement learning: a provably convergent scheme","author":"Avrachenkov","year":"2021"},{"key":"10.1016\/j.artint.2025.104382_br0020","series-title":"Proceedings of the International Conference on Machine Learning","first-page":"30","article-title":"Residual algorithms: reinforcement learning with function approximation","author":"Baird","year":"1995"},{"key":"10.1016\/j.artint.2025.104382_br0030","series-title":"Stochastic Approximation: A Dynamical Systems Viewpoint","author":"Borkar","year":"2008"},{"key":"10.1016\/j.artint.2025.104382_br0040","series-title":"Advances in Neural Information Processing Systems","article-title":"Neural temporal-difference learning converges to global optima","author":"Cai","year":"2019"},{"key":"10.1016\/j.artint.2025.104382_br0050","series-title":"Advances in Neural Information Processing Systems","first-page":"271","article-title":"Feudal reinforcement learning","author":"Dayan","year":"1993"},{"key":"10.1016\/j.artint.2025.104382_br0060","series-title":"International Conference on Machine Learning","first-page":"1329","article-title":"Benchmarking deep reinforcement learning for continuous control","author":"Duan","year":"2016"},{"key":"10.1016\/j.artint.2025.104382_br0070","series-title":"Uncertainty in Artificial Intelligence","first-page":"162","article-title":"Metrics for finite Markov decision processes","author":"Ferns","year":"2004"},{"key":"10.1016\/j.artint.2025.104382_br0080","doi-asserted-by":"crossref","first-page":"1662","DOI":"10.1137\/10080484X","article-title":"Bisimulation metrics for continuous Markov decision processes","volume":"40","author":"Ferns","year":"2011","journal-title":"SIAM J. Comput."},{"key":"10.1016\/j.artint.2025.104382_br0090","series-title":"International Conference on Machine Learning","first-page":"3556","article-title":"Representations for stable off-policy reinforcement learning","author":"Ghosh","year":"2020"},{"key":"10.1016\/j.artint.2025.104382_br0100","series-title":"International Conference on Machine Learning","first-page":"1480","article-title":"DARLA: improving zero-shot transfer in reinforcement learning","author":"Higgins","year":"2017"},{"key":"10.1016\/j.artint.2025.104382_br0110","doi-asserted-by":"crossref","first-page":"172","DOI":"10.3390\/make4010009","article-title":"Hierarchical reinforcement learning: a survey and open research challenges","volume":"4","author":"Hutsebaut-Buysse","year":"2022","journal-title":"Mach. Learn. Knowl. Extr."},{"key":"10.1016\/j.artint.2025.104382_br0120","doi-asserted-by":"crossref","first-page":"73","DOI":"10.1016\/j.tcs.2016.07.032","article-title":"Extreme state aggregation beyond Markov decision processes","volume":"650","author":"Hutter","year":"2016","journal-title":"Theor. Comput. Sci."},{"key":"10.1016\/j.artint.2025.104382_br0130","author":"Hutter"},{"key":"10.1016\/j.artint.2025.104382_br0140","series-title":"Advances in Neural Information Processing Systems","article-title":"Language as an abstraction for hierarchical deep reinforcement learning","author":"Jiang","year":"2019"},{"key":"10.1016\/j.artint.2025.104382_br0150","series-title":"Second International Conference on Learning Representations","first-page":"121","article-title":"Stochastic gradient VB and the variational auto-encoder","author":"Kingma","year":"2014"},{"key":"10.1016\/j.artint.2025.104382_br0160","doi-asserted-by":"crossref","first-page":"108","DOI":"10.1016\/j.automatica.2016.12.014","article-title":"A stability criterion for two timescale stochastic approximation schemes","volume":"79","author":"Lakshminarayanan","year":"2017","journal-title":"Automatica"},{"key":"10.1016\/j.artint.2025.104382_br0170","author":"Lan"},{"key":"10.1016\/j.artint.2025.104382_br0180","series-title":"The 2012 International Joint Conference on Neural Networks (IJCNN)","first-page":"1","article-title":"Autonomous reinforcement learning on raw visual input data in a real world application","author":"Lange","year":"2012"},{"key":"10.1016\/j.artint.2025.104382_br0190","series-title":"International Conference on Machine Learning","first-page":"5639","article-title":"CURL: contrastive unsupervised representations for reinforcement learning","author":"Laskin","year":"2020"},{"key":"10.1016\/j.artint.2025.104382_br0200","series-title":"Proceedings of the 7th International Conference on Learning Representations","article-title":"Learning multi-level hierarchies with hindsight","author":"Levy","year":"2017"},{"key":"10.1016\/j.artint.2025.104382_br0210","first-page":"5","article-title":"Towards a unified theory of state abstraction for MDPs","volume":"4","author":"Li","year":"2006","journal-title":"ISAIM"},{"key":"10.1016\/j.artint.2025.104382_br0220","series-title":"International Conference on Machine Learning (ICML)","article-title":"RLlib: abstractions for distributed reinforcement learning","author":"Liang","year":"2018"},{"key":"10.1016\/j.artint.2025.104382_br0230","author":"Lim"},{"key":"10.1016\/j.artint.2025.104382_br0240","doi-asserted-by":"crossref","first-page":"84","DOI":"10.1016\/j.neucom.2013.09.055","article-title":"Autoencoder for words","volume":"139","author":"Liou","year":"2014","journal-title":"Neurocomputing"},{"key":"10.1016\/j.artint.2025.104382_br0250","doi-asserted-by":"crossref","first-page":"531","DOI":"10.1080\/00207179208934253","article-title":"The general problem of the stability of motion","volume":"55","author":"Lyapunov","year":"1992","journal-title":"Int. J. Control"},{"key":"10.1016\/j.artint.2025.104382_br0260","series-title":"International Joint Conference on Artificial Intelligence (IJCAI)","first-page":"2546","article-title":"On Q-learning convergence for non-Markov decision processes","author":"Majeed","year":"2018"},{"key":"10.1016\/j.artint.2025.104382_br0270","series-title":"Advances in Neural Information Processing Systems","first-page":"1117","article-title":"On the almost sure convergence of stochastic gradient descent in non-convex problems","author":"Mertikopoulos","year":"2020"},{"key":"10.1016\/j.artint.2025.104382_br0280","series-title":"Advances in Neural Information Processing Systems","article-title":"Data-efficient hierarchical reinforcement learning","author":"Nachum","year":"2018"},{"key":"10.1016\/j.artint.2025.104382_br0290","series-title":"Markov Decision Processes: Discrete Stochastic Dynamic Programming","author":"Puterman","year":"2005"},{"key":"10.1016\/j.artint.2025.104382_br0300","doi-asserted-by":"crossref","first-page":"400","DOI":"10.1214\/aoms\/1177729586","article-title":"A stochastic approximation method","volume":"22","author":"Robbins","year":"1951","journal-title":"Ann. Math. Stat."},{"key":"10.1016\/j.artint.2025.104382_br0310","doi-asserted-by":"crossref","first-page":"287","DOI":"10.1023\/A:1007678930559","article-title":"Convergence results for single-step on-policy reinforcement-learning algorithms","volume":"38","author":"Singh","year":"2000","journal-title":"Mach. Learn."},{"key":"10.1016\/j.artint.2025.104382_br0320","series-title":"Reinforcement Learning: An Introduction","author":"Sutton","year":"2018"},{"key":"10.1016\/j.artint.2025.104382_br0330","doi-asserted-by":"crossref","first-page":"59","DOI":"10.1023\/A:1018008221616","article-title":"Feature-based methods for large scale dynamic programming","volume":"22","author":"Tsitsiklis","year":"1996","journal-title":"Mach. Learn."},{"key":"10.1016\/j.artint.2025.104382_br0340","author":"Wang"},{"key":"10.1016\/j.artint.2025.104382_br0350","first-page":"279","article-title":"Q-learning","volume":"8","author":"Watkins","year":"1992","journal-title":"Mach. Learn."},{"key":"10.1016\/j.artint.2025.104382_br0360","series-title":"International Conference on Machine Learning","first-page":"10555","article-title":"A finite-time analysis of Q-learning with neural network function approximation","author":"Xu","year":"2020"},{"key":"10.1016\/j.artint.2025.104382_br0370","author":"Zhang"},{"key":"10.1016\/j.artint.2025.104382_br0380","series-title":"International Conference on Machine Learning","first-page":"12621","article-title":"Breaking the deadly triad with a target network","author":"Zhang","year":"2021"}],"container-title":["Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0004370225001018?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0004370225001018?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,4,28]],"date-time":"2026-04-28T13:30:46Z","timestamp":1777383046000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0004370225001018"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10]]},"references-count":38,"alternative-id":["S0004370225001018"],"URL":"https:\/\/doi.org\/10.1016\/j.artint.2025.104382","relation":{},"ISSN":["0004-3702"],"issn-type":[{"value":"0004-3702","type":"print"}],"subject":[],"published":{"date-parts":[[2025,10]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Reinforcement learning in convergently non-stationary environments: Feudal hierarchies and learned representations","name":"articletitle","label":"Article Title"},{"value":"Artificial Intelligence","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.artint.2025.104382","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2025 The Authors. Published by Elsevier B.V.","name":"copyright","label":"Copyright"}],"article-number":"104382"}}