{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,10]],"date-time":"2026-03-10T18:31:18Z","timestamp":1773167478438,"version":"3.50.1"},"reference-count":46,"publisher":"Springer Science and Business Media LLC","issue":"11","license":[{"start":{"date-parts":[[2020,6,18]],"date-time":"2020-06-18T00:00:00Z","timestamp":1592438400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,6,18]],"date-time":"2020-06-18T00:00:00Z","timestamp":1592438400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2020,11]]},"DOI":"10.1007\/s10489-020-01758-5","type":"journal-article","created":{"date-parts":[[2020,6,18]],"date-time":"2020-06-18T09:02:36Z","timestamp":1592470956000},"page":"3590-3606","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":92,"title":["Reinforcement learning algorithm for non-stationary environments"],"prefix":"10.1007","volume":"50","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3385-294X","authenticated-orcid":false,"given":"Sindhu","family":"Padakandla","sequence":"first","affiliation":[]},{"given":"Prabuchandran","family":"K. J.","sequence":"additional","affiliation":[]},{"given":"Shalabh","family":"Bhatnagar","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,6,18]]},"reference":[{"issue":"46","key":"1758_CR1","first-page":"1","volume":"17","author":"S Abdallah","year":"2016","unstructured":"Abdallah S, Kaisers M (2016) Addressing environment non-stationarity by repeating q-learning updates. J Mach Learn Res 17(46):1\u201331","journal-title":"J Mach Learn Res"},{"issue":"3","key":"1758_CR2","doi-asserted-by":"publisher","first-page":"681","DOI":"10.1137\/S0363012999361974","volume":"40","author":"J Abounadi","year":"2001","unstructured":"Abounadi J, Bertsekas D, Borkar V (2001) Learning algorithms for markov decision processes with average cost. SIAM J Control Optim 40(3):681\u2013698. https:\/\/doi.org\/10.1137\/S0363012999361974","journal-title":"SIAM J Control Optim"},{"key":"1758_CR3","doi-asserted-by":"publisher","unstructured":"Andrychowicz M et al (2019). Learning dexterous in-hand manipulation. Int J Robot Res https:\/\/doi.org\/10.1177\/0278364919887447","DOI":"10.1177\/0278364919887447"},{"key":"1758_CR4","doi-asserted-by":"publisher","unstructured":"Banerjee T, Miao Liu, How JP (2017) Quickest change detection approach to optimal control in markov decision processes with model changes. In: 2017 American Control Conference (ACC). https:\/\/doi.org\/10.23919\/ACC.2017.7962986, pp 399\u2013405","DOI":"10.23919\/ACC.2017.7962986"},{"key":"1758_CR5","volume-title":"Dynamic programming and optimal control vol 2","author":"D Bertsekas","year":"2013","unstructured":"Bertsekas D (2013) Dynamic programming and optimal control vol 2, 4th edn. Athena Scientific, Belmont","edition":"4th edn."},{"key":"1758_CR6","doi-asserted-by":"publisher","first-page":"248","DOI":"10.1016\/j.patcog.2018.10.024","volume":"87","author":"A Cano","year":"2019","unstructured":"Cano A, Krawczyk B (2019) Evolving rule-based classifiers with genetic programming on gpus for drifting data streams. Pattern Recogn 87:248\u2013268. https:\/\/doi.org\/10.1016\/j.patcog.2018.10.024","journal-title":"Pattern Recogn"},{"key":"1758_CR7","doi-asserted-by":"crossref","unstructured":"Choi SP, Yeung DY, Zhang NL (2000a) Hidden-mode markov decision processes for nonstationary sequential decision making. In: Sequence Learning. Springer, pp 264\u2013287","DOI":"10.1007\/3-540-44565-X_12"},{"key":"1758_CR8","unstructured":"Choi S P M, Yeung D Y, Zhang N L (2000b) An environment model for nonstationary reinforcement learning. In: Solla S A, Leen T K, M\u00fcller K (eds) Advances in neural information processing systems, vol 12. MIT Press, pp 987\u2013993"},{"key":"1758_CR9","first-page":"1679","volume":"9","author":"BC Cs\u00e1ji","year":"2008","unstructured":"Cs\u00e1ji B C, Monostori L (2008) Value function based reinforcement learning in changing markovian environments. J Mach Learn Res 9:1679\u20131709","journal-title":"J Mach Learn Res"},{"key":"1758_CR10","unstructured":"Dick T, Gy\u00f6rgy A, Szepesv\u00e1ri C (2014) Online learning in markov decision processes with changing cost sequences. In: Proceedings of the 31st international conference on International Conference on Machine Learning - vol 32, JMLR.org, ICML\u201914, pp I\u2013512\u2013I\u2013520"},{"issue":"12","key":"1758_CR11","doi-asserted-by":"publisher","first-page":"4211","DOI":"10.1007\/s10489-019-01487-4","volume":"49","author":"S Ding","year":"2019","unstructured":"Ding S, Du W, Zhao X, Wang L, Jia W (2019) A new asynchronous reinforcement learning algorithm based on improved parallel PSO. Appl Intell 49(12):4211\u20134222. https:\/\/doi.org\/10.1007\/s10489-019-01487-4","journal-title":"Appl Intell"},{"key":"1758_CR12","unstructured":"Everett R (2018) Learning against non-stationary agents with opponent modelling and deep reinforcement learning. In: 2018 AAAI spring symposium series"},{"key":"1758_CR13","unstructured":"Hadoux E, Beynier A, Weng P (2014) Sequential decision-making under non-stationary environments via sequential change-point detection. In: Learning over Multiple Contexts (LMCE), Nancy, France"},{"key":"1758_CR14","unstructured":"Hallak A, Castro D D, Mannor S (2015) Contextual markov decision processes. In: Proceedings of the 12th European Workshop on Reinforcement Learning (EWRL)"},{"key":"1758_CR15","unstructured":"Harel M, Mannor S, El-Yaniv R, Crammer K (2014) Concept drift detection through resampling, pp 1009\u20131017"},{"key":"1758_CR16","doi-asserted-by":"publisher","first-page":"1532","DOI":"10.1109\/ACCESS.2018.2886026","volume":"7","author":"AS Iwashita","year":"2019","unstructured":"Iwashita A S, Papa J P (2019) An overview on concept drift learning. IEEE Access 7:1532\u20131547. https:\/\/doi.org\/10.1109\/ACCESS.2018.2886026","journal-title":"IEEE Access"},{"key":"1758_CR17","first-page":"1563","volume":"11","author":"T Jaksch","year":"2010","unstructured":"Jaksch T, Ortner R, Auer P (2010) Near-optimal regret bounds for reinforcement learning. J Mach Learn Res 11:1563\u20131600","journal-title":"J Mach Learn Res"},{"key":"1758_CR18","unstructured":"Kaplanis C et al (2019) Policy consolidation for continual reinforcement learning. In: Proceedings of the 36th international conference on machine learning, PMLR, vol 97, pp 3242\u20133251"},{"key":"1758_CR19","doi-asserted-by":"crossref","unstructured":"Kemker R et al (2018) Measuring catastrophic forgetting in neural networks. In: Thirty-second AAAI conference on artificial intelligence","DOI":"10.1609\/aaai.v32i1.11651"},{"key":"1758_CR20","doi-asserted-by":"crossref","unstructured":"Kolomvatsos K, Anagnostopoulos C (2017) Reinforcement learning for predictive analytics in smart cities. In: Informatics, multidisciplinary digital publishing institute, vol 4, p 16","DOI":"10.3390\/informatics4030016"},{"issue":"4","key":"1758_CR21","doi-asserted-by":"crossref","first-page":"1143","DOI":"10.1137\/S0363012901385691","volume":"42","author":"VR Konda","year":"2003","unstructured":"Konda V R, Tsitsiklis J N (2003) On actor-critic algorithms. SIAM J Control Optim 42(4):1143\u20131166","journal-title":"SIAM J Control Optim"},{"key":"1758_CR22","doi-asserted-by":"publisher","first-page":"677","DOI":"10.1016\/j.asoc.2017.12.008","volume":"68","author":"B Krawczyk","year":"2018","unstructured":"Krawczyk B, Cano A (2018) Online ensemble learning with abstaining classifiers for drifting and noisy data streams. Appl Soft Comput 68:677\u201369. https:\/\/doi.org\/10.1016\/j.asoc.2017.12.008","journal-title":"Appl Soft Comput"},{"key":"1758_CR23","unstructured":"Levin, David A, Peres Y, Wilmer EL, Elizabeth L (2006) Markov Chains and Mixing Times. American Mathematical Soc."},{"key":"1758_CR24","unstructured":"Liebman E, Zavesky E, Stone P (2018) A stitch in time - autonomous model management via reinforcement learning. In: Proceedings of the 17th international conference on autonomous agents and multiagent systems, international foundation for Autonomous Agents and Multiagent Systems, AAMAS \u201918, pp 990\u2013998"},{"issue":"505","key":"1758_CR25","doi-asserted-by":"crossref","first-page":"334","DOI":"10.1080\/01621459.2013.849605","volume":"109","author":"DS Matteson","year":"2014","unstructured":"Matteson D S, James N A (2014) A nonparametric approach for multiple change point analysis of multivariate data. J Am Stat Assoc 109(505):334\u2013345","journal-title":"J Am Stat Assoc"},{"key":"1758_CR26","unstructured":"Minka T (2000) Estimating a Dirichlet distribution"},{"issue":"2","key":"1758_CR27","doi-asserted-by":"publisher","first-page":"94","DOI":"10.1109\/MCOM.2018.1700298","volume":"56","author":"M Mohammadi","year":"2018","unstructured":"Mohammadi M, Al-Fuqaha A (2018) Enabling cognitive smart cities using big data and machine learning: approaches and challenges. IEEE Commun Mag 56(2):94\u2013101. https:\/\/doi.org\/10.1109\/MCOM.2018.1700298","journal-title":"IEEE Commun Mag"},{"key":"1758_CR28","unstructured":"Nagabandi A et al (2018) Learning to adapt: meta-learning for model-based control. arXiv:1803.11347"},{"issue":"2","key":"1758_CR29","doi-asserted-by":"publisher","first-page":"610","DOI":"10.1109\/LRA.2019.2891991","volume":"4","author":"F Niroui","year":"2019","unstructured":"Niroui F, Zhang K, Kashino Z, Nejat G (2019) Deep reinforcement learning robot for search and rescue applications: exploration in unknown cluttered environments. IEEE Robot Autom Lett 4(2):610\u2013617. https:\/\/doi.org\/10.1109\/LRA.2019.2891991","journal-title":"IEEE Robot Autom Lett"},{"key":"1758_CR30","unstructured":"Ortner R, Gajane P, Auer P (2019) Variational regret bounds for reinforcement learning. In: Proceedings of the 35th conference on uncertainty in artificial intelligence"},{"issue":"1\/2","key":"1758_CR31","doi-asserted-by":"crossref","first-page":"100","DOI":"10.2307\/2333009","volume":"41","author":"ES Page","year":"1954","unstructured":"Page E S (1954) Continuous inspection schemes. Biometrika 41(1\/2):100\u2013115","journal-title":"Biometrika"},{"issue":"1","key":"1758_CR32","doi-asserted-by":"publisher","first-page":"82","DOI":"10.1109\/WCL.2012.112012.120754","volume":"2","author":"KJ Prabuchandran","year":"2013","unstructured":"Prabuchandran K J, Meena S K, Bhatnagar S (2013) Q-learning based energy management policies for a single sensor node with finite buffer. IEEE Wirel Commun Lett 2(1):82\u201385. https:\/\/doi.org\/10.1109\/WCL.2012.112012.120754","journal-title":"IEEE Wirel Commun Lett"},{"key":"1758_CR33","unstructured":"Prabuchandran KJ, Singh N, Dayama P, Pandit V (2019). Change Point Detection for Compositional Multivariate Data. arXiv:1901.04935"},{"key":"1758_CR34","doi-asserted-by":"publisher","unstructured":"Prashanth LA, Bhatnagar S (2011) Reinforcement learning with average cost for adaptive control of traffic lights at intersections. https:\/\/doi.org\/10.1109\/ITSC.2011.6082823, pp 1640\u20131645","DOI":"10.1109\/ITSC.2011.6082823"},{"key":"1758_CR35","volume-title":"Markov decision processes: discrete stochastic dynamic programming","author":"ML Puterman","year":"2005","unstructured":"Puterman M L (2005) Markov decision processes: discrete stochastic dynamic programming, 2nd edn. Wiley, New York","edition":"2nd edn."},{"issue":"9","key":"1758_CR36","doi-asserted-by":"publisher","first-page":"2570","DOI":"10.1109\/TNNLS.2018.2886956","volume":"30","author":"M Roveri","year":"2019","unstructured":"Roveri M (2019) Learning discrete-time markov chains under concept drift. IEEE Trans Neural Netw Learn Syst 30(9):2570\u20132582. https:\/\/doi.org\/10.1109\/TNNLS.2018.2886956","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"1758_CR37","doi-asserted-by":"publisher","unstructured":"Salkham A, Cahill V (2010) Soilse: a decentralized approach to optimization of fluctuating urban traffic using reinforcement learning. In: 13th international IEEE conference on intelligent transportation systems. https:\/\/doi.org\/10.1109\/ITSC.2010.5625145, pp 531\u2013538","DOI":"10.1109\/ITSC.2010.5625145"},{"issue":"1","key":"1758_CR38","doi-asserted-by":"crossref","first-page":"22","DOI":"10.1137\/1108002","volume":"8","author":"A Shiryaev","year":"1963","unstructured":"Shiryaev A (1963) On Optimum Methods in Quickest Detection Problems. Theory Probab Appl 8(1):22\u201346","journal-title":"Theory Probab Appl"},{"key":"1758_CR39","doi-asserted-by":"publisher","unstructured":"da Silva BC, Basso EW, Bazzan ALC, Engel PM (2006) Dealing with non-stationary environments using context detection. In: Proceedings of the 23rd International Conference on Machine Learning, Association for Computing Machinery, ICML \u201906. https:\/\/doi.org\/10.1145\/1143844.1143872, pp 217\u2013224","DOI":"10.1145\/1143844.1143872"},{"key":"1758_CR40","volume-title":"Reinforcement learning: an introduction","author":"RS Sutton","year":"2018","unstructured":"Sutton R S, Barto A G (2018) Reinforcement learning: an introduction, 2nd. MIT Press, Cambridge","edition":"2nd"},{"key":"1758_CR41","unstructured":"Sutton RS, McAllester D, Singh S, Mansour Y (1999) Policy gradient methods for reinforcement learning with function approximation. In: Proceedings of the 12th international conference on neural information processing systems, pp 1057\u20131063"},{"key":"1758_CR42","unstructured":"Tatbul N, Lee TJ, Zdonik S, Alam M, Gottschlich J (2018) Precision and recall for time series. In: Advances in neural information processing systems, pp 1920\u20131930"},{"key":"1758_CR43","doi-asserted-by":"publisher","unstructured":"Tijsma AD, Drugan MM, Wiering MA (2016) Comparing exploration strategies for q-learning in random stochastic mazes. In: 2016 IEEE Symposium Series on Computational Intelligence (SSCI). https:\/\/doi.org\/10.1109\/SSCI.2016.7849366, pp 1\u20138","DOI":"10.1109\/SSCI.2016.7849366"},{"issue":"3-4","key":"1758_CR44","doi-asserted-by":"crossref","first-page":"279","DOI":"10.1007\/BF00992698","volume":"8","author":"CJ Watkins","year":"1992","unstructured":"Watkins C J, Dayan P (1992) Q-learning. Mach Learn 8(3-4):279\u2013292","journal-title":"Mach Learn"},{"key":"1758_CR45","doi-asserted-by":"publisher","unstructured":"Yu JY, Mannor S (2009) Online learning in markov decision processes with arbitrarily changing rewards and transitions. In: 2009 international conference on game theory for networks, pp 314\u2013322, DOI https:\/\/doi.org\/10.1109\/GAMENETS.2009.5137416, (to appear in print)","DOI":"10.1109\/GAMENETS.2009.5137416"},{"issue":"2","key":"1758_CR46","doi-asserted-by":"publisher","first-page":"581","DOI":"10.1007\/s10489-018-1296-x","volume":"49","author":"X Zhao","year":"2019","unstructured":"Zhao X et al (2019) Applications of asynchronous deep reinforcement learning based on dynamic updating weights. Appl Intell 49(2):581\u2013591. https:\/\/doi.org\/10.1007\/s10489-018-1296-x","journal-title":"Appl Intell"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-020-01758-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-020-01758-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-020-01758-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,29]],"date-time":"2022-10-29T05:35:17Z","timestamp":1667021717000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-020-01758-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,6,18]]},"references-count":46,"journal-issue":{"issue":"11","published-print":{"date-parts":[[2020,11]]}},"alternative-id":["1758"],"URL":"https:\/\/doi.org\/10.1007\/s10489-020-01758-5","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"value":"0924-669X","type":"print"},{"value":"1573-7497","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,6,18]]},"assertion":[{"value":"18 June 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}