{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,11]],"date-time":"2026-04-11T19:12:21Z","timestamp":1775934741867,"version":"3.50.1"},"reference-count":70,"publisher":"Elsevier BV","issue":"1-2","license":[{"start":{"date-parts":[[1995,2,1]],"date-time":"1995-02-01T00:00:00Z","timestamp":791596800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2013,7,25]],"date-time":"2013-07-25T00:00:00Z","timestamp":1374710400000},"content-version":"vor","delay-in-days":6749,"URL":"https:\/\/www.elsevier.com\/open-access\/userlicense\/1.0\/"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Artificial Intelligence"],"published-print":{"date-parts":[[1995,2]]},"DOI":"10.1016\/0004-3702(94)00012-p","type":"journal-article","created":{"date-parts":[[2002,7,25]],"date-time":"2002-07-25T09:49:34Z","timestamp":1027590574000},"page":"271-306","source":"Crossref","is-referenced-by-count":78,"title":["Reinforcement learning of non-Markov decision processes"],"prefix":"10.1016","volume":"73","author":[{"given":"Steven D.","family":"Whitehead","sequence":"first","affiliation":[]},{"given":"Long-Ji","family":"Lin","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/0004-3702(94)00012-P_BIB1","series-title":"Tech. Report No. 1085","article-title":"The dynamic structure of everyday life","author":"Agre","year":"1988"},{"key":"10.1016\/0004-3702(94)00012-P_BIB2","series-title":"Proceedings IJCAI-89","first-page":"794","article-title":"Noise-tolerant instance-based learning algorithms","author":"Aha","year":"1989"},{"issue":"4","key":"10.1016\/0004-3702(94)00012-P_BIB3","doi-asserted-by":"crossref","first-page":"333","DOI":"10.1007\/BF00133571","article-title":"Active vision","volume":"1","author":"Aloimonons","year":"1988","journal-title":"Int. J. Comput. Vision"},{"key":"10.1016\/0004-3702(94)00012-P_BIB4","article-title":"Connectionist modeling and control of finite state environments","author":"Bachrach","year":"1992"},{"key":"10.1016\/0004-3702(94)00012-P_BIB5","series-title":"Proceedings U.S.-France Robotics Workshop","article-title":"Sensing strategies","author":"Bajcsy","year":"1984"},{"key":"10.1016\/0004-3702(94)00012-P_BIB6","article-title":"Animate vision","author":"Ballard","year":"1990"},{"key":"10.1016\/0004-3702(94)00012-P_BIB7","article-title":"Real-time learning and control using asynchronous dynamic programming","author":"Barto","year":"1991"},{"issue":"5","key":"10.1016\/0004-3702(94)00012-P_BIB8","doi-asserted-by":"crossref","first-page":"834","DOI":"10.1109\/TSMC.1983.6313077","article-title":"Neuron-like elements that can solve difficult learning control problems","volume":"13","author":"Barto","year":"1983","journal-title":"IEEE Trans. Syst. Man Cybern."},{"key":"10.1016\/0004-3702(94)00012-P_BIB9","author":"Bellman","year":"1957"},{"key":"10.1016\/0004-3702(94)00012-P_BIB10","author":"Bertsekas","year":"1987"},{"key":"10.1016\/0004-3702(94)00012-P_BIB11","article-title":"The Tempo 2 algorithm: adjusting time-delays by supervised learning","volume":"3","author":"Bodenhausen","year":"1991"},{"key":"10.1016\/0004-3702(94)00012-P_BIB12","author":"Chapman","year":"1993"},{"key":"10.1016\/0004-3702(94)00012-P_BIB13_1","series-title":"Proceedings IJCAI-9I","article-title":"Learning from delayed reinforcement in a complex domain","author":"Chapman","year":"1991"},{"key":"10.1016\/0004-3702(94)00012-P_BIB13_2","author":"Chapman","year":"1990","journal-title":"Teleos Technical Report TR-90-11"},{"key":"10.1016\/0004-3702(94)00012-P_BIB14","series-title":"Proceedings AAA1-92","first-page":"183","article-title":"Reinforcement learning with perceptual aliasing: the predictive distinctions approach","author":"Chrisman","year":"1992"},{"key":"10.1016\/0004-3702(94)00012-P_BIB15","series-title":"Proceedings Ninth International Conference on Machine Learning","article-title":"A teaching method for reinforcement learning","author":"Clouse","year":"1992"},{"key":"10.1016\/0004-3702(94)00012-P_BIB16","article-title":"Feudal reinforcement learning","volume":"5","author":"Dayan","year":"1993"},{"key":"10.1016\/0004-3702(94)00012-P_BIB17","doi-asserted-by":"crossref","first-page":"179","DOI":"10.1207\/s15516709cog1402_1","article-title":"Finding structure in time","volume":"14","author":"Elman","year":"1990","journal-title":"Cogn. Sci."},{"key":"10.1016\/0004-3702(94)00012-P_BIB18","doi-asserted-by":"crossref","first-page":"225","DOI":"10.1007\/BF00113898","article-title":"Credit assignment in rule discovery systems based on genetic algorithms","volume":"3","author":"Grefenstette","year":"1988","journal-title":"Mach. Learn."},{"key":"10.1016\/0004-3702(94)00012-P_BIB19","article-title":"Escaping brittleness: the possibilities of general-purpose learning algortihms applied to parallel rule-based systems","volume":"II","author":"Holland","year":"1986"},{"key":"10.1016\/0004-3702(94)00012-P_BIB20","series-title":"Technical Report CMU-CS-91-208","article-title":"A connectionist learning architecture for parsing spoken language","author":"Jain","year":"1991"},{"key":"10.1016\/0004-3702(94)00012-P_BIB21","article-title":"Learning in embedded systems","author":"Kaelbling","year":"1990"},{"key":"10.1016\/0004-3702(94)00012-P_BIB22","series-title":"Proceedings Tenth International Conference on Machine Learning","article-title":"Hierarchical learning in stochastic domains: preliminary results","author":"Kaelbling","year":"1993"},{"key":"10.1016\/0004-3702(94)00012-P_BIB23","article-title":"Complexity analysis of real-time reinforcement learning applied to finding shortest paths in deterministic domains","author":"Koenig","year":"1992"},{"key":"10.1016\/0004-3702(94)00012-P_BIB24","series-title":"Proceedings AAAI-91","first-page":"781","article-title":"Programming robots using reinforcement learning and teaching","author":"Lin","year":"1991"},{"key":"10.1016\/0004-3702(94)00012-P_BIB25","doi-asserted-by":"crossref","first-page":"293","DOI":"10.1007\/BF00992699","article-title":"Self-improving reactive agents based on reinforcement learning, planning and teaching","volume":"8","author":"Lin","year":"1992","journal-title":"Mach. Learn."},{"key":"10.1016\/0004-3702(94)00012-P_BIB26","series-title":"Proceedings 1993 IEEE International Conference on Neural Networks","article-title":"Hierarchical learning of robot skills by reinforcement","author":"Lin","year":"1993"},{"key":"10.1016\/0004-3702(94)00012-P_BIB27","series-title":"Technical Report CMU-CS-93-103","article-title":"Reinforcement learning for robots using neural networks","author":"Lin","year":"1993"},{"key":"10.1016\/0004-3702(94)00012-P_BIB28","series-title":"Proceedings Second International Conference on Simulation of Adaptive Behavior: From Animals to Animats","article-title":"Reinforcement learning with hidden states","author":"Lin","year":"1993"},{"key":"10.1016\/0004-3702(94)00012-P_BIB29","series-title":"Proceedings Eighth International Workshop on Machine Learning","article-title":"Scaling reinforcement learning to robotics by exploiting the subsumption architecture","author":"Mahadevan","year":"1991"},{"key":"10.1016\/0004-3702(94)00012-P_BIB30","series-title":"Prolegomena","first-page":"206","article-title":"\u2018Boxes\u2019 as a model of pattern-formation","volume":"1","author":"Michie","year":"1968"},{"key":"10.1016\/0004-3702(94)00012-P_BIB31","article-title":"Explanation-based neural network learning for robot control","volume":"5","author":"Mitchell","year":"1993"},{"key":"10.1016\/0004-3702(94)00012-P_BIB32","series-title":"Proceedings Eighth International Conference on Machine Learning","first-page":"333","article-title":"Variable resolution dynamic programming: efficiently learning action maps in multivariate real-values state spaces","author":"Moore","year":"1991"},{"issue":"1","key":"10.1016\/0004-3702(94)00012-P_BIB33","doi-asserted-by":"crossref","first-page":"103","DOI":"10.1007\/BF00993104","article-title":"Prioritized sweeping: reinforcement learning with less data and less real time","volume":"13","author":"Moore","year":"1993","journal-title":"Mach. Learn."},{"key":"10.1016\/0004-3702(94)00012-P_BIB34","series-title":"Proceedings Second International Conference on Simulation of Adaptive Behavior: From Animals to Animats","article-title":"Efficient learning and planning within the Dyna framework","author":"Peng","year":"1993"},{"key":"10.1016\/0004-3702(94)00012-P_BIB35","doi-asserted-by":"crossref","first-page":"81","DOI":"10.1007\/BF00116251","article-title":"Induction of decision trees","volume":"1","author":"Quinlan","year":"1986","journal-title":"Mach. Learn."},{"key":"10.1016\/0004-3702(94)00012-P_BIB36","author":"Ross","year":"1983"},{"key":"10.1016\/0004-3702(94)00012-P_BIB37","article-title":"Learning internal representations by error propagation","volume":"1","author":"Rumelhart","year":"1986"},{"key":"10.1016\/0004-3702(94)00012-P_BIB38","series-title":"Computers and Thought","first-page":"71","article-title":"Some studies in machine learning using the game of checkers","author":"Samuel","year":"1963"},{"key":"10.1016\/0004-3702(94)00012-P_BIB39","first-page":"500","article-title":"Reinforcement learning in Markovian and non-Markovian environments","volume":"3","author":"Schmidhuber","year":"1991"},{"key":"10.1016\/0004-3702(94)00012-P_BIB40","series-title":"Proceedings Eighth International Workshop on Machine Learning","first-page":"348","article-title":"Transfer of learning across compositions of sequential tasks","author":"Singh","year":"1991"},{"key":"10.1016\/0004-3702(94)00012-P_BIB41","doi-asserted-by":"crossref","first-page":"323","DOI":"10.1007\/BF00992700","article-title":"Transfer of learning by composing solutions of elemental sequential tasks","volume":"8","author":"Singh","year":"1992","journal-title":"Mach. Learn."},{"key":"10.1016\/0004-3702(94)00012-P_BIB42","author":"Snedecor","year":"1989"},{"key":"10.1016\/0004-3702(94)00012-P_BIB43","article-title":"Temporal credit assignment in reinforcement learning","author":"Sutton","year":"1984"},{"key":"10.1016\/0004-3702(94)00012-P_BIB65","unstructured":"also: COINS Tech. Report 84-02."},{"issue":"1","key":"10.1016\/0004-3702(94)00012-P_BIB44","doi-asserted-by":"crossref","first-page":"9","DOI":"10.1007\/BF00115009","article-title":"Learning to predict by the method of temporal differences","volume":"3","author":"Sutton","year":"1988","journal-title":"Mach. Learn."},{"key":"10.1016\/0004-3702(94)00012-P_BIB45","series-title":"Proceedings Seventh International Conference on Machine Learning","article-title":"Integrating architectures for learning, planning, and reacting based on approximating dynamic programming","author":"Sutton","year":"1990"},{"key":"10.1016\/0004-3702(94)00012-P_BIB46","author":"Sutton","year":"1992"},{"key":"10.1016\/0004-3702(94)00012-P_BIB47","series-title":"Proceedings IJCAI-91","article-title":"Cost sensitive reinforcement learning tor adaptive classification and control","author":"Tan","year":"1991"},{"key":"10.1016\/0004-3702(94)00012-P_BIB48","article-title":"Cost sensitive robot learning","author":"Tan","year":"1991"},{"key":"10.1016\/0004-3702(94)00012-P_BIB49","doi-asserted-by":"crossref","first-page":"257","DOI":"10.1007\/BF00992697","article-title":"Practical issues in temporal difference learning","volume":"8","author":"Tesauro","year":"1992","journal-title":"Mach. Learn."},{"key":"10.1016\/0004-3702(94)00012-P_BIB50","article-title":"Planning with an adaptive world model","volume":"3","author":"Thrun","year":"1991"},{"key":"10.1016\/0004-3702(94)00012-P_BIB51","article-title":"Efficient exploration in reinforcement learning","author":"Thrun","year":"1992"},{"key":"10.1016\/0004-3702(94)00012-P_BIB52_1","doi-asserted-by":"crossref","first-page":"97","DOI":"10.1016\/0010-0277(84)90023-4","article-title":"Visual routines","volume":"18","author":"Ullman","year":"1984","journal-title":"Cognition"},{"key":"10.1016\/0004-3702(94)00012-P_BIB52_2","author":"Ullman","year":"1985"},{"key":"10.1016\/0004-3702(94)00012-P_BIB53","doi-asserted-by":"crossref","first-page":"39","DOI":"10.1162\/neco.1989.1.1.39","article-title":"Modular construction of time-delay neural networks for speech recognition","volume":"1","author":"Waibel","year":"1989","journal-title":"Neural Comput."},{"key":"10.1016\/0004-3702(94)00012-P_BIB54","article-title":"Learning from delayed rewards","author":"Watkins","year":"1989"},{"key":"10.1016\/0004-3702(94)00012-P_BIB55","first-page":"39","article-title":"Technical note: Q-learnmg","volume":"82","author":"Watkins","year":"1992","journal-title":"Mach. Learn."},{"key":"10.1016\/0004-3702(94)00012-P_BIB56_1","series-title":"Proceedings AAAI-91","article-title":"Complexity and cooperation in reinforcement learning","author":"Whitehead","year":"1991"},{"key":"10.1016\/0004-3702(94)00012-P_BIB56_2","series-title":"Proceedings Eighth International Workshop on Machine Learning","author":"Whitehead","year":"1991"},{"key":"10.1016\/0004-3702(94)00012-P_BIB57","article-title":"Reinforcement learning for the adaptive control of perception and action","author":"Whitehead","year":"1991"},{"key":"10.1016\/0004-3702(94)00012-P_BIB58","series-title":"Proceedings Sixth International Workshop on Machine Learning","article-title":"A role for anticipation in reactive systems that learn","author":"Whitehead","year":"1989"},{"issue":"4","key":"10.1016\/0004-3702(94)00012-P_BIB59_1","doi-asserted-by":"crossref","DOI":"10.1162\/neco.1990.2.4.409","article-title":"Active perception and reinforcement learning","volume":"2","author":"Whitehead","year":"1990","journal-title":"Neural Comput."},{"key":"10.1016\/0004-3702(94)00012-P_BIB59_2","series-title":"Proceedings Seventh International Conference on Machine Learning","author":"Whitehead","year":"1990"},{"issue":"1","key":"10.1016\/0004-3702(94)00012-P_BIB60_1","doi-asserted-by":"crossref","DOI":"10.1007\/BF00058926","article-title":"Learning to perceive and act by trial and error","volume":"7","author":"Whitehead","year":"1991","journal-title":"Mach. Learn."},{"key":"10.1016\/0004-3702(94)00012-P_BIB60_2","author":"Whitehead","year":"1990"},{"key":"10.1016\/0004-3702(94)00012-P_BIB61","article-title":"A study of cooperative mechanisms for faster reinforcement learning","author":"Whitehead","year":"1991"},{"key":"10.1016\/0004-3702(94)00012-P_BIB62","series-title":"Robot Learning","article-title":"Learning multiple goal behavior via task decomposition and dynamic policy merging","author":"Whitehead","year":"1993"},{"key":"10.1016\/0004-3702(94)00012-P_BIB63","article-title":"Reinforcement learning in connectionist networks","author":"Williams","year":"1986"},{"key":"10.1016\/0004-3702(94)00012-P_BIB64","series-title":"Proceedings AAAI-90","article-title":"Explaining temporal-differences to create useful concepts for evaluating states","author":"Yee","year":"1990"}],"container-title":["Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:000437029400012P?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:000437029400012P?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2019,4,28]],"date-time":"2019-04-28T22:20:05Z","timestamp":1556490005000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/000437029400012P"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[1995,2]]},"references-count":70,"journal-issue":{"issue":"1-2","published-print":{"date-parts":[[1995,2]]}},"alternative-id":["000437029400012P"],"URL":"https:\/\/doi.org\/10.1016\/0004-3702(94)00012-p","relation":{},"ISSN":["0004-3702"],"issn-type":[{"value":"0004-3702","type":"print"}],"subject":[],"published":{"date-parts":[[1995,2]]}}}