{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,10]],"date-time":"2025-07-10T12:10:08Z","timestamp":1752149408138,"version":"3.41.2"},"reference-count":10,"publisher":"Springer Science and Business Media LLC","issue":"2-3","license":[{"start":{"date-parts":[[2002,11,1]],"date-time":"2002-11-01T00:00:00Z","timestamp":1036108800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2002,11,1]],"date-time":"2002-11-01T00:00:00Z","timestamp":1036108800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Machine Learning"],"published-print":{"date-parts":[[2002,11]]},"DOI":"10.1023\/a:1017917511082","type":"journal-article","created":{"date-parts":[[2002,12,30]],"date-time":"2002-12-30T09:36:44Z","timestamp":1041241004000},"page":"107-109","source":"Crossref","is-referenced-by-count":0,"title":["Introduction"],"prefix":"10.1007","volume":"49","author":[{"given":"Satinder","family":"Singh","sequence":"first","affiliation":[]}],"member":"297","reference":[{"key":"395103_CR1","volume-title":"Neuro-dynamic programming","author":"D. P. Bertsekas","year":"1996","unstructured":"Bertsekas, D. P., & Tsitsiklis, J. N. (1996). Neuro-dynamic programming. Belmont, MA: Athena Scientific."},{"key":"395103_CR2","first-page":"118","volume-title":"Machine Learning: Proceedings of the Fifteenth International Conference","author":"T. G. Dietterich","year":"1998","unstructured":"Dietterich, T. G. (1998). The MAXQ method for hierarchical reinforcement learning. In Machine Learning: Proceedings of the Fifteenth International Conference (pp. 118\u2013126). San Mateo, CA: Morgan Kaufman."},{"key":"395103_CR3","doi-asserted-by":"crossref","first-page":"1371","DOI":"10.1162\/089976600300015411","volume":"12","author":"H. Jaeger","year":"2000","unstructured":"Jaeger, H. (2000). Observable operator models for discrete stochastic time series. Neural Computation, 12, 1371\u20131398.","journal-title":"Neural Computation"},{"key":"395103_CR4","unstructured":"McCallum, A. K. (1995). Reinforcement learning with selective perception and hidden state. Doctoral dissertation, Department of Computer Science, University of Rochester."},{"key":"395103_CR5","volume-title":"Advances in neural information processing systems 11","author":"R. Parr","year":"1998","unstructured":"Parr, R., & Russell, S. (1998). Reinforcement learning with hierarchies of machines. In Advances in neural information processing systems 11. Cambridge. MA: MIT Press."},{"key":"395103_CR6","doi-asserted-by":"crossref","first-page":"555","DOI":"10.1145\/176584.176589","volume":"41","author":"R. L. Rivest","year":"1994","unstructured":"Rivest, R. L., & Schapire, R. E. (1994). Diversity-based inference of finite automata. Journal of the ACM, 41, 555\u2013589.","journal-title":"Journal of the ACM"},{"key":"395103_CR7","volume-title":"Reinforcement learning: An introduction","author":"R. S. Sutton","year":"1998","unstructured":"Sutton, R. S., & Barto, A. G. (1998). Reinforcement learning: An introduction. Cambridge, MA: MIT Press."},{"key":"395103_CR8","doi-asserted-by":"crossref","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","volume":"112","author":"R. S. Sutton","year":"1999","unstructured":"Sutton, R. S., Precup, D., & Singh, S. (1999). Between MDPs and Semi-MDPs: A framework for temporal abstraction in reinforcement learning. Artificial Intelligence, 112, 181\u2013211","journal-title":"Artificial Intelligence"},{"key":"395103_CR9","doi-asserted-by":"crossref","first-page":"58","DOI":"10.1145\/203330.203343","volume":"38","author":"G. J. Tesauro","year":"1995","unstructured":"Tesauro, G. J. (1995). Temporal difference learning and TD-Gammon. Communications of the ACM, 38, 58\u201368.","journal-title":"Communications of the ACM"},{"issue":"1","key":"395103_CR10","doi-asserted-by":"crossref","first-page":"45","DOI":"10.1023\/A:1022619109594","volume":"7","author":"S. D. Whitehead","year":"1991","unstructured":"Whitehead, S. D., & Ballard, D. H. (1991). Learning to perceive and act by trial and error. Machine Learning, 7:1, 45\u201383.","journal-title":"Machine Learning"}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1023\/A:1017917511082.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1023\/A:1017917511082\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1023\/A:1017917511082.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,10]],"date-time":"2025-07-10T11:45:02Z","timestamp":1752147902000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1023\/A:1017917511082"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2002,11]]},"references-count":10,"journal-issue":{"issue":"2-3","published-print":{"date-parts":[[2002,11]]}},"alternative-id":["395103"],"URL":"https:\/\/doi.org\/10.1023\/a:1017917511082","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"type":"print","value":"0885-6125"},{"type":"electronic","value":"1573-0565"}],"subject":[],"published":{"date-parts":[[2002,11]]}}}