{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,2]],"date-time":"2026-05-02T04:46:33Z","timestamp":1777697193917,"version":"3.51.4"},"reference-count":20,"publisher":"SAGE Publications","issue":"2","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IDT"],"published-print":{"date-parts":[[2017,6,22]]},"DOI":"10.3233\/idt-170285","type":"journal-article","created":{"date-parts":[[2017,3,6]],"date-time":"2017-03-06T07:15:38Z","timestamp":1488784538000},"page":"167-175","source":"Crossref","is-referenced-by-count":2,"title":["Small-sample reinforcement learning: Improving policies using synthetic data1"],"prefix":"10.1177","volume":"11","author":[{"given":"Stephen W.","family":"Carden","sequence":"first","affiliation":[{"name":"Department of Mathematical Sciences, Georgia Southern University, Statesboro, GA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"James","family":"Livsey","sequence":"additional","affiliation":[{"name":"Center for Statistical Research and Methodology, U.S. Census Bureau, Washington, DC, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"179","reference":[{"key":"10.3233\/IDT-170285_ref1","doi-asserted-by":"crossref","first-page":"767","DOI":"10.1093\/biomet\/81.4.767","article-title":"Logistic regression for autocorrelated data with applications to repeated measures","volume":"10","author":"Azzalini","year":"1994","journal-title":"Biometrika"},{"key":"10.3233\/IDT-170285_ref2","first-page":"1","article-title":"Practical kernel-based reinforcement learning","volume":"17","author":"Barreto","year":"2016","journal-title":"Journal of Machine Learning Research"},{"key":"10.3233\/IDT-170285_ref3","doi-asserted-by":"crossref","first-page":"705","DOI":"10.1613\/jair.4271","article-title":"Convergence of a q-learning variant for continuous states and actions","volume":"49","author":"Carden","year":"2014","journal-title":"Journal of Artificial Intelligence Research"},{"key":"10.3233\/IDT-170285_ref4","first-page":"503","article-title":"Tree-based batch mode reinforcement learning","volume":"6","author":"Ernst","year":"2005","journal-title":"Journal of Machine Learning Research"},{"key":"10.3233\/IDT-170285_ref5","unstructured":"Gaskett C., Wettergreen D. and Zelinsky A., Q-learning in Continuous State and Action Spaces, in: Proceedings of 12th Australian Joint Conference on Artificial Intelligence, Springer-Verlag, (1999)."},{"key":"10.3233\/IDT-170285_ref6","doi-asserted-by":"crossref","unstructured":"Hans A. and Udluft S., Efficient uncertainty propagation for reinforcement learning with limited data, in: Artificial Neural Networks - ICANN 2009, Allippi C. et al., ed., Springer, Berlin, 2009, pp. 70-79.","DOI":"10.1007\/978-3-642-04274-4_8"},{"key":"10.3233\/IDT-170285_ref7","doi-asserted-by":"crossref","first-page":"342","DOI":"10.1111\/j.0006-341X.2002.00342.x","article-title":"Marginalized transition models and likelihood inference for longitudinal categorical data","volume":"58","author":"Heagerty","year":"2002","journal-title":"Biometrics"},{"key":"10.3233\/IDT-170285_ref8","unstructured":"Jiang N., Kulesza A. and Singh S., Abstraction selection in model-based reinforcement learning, in: Proceedings of the 32nd International Conference on Machine Learning, (2015), 179-188."},{"key":"10.3233\/IDT-170285_ref9","volume-title":"On the Sample Complexity of Reinforcement Learning","author":"Kakade","year":"2003"},{"key":"10.3233\/IDT-170285_ref10","doi-asserted-by":"crossref","first-page":"193","DOI":"10.1023\/A:1017932429737","article-title":"A sparse sampling algorithm for near-optimal planning in large markov decision processes","volume":"49","author":"Kearns","year":"2002","journal-title":"Machine Learning"},{"key":"10.3233\/IDT-170285_ref11","doi-asserted-by":"crossref","first-page":"1455","DOI":"10.1002\/sim.2022","article-title":"An experimental design for the development of adaptive treatment strategies","volume":"24","author":"Murphy","year":"2005","journal-title":"Statistics in Medicine"},{"key":"10.3233\/IDT-170285_ref12","doi-asserted-by":"crossref","first-page":"141","DOI":"10.1137\/1109020","article-title":"On estimating regression","volume":"9","author":"Nadaraya","year":"1964","journal-title":"Theory of Probability and its Applications"},{"issue":"Suppl 2","key":"10.3233\/IDT-170285_ref13","doi-asserted-by":"crossref","first-page":"S52","DOI":"10.1016\/j.drugalcdep.2007.01.005","article-title":"Constructing evidence-based treatment strategies using methods from computer science","volume":"88","author":"Pineau","year":"2007","journal-title":"Drug and Alcohol Dependence"},{"key":"10.3233\/IDT-170285_ref14","doi-asserted-by":"crossref","unstructured":"Puterman M.L., Markov Decision Processes: Discrete Sto- chastic Dynamic Programming, Wiley-Interscience, Hoboken, New Jersey, 1994.","DOI":"10.1002\/9780470316887"},{"key":"10.3233\/IDT-170285_ref15","unstructured":"Rachelson E., Schnitzler F., Wehenkel L. and Ernst D., Optimal sample selection for batch-mode reinforcement learning, 3rd International Conference on Agents and Artificial Intelligence, (2011)."},{"key":"10.3233\/IDT-170285_ref16","volume-title":"Applied Probability Models with Optimization Applications","author":"Ross","year":"1992"},{"key":"10.3233\/IDT-170285_ref17","doi-asserted-by":"crossref","first-page":"57","DOI":"10.1093\/oxfordjournals.schbul.a006991","article-title":"Clinical antipsychotic trials of intervention effectiveness (CATIE): Alzheimer's disease trial","volume":"29","author":"Schneider","year":"2003","journal-title":"Schizophrenia Bulletin"},{"key":"10.3233\/IDT-170285_ref18","doi-asserted-by":"crossref","DOI":"10.1109\/TNN.1998.712192","volume-title":"Introduction to Reinforcement Learning","author":"Sutton","year":"1998"},{"key":"10.3233\/IDT-170285_ref19","volume-title":"Learning from Delayed Rewards","author":"Watkins","year":"1989"},{"key":"10.3233\/IDT-170285_ref20","first-page":"359","article-title":"Smooth regression analysis","volume":"26","author":"Watson","year":"1964","journal-title":"Sankhya: The Indian Journal of Statistics"}],"container-title":["Intelligent Decision Technologies"],"original-title":[],"link":[{"URL":"https:\/\/content.iospress.com\/download?id=10.3233\/IDT-170285","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T09:22:13Z","timestamp":1777454533000},"score":1,"resource":{"primary":{"URL":"https:\/\/journals.sagepub.com\/doi\/full\/10.3233\/IDT-170285"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,6,22]]},"references-count":20,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.3233\/idt-170285","relation":{},"ISSN":["1872-4981","1875-8843"],"issn-type":[{"value":"1872-4981","type":"print"},{"value":"1875-8843","type":"electronic"}],"subject":[],"published":{"date-parts":[[2017,6,22]]}}}