{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,16]],"date-time":"2026-05-16T04:00:26Z","timestamp":1778904026729,"version":"3.51.4"},"reference-count":65,"publisher":"Springer Science and Business Media LLC","issue":"1-2","license":[{"start":{"date-parts":[[2010,12,22]],"date-time":"2010-12-22T00:00:00Z","timestamp":1292976000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Mach Learn"],"published-print":{"date-parts":[[2011,7]]},"DOI":"10.1007\/s10994-010-5229-0","type":"journal-article","created":{"date-parts":[[2010,12,21]],"date-time":"2010-12-21T18:27:32Z","timestamp":1292956052000},"page":"109-136","source":"Crossref","is-referenced-by-count":112,"title":["Informing sequential clinical decision-making through\u00a0reinforcement learning: an empirical study"],"prefix":"10.1007","volume":"84","author":[{"given":"Susan M.","family":"Shortreed","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Eric","family":"Laber","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Daniel J.","family":"Lizotte","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"T. Scott","family":"Stroup","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Joelle","family":"Pineau","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Susan A.","family":"Murphy","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2010,12,22]]},"reference":[{"issue":"13","key":"5229_CR1","doi-asserted-by":"crossref","first-page":"144","DOI":"10.1017\/S1121189X00005649","volume":"11","author":"C. E. Adams","year":"2002","unstructured":"Adams, C. E. (2002). Schizophrenia trials: past, present and future. Epidemiologia E Psichiatria Sociale, 11(13), 144\u2013151.","journal-title":"Epidemiologia E Psichiatria Sociale"},{"issue":"2","key":"5229_CR2","doi-asserted-by":"crossref","first-page":"399","DOI":"10.1111\/1468-0262.00114","volume":"68","author":"D. W. K. Andrews","year":"2000","unstructured":"Andrews, D. W. K. (2000). Inconsistency of the bootstrap when a parameter is on the boundary of the parameter space. Econometrica, 68(2), 399\u2013405.","journal-title":"Econometrica"},{"key":"5229_CR3","unstructured":"Bagnell, A., Ng, A., & Schneider, J. (2001). Solving uncertain Markov decision problems (Tech. Rep. CMU-RI-TR-01-25). Robotics Institute, Carnegie Mellon University."},{"key":"5229_CR4","doi-asserted-by":"crossref","first-page":"27","DOI":"10.1038\/nrd1927","volume":"5","author":"D. A. Berry","year":"2006","unstructured":"Berry, D. A. (2006). A guide to drug discovery: Bayesian clinical trials. Nature Reviews. Drug Discovery, 5, 27\u201336.","journal-title":"Nature Reviews. Drug Discovery"},{"key":"5229_CR5","doi-asserted-by":"crossref","first-page":"205","DOI":"10.1177\/1740774509104992","volume":"6","author":"S. Biswas","year":"2009","unstructured":"Biswas, S., Liu, D. D., Lee, J. J., & Berry, D. A. (2009). Bayesian clinical trials at the University of Texas M.\u00a0D.\u00a0Anderson cancer center. Clinical Trials, 6, 205\u2013216.","journal-title":"Clinical Trials"},{"issue":"2","key":"5229_CR6","first-page":"123","volume":"24","author":"L. Breiman","year":"1996","unstructured":"Breiman, L. (1996). Bagging predictors. Machine Learning, 24(2), 123\u2013140.","journal-title":"Machine Learning"},{"key":"5229_CR7","first-page":"53","volume-title":"Proceedings of 24th conference on uncertainty in artificial intelligence (UAI 2008)","author":"E. Brunskill","year":"2008","unstructured":"Brunskill, E., Leffler, B. R., Li, L., Littman, M., & Roy, N. (2008). A continuous-state offset-dynamics reinforcement learner. In D. A. McAllester & P. Myllym\u00e4ki (Eds.), Proceedings of 24th conference on uncertainty in artificial intelligence (UAI 2008) (pp. 53\u201361)."},{"issue":"3","key":"5229_CR8","doi-asserted-by":"crossref","first-page":"259","DOI":"10.1177\/0962280206075303","volume":"16","author":"J. R. Carpenter","year":"2007","unstructured":"Carpenter, J. R. , Kenward, M. G., & White, I. R. (2007). Sensitivity analysis after multiple imputation under missing at random: a weighting approach. Statistical Methods in Medical Research, 16(3), 259\u2013275.","journal-title":"Statistical Methods in Medical Research"},{"key":"5229_CR9","doi-asserted-by":"crossref","first-page":"3249","DOI":"10.1002\/sim.1920","volume":"23","author":"R. Dawson","year":"2004","unstructured":"Dawson, R., & Lavori, P. W. (2004). Placebo-free designs for evaluating new mental health treatments: the use of adaptive strategies. Statistics in Medicine, 23, 3249\u20133262.","journal-title":"Statistics in Medicine"},{"key":"5229_CR10","first-page":"150","volume-title":"Proceedings of 5th conference on uncertainty in artificial intelligence (UAI 1999)","author":"R. Dearden","year":"1999","unstructured":"Dearden, R., Friedman, N., & Andre, D. (1999). Model based Bayesian exploration. In B. Kathryn, & H.\u00a0P.\u00a0Laskey (Eds.), Proceedings of 5th conference on uncertainty in artificial intelligence (UAI 1999) (pp. 150\u2013159). San Mateo: Morgan Kaufmann."},{"key":"5229_CR11","doi-asserted-by":"crossref","DOI":"10.1093\/oso\/9780198524847.001.0001","volume-title":"Analysis of longitudinal data","author":"P. Diggle","year":"2002","unstructured":"Diggle, P., Heagerty, P., Liang, K. Y., & Zeger, S. (2002). Analysis of longitudinal data. Oxford: Oxford University Press."},{"key":"5229_CR12","doi-asserted-by":"crossref","first-page":"256","DOI":"10.1145\/1390156.1390189","volume-title":"Proceedings of the 25th annual international conference on machine learning (ICML 2008)","author":"F. Doshi","year":"2008","unstructured":"Doshi, F., Pineau, J., & Roy, N. (2008). Reinforcement learning with limited reinforcement: using Bayes risk for active learning in POMDPs. In A. McCallum & S. Roweis (Eds.), Proceedings of the 25th annual international conference on machine learning (ICML 2008) (pp. 256\u2013263). New York: Omnipress."},{"issue":"1","key":"5229_CR13","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1214\/aos\/1176344552","volume":"7","author":"B. Efron","year":"1979","unstructured":"Efron, B. (1979). Bootstrap methods: another look at the jackknife. The Annals of Statistics, 7(1), 1\u201326.","journal-title":"The Annals of Statistics"},{"key":"5229_CR14","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4899-4541-9","volume-title":"An introduction to the bootstrap","author":"B. Efron","year":"1993","unstructured":"Efron, B., & Tibshirani, R. J. (1993). An introduction to the bootstrap. New York: Chapman & Hall."},{"key":"5229_CR15","doi-asserted-by":"crossref","first-page":"201","DOI":"10.1145\/1102351.1102377","volume-title":"Proceedings of the 22nd international conference on machine learning (ICML 2005)","author":"Y. Engel","year":"2005","unstructured":"Engel, Y., Mannor, S., & Meir, R. (2005). Reinforcement learning with Gaussian processes. In L. D. Raedt & S. Wrobel (Eds.), Proceedings of the 22nd international conference on machine learning (ICML 2005) (pp. 201\u2013208). New York: ACM. 10.1145\/1102351.1102377 ."},{"key":"5229_CR16","first-page":"503","volume":"6","author":"D. Ernst","year":"2005","unstructured":"Ernst, D., Geurts, P., & Wehenkel, L. (2005). Tree-based batch mode reinforcement learning. Journal of Machine Learning Research, 6, 503\u2013556.","journal-title":"Journal of Machine Learning Research"},{"key":"5229_CR17","first-page":"65","volume-title":"Proceedings of the machine learning conference of Belgium and The Netherlands (Benelearn)","author":"D. Ernst","year":"2006","unstructured":"Ernst, D., Stan, G. B., Goncalves, J., & Wehenkel, L. (2006). Clinical data based optimal STI strategies for HIV: a reinforcement learning approach. In Proceedings of the machine learning conference of Belgium and The Netherlands (Benelearn) (pp. 65\u201372)."},{"key":"5229_CR18","first-page":"1065","volume-title":"Advances in neural information processing systems","author":"M. M. Fard","year":"2009","unstructured":"Fard, M. M., Pineau, J. (2009). MDPs with non-deterministic policies. In D. Koller, D. Schuurmans, Y. Bengio, & L. Bottou (Eds.), Advances in neural information processing systems (pp. 1065\u20131072). Cambridge: MIT Press."},{"key":"5229_CR19","doi-asserted-by":"crossref","DOI":"10.1201\/9780429258411","volume-title":"Bayesian Data Analysis","author":"A. Gelman","year":"1995","unstructured":"Gelman, A., Carlin, J. B., Stern, H., & Rubin, D. B. (1995). Bayesian Data Analysis. New York: Chapman & Hall."},{"key":"5229_CR20","doi-asserted-by":"crossref","first-page":"74","DOI":"10.1111\/j.0006-341X.2005.031010.x","volume":"61","author":"A. Gelman","year":"2005","unstructured":"Gelman, A., Mechelen, I. V., Verbeke, G., Heitjan, D. F., & Meulders, M. (2005). Multiple imputation for model checking: completed-data plots with missing and latent data. Biometrics, 61, 74\u201385.","journal-title":"Biometrics"},{"key":"5229_CR21","volume-title":"Proceedings of the innovative applications of artificial intelligence (IAAI)","author":"A. Guez","year":"2008","unstructured":"Guez, A., Vincent, R., Avoli, M., & Pineau, J. (2008). Adaptive treatment of epilepsy via batch-mode reinforcement learning. In Proceedings of the innovative applications of artificial intelligence (IAAI)."},{"key":"5229_CR22","doi-asserted-by":"crossref","DOI":"10.1007\/978-0-387-21606-5","volume-title":"Elements of statistical learning","author":"T. Hastie","year":"2001","unstructured":"Hastie, T., Tibshirani, R., & Friedman, J. (2001). Elements of statistical learning. Berlin: Springer."},{"key":"5229_CR23","first-page":"2005","volume-title":"Proceeding of the twentieth national conference on artificial intelligence (AAAI)","author":"M. Irodova","year":"2005","unstructured":"Irodova, M., & Sloan, R. H. (2005). Reinforcement learning and function approximation. In Proceeding of the twentieth national conference on artificial intelligence (AAAI) (p. 2005). American Association for Artificial Intelligence, Menlo Park."},{"key":"5229_CR24","doi-asserted-by":"crossref","first-page":"237","DOI":"10.1613\/jair.301","volume":"4","author":"L. P. Kaelbling","year":"1996","unstructured":"Kaelbling, L. P., Littman, M. L., & Moore, A. (1996). Reinforcement learning: a survey. The Journal of Artificial Intelligence Research, 4, 237\u2013385.","journal-title":"The Journal of Artificial Intelligence Research"},{"key":"5229_CR25","doi-asserted-by":"crossref","first-page":"99","DOI":"10.1016\/S0004-3702(98)00023-X","volume":"101","author":"L. P. Kaelbling","year":"1998","unstructured":"Kaelbling, L. P., Littman, M. L., & Cassandra, A. R. (1998). Planning and acting in partially observable stochastic domains. Artificial Intelligence, 101, 99\u2013134.","journal-title":"Artificial Intelligence"},{"key":"5229_CR26","volume-title":"Proceedings of the 20th Annual International Conference on Machine Learning (ICML 2003)","author":"S. Kakade","year":"2003","unstructured":"Kakade, S., Kearns, J., & Langford, J. (2003). Exploration in metric state spaces. In Proceedings of the 20th Annual International Conference on Machine Learning (ICML 2003)."},{"issue":"2","key":"5229_CR27","doi-asserted-by":"crossref","first-page":"261","DOI":"10.1093\/schbul\/13.2.261","volume":"13","author":"S. R. Kay","year":"1987","unstructured":"Kay, S. R., Flazbein, A., & Opler, L. A. (1987). The positive and negative syndrome scale (PANSS) for schizophrenia. Schizophrenia Bulletin, 13(2), 261\u2013276.","journal-title":"Schizophrenia Bulletin"},{"key":"5229_CR28","unstructured":"Laber, E. B., Qian, M., & Murphy, S. A. (2010). Statistical inference in dynamic treatment regimes (Tech. Rep. 506). Dept. of Statistics, University of Michigan"},{"key":"5229_CR29","doi-asserted-by":"crossref","first-page":"1107","DOI":"10.1162\/jmlr.2003.4.6.1107","volume":"4","author":"M. G. Lagoudakis","year":"2003","unstructured":"Lagoudakis, M. G., & Parr, R. (2003). Least-squares policy iteration. Journal of Machine Learning Research, 4, 1107\u20131149.","journal-title":"Journal of Machine Learning Research"},{"key":"5229_CR30","volume-title":"Statistical analysis with missing data","author":"R. J. A. Little","year":"1987","unstructured":"Little, R. J. A., & Rubin, D. B. (1987). Statistical analysis with missing data. New York: Wiley."},{"key":"5229_CR31","unstructured":"Lizotte, D. J., Laber, E., & Murphy, S. A. (2009) Assessing confidence in policies learned from sequential randomized trials (Tech. Rep. 481). Department of Statistics, University of Michigan."},{"key":"5229_CR32","first-page":"695","volume-title":"Proceedings of the twenty-seventh international conference on machine learning (ICML 2010)","author":"D. Lizotte","year":"2010","unstructured":"Lizotte, D., Bowling, M., & Murphy, S. (2010). Efficient reinforcement learning with multiple reward functions for randomized controlled trial analysis. In Proceedings of the twenty-seventh international conference on machine learning (ICML 2010). (pp. 695\u2013702). New York: Omnipress."},{"key":"5229_CR33","doi-asserted-by":"crossref","unstructured":"Mannor, S., Simester, D., Sun, P., & Tsitsiklis, J. (2007) Biases and variance in value function estimates. Management Science 53(1).","DOI":"10.1287\/mnsc.1060.0614"},{"key":"5229_CR34","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1287\/mnsc.28.1.1","volume":"28","author":"G. Monahan","year":"1982","unstructured":"Monahan, G. (1982). A survey of partially observable Markov decision processes. Management Science, 28, 1\u201316.","journal-title":"Management Science"},{"issue":"2","key":"5229_CR35","doi-asserted-by":"crossref","first-page":"331","DOI":"10.1111\/1467-9868.00389","volume":"65","author":"S. M. Murphy","year":"2003","unstructured":"Murphy, S. M. (2003). Optimal dynamic treatment regimes. Journal of the Royal Statistical Society, Series B, 65(2), 331\u2013366.","journal-title":"Journal of the Royal Statistical Society, Series B"},{"key":"5229_CR36","doi-asserted-by":"crossref","first-page":"1455","DOI":"10.1002\/sim.2022","volume":"24","author":"S. A. Murphy","year":"2005","unstructured":"Murphy, S. A. (2005). An experimental design for the development of adaptive treatment strategies. Statistics in Medicine, 24, 1455\u20131481.","journal-title":"Statistics in Medicine"},{"issue":"2","key":"5229_CR37","doi-asserted-by":"crossref","first-page":"257","DOI":"10.1038\/sj.npp.1301241","volume":"32","author":"S. A. Murphy","year":"2007","unstructured":"Murphy, S. A., Oslin, D., & Rush, A. J. (2007). Methodological challenges in constructing effective treatment sequences for chronic disorders. Neuropsychopharmacology, 32(2), 257\u2013262.","journal-title":"Neuropsychopharmacology"},{"key":"5229_CR38","unstructured":"NAP (2010). The prevention and treatment of missing data in clinical trials. The National Academies Press, Panel on Handling Missing Data in Clinical Trials. Committee on National Statistics, Division of Behavioral, Social Sciences and Education."},{"key":"5229_CR39","volume-title":"Applied linear statistical models","author":"J. Neter","year":"1996","unstructured":"Neter, J., Kutner, M. H., Nachtsheim, C. J., & Wasserman, W. (1996). Applied linear statistical models. New York: McGraw-Hill."},{"key":"5229_CR40","doi-asserted-by":"crossref","first-page":"752","DOI":"10.1145\/1390156.1390251","volume-title":"Proceedings of the 25th annual international conference on machine learning","author":"R. Parr","year":"2008","unstructured":"Parr, R., Li, L., Taylor, G., Painter-Wakefield, C., & Littman, M. (2008). An analysis of linear models, linear value-function approximation, and feature selection for reinforcement learning. In A. McCallum, & S. Roweis (Eds.), Proceedings of the 25th annual international conference on machine learning (pp. 752\u2013759). New York: Omnipress."},{"key":"5229_CR41","doi-asserted-by":"crossref","unstructured":"Pineau, J., Bellemare, M. G., Rush, A. J., Ghizaru, A., & Murphy, S. A. (2007). Constructing evidence-based treatment strategies using methods from computer science. Drug and Alcohol Dependence S52\u2013S60.","DOI":"10.1016\/j.drugalcdep.2007.01.005"},{"key":"5229_CR42","first-page":"1","volume-title":"Statistical models in epidemiology: the environment and clinical trials","author":"J. M. Robins","year":"1999","unstructured":"Robins, J. M., Rotnitzky, A., & Scharfstein, D. (1999). Sensitivity analysis for selection bias and unmeasured confounding in missing data and causal inference models. In M. E. Halloran & D. Berry (Eds.), Statistical models in epidemiology: the environment and clinical trials (pp. 1\u201392). Berlin: Springer."},{"key":"5229_CR43","doi-asserted-by":"crossref","first-page":"473","DOI":"10.2307\/2291635","volume":"91","author":"D. B. Rubin","year":"1996","unstructured":"Rubin, D. B. (1996). Multiple imputation after 18+ years (with discussion). Journal of the American Statistical Association, 91, 473\u2013489.","journal-title":"Journal of the American Statistical Association"},{"issue":"1","key":"5229_CR44","doi-asserted-by":"crossref","first-page":"119","DOI":"10.1016\/S0197-2456(03)00112-0","volume":"25","author":"A. J. Rush","year":"2004","unstructured":"Rush, A. J., Fava, M., Wisniewski, S. R., & Lavori, P. W. (2004). Sequenced treatment alternatives to relieve depression (STAR*D): rational and design. Controlled Clinical Trials, 25(1), 119\u2013142.","journal-title":"Controlled Clinical Trials"},{"key":"5229_CR45","unstructured":"Schafer, J. L. (1997). Imputation of missing covariates under a multivariate linear mixed model (Tech. rep.). Dept. of Statistics, The Pennsylvania State University."},{"issue":"1","key":"5229_CR46","doi-asserted-by":"crossref","first-page":"3","DOI":"10.1191\/096228099671525676","volume":"8","author":"J. L. Schafer","year":"1999","unstructured":"Schafer, J. L. (1999). Multiple imputation: a primer. Statistical Methods in Medical Research, 8(1), 3\u201315.","journal-title":"Statistical Methods in Medical Research"},{"key":"5229_CR47","doi-asserted-by":"crossref","first-page":"421","DOI":"10.1198\/106186002760180608","volume":"11","author":"J. L. Schafer","year":"2002","unstructured":"Schafer, J. L., & Yucel, R. M. (2002). Computational strategies for multivariate linear mixed models with missing values. Journal of Computational and Graphical Statistics, 11, 421\u2013442.","journal-title":"Journal of Computational and Graphical Statistics"},{"issue":"448","key":"5229_CR48","doi-asserted-by":"crossref","first-page":"1096","DOI":"10.2307\/2669923","volume":"94","author":"D. O. Scharfstein","year":"1999","unstructured":"Scharfstein, D. O., Rotnitzky, A., & Robins, J. M. (1999). Adjusting for nonignorable drop-out using semiparametric nonresponse models. Journal of the American Statistical Association, 94(448), 1096\u20131120.","journal-title":"Journal of the American Statistical Association"},{"issue":"4","key":"5229_CR49","doi-asserted-by":"crossref","first-page":"1251","DOI":"10.1090\/S0002-9939-1994-1227529-8","volume":"122","author":"J. Shao","year":"1994","unstructured":"Shao, J. (1994). Bootstrap sample size in nonregular cases. Proceedings of the American Mathematical Society, 122(4), 1251\u20131262.","journal-title":"Proceedings of the American Mathematical Society"},{"key":"5229_CR50","first-page":"1082","volume-title":"Advances in neural information processing systems (NIPS 2000)","author":"C. R. Shelton","year":"2001","unstructured":"Shelton, C. R. (2001). Balancing multiple sources of reward in reinforcement learning. In Advances in neural information processing systems (NIPS 2000) (pp. 1082\u20131088)."},{"key":"5229_CR51","doi-asserted-by":"crossref","first-page":"1070","DOI":"10.1287\/opre.21.5.1071","volume":"21","author":"R. D. Smallwood","year":"1973","unstructured":"Smallwood, R. D., & Sondik, E. J. (1973). The optimal control of partially observable Markov processes over a finite horizon. Operations Research, 21, 1070\u20131088.","journal-title":"Operations Research"},{"key":"5229_CR52","first-page":"128","volume-title":"ICTAI","author":"A. L. Strehl","year":"2004","unstructured":"Strehl, A. L., & Littman, M. L. (2004). An empirical evaluation of interval Estimation for Markov decision processes. In ICTAI (pp. 128\u2013135). Los Alamitos: IEEE Computer Society."},{"key":"5229_CR53","doi-asserted-by":"crossref","first-page":"856","DOI":"10.1145\/1102351.1102459","volume-title":"Proceedings of the 22nd international conference on Machine learning (ICML 2005 )","author":"A. L. Strehl","year":"2005","unstructured":"Strehl, A. L., & Littman, M. L. (2005). A theoretical analysis of model-based interval Estimation. In L. D. Raedt & S. Wrobel (Eds.), Proceedings of the 22nd international conference on Machine learning (ICML 2005 ) (pp. 856\u2013863). New York: ACM. 10.1145\/1102351.1102459 ."},{"key":"5229_CR54","doi-asserted-by":"crossref","first-page":"881","DOI":"10.1145\/1143844.1143955","volume-title":"Proceedings of the 23rd annual international conference on machine learning (ICML 2006)","author":"A. Strehl","year":"2006","unstructured":"Strehl, A., Li, L., Wiewiora, E., Langford, J., & Littman, M. (2006). PAC model-free reinforcement learning. In W. W. Cohen & A. Moore (Eds.), Proceedings of the 23rd annual international conference on machine learning (ICML 2006) (pp. 881\u2013888)."},{"key":"5229_CR55","first-page":"943","volume-title":"Proceedings of the seventeenth international conference on machine learning (ICML 2000)","author":"M. J. A. Strens","year":"2000","unstructured":"Strens, M. J. A. (2000). A Bayesian framework for reinforcement learning. In P. Langley (Ed.), Proceedings of the seventeenth international conference on machine learning (ICML 2000) (p. 943\u2013950). San Francisco: Morgan Kaufmann."},{"issue":"1","key":"5229_CR56","doi-asserted-by":"crossref","first-page":"15","DOI":"10.1093\/oxfordjournals.schbul.a006986","volume":"29","author":"T. S. Stroup","year":"2003","unstructured":"Stroup, T. S., McEvoy, J. P., Swartz, M. S., Byerly, M. J., Glick, I. D., Canive, J. M., McGee, M., Simpson, G. M., Stevens, M. D., & Lieberman, J. A. (2003). The National Institute of Mental Health clinical antipschotic trials of intervention effectiveness (CATIE) project: schizophrenia trial design and protocol development. Schizophrenia Bulletin, 29(1), 15\u201331.","journal-title":"Schizophrenia Bulletin"},{"key":"5229_CR57","volume-title":"Reinforcement learning: an introduction","author":"R. S. Sutton","year":"1998","unstructured":"Sutton, R. S., & Barto, A. G. (1998). Off-policy bootstrapping. In Reinforcement learning: an introduction Cambridge: MIT Press."},{"issue":"1","key":"5229_CR58","doi-asserted-by":"crossref","first-page":"33","DOI":"10.1093\/oxfordjournals.schbul.a006989","volume":"29","author":"M. S. Swartz","year":"2003","unstructured":"Swartz, M. S., Perkins, D. O., Stroup, T. S., McEvoy, J. P., Nieri, J. M., & Haal, D. D. (2003). Assessing clinical and functional outcomes in the clinical antipsychotic of intervention effectiveness (CATIE) schizophrenia trial. Schizophrenia Bulletin, 29(1), 33\u201343.","journal-title":"Schizophrenia Bulletin"},{"key":"5229_CR59","first-page":"276","volume-title":"Proceedings of the human language technology conference","author":"J. Tetreault","year":"2007","unstructured":"Tetreault, J., Bohus, D., & Litman, D. (2007). Estimating the reliability of MDP policies: a confidence interval approach. In Proceedings of the human language technology conference (pp. 276\u2013283)."},{"issue":"5","key":"5229_CR60","doi-asserted-by":"crossref","first-page":"859","DOI":"10.1016\/j.ejca.2007.01.006","volume":"43","author":"P. Thall","year":"2007","unstructured":"Thall, P., & Wathen, J. (2007). Practical Bayesian adaptive randomisation in clinical trials. European Journal of Cancer, 43(5), 859\u2013866.","journal-title":"European Journal of Cancer"},{"key":"5229_CR61","doi-asserted-by":"crossref","first-page":"1011","DOI":"10.1002\/(SICI)1097-0258(20000430)19:8<1011::AID-SIM414>3.0.CO;2-M","volume":"19","author":"P. F. Thall","year":"2000","unstructured":"Thall, P. F., & Wathan, J. K. (2000). Covariate-adjusted adaptive randomization in a sarcoma trial with multistate treatments. Statistics in Medicine, 19, 1011\u20131028.","journal-title":"Statistics in Medicine"},{"issue":"3","key":"5229_CR62","doi-asserted-by":"crossref","first-page":"219","DOI":"10.1177\/0962280206074463","volume":"16","author":"S. Buuren van","year":"2007","unstructured":"van Buuren, S. (2007). Multiple imputation of discrete and continuous data by fully conditional specification. Statistical Methods in Medical Research, 16(3), 219\u2013242.","journal-title":"Statistical Methods in Medical Research"},{"issue":"12","key":"5229_CR63","doi-asserted-by":"crossref","first-page":"1049","DOI":"10.1080\/10629360600810434","volume":"76","author":"S. Buuren van","year":"2006","unstructured":"van Buuren, S., Brand, J. P. L., Groothuis-Oudshoorn, C. G. M., & Rubin, D. B. (2006). Fully conditional specification in multivariate imputation. Journal of Statistical Computation and Simulation, 76(12), 1049\u20131064.","journal-title":"Journal of Statistical Computation and Simulation"},{"key":"5229_CR64","doi-asserted-by":"crossref","first-page":"956","DOI":"10.1145\/1102351.1102472","volume-title":"Proceedings of the 22nd international conference on machine learning (ICML 2005)","author":"T. Wang","year":"2005","unstructured":"Wang, T., Lizotte, D., Bowling, M., & Schuurmans, D. (2005). Bayesian sparse sampling for on-line reward optimization. In L. D. Raedt & S. Wrobel (Eds.), Proceedings of the 22nd international conference on machine learning (ICML 2005) (pp. 956\u2013963). New York: ACM. 10.1145\/1102351.1102472 ."},{"key":"5229_CR65","doi-asserted-by":"crossref","first-page":"3294","DOI":"10.1002\/sim.3720","volume":"28","author":"Y. Zhao","year":"2009","unstructured":"Zhao, Y., Kosorok, M. R., & Zeng, D. (2009). Reinforcement learning design for cancer clinical trials. Statistics in Medicine, 28, 3294\u20133315.","journal-title":"Statistics in Medicine"}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-010-5229-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10994-010-5229-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-010-5229-0","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,4,2]],"date-time":"2024-04-02T05:47:24Z","timestamp":1712036844000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10994-010-5229-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2010,12,22]]},"references-count":65,"journal-issue":{"issue":"1-2","published-print":{"date-parts":[[2011,7]]}},"alternative-id":["5229"],"URL":"https:\/\/doi.org\/10.1007\/s10994-010-5229-0","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"value":"0885-6125","type":"print"},{"value":"1573-0565","type":"electronic"}],"subject":[],"published":{"date-parts":[[2010,12,22]]}}}