{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,5]],"date-time":"2026-05-05T22:10:33Z","timestamp":1778019033948,"version":"3.51.4"},"reference-count":85,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2024,11,26]],"date-time":"2024-11-26T00:00:00Z","timestamp":1732579200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2024,11,26]],"date-time":"2024-11-26T00:00:00Z","timestamp":1732579200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"DOI":"10.13039\/100006108","name":"U.S. Department of Health & Human Services | NIH | National Center for Advancing Translational Sciences","doi-asserted-by":"publisher","award":["UL1TR004419"],"award-info":[{"award-number":["UL1TR004419"]}],"id":[{"id":"10.13039\/100006108","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100006955","name":"U.S. Department of Health & Human Services | NIH | Office of Extramural Research, National Institutes of Health","doi-asserted-by":"publisher","award":["K08DK131286"],"award-info":[{"award-number":["K08DK131286"]}],"id":[{"id":"10.13039\/100006955","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000062","name":"U.S. Department of Health & Human Services | NIH | National Institute of Diabetes and Digestive and Kidney Diseases","doi-asserted-by":"publisher","award":["K08DK131286"],"award-info":[{"award-number":["K08DK131286"]}],"id":[{"id":"10.13039\/100000062","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["npj Digit. Med."],"DOI":"10.1038\/s41746-024-01316-0","type":"journal-article","created":{"date-parts":[[2024,11,26]],"date-time":"2024-11-26T22:52:27Z","timestamp":1732661547000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":75,"title":["A Primer on Reinforcement Learning in Medicine for Clinicians"],"prefix":"10.1038","volume":"7","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1159-8354","authenticated-orcid":false,"given":"Pushkala","family":"Jayaraman","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5411-6637","authenticated-orcid":false,"given":"Jacob","family":"Desman","sequence":"additional","affiliation":[]},{"given":"Moein","family":"Sabounchi","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6319-4314","authenticated-orcid":false,"given":"Girish N.","family":"Nadkarni","sequence":"additional","affiliation":[]},{"given":"Ankit","family":"Sakhuja","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,26]]},"reference":[{"key":"1316_CR1","unstructured":"Sutton, R. S. & Barto, A. G. Reinforcement learning: An introduction, 2nd ed, (The MIT Press, Cambridge, MA, US, 2018)."},{"key":"1316_CR2","first-page":"679","volume":"6","author":"R Bellman","year":"1957","unstructured":"Bellman, R. A Markovian decision process. J. Math. Mech. 6, 679\u2013684 (1957).","journal-title":"J. Math. Mech."},{"key":"1316_CR3","unstructured":"Szepesv\u00e1ri, C. Algorithms for Reinforcement Learning, (Springer International Publishing, 2022)."},{"key":"1316_CR4","unstructured":"Thomas, M. M., Joost, B., Aske, P. & Catholijn, M. J. Model-based Reinforcement Learning: A Survey, (now, 2023)."},{"key":"1316_CR5","doi-asserted-by":"publisher","first-page":"1140","DOI":"10.1126\/science.aar6404","volume":"362","author":"D Silver","year":"2018","unstructured":"Silver, D. et al. A general reinforcement learning algorithm that masters chess, shogi, and Go through self-play. Science 362, 1140\u20131144 (2018).","journal-title":"Science"},{"key":"1316_CR6","doi-asserted-by":"crossref","unstructured":"Huang, Q. Model-Based or Model-Free, a Review of Approaches in Reinforcement Learning. In 2020 International Conference on Computing and Data Science (CDS) 219-221 (2020).","DOI":"10.1109\/CDS49703.2020.00051"},{"key":"1316_CR7","doi-asserted-by":"publisher","first-page":"134704","DOI":"10.1109\/ACCESS.2022.3228647","volume":"10","author":"MC McKenzie","year":"2022","unstructured":"McKenzie, M. C. & McDonnell, M. D. Modern value based reinforcement learning: a chronological review. IEEE Access 10, 134704\u2013134725 (2022).","journal-title":"IEEE Access"},{"key":"1316_CR8","doi-asserted-by":"crossref","unstructured":"Poole, D. L. & Mackworth, A. K. Artificial Intelligence: Foundations of Computational Agents, (Cambridge University Press, Cambridge, 2017).","DOI":"10.1017\/9781108164085"},{"key":"1316_CR9","unstructured":"Rummery, G. A. & Niranjan, M. On-line Q-learning using connectionist systems. (1994)."},{"key":"1316_CR10","doi-asserted-by":"publisher","first-page":"123","DOI":"10.1007\/BF00114726","volume":"22","author":"SP Singh","year":"1996","unstructured":"Singh, S. P. & Sutton, R. S. Reinforcement learning with replacing eligibility traces. Mach. Learn. 22, 123\u2013158 (1996).","journal-title":"Mach. Learn."},{"key":"1316_CR11","unstructured":"Prudencio, R. F., Maximo, M. & Colombini, E. L. A Survey on Offline Reinforcement Learning: Taxonomy, Review, and Open Problems. IEEE Trans Neural Netw Learn Syst PP(2023)."},{"key":"1316_CR12","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1007\/BF00992698","volume":"8","author":"CJCH Watkins","year":"1992","unstructured":"Watkins, C. J. C. H. & Dayan, P. Q-learning. Mach. Learn. 8, 279\u2013292 (1992).","journal-title":"Mach. Learn."},{"key":"1316_CR13","unstructured":"Uehara, M., Shi, C. & Kallus, N. A Review of Off-Policy Evaluation in Reinforcement Learning. (2022)."},{"key":"1316_CR14","doi-asserted-by":"publisher","first-page":"229","DOI":"10.1007\/BF00992696","volume":"8","author":"RJ Williams","year":"1992","unstructured":"Williams, R. J. Simple statistical gradient-following algorithms for connectionist reinforcement learning. Mach. Learn. 8, 229\u2013256 (1992).","journal-title":"Mach. Learn."},{"key":"1316_CR15","unstructured":"Konda, V. & Tsitsiklis, J. Actor-critic algorithms. Advances in neural information processing systems 12 (1999)."},{"key":"1316_CR16","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V. et al. Human-level control through deep reinforcement learning. Nature 518, 529\u2013533 (2015).","journal-title":"Nature"},{"key":"1316_CR17","unstructured":"Lai, M. Giraffe: Using Deep Reinforcement Learning to Play Chess. (2015)."},{"key":"1316_CR18","doi-asserted-by":"publisher","first-page":"4909","DOI":"10.1109\/TITS.2021.3054625","volume":"23","author":"BR Kiran","year":"2022","unstructured":"Kiran, B. R. et al. Deep reinforcement learning for autonomous driving: a survey. IEEE Trans. Intell. Transportation Syst. 23, 4909\u20134926 (2022).","journal-title":"IEEE Trans. Intell. Transportation Syst."},{"key":"1316_CR19","doi-asserted-by":"publisher","first-page":"1238","DOI":"10.1177\/0278364913495721","volume":"32","author":"J Kober","year":"2013","unstructured":"Kober, J., Bagnell, J. A. & Peters, J. Reinforcement learning in robotics: a survey. Int. J. Robot. Res. 32, 1238\u20131274 (2013).","journal-title":"Int. J. Robot. Res."},{"key":"1316_CR20","doi-asserted-by":"publisher","first-page":"1122","DOI":"10.1109\/JAS.2023.123618","volume":"10","author":"T Wu","year":"2023","unstructured":"Wu, T. et al. A brief overview of ChatGPT: The history, status quo and potential future development. IEEE\/CAA J. Autom. Sin. 10, 1122\u20131136 (2023).","journal-title":"IEEE\/CAA J. Autom. Sin."},{"key":"1316_CR21","unstructured":"Li, L., Chu, W., Langford, J. & Schapire, R. E. A contextual-bandit approach to personalized news article recommendation. (ACM)."},{"key":"1316_CR22","unstructured":"Gauci, J. et al. Horizon: Facebook\u2019s open source applied reinforcement learning platform. https:\/\/openreview.net\/forum?id=SylQKinLi4 (2019)."},{"key":"1316_CR23","doi-asserted-by":"publisher","first-page":"362","DOI":"10.17775\/CSEEJPES.2018.00520","volume":"4","author":"D Zhang","year":"2018","unstructured":"Zhang, D., Han, X. & Deng, C. Review on the research and practice of deep learning and reinforcement learning in smart grids. CSEE J. Power Energy Syst. 4, 362\u2013370 (2018).","journal-title":"CSEE J. Power Energy Syst."},{"key":"1316_CR24","unstructured":"Trella, A. L., et al. Monitoring Fidelity of Online Reinforcement Learning Algorithms in Clinical Trials. arXiv preprint arXiv:2402.17003 (2024)."},{"key":"1316_CR25","unstructured":"Prudencio, R. F., Maximo, M. R. O. A. & Colombini, E. L. A Survey on Offline Reinforcement Learning: Taxonomy, Review, and Open Problems. IEEE Transactions on Neural Networks and Learning Systems, 1-0 (2024)."},{"key":"1316_CR26","doi-asserted-by":"crossref","unstructured":"Jakobi, N, Husbands, P & Harvey, I. Noise and the reality gap: the use of simulation in evolutionary robotics. Springer, Berlin, p 704\u2013720 (1995).","DOI":"10.1007\/3-540-59496-5_337"},{"key":"1316_CR27","unstructured":"Levine, S., Kumar, A., Tucker, G. & Fu, J. Offline reinforcement learning: Tutorial, review, and perspectives on open problems. abs\/2005.01643. https:\/\/arxiv.org\/abs\/2005.01643 (2020)."},{"key":"1316_CR28","doi-asserted-by":"crossref","unstructured":"Kuhn, D., Esfahani, P. M., Nguyen, V. A. & Shafieezadeh-Abadeh, S. Wasserstein distributionally robust optimization: Theory and applications in machine learning. In Operations research & management science in the age of analytics 130-166 (Informs, 2019).","DOI":"10.1287\/educ.2019.0198"},{"key":"1316_CR29","unstructured":"Koh, P. W. et al. WILDS: A Benchmark of in-the-Wild Distribution Shifts. In Proceedings of the 38th International Conference on Machine Learning, 139 (eds. Marina, M. & Tong, Z.) 5637-5664 (PMLR, Proceedings of Machine Learning Research, 2021)."},{"key":"1316_CR30","unstructured":"Fu, J., Kumar, A., Nachum, O., Tucker, G. & Levine, S. Datasets for deep data-driven reinforcement learning. https:\/\/openreview.net\/forum?id=px0-N3_KjA (2021)."},{"key":"1316_CR31","unstructured":"Bain, M. & Sammut, C. A Framework for Behavioural Cloning. in Machine Intelligence 15 (1995)."},{"key":"1316_CR32","first-page":"1179","volume":"33","author":"A Kumar","year":"2020","unstructured":"Kumar, A., Zhou, A., Tucker, G. & Levine, S. Conservative q-learning for offline reinforcement learning. Adv. Neural Inf. Process. Syst. 33, 1179\u20131191 (2020).","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"1316_CR33","unstructured":"Fujimoto, S., Meger, D. & Precup, D. Off-policy deep reinforcement learning without exploration. In International conference on machine learning 2052-2062 (PMLR, 2019)."},{"key":"1316_CR34","unstructured":"Kostrikov, I., Nair, A. & Levine, S. Offline reinforcement learning with implicit q-learning. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=68n2s9ZJWF8 (2022)."},{"key":"1316_CR35","unstructured":"Wu, Y., Tucker, G. & Nachum, O. Behavior regularized offline reinforcement learning. https:\/\/openreview.net\/forum?id=BJg9hTNKPH, https:\/\/openreview.net\/forum?id=BJg9hTNKPH (2020)."},{"key":"1316_CR36","unstructured":"Kumar, A., Fu, J., Soh, M., Tucker, G. & Levine, S. Stabilizing off-policy q-learning via bootstrapping error reduction. Advances in neural information processing systems 32 (2019)."},{"key":"1316_CR37","doi-asserted-by":"crossref","unstructured":"Vieillard, N., Pietquin, O. & Geist, M. Deep conservative policy iteration. in Proceedings of the AAAI Conference on Artificial Intelligence, 34 6070-6077 (2020).","DOI":"10.1609\/aaai.v34i04.6070"},{"key":"1316_CR38","unstructured":"Wang, Z., et al. Dueling network architectures for deep reinforcement learning. in International conference on machine learning 1995-2003 (PMLR, 2016)."},{"key":"1316_CR39","unstructured":"Xu, H., Zhan, X., Li, J. & Yin, H. Offline reinforcement learning with soft behavior regularization. abs\/2110.07395. https:\/\/arxiv.org\/abs\/2110.07395 (2021)."},{"key":"1316_CR40","unstructured":"Cheng, C.-A., Xie, T., Jiang, N. & Agarwal, A. Adversarially trained actor critic for offline reinforcement learning. In International Conference on Machine Learning 3852-3878 (PMLR, 2022)."},{"key":"1316_CR41","unstructured":"Le, H., Voloshin, C. & Yue, Y. Batch policy learning under constraints. in International Conference on Machine Learning 3703-3712 (PMLR, 2019)."},{"key":"1316_CR42","unstructured":"Jiang, N. & Li, L. Doubly robust off-policy value evaluation for reinforcement learning. in International conference on machine learning 652-661 (PMLR, 2016)."},{"key":"1316_CR43","unstructured":"Nachum, O., Chow, Y., Dai, B. & Li, L. Dualdice: Behavior-agnostic estimation of discounted stationary distribution corrections. Advances in neural information processing systems 32 (2019)."},{"key":"1316_CR44","unstructured":"Zhang, R., Dai, B., Li, L. & Schuurmans, D. Gendice: Generalized offline estimation of stationary values. In international Conference on Learning Representations. https:\/\/openreview.net\/forum?id=HkxlcnVFwB (2020)."},{"key":"1316_CR45","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3477600","volume":"55","author":"C Yu","year":"2021","unstructured":"Yu, C., Liu, J., Nemati, S. & Yin, G. Reinforcement learning in healthcare: a survey. ACM Comput. Surv. (CSUR) 55, 1\u201336 (2021).","journal-title":"ACM Comput. Surv. (CSUR)"},{"key":"1316_CR46","doi-asserted-by":"crossref","unstructured":"Borera, E. C., Moore, B. L., Doufas, A. G. & Pyeatt, L. D. An Adaptive Neural Network Filter for Improved Patient State Estimation in Closed-Loop Anesthesia Control. in 2011 IEEE 23rd International Conference on Tools with Artificial Intelligence 41-46 (2011).","DOI":"10.1109\/ICTAI.2011.15"},{"key":"1316_CR47","doi-asserted-by":"publisher","first-page":"3294","DOI":"10.1002\/sim.3720","volume":"28","author":"Y Zhao","year":"2009","unstructured":"Zhao, Y., Kosorok, M. R. & Zeng, D. Reinforcement learning design for cancer clinical trials. Stat. Med 28, 3294\u20133315 (2009).","journal-title":"Stat. Med"},{"key":"1316_CR48","doi-asserted-by":"publisher","first-page":"121","DOI":"10.1016\/j.biosystems.2011.07.005","volume":"106","author":"I Ahn","year":"2011","unstructured":"Ahn, I. & Park, J. Drug scheduling of cancer chemotherapy based on natural actor-critic approach. Biosystems 106, 121\u2013129 (2011).","journal-title":"Biosystems"},{"key":"1316_CR49","doi-asserted-by":"publisher","first-page":"105443","DOI":"10.1016\/j.cmpb.2020.105443","volume":"193","author":"A Ebrahimi Zade","year":"2020","unstructured":"Ebrahimi Zade, A., Shahabi Haghighi, S. & Soltani, M. Reinforcement learning for optimal scheduling of glioblastoma treatment with temozolomide. Computer Methods Prog. Biomedicine 193, 105443 (2020).","journal-title":"Computer Methods Prog. Biomedicine"},{"key":"1316_CR50","doi-asserted-by":"publisher","first-page":"107280","DOI":"10.1016\/j.cmpb.2022.107280","volume":"229","author":"CY Yang","year":"2023","unstructured":"Yang, C. Y., Shiranthika, C., Wang, C. Y., Chen, K. W. & Sumathipala, S. Reinforcement learning strategies in cancer chemotherapy treatments: A review. Comput Methods Prog. Biomed. 229, 107280 (2023).","journal-title":"Comput Methods Prog. Biomed."},{"key":"1316_CR51","doi-asserted-by":"publisher","first-page":"428","DOI":"10.1089\/dia.2013.0377","volume":"16","author":"R Visentin","year":"2014","unstructured":"Visentin, R., Dalla Man, C., Kovatchev, B. & Cobelli, C. The university of Virginia\/Padova type 1 diabetes simulator matches the glucose traces of a clinical trial. Diabetes Technol. Ther. 16, 428\u2013434 (2014).","journal-title":"Diabetes Technol. Ther."},{"key":"1316_CR52","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-021-88619-4","volume":"11","author":"Y Kim","year":"2021","unstructured":"Kim, Y., Suescun, J., Schiess, M. C. & Jiang, X. Computational medication regimen for Parkinson\u2019s disease using reinforcement learning. Sci. Rep. 11, 9313 (2021).","journal-title":"Sci. Rep."},{"key":"1316_CR53","doi-asserted-by":"publisher","first-page":"136","DOI":"10.1038\/s41591-021-01599-w","volume":"28","author":"A Yala","year":"2022","unstructured":"Yala, A. et al. Optimizing risk-based breast cancer screening policies with reinforcement learning. Nat. Med. 28, 136\u2013143 (2022).","journal-title":"Nat. Med"},{"key":"1316_CR54","doi-asserted-by":"publisher","first-page":"1941","DOI":"10.1038\/s41591-023-02475-5","volume":"29","author":"C Barata","year":"2023","unstructured":"Barata, C. et al. A reinforcement learning model for AI-based decision support in skin cancer. Nat. Med. 29, 1941\u20131946 (2023).","journal-title":"Nat. Med"},{"key":"1316_CR55","doi-asserted-by":"publisher","DOI":"10.1038\/sdata.2016.35","volume":"3","author":"AE Johnson","year":"2016","unstructured":"Johnson, A. E. et al. MIMIC-III, a freely accessible critical care database. Sci. Data 3, 160035 (2016).","journal-title":"Sci. Data"},{"key":"1316_CR56","doi-asserted-by":"publisher","DOI":"10.1038\/s41597-022-01899-x","volume":"10","author":"AEW Johnson","year":"2023","unstructured":"Johnson, A. E. W. et al. MIMIC-IV, a freely accessible electronic health record dataset. Sci. Data 10, 1 (2023).","journal-title":"Sci. Data"},{"key":"1316_CR57","doi-asserted-by":"publisher","DOI":"10.1038\/sdata.2018.178","volume":"5","author":"TJ Pollard","year":"2018","unstructured":"Pollard, T. J. et al. The eICU Collaborative Research Database, a freely available multi-center database for critical care research. Sci. Data 5, 180178 (2018).","journal-title":"Sci. Data"},{"key":"1316_CR58","first-page":"2978","volume":"2016","author":"S Nemati","year":"2016","unstructured":"Nemati, S., Ghassemi, M. M. & Clifford, G. D. Optimal medication dosing from suboptimal clinical examples: a deep reinforcement learning approach. Annu Int Conf. IEEE Eng. Med Biol. Soc. 2016, 2978\u20132981 (2016).","journal-title":"Annu Int Conf. IEEE Eng. Med Biol. Soc."},{"key":"1316_CR59","first-page":"4927","volume":"2018","author":"R Lin","year":"2018","unstructured":"Lin, R., Stanley, M. D., Ghassemi, M. M. & Nemati, S. A deep deterministic policy gradient approach to medication dosing and surveillance in the ICU. Annu Int Conf. IEEE Eng. Med Biol. Soc. 2018, 4927\u20134931 (2018).","journal-title":"Annu Int Conf. IEEE Eng. Med Biol. Soc."},{"key":"1316_CR60","unstructured":"Raghu, A. et al. Deep reinforcement learning for sepsis treatment. abs\/1711.09602. http:\/\/arxiv.org\/abs\/1711.09602 (2017)."},{"key":"1316_CR61","doi-asserted-by":"publisher","first-page":"11034","DOI":"10.1007\/s10489-022-04099-7","volume":"53","author":"D Liang","year":"2023","unstructured":"Liang, D., Deng, H. & Liu, Y. The treatment of sepsis: an episodic memory-assisted deep reinforcement learning approach. Appl. Intell. 53, 11034\u201311044 (2023).","journal-title":"Appl. Intell."},{"key":"1316_CR62","doi-asserted-by":"publisher","first-page":"1716","DOI":"10.1038\/s41591-018-0213-5","volume":"24","author":"M Komorowski","year":"2018","unstructured":"Komorowski, M., Celi, L. A., Badawi, O., Gordon, A. C. & Faisal, A. A. The artificial intelligence clinician learns optimal treatment strategies for sepsis in intensive care. Nat. Med. 24, 1716\u20131720 (2018).","journal-title":"Nat. Med"},{"key":"1316_CR63","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1038\/s41746-023-00755-5","volume":"6","author":"X Wu","year":"2023","unstructured":"Wu, X., Li, R., He, Z., Yu, T. & Cheng, C. A value-based deep reinforcement learning model with human expertise in optimal treatment of sepsis. NPJ Digit Med. 6, 15 (2023).","journal-title":"NPJ Digit Med"},{"key":"1316_CR64","first-page":"15696","volume":"37","author":"F Kondrup","year":"2024","unstructured":"Kondrup, F. et al. Towards safe mechanical ventilation treatment using deep offline reinforcement learning. Proc. AAAI Conf. Artif. Intell. 37, 15696\u201315702 (2024).","journal-title":"Proc. AAAI Conf. Artif. Intell."},{"key":"1316_CR65","unstructured":"Prasad, N., Cheng, L.F., Chivers, C., Draugelis, M. & Engelhardt, B. E. A reinforcement learning approach to weaning of mechanical ventilation in intensive care units. abs\/1704.06300. http:\/\/arxiv.org\/abs\/1704.06300 (2017)."},{"key":"1316_CR66","doi-asserted-by":"publisher","first-page":"32","DOI":"10.1038\/s41746-021-00388-6","volume":"4","author":"A Peine","year":"2021","unstructured":"Peine, A. et al. Development and validation of a reinforcement learning algorithm to dynamically optimize mechanical ventilation in critical care. NPJ Digit Med. 4, 32 (2021).","journal-title":"NPJ Digit Med"},{"key":"1316_CR67","unstructured":"Hasselt, H. Double Q-learning. Advances in neural information processing systems 23 (2010)."},{"key":"1316_CR68","doi-asserted-by":"publisher","DOI":"10.1016\/j.artmed.2023.102742","volume":"147","author":"F den Hengst","year":"2024","unstructured":"den Hengst, F. et al. Guideline-informed reinforcement learning for mechanical ventilation in critical care. Artif. Intell. Med. 147, 102742 (2024).","journal-title":"Artif. Intell. Med."},{"key":"1316_CR69","doi-asserted-by":"crossref","unstructured":"Saghafian, S. Ambiguous Dynamic Treatment Regimes: A Reinforcement Learning Approach. Management Science (2023).","DOI":"10.1287\/mnsc.2022.00883"},{"key":"1316_CR70","unstructured":"Luo, Z., Pan, Y., Watkinson, P. & Zhu, T. Position: Reinforcement Learning in Dynamic Treatment Regimes Needs Critical Reexamination. In Forty-first International Conference on Machine Learning. https:\/\/openreview.net\/forum?id=xtKWwB6lzT (2024)."},{"key":"1316_CR71","unstructured":"Luo, Z. et al. DTR-Bench: An in silico Environment and Benchmark Platform for Reinforcement Learning Based Dynamic Treatment Regime. arXiv preprint arXiv:2405.18610 (2024)."},{"key":"1316_CR72","doi-asserted-by":"publisher","first-page":"24","DOI":"10.1038\/s41591-018-0316-z","volume":"25","author":"A Esteva","year":"2019","unstructured":"Esteva, A. et al. A guide to deep learning in healthcare. Nat. Med 25, 24\u201329 (2019).","journal-title":"Nat. Med"},{"key":"1316_CR73","doi-asserted-by":"publisher","first-page":"231","DOI":"10.1136\/bmjqs-2018-008370","volume":"28","author":"R Challen","year":"2019","unstructured":"Challen, R. et al. Artificial intelligence, bias and clinical safety. BMJ Qual. Saf. 28, 231\u2013237 (2019).","journal-title":"BMJ Qual. Saf."},{"key":"1316_CR74","doi-asserted-by":"publisher","first-page":"e18477","DOI":"10.2196\/18477","volume":"22","author":"S Liu","year":"2020","unstructured":"Liu, S. et al. Reinforcement learning for clinical decision support in critical care: comprehensive review. J. Med Internet Res 22, e18477 (2020).","journal-title":"J. Med Internet Res"},{"key":"1316_CR75","doi-asserted-by":"publisher","DOI":"10.1186\/s12911-019-0763-6","volume":"19","author":"C Yu","year":"2019","unstructured":"Yu, C., Liu, J. & Zhao, H. Inverse reinforcement learning for intelligent mechanical ventilation and sedative dosing in intensive care units. BMC Med Inf. Decis. Mak. 19, 57 (2019).","journal-title":"BMC Med Inf. Decis. Mak."},{"key":"1316_CR76","unstructured":"Nachum, O. et al. Algaedice: Policy gradient from arbitrary experience. abs\/1912.02074. http:\/\/arxiv.org\/abs\/1912.0207 (2019)."},{"key":"1316_CR77","first-page":"6551","volume":"33","author":"M Yang","year":"2020","unstructured":"Yang, M., Nachum, O., Dai, B., Li, L. & Schuurmans, D. Off-policy evaluation via the regularized lagrangian. Adv. Neural Inf. Process. Syst. 33, 6551\u20136561 (2020).","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"1316_CR78","unstructured":"Voloshin, C., Le, H. M., Jiang, N. & Yue, Y. Empirical study of off-policy policy evaluation for reinforcement learning. abs\/1911.06854. http:\/\/arxiv.org\/abs\/1911.06854 (2019)."},{"key":"1316_CR79","doi-asserted-by":"publisher","first-page":"39","DOI":"10.1038\/s41746-024-01028-5","volume":"7","author":"JC Lauffenburger","year":"2024","unstructured":"Lauffenburger, J. C. et al. The impact of using reinforcement learning to personalize communication on medication adherence: findings from the REINFORCE trial. NPJ Digit Med. 7, 39 (2024).","journal-title":"NPJ Digit Med"},{"key":"1316_CR80","doi-asserted-by":"publisher","first-page":"2633","DOI":"10.1038\/s41591-023-02552-9","volume":"29","author":"G Wang","year":"2023","unstructured":"Wang, G. et al. Optimized glycemic control of type 2 diabetes with reinforcement learning: a proof-of-concept trial. Nat. Med. 29, 2633\u20132642 (2023).","journal-title":"Nat. Med"},{"key":"1316_CR81","first-page":"e39305","volume":"15","author":"M Karabacak","year":"2023","unstructured":"Karabacak, M. & Margetis, K. Embracing large language models for medical applications: opportunities and challenges. Cureus 15, e39305 (2023).","journal-title":"Cureus"},{"key":"1316_CR82","doi-asserted-by":"publisher","first-page":"141","DOI":"10.1038\/s43856-023-00370-1","volume":"3","author":"J Clusmann","year":"2023","unstructured":"Clusmann, J. et al. The future landscape of large language models in medicine. Commun. Med (Lond.) 3, 141 (2023).","journal-title":"Commun. Med (Lond.)"},{"key":"1316_CR83","doi-asserted-by":"publisher","first-page":"19117","DOI":"10.1109\/JIOT.2023.3281347","volume":"10","author":"W Gong","year":"2023","unstructured":"Gong, W. et al. Federated inverse reinforcement learning for smart icus with differential privacy. IEEE Internet Things J. 10, 19117\u201319124 (2023).","journal-title":"IEEE Internet Things J."},{"key":"1316_CR84","doi-asserted-by":"publisher","DOI":"10.1186\/s40635-024-00614-x","volume":"12","author":"LF Roggeveen","year":"2024","unstructured":"Roggeveen, L. F. et al. Reinforcement learning for intensive care medicine: actionable clinical insights from novel approaches to reward shaping and off-policy model evaluation. Intensive Care Med Exp. 12, 32 (2024).","journal-title":"Intensive Care Med Exp."},{"key":"1316_CR85","doi-asserted-by":"crossref","unstructured":"Van Hasselt, H., Guez, A. & Silver, D. Deep reinforcement learning with double q-learning. In Proceedings of the AAAI conference on artificial intelligence, 30 (2016).","DOI":"10.1609\/aaai.v30i1.10295"}],"container-title":["npj Digital Medicine"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.nature.com\/articles\/s41746-024-01316-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s41746-024-01316-0","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s41746-024-01316-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,27]],"date-time":"2024-11-27T11:08:49Z","timestamp":1732705729000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.nature.com\/articles\/s41746-024-01316-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,26]]},"references-count":85,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2024,12]]}},"alternative-id":["1316"],"URL":"https:\/\/doi.org\/10.1038\/s41746-024-01316-0","relation":{},"ISSN":["2398-6352"],"issn-type":[{"value":"2398-6352","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,26]]},"assertion":[{"value":"8 June 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 October 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 November 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"GNN reports grants, personal fees, and non-financial support from Renalytix. GNN reports non-financial support from Pensieve Health, personal fees from AstraZeneca, personal fees from BioVie, personal fees from GLG Consulting, and personal fees from Siemens Healthineers from outside the submitted work. GNN is also serves as the Associate Editor for NPJDM. None of the other authors have any other competing interests to declare.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"337"}}