{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,7]],"date-time":"2026-05-07T11:16:56Z","timestamp":1778152616694,"version":"3.51.4"},"reference-count":87,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2026,5,7]],"date-time":"2026-05-07T00:00:00Z","timestamp":1778112000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2026,5,7]],"date-time":"2026-05-07T00:00:00Z","timestamp":1778112000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"DOI":"10.13039\/100018085","name":"Data Science Institute, Columbia University","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100018085","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["npj Digit. Med."],"DOI":"10.1038\/s41746-026-02625-2","type":"journal-article","created":{"date-parts":[[2026,5,7]],"date-time":"2026-05-07T11:05:31Z","timestamp":1778151931000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Off by a beat: the effects of temporal misalignment in reinforcement learning for sepsis treatment"],"prefix":"10.1038","volume":"9","author":[{"given":"Shengpu","family":"Tang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiayu","family":"Yao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jenna","family":"Wiens","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sonali","family":"Parbhoo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,5,7]]},"reference":[{"key":"2625_CR1","unstructured":"Sutton, R. S. & Barto, A. G. Reinforcement Learning: An Introduction (MIT Press, 2018)."},{"key":"2625_CR2","doi-asserted-by":"publisher","first-page":"337","DOI":"10.1038\/s41746-024-01316-0","volume":"7","author":"P Jayaraman","year":"2024","unstructured":"Jayaraman, P., Desman, J., Sabounchi, M., Nadkarni, G. N. & Sakhuja, A. A primer on reinforcement learning in medicine for clinicians. npj Digit. Med. 7, 337 (2024).","journal-title":"npj Digit. Med."},{"key":"2625_CR3","doi-asserted-by":"publisher","unstructured":"Nemati, S., Ghassemi, M. M. & Clifford, G. D. Optimal medication dosing from suboptimal clinical examples: A deep reinforcement learning approach. In Proc. 38th Annual International Conference of the IEEE Engineering in Medicine and Biology Society (EMBC), 2978\u20132981. https:\/\/doi.org\/10.1109\/embc.2016.7591355 (IEEE, 2016).","DOI":"10.1109\/embc.2016.7591355"},{"key":"2625_CR4","doi-asserted-by":"publisher","first-page":"32","DOI":"10.1038\/s41746-021-00388-6","volume":"4","author":"A Peine","year":"2021","unstructured":"Peine, A. et al. Development and validation of a reinforcement learning algorithm to dynamically optimize mechanical ventilation in critical care. npj Digit. Med. 4, 32 (2021).","journal-title":"npj Digit. Med."},{"key":"2625_CR5","doi-asserted-by":"publisher","first-page":"1716","DOI":"10.1038\/s41591-018-0213-5","volume":"24","author":"M Komorowski","year":"2018","unstructured":"Komorowski, M., Celi, L. A., Badawi, O., Gordon, A. C. & Faisal, A. A. The Artificial Intelligence Clinician learns optimal treatment strategies for sepsis in intensive care. Nat. Med. 24, 1716\u20131720 (2018).","journal-title":"Nat. Med."},{"key":"2625_CR6","doi-asserted-by":"publisher","first-page":"1140","DOI":"10.1126\/science.aar6404","volume":"362","author":"D Silver","year":"2018","unstructured":"Silver, D. et al. A general reinforcement learning algorithm that masters chess, shogi, and Go through self-play. Science 362, 1140\u20131144 (2018).","journal-title":"Science"},{"key":"2625_CR7","unstructured":"Towers, M. et al. Gymnasium: a standard interface for reinforcement learning environments. In The Thirty-ninth Annual Conference on Neural Information Processing Systems Datasets and Benchmarks Track. https:\/\/openreview.net\/forum?id=qPMLvJxtPK (2025)."},{"key":"2625_CR8","unstructured":"Raghu, A., Komorowski, M., Celi, L. A., Szolovits, P. & Ghassemi, M. Continuous state-space models for optimal sepsis treatment: a deep reinforcement learning approach. In Proc. 2nd Machine Learning for Healthcare Conference Vol. 68 of Proceedings of Machine Learning Research (eds Doshi-Velez, F. et al.) 147\u2013163. https:\/\/proceedings.mlr.press\/v68\/raghu17a.html (PMLR, 2017)."},{"key":"2625_CR9","doi-asserted-by":"publisher","first-page":"4069","DOI":"10.1002\/sim.8710","volume":"39","author":"S Ferreira Guerra","year":"2020","unstructured":"Ferreira Guerra, S., Schnitzer, M. E., Forget, A. & Blais, L. Impact of discretization of the timeline for longitudinal causal inference methods. Stat. Med. 39, 4069\u20134085 (2020).","journal-title":"Stat. Med."},{"key":"2625_CR10","unstructured":"Adams, R., Saria, S. & Rosenblum, M. The impact of time series length and discretization on longitudinal causal estimation methods. Preprint at https:\/\/arxiv.org\/abs\/2011.15099 (2020)."},{"key":"2625_CR11","doi-asserted-by":"publisher","first-page":"81","DOI":"10.1016\/S0140-6736(74)91639-0","volume":"304","author":"G Teasdale","year":"1974","unstructured":"Teasdale, G. & Jennett, B. Assessment of coma and impaired consciousness: a practical scale. Lancet 304, 81\u201384 (1974).","journal-title":"Lancet"},{"key":"2625_CR12","doi-asserted-by":"publisher","DOI":"10.1186\/s13054-022-04290-9","volume":"27","author":"R Moreno","year":"2023","unstructured":"Moreno, R. et al. The sequential organ failure assessment (SOFA) score: has the time come for an update?. Crit. Care 27, 15 (2023).","journal-title":"Crit. Care"},{"key":"2625_CR13","unstructured":"Gottesman, O. et al. Evaluating reinforcement learning algorithms in observational health settings. Preprint at https:\/\/arxiv.org\/abs\/1805.12298 (2018)."},{"key":"2625_CR14","doi-asserted-by":"publisher","first-page":"16","DOI":"10.1038\/s41591-018-0310-5","volume":"25","author":"O Gottesman","year":"2019","unstructured":"Gottesman, O. et al. Guidelines for reinforcement learning in healthcare. Nat. Med. 25, 16\u201318 (2019).","journal-title":"Nat. Med."},{"key":"2625_CR15","unstructured":"Jeter, R., Josef, C., Shashikumar, S. & Nemati, S. Does the \u201cArtificial Intelligence Clinician\u201d learn optimal treatment strategies for sepsis in intensive care?. Preprint at https:\/\/arxiv.org\/abs\/1902.03271 (2019)."},{"key":"2625_CR16","unstructured":"Luo, Z., Pan, Y., Watkinson, P. & Zhu, T. Position: Reinforcement learning in dynamic treatment regimes needs critical reexamination. In Proc. 41st International Conference on Machine Learning Vol. 235 of Proceedings of Machine Learning Research (eds Salakhutdinov, R. et al.) 33432\u201333465. https:\/\/proceedings.mlr.press\/v235\/luo24f.html (PMLR, 2024)."},{"key":"2625_CR17","doi-asserted-by":"publisher","unstructured":"Liao, P., Greenewald, K., Klasnja, P. & Murphy, S. Personalized HeartSteps: a reinforcement learning algorithm for optimizing physical activity. Proc. ACM Interact. Mob. Wearable Ubiquitous Technol. 4. https:\/\/doi.org\/10.1145\/3381007 (2020).","DOI":"10.1145\/3381007"},{"key":"2625_CR18","unstructured":"Fatemi, M. et al. Semi-Markov offline reinforcement learning for healthcare. In Proc. Conference on Health, Inference, and Learning Vol. 174 of Proceedings of Machine Learning Research, (eds Flores, G., Chen, G. H., Pollard, T., Ho, J. C. & Naumann, T.) 119\u2013137. https:\/\/proceedings.mlr.press\/v174\/fatemi22a.html (PMLR, 2022)."},{"key":"2625_CR19","unstructured":"Zhang, Z., Mei, H. & Xu, Y. Continuous-time decision transformer for healthcare applications. In Proc. 26th International Conference on Artificial Intelligence and Statistics Vol. 206 of Proceedings of Machine Learning Research (eds Ruiz, F., Dy, J. & van de Meent, J.-W.) 6245\u20136262. https:\/\/proceedings.mlr.press\/v206\/zhang23i.html (PMLR, 2023)."},{"key":"2625_CR20","unstructured":"Sun, Y. & Tang, S. Exploring time-step size in reinforcement learning for sepsis treatment. In Proc. Machine Learning for Health Symposium Vol. 297 of Proceedings of Machine Learning Research (PMLR, 2025)."},{"key":"2625_CR21","doi-asserted-by":"publisher","first-page":"e69145","DOI":"10.2196\/69145","volume":"13","author":"JM Lee","year":"2025","unstructured":"Lee, J. M., Tang, S., Sjoding, M. & Wiens, J. Optimizing loop diuretic treatment for mortality reduction in patients with acute dyspnea using a practical offline reinforcement learning pipeline for health care: Retrospective single-center simulation study. JMIR Med. Inform. 13, e69145 (2025).","journal-title":"JMIR Med. Inform."},{"key":"2625_CR22","unstructured":"Tang, S. Towards Clinically Applicable Reinforcement Learning. Ph.D. thesis, University of Michigan (2024)."},{"key":"2625_CR23","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1038\/sdata.2016.35","volume":"3","author":"AE Johnson","year":"2016","unstructured":"Johnson, A. E. et al. MIMIC-III, a freely accessible critical care database. Sci. Data 3, 1\u20139 (2016).","journal-title":"Sci. Data"},{"key":"2625_CR24","unstructured":"Johnson, A., Pollard, T. & Mark, R. MIMIC-III clinical database. PhysioNet. https:\/\/physionet.org\/content\/mimiciii\/1.4\/ (2016)."},{"key":"2625_CR25","unstructured":"Killian, T. W., Zhang, H., Subramanian, J., Fatemi, M. & Ghassemi, M. An empirical study of representation learning for reinforcement learning in healthcare. In Proc. Machine Learning for Health NeurIPS Workshop, 139\u2013160. https:\/\/proceedings.mlr.press\/v136\/killian20a.html (PMLR, 2020)."},{"key":"2625_CR26","doi-asserted-by":"crossref","unstructured":"Tang, S., Makar, M., Sjoding, M., Doshi-Velez, F. & Wiens, J. Leveraging factored action spaces for efficient offline reinforcement learning in healthcare. In Advances in Neural Information Processing Systems Vol. 35 (eds Koyejo, S. et al.) 34272\u201334286. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2022\/hash\/dda7f9378a210c25e470e19304cce85d-Abstract-Conference.html (Curran Associates, Inc., 2022).","DOI":"10.52202\/068431-2484"},{"key":"2625_CR27","doi-asserted-by":"publisher","first-page":"801","DOI":"10.1001\/jama.2016.0287","volume":"315","author":"M Singer","year":"2016","unstructured":"Singer, M. et al. The third international consensus definitions for sepsis and septic shock (Sepsis-3). JAMA 315, 801\u2013810 (2016).","journal-title":"JAMA"},{"key":"2625_CR28","doi-asserted-by":"publisher","first-page":"1921","DOI":"10.1093\/jamia\/ocaa139","volume":"27","author":"S Tang","year":"2020","unstructured":"Tang, S. et al. Democratizing EHR analyses with FIDDLE: a flexible data-driven preprocessing pipeline for structured clinical data. J. Am. Med. Inform. Assoc. 27, 1921\u20131934 (2020).","journal-title":"J. Am. Med. Inform. Assoc."},{"key":"2625_CR29","doi-asserted-by":"publisher","unstructured":"Arthur, D. & Vassilvitskii, S. k-means++: the advantages of careful seeding. In Proc. Eighteenth Annual ACM-SIAM Symposium on Discrete Algorithms, SODA \u201907, 1027\u20131035. https:\/\/doi.org\/10.5555\/1283383.1283494 (Society for Industrial and Applied Mathematics, USA, 2007).","DOI":"10.5555\/1283383.1283494"},{"key":"2625_CR30","unstructured":"Tang, S., Modi, A., Sjoding, M. & Wiens, J. Clinician-in-the-loop decision making: Reinforcement learning with near-optimal set-valued policies. In International Conference on Machine Learning 9387\u20139396. https:\/\/proceedings.mlr.press\/v119\/tang20c.html (PMLR, 2020)."},{"key":"2625_CR31","unstructured":"Voloshin, C., Le, H., Jiang, N. & Yue, Y. Empirical study of off-policy policy evaluation for reinforcement learning. In Proc. Neural Information Processing Systems Track on Datasets and Benchmarks Vol. 1 (eds Vanschoren, J. & Yeung, S.) https:\/\/datasets-benchmarks-proceedings.neurips.cc\/paper_files\/paper\/2021\/hash\/a5e00132373a7031000fd987a3c9f87b-Abstract-round1.html (Curran Associates, Inc., 2021)."},{"key":"2625_CR32","doi-asserted-by":"publisher","first-page":"485 \u2013 511","DOI":"10.1214\/14-STS500","volume":"29","author":"M Dud\u00edk","year":"2014","unstructured":"Dud\u00edk, M., Erhan, D., Langford, J. & Li, L. Doubly robust policy evaluation and optimization. Stat. Sci. 29, 485 \u2013 511 (2014).","journal-title":"Stat. Sci."},{"key":"2625_CR33","unstructured":"Precup, D., Sutton, R. S. & Singh, S. P. Eligibility traces for off-policy policy evaluation. In Proc. Seventeenth International Conference on Machine Learning, ICML \u201900, 759\u2013766. https:\/\/dl.acm.org\/doi\/10.5555\/645529.658134 (Morgan Kaufmann Publishers Inc., San Francisco, CA, USA, 2000)."},{"key":"2625_CR34","unstructured":"Jiang, N. & Li, L. Doubly robust off-policy value evaluation for reinforcement learning. In Proc. 33rd International Conference on Machine Learning Vol. 48 of Proceedings of Machine Learning Research (eds Balcan, M. F. & Weinberger, K. Q.) 652\u2013661. https:\/\/proceedings.mlr.press\/v48\/jiang16.html (PMLR, New York, New York, USA, 2016)."},{"key":"2625_CR35","unstructured":"Thomas, P. & Brunskill, E. Data-efficient off-policy policy evaluation for reinforcement learning. In Proc. 33rd International Conference on Machine Learning Vol. 48 of Proceedings of Machine Learning Research, 2139\u20132148. https:\/\/proceedings.mlr.press\/v48\/thomasa16.html (PMLR, New York, New York, USA, 2016)."},{"key":"2625_CR36","unstructured":"Hanna, J., Niekum, S. & Stone, P. Importance sampling policy evaluation with an estimated behavior policy. In Proc. 36th International Conference on Machine Learning Vol. 97 of Proceedings of Machine Learning Research (eds Chaudhuri, K. & Salakhutdinov, R.) 2605\u20132613. https:\/\/proceedings.mlr.press\/v97\/hanna19a.html (PMLR, 2019)."},{"key":"2625_CR37","unstructured":"Chen, X., Wang, L., Hang, Y., Ge, H. & Zha, H. Infinite-horizon off-policy policy evaluation with multiple behavior policies. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=rkgU1gHtvr (OpenReview.net, 2019)."},{"key":"2625_CR38","unstructured":"Tang, S. & Wiens, J. Model selection for offline reinforcement learning: Practical considerations for healthcare settings. In Proc. 6th Machine Learning for Healthcare Conference Vol. 149 of Proceedings of Machine Learning Research (eds Jung, K., Yeung, S., Sendak, M., Sjoding, M. & Ranganath, R.) 2\u201335. https:\/\/proceedings.mlr.press\/v149\/tang21a.html (PMLR, 2021)."},{"key":"2625_CR39","unstructured":"Raghu, A. et al. Deep reinforcement learning for sepsis treatment. NeurIPS workshop on Machine Learning for Health (ML4H). Preprint at https:\/\/arxiv.org\/abs\/1711.09602 (2017)."},{"key":"2625_CR40","unstructured":"Peng, X. et al. Improving sepsis treatment strategies by combining deep and kernel-based reinforcement learning. AMIA annual symposium proceedings Vol. 2018, 887 (American Medical Informatics Association, 2018)."},{"key":"2625_CR41","unstructured":"Korsunsky, P., Belogolovsky, S., Zahavy, T., Tessler, C. & Mannor, S. Contextual inverse reinforcement learning. Preprint at https:\/\/openreview.net\/forum?id=S1gqraNKwB (2020)."},{"key":"2625_CR42","doi-asserted-by":"publisher","unstructured":"Masood, M. & Doshi-Velez, F. Diversity-inducing policy gradient: using maximum mean discrepancy to find a set of diverse policies. In Proc. Twenty-Eighth International Joint Conference on Artificial Intelligence, IJCAI-19, 5923\u20135929. https:\/\/doi.org\/10.24963\/ijcai.2019\/821 (International Joint Conferences on Artificial Intelligence Organization, 2019).","DOI":"10.24963\/ijcai.2019\/821"},{"key":"2625_CR43","unstructured":"Oberst, M. K.Counterfactual Policy Introspection Using Structural Causal Models. Ph.D. thesis, Massachusetts Institute of Technology (2019)."},{"key":"2625_CR44","doi-asserted-by":"crossref","unstructured":"Jia, Y., Burden, J., Lawton, T. & Habli, I. Safe reinforcement learning for sepsis treatment. In Proc. IEEE International Conference on Healthcare Informatics (ICHI) 1\u20137. https:\/\/ieeexplore.ieee.org\/document\/9374367 (IEEE, 2020).","DOI":"10.1109\/ICHI48887.2020.9374367"},{"key":"2625_CR45","doi-asserted-by":"crossref","unstructured":"Wu, Z. et al. Learning individualized treatment rules with estimated translated inverse propensity score. In Proc. IEEE International Conference on Healthcare Informatics (ICHI) 1\u201311. https:\/\/ieeexplore.ieee.org\/document\/9374397 (IEEE, 2020).","DOI":"10.1109\/ICHI48887.2020.9374397"},{"key":"2625_CR46","doi-asserted-by":"publisher","first-page":"2295","DOI":"10.1007\/s10994-021-05984-x","volume":"110","author":"S Belogolovsky","year":"2021","unstructured":"Belogolovsky, S., Korsunsky, P., Mannor, S., Tessler, C. & Zahavy, T. Inverse reinforcement learning in contextual MDPs. Mach. Learn. 110, 2295\u20132334 (2021).","journal-title":"Mach. Learn."},{"key":"2625_CR47","unstructured":"Fatemi, M., Killian, T. W., Subramanian, J. & Ghassemi, M. Medical dead-ends and learning to identify high-risk states and treatments. In Advances in Neural Information Processing Systems Vol. 34 (eds Ranzato, M., Beygelzimer, A., Dauphin, Y., Liang, P. & Vaughan, J. W.) 4856\u20134870. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2021\/hash\/26405399c51ad7b13b504e74eb7c696c-Abstract.html (Curran Associates, Inc., 2021)."},{"key":"2625_CR48","first-page":"305","volume":"2021","author":"CX Ji","year":"2021","unstructured":"Ji, C. X., Oberst, M., Kanjilal, S. & Sontag, D. Trajectory inspection: a method for iterative clinician-driven design of reinforcement learning studies. AMIA Jt Summits Transl. Sci. Proc. 2021, 305\u2013314 (2021).","journal-title":"AMIA Jt Summits Transl. Sci. Proc."},{"key":"2625_CR49","unstructured":"Jia, Y. Embracing Machine Learning in Safety Assurance in Healthcare. Ph.D. thesis, University of York (2021)."},{"key":"2625_CR50","doi-asserted-by":"publisher","first-page":"103762","DOI":"10.1016\/j.jbi.2021.103762","volume":"117","author":"Y Jia","year":"2021","unstructured":"Jia, Y., Lawton, T., Burden, J., McDermid, J. & Habli, I. Safety-driven design of machine learning for sepsis treatment. J. Biomed. Inform. 117, 103762 (2021).","journal-title":"J. Biomed. Inform."},{"key":"2625_CR51","unstructured":"Lu, M., Shahn, Z., Sow, D., Doshi-Velez, F. & Lehman, L.-w. H. Is deep reinforcement learning ready for practical applications in healthcare? A sensitivity analysis of duel-DDQN for hemodynamic management in sepsis patients. In AMIA annual symposium proceedings Vol. 2020, 773 (American Medical Informatics Association, 2021)."},{"key":"2625_CR52","unstructured":"Liu, T. Fair Policy Learning. Master\u2019s thesis, University of Cambridge (2021)."},{"key":"2625_CR53","unstructured":"Satija, H., Thomas, P. S., Pineau, J. & Laroche, R. Multi-objective SPIBB: Seldonian offline policy improvement with safety constraints in finite MDPs. In Advances in Neural Information Processing Systems Vol. 34 (eds Ranzato, M., Beygelzimer, A., Dauphin, Y., Liang, P. & Vaughan, J. W.) 2004\u20132017. https:\/\/proceedings.neurips.cc\/paper\/2021\/hash\/0f65caf0a7d00afd2b87c028e88fe931-Abstract.html (Curran Associates, Inc., 2021)."},{"key":"2625_CR54","doi-asserted-by":"publisher","unstructured":"Carey, S. et al. Fair reinforcement learning for maternal sepsis treatment. Preprint at https:\/\/doi.org\/10.1101\/2022.08.09.22278582 (2022).","DOI":"10.1101\/2022.08.09.22278582"},{"key":"2625_CR55","doi-asserted-by":"publisher","first-page":"e100549","DOI":"10.1136\/bmjhci-2022-100549","volume":"29","author":"P Festor","year":"2022","unstructured":"Festor, P. et al. Assuring the safety of AI-based clinical decision support systems: a case study of the AI Clinician for sepsis treatment. BMJ Health Care Inform. 29, e100549 (2022).","journal-title":"BMJ Health Care Inform."},{"key":"2625_CR56","unstructured":"Huang, Y., Cao, R. & Rahmani, A. Reinforcement learning for sepsis treatment: A continuous action space solution. In Proc. 7th Machine Learning for Healthcare Conference, vol. 182 of Proceedings of Machine Learning Research (eds Lipton, Z., Ranganath, R., Sendak, M., Sjoding, M. & Yeung, S.) 631\u2013647. https:\/\/proceedings.mlr.press\/v182\/huang22a.html (PMLR, 2022)."},{"key":"2625_CR57","unstructured":"Kaushik, P., Kummetha, S., Moodley, P. & Bapi, R. S. A conservative Q-learning approach for handling distribution shift in sepsis treatment strategies. Preprint at https:\/\/arxiv.org\/abs\/2203.13884 (2022)."},{"key":"2625_CR58","doi-asserted-by":"publisher","DOI":"10.1038\/s41597-022-01784-7","volume":"9","author":"NI-H Kuo","year":"2022","unstructured":"Kuo, N. I.-H. et al. The Health Gym: synthetic health-related datasets for the development of reinforcement learning algorithms. Sci. Data 9, 693 (2022).","journal-title":"Sci. Data"},{"key":"2625_CR59","unstructured":"Liu, Y. & Brunskill, E. Avoiding overfitting to the importance weights in offline policy optimization. Preprint at https:\/\/openreview.net\/forum?id=dLTXoSIcrik (2022)."},{"key":"2625_CR60","unstructured":"Luo, Z., Watkinson, P. & Zhu, T. NurSpecialist: Duel-agent reinforcement learning for dynamic hospitalised intervention regimes using electronic health records. Preprint at https:\/\/amulyayadav.github.io\/AI4SG2023\/images\/34.pdf (2022)."},{"key":"2625_CR61","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1038\/s41746-023-00755-5","volume":"6","author":"X Wu","year":"2023","unstructured":"Wu, X., Li, R., He, Z., Yu, T. & Cheng, C. A value-based deep reinforcement learning model with human expertise in optimal treatment of sepsis. npj Digit. Med. 6, 15 (2023).","journal-title":"npj Digit. Med."},{"key":"2625_CR62","unstructured":"Rahman, A. A., Agarwal, P., Michalski, V., Noumeir, R. & Kahou, S. Empowering clinicians with MeDT: A framework for sepsis treatment. Preprint at https:\/\/openreview.net\/forum?id=ihbEwhPaXe (2023)."},{"key":"2625_CR63","doi-asserted-by":"publisher","first-page":"11034","DOI":"10.1007\/s10489-022-04099-7","volume":"53","author":"D Liang","year":"2023","unstructured":"Liang, D., Deng, H. & Liu, Y. The treatment of sepsis: an episodic memory-assisted deep reinforcement learning approach. Appl. Intell. 53, 11034\u201311044 (2023).","journal-title":"Appl. Intell."},{"key":"2625_CR64","doi-asserted-by":"crossref","unstructured":"Ma, S., Lee, J., Serban, N. & Yang, S. Deep attention Q-network for personalized treatment recommendation. In Proc. IEEE International Conference on Data Mining Workshops (ICDMW), 329\u2013337. https:\/\/ieeexplore.ieee.org\/document\/10411559 (IEEE, 2023).","DOI":"10.1109\/ICDMW60847.2023.00048"},{"key":"2625_CR65","doi-asserted-by":"publisher","unstructured":"Nambiar, M. et al. Deep offline reinforcement learning for real-world treatment optimization applications. In Proc. 29th ACM SIGKDD Conference on Knowledge Discovery and Data Mining, KDD \u201923, 4673\u20134684. https:\/\/doi.org\/10.1145\/3580305.3599800 (Association for Computing Machinery, New York, NY, USA, 2023).","DOI":"10.1145\/3580305.3599800"},{"key":"2625_CR66","unstructured":"Ma, T. et al. Sequential knockoffs for variable selection in reinforcement learning. Preprint at https:\/\/arxiv.org\/abs\/2303.14281 (2023)."},{"key":"2625_CR67","doi-asserted-by":"crossref","unstructured":"Tsirtsis, S. & Rodriguez, M. Finding counterfactually optimal action sequences in continuous state spaces. In Advances in Neural Information Processing Systems vol. 36 (eds Oh, A. et al.) 3220\u20133247. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2023\/hash\/09ae6beae5f1ff38f05c05979097ea0f-Abstract-Conference.html (Curran Associates, Inc., 2023).","DOI":"10.52202\/075280-0143"},{"key":"2625_CR68","unstructured":"Zhou, Y., Qi, Z., Shi, C. & Li, L. Optimizing pessimism in dynamic treatment regimes: a bayesian learning approach. In Proc. 26th International Conference on Artificial Intelligence and Statistics Vol. 206 of Proceedings of Machine Learning Research (eds Ruiz, F., Dy, J. & van de Meent, J.-W.) 6704\u20136721. https:\/\/proceedings.mlr.press\/v206\/zhou23a.html (PMLR, 2023)."},{"key":"2625_CR69","doi-asserted-by":"crossref","unstructured":"Chaudhari, S., Deshpande, A., da Silva, B. C. & Thomas, P. S. Abstract reward processes: Leveraging state abstraction for consistent off-policy evaluation. In Advances in Neural Information Processing Systems Vol. 37 (eds Globerson, A. et al.) 17069\u201317105. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2024\/hash\/1e6dcc16ffa7ced2228d1f2fdc8b5adf-Abstract-Conference.html (Curran Associates, Inc., 2024).","DOI":"10.52202\/079017-0543"},{"key":"2625_CR70","unstructured":"Chen, X., Zhang, G., Fu, Y., Yang, C. & Li, S. Treatment rule optimization under counterfactual temporal point processes with latent states. Preprint at https:\/\/openreview.net\/forum?id=jZffxvubJ9 (2025)."},{"key":"2625_CR71","first-page":"1546","volume":"4","author":"K Choudhary","year":"2024","unstructured":"Choudhary, K., Gupta, D. & Thomas, P. S. ICU-Sepsis: a benchmark MDP built from real medical data. Reinf. Learn. J. 4, 1546\u20131566 (2024).","journal-title":"Reinf. Learn. J."},{"key":"2625_CR72","doi-asserted-by":"crossref","unstructured":"Jeon, E., Choi, J.-H. & Suk, H.-I. ADT2R: Adaptive decision transformer for dynamic treatment regimes in sepsis. IEEE Trans. Neural Netw. Learn. Syst. 36, 8554\u20138564 (2024).","DOI":"10.1109\/TNNLS.2024.3442243"},{"key":"2625_CR73","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3643856","volume":"15","author":"S Job","year":"2024","unstructured":"Job, S. et al. Optimal treatment strategies for critical patients with deep reinforcement learning. ACM Trans. Intell. Syst. Technol. 15, 1\u201322 (2024).","journal-title":"ACM Trans. Intell. Syst. Technol."},{"key":"2625_CR74","doi-asserted-by":"publisher","unstructured":"Preuett, L. & Teredesai, A. M. Reproducible evaluation framework for sepsis treatment strategies learned via offline reinforcement learning. In Proc. IEEE International Conference on E-health Networking, Application & Services (HealthCom), 1\u20136. https:\/\/doi.org\/10.1109\/HealthCom60970.2024.10880803 (IEEE, 2024).","DOI":"10.1109\/HealthCom60970.2024.10880803"},{"key":"2625_CR75","doi-asserted-by":"publisher","first-page":"6268","DOI":"10.1109\/JBHI.2024.3415115","volume":"28","author":"A Shirali","year":"2024","unstructured":"Shirali, A., Schubert, A. & Alaa, A. Pruning the way to reliable policies: a multi-objective deep q-learning approach to critical care. IEEE J. Biomed. Health Inform. 28, 6268\u20136279 (2024).","journal-title":"IEEE J. Biomed. Health Inform."},{"key":"2625_CR76","unstructured":"Sukovic, A. & Radanovic, G. Reward design for justifiable sequential decision-making. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=OUkZXbbwQr (OpenReview.net, 2024)."},{"key":"2625_CR77","unstructured":"Valdettaro, F. & Faisal, A. Offline bayesian aleatoric and epistemic uncertainty quantification and posterior value optimisation in finite-state MDPs. In Proc. Fortieth Conference on Uncertainty in Artificial Intelligence Vol. 244 of Proceedings of Machine Learning Research (eds Kiyavash, N. & Mooij, J. M.) 3391\u20133409. https:\/\/proceedings.mlr.press\/v244\/valdettaro24a.html (PMLR, 2024)."},{"key":"2625_CR78","doi-asserted-by":"publisher","first-page":"5289","DOI":"10.1214\/25-EJS2459","volume":"19","author":"E Chen","year":"2025","unstructured":"Chen, E., Li, S. & Jordan, M. I. Transfer Q-learning for finite-horizon Markov decision processes. Electron. J. Stat. 19, 5289\u20135312 (2025).","journal-title":"Electron. J. Stat."},{"key":"2625_CR79","doi-asserted-by":"publisher","unstructured":"Feng, X. et al. Tailored to fit sepsis individuals: medical knowledge aware reinforcement learning model offers optimized therapeutic strategies. IEEE J. Biomed. Health Inform. https:\/\/doi.org\/10.1109\/JBHI.2025.3647877 (2025).","DOI":"10.1109\/JBHI.2025.3647877"},{"key":"2625_CR80","doi-asserted-by":"crossref","unstructured":"Gao, Y. Stable CDE autoencoders with acuity regularization for offline reinforcement learning in sepsis treatment. Preprint at https:\/\/arxiv.org\/abs\/2506.15019 (2025).","DOI":"10.53941\/tai.2025.100021"},{"key":"2625_CR81","unstructured":"Lee, J. et al. Q-function decomposition with intervention semantics for factored action spaces. In Proc. 28th International Conference on Artificial Intelligence and Statistics Vol. 258 of Proceedings of Machine Learning Research (eds Li, Y., Mandt, S., Agrawal, S. & Khan, E.) 1027\u20131035. https:\/\/proceedings.mlr.press\/v258\/lee25c.html (PMLR, 2025)."},{"key":"2625_CR82","unstructured":"Lim, Y. et al. Large language model-augmented offline reinforcement learning framework for sepsis management in critical care. npj Digit. Med. https:\/\/arxiv.org\/abs\/2508.07681https:\/\/www.nature.com\/articles\/s41746-026-02611-8 (2026)."},{"key":"2625_CR83","unstructured":"Luo, Z. Dynamic Treatment Regime for Electronic Health Record. Ph.D. thesis, University of Oxford (2025)."},{"key":"2625_CR84","doi-asserted-by":"publisher","first-page":"63","DOI":"10.1007\/s44230-025-00093-7","volume":"5","author":"R Tu","year":"2025","unstructured":"Tu, R. et al. Offline safe reinforcement learning for sepsis treatment: tackling variable-length episodes with sparse rewards. Hum.-Cent. Intell. Syst. 5, 63\u201376 (2025).","journal-title":"Hum.-Cent. Intell. Syst."},{"key":"2625_CR85","doi-asserted-by":"crossref","unstructured":"Oh, S., Choi, Y., Joo, H.-T. & Kim, K.-J. Federated reinforcement learning for privacy-preserving sepsis patient treatment model. ACM Trans. Intell. Syst. Technol (2025).","DOI":"10.1145\/3744655"},{"key":"2625_CR86","unstructured":"Shen, Y., Ye, Y., Xiong, H. & Chen, Y. SAFER: A calibrated risk-aware multimodal recommendation model for dynamic treatment regimes. In Proc. 42nd International Conference on Machine Learning Vol. 267 of Proceedings of Machine Learning Research (eds Singh, A. et al.) 54611\u201354631. https:\/\/proceedings.mlr.press\/v267\/shen25l.html (PMLR, 2025)."},{"key":"2625_CR87","unstructured":"Taufiq, M. F. Uncertainty quantification and causal considerations for off-policy decision making. Preprint at https:\/\/arxiv.org\/abs\/2502.06011 (2025)."}],"container-title":["npj Digital Medicine"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.nature.com\/articles\/s41746-026-02625-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s41746-026-02625-2","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s41746-026-02625-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,7]],"date-time":"2026-05-07T11:06:18Z","timestamp":1778151978000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.nature.com\/articles\/s41746-026-02625-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,5,7]]},"references-count":87,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2026,12]]}},"alternative-id":["2625"],"URL":"https:\/\/doi.org\/10.1038\/s41746-026-02625-2","relation":{},"ISSN":["2398-6352"],"issn-type":[{"value":"2398-6352","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,5,7]]},"assertion":[{"value":"19 January 2026","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"31 March 2026","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 May 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The authors declare no competing interests.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"360"}}