{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,21]],"date-time":"2026-03-21T09:37:49Z","timestamp":1774085869321,"version":"3.50.1"},"reference-count":28,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,9,26]],"date-time":"2025-09-26T00:00:00Z","timestamp":1758844800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2025,9,26]],"date-time":"2025-09-26T00:00:00Z","timestamp":1758844800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"name":"The Mark and Debra Leslie fund for AI in Healthcare"},{"DOI":"10.13039\/100023581","name":"National Science Foundation Graduate Research Fellowship Program","doi-asserted-by":"publisher","award":["NSF 23-605"],"award-info":[{"award-number":["NSF 23-605"]}],"id":[{"id":"10.13039\/100023581","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["npj Digit. Med."],"DOI":"10.1038\/s41746-025-01965-9","type":"journal-article","created":{"date-parts":[[2025,9,26]],"date-time":"2025-09-26T12:43:23Z","timestamp":1758890603000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":7,"title":["TIMER: temporal instruction modeling and evaluation for longitudinal clinical records"],"prefix":"10.1038","volume":"8","author":[{"given":"Hejie","family":"Cui","sequence":"first","affiliation":[]},{"given":"Alyssa","family":"Unell","sequence":"additional","affiliation":[]},{"given":"Bowen","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Jason Alan","family":"Fries","sequence":"additional","affiliation":[]},{"given":"Emily","family":"Alsentzer","sequence":"additional","affiliation":[]},{"given":"Sanmi","family":"Koyejo","sequence":"additional","affiliation":[]},{"given":"Nigam H.","family":"Shah","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,9,26]]},"reference":[{"key":"1965_CR1","unstructured":"Li, T., Zhang, G., Do, Q. D., Yue, X., & Chen, W. Long-context LLMs Struggle with Long In-context Learning. Trans. Mach. Learn Res. https:\/\/openreview.net\/forum?id=Cw2xlg0e46 (2025)."},{"key":"1965_CR2","unstructured":"Kuratov, Y. et al. BABILong: Testing the Limits of LLMs with Long Context Reasoning-in-a-Haystack. In A. Globerson, L. et al. (eds.) Advances in Neural Information Processing Systems (Vol. 37, pp 106519\u2013106554) (2024)."},{"key":"1965_CR3","doi-asserted-by":"publisher","first-page":"S46","DOI":"10.1097\/MLR.0000000000001298","volume":"58","author":"N Huguet","year":"2020","unstructured":"Huguet, N. et al. Using electronic health records in longitudinal studies: estimating patient attrition. Med. Care 58, S46\u2013S52 (2020).","journal-title":"Med. Care"},{"key":"1965_CR4","unstructured":"Wornow, M. et al. Context Clues: Evaluating Long Context Models for Clinical Prediction Tasks on EHR Data. The Thirteenth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=zg3ec1TdAP (2025)."},{"key":"1965_CR5","doi-asserted-by":"publisher","first-page":"172","DOI":"10.1038\/s41586-023-06291-2","volume":"620","author":"K Singhal","year":"2023","unstructured":"Singhal, K. et al. Large language models encode clinical knowledge. Nature 620, 172\u2013180 (2023).","journal-title":"Nature"},{"key":"1965_CR6","doi-asserted-by":"publisher","first-page":"1801","DOI":"10.1093\/jamia\/ocae202","volume":"31","author":"Z Lu","year":"2024","unstructured":"Lu, Z. et al. Large language models in biomedicine and health: current research landscape and future directions. J. Am. Med. Inform. Assoc. 31, 1801\u20131811 (2024).","journal-title":"J. Am. Med. Inform. Assoc."},{"key":"1965_CR7","doi-asserted-by":"publisher","first-page":"1964","DOI":"10.1093\/jamia\/ocae131","volume":"31","author":"MM Lucas","year":"2024","unstructured":"Lucas, M. M., Yang, J., Pomeroy, J. K. & Yang, C. C. Reasoning with large language models for medical question answering. J. Am. Med. Inform. Assoc. 31, 1964\u20131975 (2024).","journal-title":"J. Am. Med. Inform. Assoc."},{"key":"1965_CR8","doi-asserted-by":"publisher","first-page":"2613","DOI":"10.1038\/s41591-024-03097-1","volume":"30","author":"P Hager","year":"2024","unstructured":"Hager, P. et al. Evaluating and mitigating limitations of large language models in clinical decision making. Nat. Med. 30, 2613\u20132622 (2024).","journal-title":"Nat. Med."},{"key":"1965_CR9","doi-asserted-by":"publisher","first-page":"319","DOI":"10.1001\/jama.2024.21700","volume":"333","author":"S Bedi","year":"2025","unstructured":"Bedi, S. et al. Testing and Evaluation of Health Care Applications of Large Language Models: A Systematic Review. JAMA 333, 319\u2013328, https:\/\/doi.org\/10.1001\/jama.2024.21700 (2025).","journal-title":"JAMA"},{"key":"1965_CR10","doi-asserted-by":"publisher","unstructured":"Wang, Y., & Zhao, Y. TRAM: Benchmarking Temporal Reasoning for Large Language Models. In L.-W. Ku, A. Martins, & V. Srikumar (eds.) Findings of the Association for Computational Linguistics, ACL 2024, Bangkok, Thailand and virtual meeting, 2024 (pp. 6389\u20136415). Association for Computational Linguistics. https:\/\/doi.org\/10.18653\/V1\/2024.FINDINGS-ACL.382 (2024).","DOI":"10.18653\/V1\/2024.FINDINGS-ACL.382"},{"key":"1965_CR11","unstructured":"Fatemi, B. et al. Test of Time: A Benchmark for Evaluating LLMs on Temporal Reasoning. The Thirteenth International Conference on Learning Representations, ICLR 2025, Singapore, 2025. https:\/\/openreview.net\/forum?id=44CoQe6VCq (2025)."},{"key":"1965_CR12","unstructured":"Herel, D., Bartek, V. & Mikolov, T. Time awareness in large language models: benchmarking fact recall across time. Preprint at https:\/\/arxiv.org\/abs\/2409.13338 (2024)."},{"key":"1965_CR13","first-page":"27730","volume":"35","author":"L Ouyang","year":"2022","unstructured":"Ouyang, L. et al. Training language models to follow instructions with human feedback. NeurIPS 35, 27730\u201327744 (2022).","journal-title":"NeurIPS"},{"key":"1965_CR14","unstructured":"Zhang, S. et al. Instruction tuning for large language models: a survey. Preprint at https:\/\/arxiv.org\/abs\/2308.10792 (2023)."},{"key":"1965_CR15","unstructured":"Wu, Z., Dadu, A., Nalls, M., Faghri, F. & Sun, J. Instruction tuning large language models to understand electronic health records. In NeurIPS Datasets and Benchmarks Track https:\/\/openreview.net\/forum?id=Dgy5WVgPd2 (2024)."},{"key":"1965_CR16","doi-asserted-by":"publisher","unstructured":"Fleming, S. L. et al. MedAlign: A Clinician-Generated Dataset for Instruction Following with Electronic Medical Records. In M. J. Wooldridge, J. G. Dy, & S. Natarajan (eds.) Thirty-Eighth AAAI Conference on Artificial Intelligence, AAAI 2024, Thirty-Sixth Conference on Innovative Applications of Artificial Intelligence, IAAI 2024, Fourteenth Symposium on Educational Advances in Artificial Intelligence, EAAI 2014, February 20-27, 2024, Vancouver, Canada (pp. 22021\u201322030). https:\/\/doi.org\/10.1609\/AAAI.V38I20.30205 (AAAI Press, 2024).","DOI":"10.1609\/AAAI.V38I20.30205"},{"key":"1965_CR17","unstructured":"Datta, S. et al. A new paradigm for accelerating clinical data science at Stanford medicine. Preprint at https:\/\/arxiv.org\/abs\/2003.10534 (2020)."},{"key":"1965_CR18","doi-asserted-by":"publisher","unstructured":"Xie, Q. et al. Medical foundation large language models for comprehensive text analysis and beyond. npj Digit. Med. 8, 141 https:\/\/doi.org\/10.1038\/s41746-025-01533-1 (2025).","DOI":"10.1038\/s41746-025-01533-1"},{"key":"1965_CR19","unstructured":"Chen, Z. et al. Meditron-70b: scaling medical pretraining for large language models. Preprint at https:\/\/arxiv.org\/abs\/2311.16079 (2023)."},{"key":"1965_CR20","doi-asserted-by":"crossref","unstructured":"Zeng, D., Qin, Y., Sheng, B. & Wong, T. Y. Deepseek\u2019s \u201clow-cost\" adoption across china\u2019s hospital systems: Too fast, too soon? JAMA 333, 1866\u20131869 (2025).","DOI":"10.1001\/jama.2025.6571"},{"key":"1965_CR21","unstructured":"Team, G. et al. Gemini 1.5: unlocking multimodal understanding across millions of tokens of context. Preprint at https:\/\/arxiv.org\/abs\/2403.05530 (2024)."},{"key":"1965_CR22","unstructured":"Llama, M. Llama-3.1-8b-instruct. https:\/\/huggingface.co\/meta-llama\/Llama-3.1-8B-Instruct (2024)."},{"key":"1965_CR23","unstructured":"Hu, E. J. et al. LoRA: Low-Rank Adaptation of Large Language Models. The Tenth International Conference on Learning Representations, ICLR 2022, Virtual Event, 2022. https:\/\/openreview.net\/forum?id=nZeVKeeFYf9 (2022)."},{"key":"1965_CR24","unstructured":"Qwen et al. Qwen2.5: A party of foundation models (2025)."},{"key":"1965_CR25","unstructured":"Zhang, T., Kishore, V., Wu, F., Weinberger, K. Q., & Artzi, Y. BERTScore: Evaluating Text Generation with BERT. 8th International Conference on Learning Representations, ICLR 2020, Addis Ababa, Ethiopia, 2020. https:\/\/openreview.net\/forum?id=SkeHuCVFDr (2020)."},{"key":"1965_CR26","unstructured":"Chin-Yew Lin. ROUGE: A Package for Automatic Evaluation of Summaries. In Text Summarization Branches Out, pages 74\u201381, Barcelona, Spain. Association for Computational Linguistics. (2004)."},{"key":"1965_CR27","doi-asserted-by":"crossref","unstructured":"Maja Popovi\u0107. chrF: character n-gram F-score for automatic MT evaluation. In Proceedings of the Tenth Workshop on Statistical Machine Translation, pages 392\u2013395, Lisbon, Portugal. Association for Computational Linguistics. (2015).","DOI":"10.18653\/v1\/W15-3049"},{"key":"1965_CR28","unstructured":"Satanjeev Banerjee and Alon Lavie. METEOR: An Automatic Metric for MT Evaluation with Improved Correlation with Human Judgments. In Proceedings of the ACL Workshop on Intrinsic and Extrinsic Evaluation Measures for Machine Translation and\/or Summarization, pages 65\u201372, Ann Arbor, Michigan. Association for Computational Linguistics. (2005)."}],"container-title":["npj Digital Medicine"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.nature.com\/articles\/s41746-025-01965-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s41746-025-01965-9","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s41746-025-01965-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,26]],"date-time":"2025-09-26T12:43:32Z","timestamp":1758890612000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.nature.com\/articles\/s41746-025-01965-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,26]]},"references-count":28,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2025,12]]}},"alternative-id":["1965"],"URL":"https:\/\/doi.org\/10.1038\/s41746-025-01965-9","relation":{},"ISSN":["2398-6352"],"issn-type":[{"value":"2398-6352","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,9,26]]},"assertion":[{"value":"17 May 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 August 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 September 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The authors declare no competing interests.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"577"}}