{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,24]],"date-time":"2026-03-24T20:46:35Z","timestamp":1774385195092,"version":"3.50.1"},"reference-count":75,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,8,1]],"date-time":"2026-08-01T00:00:00Z","timestamp":1785542400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,8,1]],"date-time":"2026-08-01T00:00:00Z","timestamp":1785542400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,8,1]],"date-time":"2026-08-01T00:00:00Z","timestamp":1785542400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,8,1]],"date-time":"2026-08-01T00:00:00Z","timestamp":1785542400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,8,1]],"date-time":"2026-08-01T00:00:00Z","timestamp":1785542400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,8,1]],"date-time":"2026-08-01T00:00:00Z","timestamp":1785542400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,8,1]],"date-time":"2026-08-01T00:00:00Z","timestamp":1785542400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100012639","name":"Prince Sultan University","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100012639","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Computer Science Review"],"published-print":{"date-parts":[[2026,8]]},"DOI":"10.1016\/j.cosrev.2026.100960","type":"journal-article","created":{"date-parts":[[2026,3,16]],"date-time":"2026-03-16T03:59:40Z","timestamp":1773633580000},"page":"100960","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["A comprehensive review of temporal reasoning with multimodal LLMs: Foundations, challenges, and healthcare applications"],"prefix":"10.1016","volume":"61","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1503-1083","authenticated-orcid":false,"given":"Marouene","family":"Chaieb","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-0331-5499","authenticated-orcid":false,"given":"Nour","family":"Ben Ameur","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2133-0757","authenticated-orcid":false,"given":"Wadii","family":"Boulila","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"78","reference":[{"key":"10.1016\/j.cosrev.2026.100960_bib0005","doi-asserted-by":"crossref","first-page":"30235","DOI":"10.1109\/ACCESS.2025.3535782","article-title":"Time-series large language models: a systematic review of state-of-the-art","volume":"13","author":"Abdullahi","year":"2025","journal-title":"IEEE Access"},{"key":"10.1016\/j.cosrev.2026.100960_bib0010","doi-asserted-by":"crossref","unstructured":"X. Xu, Y. Zhao, S.Y. Philip, K. Shu, Beyond numbers: a survey of time series analysis in the era of multimodal LLMs, Authorea preprints, 2025, 10.36227\/techrxiv.174317777.72957387\/v1","DOI":"10.36227\/techrxiv.174317777.72957387\/v1"},{"issue":"1","key":"10.1016\/j.cosrev.2026.100960_bib0015","doi-asserted-by":"crossref","first-page":"38","DOI":"10.1007\/s11633-022-1369-5","article-title":"VLP: a survey on vision-language pre-training","volume":"20","author":"Chen","year":"2023","journal-title":"Mach. Intell. Res."},{"key":"10.1016\/j.cosrev.2026.100960_bib0020","author":"Shi"},{"key":"10.1016\/j.cosrev.2026.100960_bib0025","author":"Liu"},{"key":"10.1016\/j.cosrev.2026.100960_bib0030","author":"Jin"},{"key":"10.1016\/j.cosrev.2026.100960_bib0035","author":"Jiang"},{"key":"10.1016\/j.cosrev.2026.100960_bib0040","author":"Chang"},{"key":"10.1016\/j.cosrev.2026.100960_bib0045","article-title":"Attention is all you need","volume":"30","author":"Vaswani","year":"2017","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.cosrev.2026.100960_bib0050","article-title":"The transformer blueprint: a holistic guide to the transformer neural network architecture","author":"Nyandwi","year":"2023","journal-title":"Deep Learn. Revis."},{"key":"10.1016\/j.cosrev.2026.100960_bib0055","series-title":"Proceedings of the 2019 conference of the North American chapter of the association for computational linguistics: human language technologies, volume 1 (long and short papers)","first-page":"4171","article-title":"BERT: pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2019"},{"key":"10.1016\/j.cosrev.2026.100960_bib0060","first-page":"1877","article-title":"Language models are few-shot learners","volume":"33","author":"Brown","year":"2020","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.cosrev.2026.100960_bib0065","author":"Huang"},{"key":"10.1016\/j.cosrev.2026.100960_bib0070","author":"Li"},{"key":"10.1016\/j.cosrev.2026.100960_bib0075","author":"Quinlan"},{"key":"10.1016\/j.cosrev.2026.100960_bib0080","author":"Tao"},{"key":"10.1016\/j.cosrev.2026.100960_bib0085","author":"Wu"},{"key":"10.1016\/j.cosrev.2026.100960_bib0090","author":"Park"},{"key":"10.1016\/j.cosrev.2026.100960_bib0095","author":"Chan"},{"key":"10.1016\/j.cosrev.2026.100960_bib0100","author":"Imran"},{"key":"10.1016\/j.cosrev.2026.100960_bib0105","author":"Kim"},{"key":"10.1016\/j.cosrev.2026.100960_bib0110","author":"Zhang"},{"issue":"3","key":"10.1016\/j.cosrev.2026.100960_bib0115","article-title":"Assessments of lung nodules by an artificial intelligence chatbot using longitudinal CT images","volume":"6","author":"Mao","year":"2025","journal-title":"Cell Rep. Med."},{"key":"10.1016\/j.cosrev.2026.100960_bib0120","author":"Chen"},{"key":"10.1016\/j.cosrev.2026.100960_bib0125","author":"Zhao"},{"key":"10.1016\/j.cosrev.2026.100960_bib0130","author":"Qian"},{"key":"10.1016\/j.cosrev.2026.100960_bib0135","author":"Huang"},{"key":"10.1016\/j.cosrev.2026.100960_bib0140","author":"Villanueva"},{"key":"10.1016\/j.cosrev.2026.100960_bib0145","series-title":"Proceedings of the 33rd ACM International Conference on Information and Knowledge Management","first-page":"3549","article-title":"EMERGE: enhancing multimodal electronic health records predictive modeling with retrieval-augmented generation","author":"Zhu","year":"2024"},{"key":"10.1016\/j.cosrev.2026.100960_bib0150","author":"Zhang"},{"key":"10.1016\/j.cosrev.2026.100960_bib0155","doi-asserted-by":"crossref","DOI":"10.1016\/j.inffus.2023.102006","article-title":"Finger pinching and imagination classification: a fusion of CNN architectures for IoMT-enabled BCI applications","volume":"101","author":"Varone","year":"2024","journal-title":"Inf. Fusion."},{"key":"10.1016\/j.cosrev.2026.100960_bib0160","series-title":"Proceedings of the 32nd ACM International Conference on Multimedia","first-page":"2776","article-title":"MM-forecast: a multimodal approach to temporal event forecasting with large language models","author":"Li","year":"2024"},{"key":"10.1016\/j.cosrev.2026.100960_bib0165","author":"Zhang"},{"key":"10.1016\/j.cosrev.2026.100960_bib0170","author":"He"},{"key":"10.1016\/j.cosrev.2026.100960_bib0175","series-title":"Computer Sciences & Mathematics Forum","first-page":"22","article-title":"Exploring multi-modal LLMs for time series anomaly detection","volume":"vol. 11","author":"Niu","year":"2025"},{"key":"10.1016\/j.cosrev.2026.100960_bib0180","author":"Yang"},{"key":"10.1016\/j.cosrev.2026.100960_bib0185","author":"Jiang"},{"key":"10.1016\/j.cosrev.2026.100960_bib0190","author":"Pillai"},{"key":"10.1016\/j.cosrev.2026.100960_bib0195","first-page":"1","article-title":"Smarter together: combining large language models and small models for physiological signals visual inspection","author":"Li","year":"2025","journal-title":"J. Healthc. Inform. Res."},{"issue":"1","key":"10.1016\/j.cosrev.2026.100960_bib0200","doi-asserted-by":"crossref","first-page":"541","DOI":"10.1038\/s41746-025-01940-4","article-title":"CARE-AD: a multi-agent large language model framework for alzheimer\u2019s disease prediction using longitudinal clinical notes","volume":"8","author":"Li","year":"2025","journal-title":"npj Digital Med."},{"key":"10.1016\/j.cosrev.2026.100960_bib0205","doi-asserted-by":"crossref","unstructured":"M. Kruse, S. Hu, N. Derby, Y. Wu, S. Stonbraker, B. Yao, D. Wang, E. Goldberg, Y. Gao, Zero-shot large language models for long clinical text summarization with temporal reasoning, medRxiv (2025) 2025\u201307.","DOI":"10.1101\/2025.07.21.25331947"},{"key":"10.1016\/j.cosrev.2026.100960_bib0210","author":"Li"},{"key":"10.1016\/j.cosrev.2026.100960_bib0215","author":"Jiang"},{"key":"10.1016\/j.cosrev.2026.100960_bib0220","author":"Chiu"},{"key":"10.1016\/j.cosrev.2026.100960_bib0225","author":"Chen"},{"key":"10.1016\/j.cosrev.2026.100960_bib0230","author":"Wang"},{"issue":"22","key":"10.1016\/j.cosrev.2026.100960_bib0235","doi-asserted-by":"crossref","first-page":"9225","DOI":"10.3390\/s23229225","article-title":"LLM multimodal traffic accident forecasting","volume":"23","author":"de Zarz\u00e0","year":"2023","journal-title":"Sensors"},{"key":"10.1016\/j.cosrev.2026.100960_bib0240","author":"Tang"},{"key":"10.1016\/j.cosrev.2026.100960_bib0245","author":"Guan"},{"key":"10.1016\/j.cosrev.2026.100960_bib0250","first-page":"1","article-title":"A multimodal vision foundation model for clinical dermatology","author":"Yan","year":"2025","journal-title":"Nat. Med."},{"key":"10.1016\/j.cosrev.2026.100960_bib0255","author":"Zhang"},{"issue":"1","key":"10.1016\/j.cosrev.2026.100960_bib0260","article-title":"Large language multimodal models for new-onset type 2 diabetes prediction using five-year cohort electronic health records","volume":"14","author":"Ding","year":"2024","journal-title":"Sci. Rep."},{"key":"10.1016\/j.cosrev.2026.100960_bib0265","unstructured":"T.Z. Li, J.M. Still, L. Zuo, Y. Liu, A.R. Krishnan, K.L. Sandler, F. Maldonado, T.A. Lasko, B.A. Landman, Longitudinal masked representation learning for pulmonary nodule diagnosis from language embedded EHRs, medRxiv (2025) 2025\u201305."},{"key":"10.1016\/j.cosrev.2026.100960_bib0270","author":"Newman"},{"key":"10.1016\/j.cosrev.2026.100960_bib0275","author":"Cao"},{"issue":"2","key":"10.1016\/j.cosrev.2026.100960_bib0280","doi-asserted-by":"crossref","first-page":"40","DOI":"10.3390\/safety11020040","article-title":"Leveraging bird eye view video and multimodal large language models for real-time intersection control and reasoning","volume":"11","author":"Masri","year":"2025","journal-title":"Safety"},{"issue":"1","key":"10.1016\/j.cosrev.2026.100960_bib0285","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1038\/sdata.2016.35","article-title":"MIMIC-III, a freely accessible critical care database","volume":"3","author":"Johnson","year":"2016","journal-title":"Sci. Data."},{"issue":"1","key":"10.1016\/j.cosrev.2026.100960_bib0290","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1038\/s41597-022-01899-x","article-title":"MIMIC-IV, a freely accessible electronic health record dataset","volume":"10","author":"Johnson","year":"2023","journal-title":"Sci. Data"},{"issue":"3","key":"10.1016\/j.cosrev.2026.100960_bib0295","doi-asserted-by":"crossref","first-page":"45","DOI":"10.1109\/51.932724","article-title":"The impact of the MIT-BIH arrhythmia database","volume":"20","author":"Moody","year":"2001","journal-title":"IEEE Eng. Med. Biol. Mag."},{"key":"10.1016\/j.cosrev.2026.100960_bib0300","author":"Kalyakulina"},{"issue":"8","key":"10.1016\/j.cosrev.2026.100960_bib0305","doi-asserted-by":"crossref","first-page":"1914","DOI":"10.1109\/TBME.2016.2613124","article-title":"Toward a robust estimation of respiratory rate from pulse oximeters","volume":"64","author":"Pimentel","year":"2016","journal-title":"IEEE Trans. Biomed. Eng."},{"key":"10.1016\/j.cosrev.2026.100960_bib0310","series-title":"Lrec","first-page":"3123","article-title":"The distress analysis interview corpus of human and computer interviews","volume":"vol. 14","author":"Gratch","year":"2014"},{"issue":"1","key":"10.1016\/j.cosrev.2026.100960_bib0315","doi-asserted-by":"crossref","first-page":"317","DOI":"10.1038\/s41597-019-0322-0","article-title":"MIMIC-CXR, a de-identified publicly available database of chest radiographs with free-text reports","volume":"6","author":"Johnson","year":"2019","journal-title":"Sci. Data."},{"issue":"1","key":"10.1016\/j.cosrev.2026.100960_bib0320","doi-asserted-by":"crossref","first-page":"55","DOI":"10.1016\/j.jalz.2005.06.003","article-title":"Ways toward an early diagnosis in alzheimer\u2019s disease: the alzheimer\u2019s disease neuroimaging initiative (ADNI)","volume":"1","author":"Mueller","year":"2005","journal-title":"Alzheimer\u2019s Dement."},{"key":"10.1016\/j.cosrev.2026.100960_bib0325","doi-asserted-by":"crossref","DOI":"10.1016\/j.media.2022.102433","article-title":"Rendezvous: attention mechanisms for the recognition of surgical action triplets in endoscopic videos","volume":"78","author":"Nwoye","year":"2022","journal-title":"Med. Image Anal."},{"key":"10.1016\/j.cosrev.2026.100960_bib0330","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2023.121378","article-title":"ASB-CS: adaptive sparse basis compressive sensing model and its application to medical image encryption","volume":"236","author":"Jiang","year":"2024","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.cosrev.2026.100960_bib0335","series-title":"Proceedings of the 2014 ACM international joint conference on pervasive and ubiquitous computing","first-page":"3","article-title":"StudentLife: assessing mental health, academic performance and behavioral trends of college students using smartphones","author":"Wang","year":"2014"},{"key":"10.1016\/j.cosrev.2026.100960_bib0340","doi-asserted-by":"crossref","DOI":"10.1016\/j.dib.2019.104141","article-title":"Heterogeneous integrated dataset for maritime intelligence, surveillance, and reconnaissance","volume":"25","author":"Ray","year":"2019","journal-title":"Data Brief."},{"key":"10.1016\/j.cosrev.2026.100960_bib0345","author":"Schuhmann"},{"key":"10.1016\/j.cosrev.2026.100960_bib0350","author":"Liu"},{"key":"10.1016\/j.cosrev.2026.100960_bib0355","author":"Wang"},{"key":"10.1016\/j.cosrev.2026.100960_bib0360","author":"Kong"},{"key":"10.1016\/j.cosrev.2026.100960_bib0365","series-title":"Proceedings of the ACM Web Conference 2024","first-page":"1963","article-title":"Back to the future: towards explainable temporal reasoning with large language models","author":"Yuan","year":"2024"},{"key":"10.1016\/j.cosrev.2026.100960_bib0370","article-title":"Self-supervised learning for graph-structured data in healthcare applications: a comprehensive review","volume":"188","author":"Atitallah","year":"2025","journal-title":"Comput. Biol. Med."},{"key":"10.1016\/j.cosrev.2026.100960_bib0375","doi-asserted-by":"crossref","DOI":"10.1016\/j.cosrev.2025.100881","article-title":"Fusing LLMs and diffusion models: a comprehensive survey of progress, challenges, and future directions in generative AI","volume":"60","author":"Benjdira","year":"2026","journal-title":"Comput. Sci. Rev."}],"container-title":["Computer Science Review"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1574013726000687?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1574013726000687?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,3,24]],"date-time":"2026-03-24T17:19:39Z","timestamp":1774372779000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S1574013726000687"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,8]]},"references-count":75,"alternative-id":["S1574013726000687"],"URL":"https:\/\/doi.org\/10.1016\/j.cosrev.2026.100960","relation":{},"ISSN":["1574-0137"],"issn-type":[{"value":"1574-0137","type":"print"}],"subject":[],"published":{"date-parts":[[2026,8]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"A comprehensive review of temporal reasoning with multimodal LLMs: Foundations, challenges, and healthcare applications","name":"articletitle","label":"Article Title"},{"value":"Computer Science Review","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.cosrev.2026.100960","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Inc. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"100960"}}