{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,27]],"date-time":"2026-06-27T08:01:34Z","timestamp":1782547294653,"version":"3.54.5"},"reference-count":27,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2024,8,7]],"date-time":"2024-08-07T00:00:00Z","timestamp":1722988800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"},{"start":{"date-parts":[[2024,8,7]],"date-time":"2024-08-07T00:00:00Z","timestamp":1722988800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["npj Digit. Med."],"abstract":"<jats:title>Abstract<\/jats:title><jats:p>This study evaluates multimodal AI models\u2019 accuracy and responsiveness in answering NEJM Image Challenge questions, juxtaposed with human collective intelligence, underscoring AI\u2019s potential and current limitations in clinical diagnostics. Anthropic\u2019s Claude 3 family demonstrated the highest accuracy among the evaluated AI models, surpassing the average human accuracy, while collective human decision-making outperformed all AI models. GPT-4 Vision Preview exhibited selectivity, responding more to easier questions with smaller images and longer questions.<\/jats:p>","DOI":"10.1038\/s41746-024-01208-3","type":"journal-article","created":{"date-parts":[[2024,8,7]],"date-time":"2024-08-07T11:04:12Z","timestamp":1723028652000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":67,"title":["Evaluating multimodal AI in medical diagnostics"],"prefix":"10.1038","volume":"7","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8570-1601","authenticated-orcid":false,"given":"Robert","family":"Kaczmarczyk","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7462-1868","authenticated-orcid":false,"given":"Theresa Isabelle","family":"Wilhelm","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-0248-537X","authenticated-orcid":false,"given":"Ron","family":"Martin","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jonas","family":"Roos","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2024,8,7]]},"reference":[{"key":"1208_CR1","doi-asserted-by":"publisher","first-page":"AIp2300031","DOI":"10.1056\/AIp2300031","volume":"1","author":"AV Eriksen","year":"2023","unstructured":"Eriksen, A. V., M\u00f6ller, S. & Ryg, J. Use of GPT-4 to Diagnose Complex Clinical Cases. NEJM AI 1, AIp2300031 (2023).","journal-title":"NEJM AI"},{"key":"1208_CR2","unstructured":"Wu, C. et al. Can GPT-4V(ision) Serve Medical Applications? Case Studies on GPT-4V for Multimodal Medical Diagnosis. Preprint at http:\/\/arxiv.org\/abs\/2310.09909 (2023)."},{"key":"1208_CR3","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1038\/s41746-023-00873-0","volume":"6","author":"B Mesk\u00f3","year":"2023","unstructured":"Mesk\u00f3, B. & Topol, E. J. The imperative for regulatory oversight of large language models (or generative AI) in healthcare. npj Digit. Med 6, 1\u20136 (2023).","journal-title":"npj Digit. Med"},{"key":"1208_CR4","doi-asserted-by":"publisher","first-page":"e2325000","DOI":"10.1001\/jamanetworkopen.2023.25000","volume":"6","author":"Y-F Shea","year":"2023","unstructured":"Shea, Y.-F., Lee, C. M. Y., Ip, W. C. T., Luk, D. W. A. & Wong, S. S. W. Use of GPT-4 to Analyze Medical Records of Patients With Extensive Investigations and Delayed Diagnosis. JAMA Netw. Open 6, e2325000 (2023).","journal-title":"JAMA Netw. Open"},{"key":"1208_CR5","doi-asserted-by":"publisher","unstructured":"Brin, D. et al. Assessing GPT-4 Multimodal Performance in Radiological Image Analysis. 2023.11.15.23298583 Preprint at https:\/\/doi.org\/10.1101\/2023.11.15.23298583 (2023).","DOI":"10.1101\/2023.11.15.23298583"},{"key":"1208_CR6","doi-asserted-by":"publisher","unstructured":"Reese, J. T. et al. On the limitations of large language models in clinical diagnosis. https:\/\/doi.org\/10.1101\/2023.07.13.23292613 (2024).","DOI":"10.1101\/2023.07.13.23292613"},{"key":"1208_CR7","doi-asserted-by":"publisher","first-page":"e51391","DOI":"10.2196\/51391","volume":"10","author":"T Abdullahi","year":"2024","unstructured":"Abdullahi, T., Singh, R. & Eickhoff, C. Learning to Make Rare and Complex Diagnoses With Generative AI Assistance: Qualitative Study of Popular Large Language Models. JMIR Med Educ. 10, e51391 (2024).","journal-title":"JMIR Med Educ."},{"key":"1208_CR8","doi-asserted-by":"publisher","unstructured":"Van, M.-H., Verma, P. & Wu, X. On Large Visual Language Models for Medical Imaging Analysis: An Empirical Study. Preprint at https:\/\/doi.org\/10.48550\/arXiv.2402.14162 (2024).","DOI":"10.48550\/arXiv.2402.14162"},{"key":"1208_CR9","unstructured":"Restrictions in AI Content Generation - Community. OpenAI Developer Forum https:\/\/community.openai.com\/t\/restrictions-in-ai-content-generation\/717246 (2024)."},{"key":"1208_CR10","unstructured":"Safety settings | Google AI for Developers. Google for Developers https:\/\/ai.google.dev\/gemini-api\/docs\/safety-settings."},{"key":"1208_CR11","unstructured":"Content moderation. Anthropic https:\/\/docs.anthropic.com\/en\/docs\/content-moderation."},{"key":"1208_CR12","unstructured":"Surowiecki, J. The Wisdom of Crowds: Why the Many Are Smarter than the Few and How Collective Wisdom Shapes Business, Economies, Societies, and Nations. xxi, 296 (Doubleday & Co, New York, NY, US, 2004)."},{"key":"1208_CR13","doi-asserted-by":"publisher","first-page":"125","DOI":"10.1038\/s41746-024-01103-x","volume":"7","author":"MP Salinas","year":"2024","unstructured":"Salinas, M. P. et al. A systematic review and meta-analysis of artificial intelligence versus clinicians for skin cancer diagnosis. NPJ Digit Med 7, 125 (2024).","journal-title":"NPJ Digit Med"},{"key":"1208_CR14","doi-asserted-by":"publisher","unstructured":"Singh, H., Devi, K. S., Gaur, S. S. & Bhattacharjee, R. Automated Skin Cancer Detection using Deep Learning with Self-Attention Mechanism. in 2023 International Conference on Computational Intelligence and Sustainable Engineering Solutions (CISES) 464\u2013469 (IEEE, Greater Noida, India, 2023). https:\/\/doi.org\/10.1109\/CISES58720.2023.10183586.","DOI":"10.1109\/CISES58720.2023.10183586"},{"key":"1208_CR15","doi-asserted-by":"publisher","unstructured":"Menz, B. D. et al. Current safeguards, risk mitigation, and transparency measures of large language models against the generation of health disinformation: repeated cross sectional analysis. BMJ e078538 (2024) https:\/\/doi.org\/10.1136\/bmj-2023-078538.","DOI":"10.1136\/bmj-2023-078538"},{"key":"1208_CR16","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1038\/s43856-023-00370-1","volume":"3","author":"J Clusmann","year":"2023","unstructured":"Clusmann, J. et al. The future landscape of large language models in medicine. Commun. Med 3, 1\u20138 (2023).","journal-title":"Commun. Med"},{"key":"1208_CR17","unstructured":"EU Artificial Intelligence Act | Up-to-date developments and analyses of the EU AI Act. https:\/\/artificialintelligenceact.eu\/."},{"key":"1208_CR18","doi-asserted-by":"publisher","DOI":"10.2214\/AJR.23.30573","volume":"222","author":"CH Savage","year":"2024","unstructured":"Savage, C. H. et al. General-Purpose Large Language Models Versus a Domain-Specific Natural Language Processing Tool for Label Extraction From Chest Radiograph Reports. Am. J. Roentgenol. 222, e2330573 (2024).","journal-title":"Am. J. Roentgenol."},{"key":"1208_CR19","unstructured":"March 7, 2024 | NEJM. New England Journal of Medicine https:\/\/www.nejm.org\/image-challenge."},{"key":"1208_CR20","unstructured":"cogvlm-chat-v1.1.zip \u00b7 THUDM\/CogVLM at main. https:\/\/huggingface.co\/THUDM\/CogVLM\/blob\/main\/cogvlm-chat-v1.1.zip (2023)."},{"key":"1208_CR21","unstructured":"liuhaotian\/llava-v1.6-34b \u00b7 Hugging Face. https:\/\/huggingface.co\/liuhaotian\/llava-v1.6-34b (2024)."},{"key":"1208_CR22","unstructured":"OpenGVLab\/InternVL-Chat-V1-5-Int8 \u00b7 Hugging Face. https:\/\/huggingface.co\/OpenGVLab\/InternVL-Chat-V1-5-Int8 (2022)."},{"key":"1208_CR23","unstructured":"openai\/openai-python. OpenAI (2024)."},{"key":"1208_CR24","unstructured":"google\/generative-ai-python. Google (2024)."},{"key":"1208_CR25","unstructured":"Introducing the next generation of Claude. https:\/\/www.anthropic.com\/news\/claude-3-family."},{"key":"1208_CR26","unstructured":"anthropics\/anthropic-sdk-python. Anthropic (2024)."},{"key":"1208_CR27","unstructured":"Hugging Face \u2013 The AI community building the future. https:\/\/huggingface.co\/ (2024)."}],"container-title":["npj Digital Medicine"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.nature.com\/articles\/s41746-024-01208-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s41746-024-01208-3","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s41746-024-01208-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,7]],"date-time":"2024-08-07T11:09:18Z","timestamp":1723028958000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.nature.com\/articles\/s41746-024-01208-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8,7]]},"references-count":27,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2024,12]]}},"alternative-id":["1208"],"URL":"https:\/\/doi.org\/10.1038\/s41746-024-01208-3","relation":{},"ISSN":["2398-6352"],"issn-type":[{"value":"2398-6352","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,8,7]]},"assertion":[{"value":"15 March 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 July 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 August 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The authors declare no competing interests.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"205"}}