{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,24]],"date-time":"2026-06-24T03:48:04Z","timestamp":1782272884488,"version":"3.54.5"},"reference-count":29,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,10,18]],"date-time":"2025-10-18T00:00:00Z","timestamp":1760745600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2025,10,18]],"date-time":"2025-10-18T00:00:00Z","timestamp":1760745600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["12505412"],"award-info":[{"award-number":["12505412"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Natural Science Basic Research Program of Shaanxi","award":["2025JC-YBQN-1093"],"award-info":[{"award-number":["2025JC-YBQN-1093"]}]},{"name":"Institutional Foundation of The First Affiliated Hospital of Xi\u2019an Jiaotong University","award":["2024-QN-26"],"award-info":[{"award-number":["2024-QN-26"]}]},{"name":"Key Research and Development Projects of Shaanxi","award":["2025CY-YBXM-196"],"award-info":[{"award-number":["2025CY-YBXM-196"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Med Syst"],"DOI":"10.1007\/s10916-025-02284-y","type":"journal-article","created":{"date-parts":[[2025,10,18]],"date-time":"2025-10-18T07:04:26Z","timestamp":1760771066000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Diagnosis and Triage Performance of Contemporary Large Language Models on Short Clinical Vignettes"],"prefix":"10.1007","volume":"49","author":[{"given":"Lei","family":"Xu","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Wenzhe","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xin","family":"Huang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2025,10,18]]},"reference":[{"key":"2284_CR1","unstructured":"Achiam, J., Adler, S., Agarwal, S., Ahmad, L., Akkaya, I., Aleman, F. L., et al. (2023). Gpt-4 technical report. arXiv preprint arXiv:2303.08774."},{"key":"2284_CR2","first-page":"1877","volume":"33","author":"T Brown","year":"2020","unstructured":"Brown, T., Mann, B., Ryder, N., Subbiah, M., Kaplan, J. D., Dhariwal, P., et al. (2020). Language models are few-shot learners. Advances in neural information processing systems, 33, 1877\u20131901.","journal-title":"Advances in neural information processing systems"},{"key":"2284_CR3","unstructured":"Guo, D., Zhu, Q., Yang, D., Xie, Z., Dong, K., Zhang, W., et al. (2024). DeepSeek-Coder: When the Large Language Model Meets Programming\u2013The Rise of Code Intelligence. arXiv preprint arXiv:2401.14196."},{"key":"2284_CR4","unstructured":"Touvron, H., Lavril, T., Izacard, G., Martinet, X., Lachaux, M.-A., Lacroix, T., et al. (2023). Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971."},{"issue":"7956","key":"2284_CR5","doi-asserted-by":"publisher","first-page":"259","DOI":"10.1038\/s41586-023-05881-4","volume":"616","author":"M Moor","year":"2023","unstructured":"Moor, M., Banerjee, O., Abad, Z. S. H., Krumholz, H. M., Leskovec, J., Topol, E. J., et al. (2023). Foundation models for generalist medical artificial intelligence. Nature, 616(7956), 259\u2013265.","journal-title":"Nature"},{"issue":"7972","key":"2284_CR6","doi-asserted-by":"publisher","first-page":"172","DOI":"10.1038\/s41586-023-06291-2","volume":"620","author":"K Singhal","year":"2023","unstructured":"Singhal, K., Azizi, S., Tu, T., Mahdavi, S. S., Wei, J., Chung, H. W., et al. (2023). Large language models encode clinical knowledge. Nature, 620(7972), 172\u2013180.","journal-title":"Nature"},{"issue":"2","key":"2284_CR7","doi-asserted-by":"publisher","first-page":"e0000198","DOI":"10.1371\/journal.pdig.0000198","volume":"2","author":"TH Kung","year":"2023","unstructured":"Kung, T. H., Cheatham, M., Medenilla, A., Sillos, C., De Leon, L., Elepa\u00f1o, C., et al. (2023). Performance of ChatGPT on USMLE: potential for AI-assisted medical education using large language models. PLoS digital health, 2(2), e0000198.","journal-title":"PLoS digital health"},{"issue":"8","key":"2284_CR8","doi-asserted-by":"publisher","first-page":"e555-e561","DOI":"10.1016\/S2589-7500(24)00097-9","volume":"6","author":"DM Levine","year":"2024","unstructured":"Levine, D. M., Tuwani, R., Kompa, B., Varma, A., Finlayson, S. G., Mehrotra, A., et al. (2024). The diagnostic and triage accuracy of the GPT-3 artificial intelligence model: an observational study. The Lancet Digital Health, 6(8), e555-e561.","journal-title":"The Lancet Digital Health"},{"key":"2284_CR9","unstructured":"Bubeck, S., Chadrasekaran, V., Eldan, R., Gehrke, J., Horvitz, E., Kamar, E., et al. (2023). Sparks of artificial general intelligence: Early experiments with gpt-4. ArXiv."},{"key":"2284_CR10","unstructured":"Nori, H., King, N., McKinney, S. M., Carignan, D., & Horvitz, E. (2023). Capabilities of gpt-4 on medical challenge problems. arXiv preprint arXiv:2303.13375."},{"key":"2284_CR11","unstructured":"Bi, X., Chen, D., Chen, G., Chen, S., Dai, D., Deng, C., et al. (2024). Deepseek llm: Scaling open-source language models with longtermism. arXiv preprint arXiv:2401.02954."},{"key":"2284_CR12","unstructured":"Team, G., Anil, R., Borgeaud, S., Alayrac, J.-B., Yu, J., Soricut, R., et al. (2023). Gemini: a family of highly capable multimodal models. arXiv preprint arXiv:2312.11805."},{"issue":"1","key":"2284_CR13","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1038\/s41591-021-01614-0","volume":"28","author":"P Rajpurkar","year":"2022","unstructured":"Rajpurkar, P., Chen, E., Banerjee, O., & Topol, E. J. (2022). AI in health and medicine. Nature medicine, 28(1), 31\u201338.","journal-title":"Nature medicine"},{"issue":"1","key":"2284_CR14","doi-asserted-by":"publisher","first-page":"123","DOI":"10.1146\/annurev-biodatasci-092820-114757","volume":"4","author":"IY Chen","year":"2021","unstructured":"Chen, I. Y., Pierson, E., Rose, S., Joshi, S., Ferryman, K., & Ghassemi, M. (2021). Ethical machine learning in healthcare. Annual review of biomedical data science, 4(1), 123\u2013144.","journal-title":"Annual review of biomedical data science"},{"issue":"6464","key":"2284_CR15","doi-asserted-by":"publisher","first-page":"447","DOI":"10.1126\/science.aax2342","volume":"366","author":"Z Obermeyer","year":"2019","unstructured":"Obermeyer, Z., Powers, B., Vogeli, C., & Mullainathan, S. (2019). Dissecting racial bias in an algorithm used to manage the health of populations. Science, 366(6464), 447\u2013453.","journal-title":"Science"},{"key":"2284_CR16","doi-asserted-by":"crossref","unstructured":"Ferrara, E. (2023). Should chatgpt be biased? challenges and risks of bias in large language models. arXiv preprint arXiv:2304.03738.","DOI":"10.2139\/ssrn.4627814"},{"issue":"3","key":"2284_CR17","doi-asserted-by":"publisher","first-page":"e213287-e213287","DOI":"10.1001\/jamanetworkopen.2021.3287","volume":"4","author":"DM Levine","year":"2021","unstructured":"Levine, D. M., & Mehrotra, A. (2021). Assessment of diagnosis and triage in validated case vignettes among nonphysicians before and after internet search. JAMA network open, 4(3), e213287-e213287.","journal-title":"JAMA network open"},{"key":"2284_CR18","unstructured":"He, S. (2024). Prompting ChatGPT for translation: A comparative analysis of translation brief and persona prompts. arXiv preprint arXiv:2403.00127."},{"issue":"1","key":"2284_CR19","doi-asserted-by":"publisher","first-page":"43","DOI":"10.1186\/s44247-024-00096-7","volume":"2","author":"M Kopka","year":"2024","unstructured":"Kopka, M., & Feufel, M. A. (2024). Software symptomcheckR: an R package for analyzing and visualizing symptom checker triage performance. BMC Digital Health, 2(1), 43.","journal-title":"BMC Digital Health"},{"key":"2284_CR20","doi-asserted-by":"publisher","first-page":"205520762311949","DOI":"10.1177\/20552076231194929","volume":"9","author":"M Kopka","year":"2023","unstructured":"Kopka, M., Feufel, M. A., Berner, E. S., & Schmieding, M. L. (2023). How suitable are clinical vignettes for the evaluation of symptom checker apps? A test theoretical perspective. Digital Health, 9, 20552076231194929.","journal-title":"Digital Health"},{"issue":"1","key":"2284_CR21","doi-asserted-by":"publisher","first-page":"e49995","DOI":"10.2196\/49995","volume":"11","author":"H Fraser","year":"2023","unstructured":"Fraser, H., Crossland, D., Bacher, I., Ranney, M., Madsen, T., & Hilliard, R. (2023). Comparison of diagnostic and triage accuracy of Ada health and WebMD symptom checkers, ChatGPT, and physicians for patients in an emergency department: clinical data analysis study. JMIR mHealth and uHealth, 11(1), e49995.","journal-title":"JMIR mHealth and uHealth"},{"key":"2284_CR22","doi-asserted-by":"publisher","first-page":"e47532","DOI":"10.2196\/47532","volume":"9","author":"N Ito","year":"2023","unstructured":"Ito, N., Kadomatsu, S., Fujisawa, M., Fukaguchi, K., Ishizawa, R., Kanda, N., et al. (2023). The accuracy and potential racial and ethnic biases of GPT-4 in the diagnosis and triage of health conditions: evaluation study. JMIR Medical Education, 9, e47532.","journal-title":"JMIR Medical Education"},{"issue":"1","key":"2284_CR23","doi-asserted-by":"publisher","first-page":"30614","DOI":"10.1038\/s41598-024-83844-z","volume":"14","author":"M Kopka","year":"2024","unstructured":"Kopka, M., Napierala, H., Privoznik, M., Sapunova, D., Zhang, S., & Feufel, M. A. (2024). The RepVig framework for designing use-case specific representative vignettes and evaluating triage accuracy of laypeople and symptom assessment applications. Scientific Reports, 14(1), 30614.","journal-title":"Scientific Reports"},{"issue":"8","key":"2284_CR24","doi-asserted-by":"publisher","first-page":"839","DOI":"10.3390\/diagnostics14080839","volume":"14","author":"A Frosolini","year":"2024","unstructured":"Frosolini, A., Catarzi, L., Benedetti, S., Latini, L., Chisci, G., Franz, L., et al. (2024). The role of large language models (LLMs) in providing triage for maxillofacial trauma cases: a preliminary study. Diagnostics, 14(8), 839.","journal-title":"Diagnostics"},{"key":"2284_CR25","doi-asserted-by":"publisher","first-page":"e53297","DOI":"10.2196\/53297","volume":"26","author":"L Masanneck","year":"2024","unstructured":"Masanneck, L., Schmidt, L., Seifert, A., K\u00f6lsche, T., Huntemann, N., Jansen, R., et al. (2024). Triage performance across large language models, ChatGPT, and untrained doctors in emergency medicine: comparative study. Journal of Medical Internet Research, 26, e53297.","journal-title":"Journal of Medical Internet Research"},{"key":"2284_CR26","doi-asserted-by":"publisher","first-page":"e55648","DOI":"10.2196\/55648","volume":"26","author":"JM Franc","year":"2024","unstructured":"Franc, J. M., Hertelendy, A. J., Cheng, L., Hata, R., & Verde, M. (2024). Accuracy of a Commercial Large Language Model (ChatGPT) to Perform Disaster Triage of Simulated Patients Using the Simple Triage and Rapid Treatment (START) Protocol: Gage Repeatability and Reproducibility Study. Journal of Medical Internet Research, 26, e55648.","journal-title":"Journal of Medical Internet Research"},{"key":"2284_CR27","doi-asserted-by":"crossref","unstructured":"Sonoda, Y., Kurokawa, R., Hagiwara, A., Asari, Y., Fukushima, T., Kanzawa, J., et al. (2024). Structured clinical reasoning prompt enhances LLM\u2019s diagnostic capabilities in diagnosis please quiz cases. Japanese Journal of Radiology, 43(4), 586-592.","DOI":"10.1101\/2024.09.01.24312894"},{"key":"2284_CR28","unstructured":"Guo, G., Zhang, K., Hoo, B., Cai, Y., Lu, X., Peng, N., et al. (2025). Structured Outputs Enable General-Purpose LLMs to be Medical Experts. arXiv preprint arXiv:2503.03194."},{"key":"2284_CR29","doi-asserted-by":"crossref","unstructured":"Haim, G. B., Saban, M., Barash, Y., Cirulnik, D., Shaham, A., Eisenman, B. Z., et al. (2024). Evaluating Large Language Model-Assisted Emergency Triage: A Comparison of Acuity Assessments by GPT\u20104 and Medical Experts. Journal of Clinical Nursing, 0, 1\u20137.","DOI":"10.1111\/jocn.17490"}],"container-title":["Journal of Medical Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10916-025-02284-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10916-025-02284-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10916-025-02284-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,18]],"date-time":"2025-10-18T19:02:50Z","timestamp":1760814170000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10916-025-02284-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,18]]},"references-count":29,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2025,12]]}},"alternative-id":["2284"],"URL":"https:\/\/doi.org\/10.1007\/s10916-025-02284-y","relation":{},"ISSN":["1573-689X"],"issn-type":[{"value":"1573-689X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,10,18]]},"assertion":[{"value":"4 June 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 October 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 October 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"This study was a secondary analysis of a previously published vignette dataset. The vignettes and their validation procedures were developed based on the study of Levine et al., which received ethics exemption from the Institutional Review Board of Harvard Medical School. The authors of the current study had no affiliation with Harvard Medical School, and no new human subjects research was conducted.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics Approval and Consent to Participate"}},{"value":"The authors declare no competing interests.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"141"}}