{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,2]],"date-time":"2026-06-02T14:15:15Z","timestamp":1780409715362,"version":"3.54.1"},"reference-count":38,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,10,8]],"date-time":"2025-10-08T00:00:00Z","timestamp":1759881600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,10,8]],"date-time":"2025-10-08T00:00:00Z","timestamp":1759881600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"Science and Technology Research Project of Henan Province","award":["No. 252102311241, No. 242102311075"],"award-info":[{"award-number":["No. 252102311241, No. 242102311075"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Med Syst"],"DOI":"10.1007\/s10916-025-02264-2","type":"journal-article","created":{"date-parts":[[2025,10,8]],"date-time":"2025-10-08T06:23:49Z","timestamp":1759904629000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Evaluation of DeepSeek-R1 for Ophthalmic Diagnosis and Reasoning: A Comparison with OpenAI o1 and o3"],"prefix":"10.1007","volume":"49","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7685-880X","authenticated-orcid":false,"given":"Shuai","family":"Ming","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xi","family":"Yao","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Qingge","family":"Guo","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Dandan","family":"Chen","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xiaohong","family":"Guo","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Kunpeng","family":"Xie","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Bo","family":"Lei","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2025,10,8]]},"reference":[{"issue":"7956","key":"2264_CR1","doi-asserted-by":"publisher","first-page":"259","DOI":"10.1038\/s41586-023-05881-4","volume":"616","author":"M Moor","year":"2023","unstructured":"Moor M, Banerjee O, Abad Z, et al. Foundation models for generalist medical artificial intelligence. Nature 2023;616(7956):259-265. https:\/\/doi.org\/10.1038\/s41586-023-05881-4.PubMed: 37045921","journal-title":"Nature"},{"issue":"4","key":"2264_CR2","doi-asserted-by":"publisher","first-page":"319","DOI":"10.1001\/jama.2024.21700","volume":"333","author":"S Bedi","year":"2025","unstructured":"Bedi S, Liu Y, Orr-Ewing L, et al. Testing and Evaluation of Health Care Applications of Large Language Models: A Systematic Review. JAMA 2025;333(4):319-328. https:\/\/doi.org\/10.1001\/jama.2024.21700.PubMed: 39405325","journal-title":"JAMA"},{"issue":"2","key":"2264_CR3","doi-asserted-by":"publisher","first-page":"e2457879","DOI":"10.1001\/jamanetworkopen.2024.57879","volume":"8","author":"B Huo","year":"2025","unstructured":"Huo B, Boyle A, Marfo N, et al. Large Language Models for Chatbot Health Advice Studies: A Systematic Review. JAMA Netw Open 2025;8(2):e2457879. https:\/\/doi.org\/10.1001\/jamanetworkopen.2024.57879.PubMed: 39903463","journal-title":"JAMA Netw Open"},{"issue":"1","key":"2264_CR4","doi-asserted-by":"publisher","first-page":"22","DOI":"10.1097\/ACM.0000-0000000","volume":"99","author":"CK Boscardin","year":"2024","unstructured":"Boscardin CK, Gin B, Golde PB, Hauer KE. ChatGPT and Generative Artificial Intelligence for Medical Education: Potential Impact and Opportunity. Acad Med 2024;99(1):22-27. https:\/\/doi.org\/10.1097\/ACM.0000-0000000 -05439.PubMed: 37651677","journal-title":"Acad Med"},{"issue":"2","key":"2264_CR5","doi-asserted-by":"publisher","first-page":"130","DOI":"10.1097\/CM9.0000000000003456","volume":"138","author":"X Yang","year":"2025","unstructured":"Yang X, Li T, Su Q, et al. Application of large language models in disease diagnosis and treatment. Chin Med J (Engl) 2025;138(2):130-142. https:\/\/doi.org\/10.1097\/CM9.0000000000003456.PubMed: 39722188","journal-title":"Chin Med J (Engl)"},{"issue":"1","key":"2264_CR6","doi-asserted-by":"publisher","first-page":"263","DOI":"10.1038\/s41746-025-01684-1","volume":"8","author":"F Gaber","year":"2025","unstructured":"Gaber F, Shaik M, Allega F, et al. Evaluating large language model workflows in clinical decision support for triage and referral and diagnosis. NPJ Digit Med 2025;8(1):263. https:\/\/doi.org\/10.1038\/s41746-025-01684-1.PubMed: 40346344","journal-title":"NPJ Digit Med"},{"issue":"10","key":"2264_CR7","doi-asserted-by":"publisher","first-page":"e2440969","DOI":"10.1001\/jamanetworkopen.2024.40969","volume":"7","author":"E Goh","year":"2024","unstructured":"Goh E, Gallo R, Hom J, et al. Large Language Model Influence on Diagnostic Reasoning: A Randomized Clinical Trial. JAMA Netw Open 2024;7(10):e2440969. https:\/\/doi.org\/10.1001\/jamanetworkopen.2024.40969.PubMed: 39466245","journal-title":"JAMA Netw Open"},{"issue":"1","key":"2264_CR8","doi-asserted-by":"publisher","first-page":"642","DOI":"10.1038\/s41467-024-55628-6","volume":"16","author":"M Griot","year":"2025","unstructured":"Griot M, Hemptinne C, Vanderdonckt J, Yuksel D. Large Language Models lack essential metacognition for reliable medical reasoning. Nat Commun 2025;16(1):642. https:\/\/doi.org\/10.1038\/s41467-024-55628-6.PubMed: 39809759","journal-title":"Nat Commun"},{"key":"2264_CR9","doi-asserted-by":"publisher","unstructured":"Wei JS, Wang XZ, Schuurmans D, et al. Chain-of-thought prompting elicits reasoning in large language models.\u00a0Adv Neural Inf Process\u00a02022;35:24824\u201337. https:\/\/doi.org\/10.5555\/3600270.3602070","DOI":"10.5555\/3600270.3602070"},{"issue":"8051","key":"2264_CR10","doi-asserted-by":"publisher","first-page":"609","DOI":"10.1038\/d41586-025-00460-1","volume":"638","author":"J Dreyer","year":"2025","unstructured":"Dreyer J. China made waves with Deepseek, but its real ambition is AI-driven industrial innovation. Nature 2025;638(8051):609-611. https:\/\/doi.org\/10.1038\/d41586-025-00460-1.PubMed: 39966638","journal-title":"Nature"},{"issue":"6731","key":"2264_CR11","doi-asserted-by":"publisher","first-page":"238","DOI":"10.1126\/science.adv9836","volume":"387","author":"D Normile","year":"2025","unstructured":"Normile D. Chinese firm's large language model makes a splash. Science 2025;387(6731):238. https:\/\/doi.org\/10.1126\/science.adv9836.PubMed: 39818899","journal-title":"Science"},{"issue":"8049","key":"2264_CR12","doi-asserted-by":"publisher","first-page":"13","DOI":"10.1038\/d41586-025-00229-6","volume":"638","author":"E Gibney","year":"2025","unstructured":"Gibney E. China's cheap, open AI model DeepSeek thrills scientists. Nature 2025;638(8049):13-14. https:\/\/doi.org\/10.1038\/d41586-025-00229-6.PubMed: 39849139","journal-title":"Nature"},{"key":"2264_CR13","doi-asserted-by":"publisher","DOI":"10.1001\/jama.2025.6571","author":"D Zeng","year":"2025","unstructured":"Zeng D, Qin Y, Sheng B, Wong TY. DeepSeek's \"Low-Cost\" Adoption Across China's Hospital Systems: Too Fast, Too Soon? JAMA 2025. https:\/\/doi.org\/10.1001\/jama.2025.6571.PubMed: 40293869","journal-title":"JAMA"},{"issue":"1","key":"2264_CR14","doi-asserted-by":"publisher","first-page":"53","DOI":"10.1007\/s10916-025-02181-4","volume":"49","author":"J Chen","year":"2025","unstructured":"Chen J, Miao C. DeepSeek Deployed in 90 Chinese Tertiary Hospitals: How Artificial Intelligence Is Transforming Clinical Practice. J Med Syst 2025;49(1):53. https:\/\/doi.org\/10.1007\/s10916-025-02181-4.PubMed: 40272650","journal-title":"J Med Syst"},{"key":"2264_CR15","doi-asserted-by":"publisher","unstructured":"Wang YM, Chen TJ. The rise of AI in healthcare education: DeepSeek and GPT-4o take on the 2024 Taiwan Pharmacist Exam. J Chin Med Assoc 2025. https:\/\/doi.org\/10.1097\/JCMA.0000000000001220. PubMed: 39972548","DOI":"10.1097\/JCMA.0000000000001220"},{"key":"2264_CR16","doi-asserted-by":"publisher","unstructured":"Ibrahim AF, Danpanichkul P, Hayek A, et al. Artificial intelligence in gastroenterology education: DeepSeek passes the gastroenterology board examination and outperforms legacy chatGPT models. Am J Gastroenterol 2025. https:\/\/doi.org\/10.14309\/ajg.0000000000003552. PubMed: 40392256","DOI":"10.14309\/ajg.0000000000003552"},{"key":"2264_CR17","doi-asserted-by":"publisher","DOI":"10.1038\/s41591-025-03727-2","author":"S Sandmann","year":"2025","unstructured":"Sandmann S, Hegselmann S, Fujarski M, et al. Benchmark evaluation of DeepSeek large language models in clinical decision-making. Nat Med 2025. https:\/\/doi.org\/10.1038\/s41591-025-03727-2.PubMed: 40267970","journal-title":"Nat Med"},{"key":"2264_CR18","doi-asserted-by":"publisher","DOI":"10.1038\/s41591-025-03726-3","author":"M Tordjman","year":"2025","unstructured":"Tordjman M, Liu Z, Yuce M, et al. Comparative benchmarking of the DeepSeek large language model on medical tasks and clinical reasoning. Nat Med 2025. https:\/\/doi.org\/10.1038\/s41591-025-03726-3.PubMed: 40267969","journal-title":"Nat Med"},{"key":"2264_CR19","doi-asserted-by":"publisher","first-page":"110506","DOI":"10.1016\/j.clinimag.2025.110506","volume":"123","author":"H Uldin","year":"2025","unstructured":"Uldin H, Saran S, Gandikota G, et al. A comparison of performance of DeepSeek-R1 model-generated responses to musculoskeletal radiology queries against ChatGPT-4 and ChatGPT-4o - A feasibility study. Clin Imaging 2025;123:110506. https:\/\/doi.org\/10.1016\/j.clinimag.2025.110506.PubMed: 40381536","journal-title":"Clin Imaging"},{"key":"2264_CR20","doi-asserted-by":"publisher","first-page":"e60226","DOI":"10.2196\/60226","volume":"26","author":"S Ming","year":"2024","unstructured":"Ming S, Yao X, Guo X, et al. Performance of ChatGPT in Ophthalmic Registration and Clinical Diagnosis: Cross-Sectional Study. J Med Internet Res 2024;26:e60226. https:\/\/doi.org\/10.2196\/60226.PubMed: 39541581","journal-title":"J Med Internet Res"},{"issue":"26","key":"2264_CR21","doi-asserted-by":"publisher","first-page":"2493","DOI":"10.1056\/NEJMp1512241","volume":"373","author":"H Singh","year":"2015","unstructured":"Singh H, Graber ML. Improving Diagnosis in Health Care--The Next Imperative for Patient Safety. N Engl J Med 2015;373(26):2493-5. https:\/\/doi.org\/10.1056\/NEJMp1512241.PubMed: 26559457","journal-title":"N Engl J Med"},{"issue":"3","key":"2264_CR22","doi-asserted-by":"publisher","first-page":"100681","DOI":"10.1016\/j.xops.2024.100681","volume":"5","author":"AP Agnihotri","year":"2025","unstructured":"Agnihotri AP, Nagel ID, Artiaga J, Guevarra M, Sosuan G, Kalaw F. Large Language Models in Ophthalmology: A Review of Publications from Top Ophthalmology Journals. Ophthalmol Sci 2025;5(3):100681. https:\/\/doi.org\/10.1016\/j.xops.2024.100681.PubMed: 40114712","journal-title":"Ophthalmol Sci"},{"issue":"1","key":"2264_CR23","doi-asserted-by":"publisher","first-page":"100600","DOI":"10.1016\/j.xops.2024.100600","volume":"5","author":"JS Chen","year":"2025","unstructured":"Chen JS, Reddy AJ, Al-Sharif E, et al. Analysis of ChatGPT Responses to Ophthalmic Cases: Can ChatGPT Think like an Ophthalmologist? Ophthalmol Sci 2025;5(1):100600. https:\/\/doi.org\/10.1016\/j.xops.2024.100600.PubMed: 39346575","journal-title":"Ophthalmol Sci"},{"key":"2264_CR24","doi-asserted-by":"publisher","DOI":"10.1016\/j.survophthal.2025.02.0","author":"Q Zhang","year":"2025","unstructured":"Zhang Q, Wang S, Wang X, Xu C, Liang J, Liu Z. Advancing ophthalmology with large language models: Applications, challenges, and future directions. Surv Ophthalmol 2025. https:\/\/doi.org\/10.1016\/j.survophthal.2025.02.0 -09.PubMed: 40032069","journal-title":"Surv Ophthalmol"},{"key":"2264_CR25","doi-asserted-by":"publisher","unstructured":"Zandi R, Fahey JD, Drakopoulos M, et al. Exploring diagnostic precision and triage proficiency: A comparative study of GPT-4 and bard in addressing common ophthalmic complaints. Bioengineering (Basel) 2024;11(2). https:\/\/doi.org\/10.3390\/bioengineering11020120. PubMed: 38391606","DOI":"10.3390\/bioengineering11020120"},{"key":"2264_CR26","doi-asserted-by":"publisher","DOI":"10.1007\/s00417-023-06363-z","author":"A Shemer","year":"2024","unstructured":"Shemer A, Cohen M, Altarescu A, et al. Diagnostic capabilities of ChatGPT in ophthalmology. Graefes Arch Clin Exp Ophthalmol 2024. https:\/\/doi.org\/10.1007\/s00417-023-06363-z.PubMed: 38183467","journal-title":"Graefes Arch Clin Exp Ophthalmol"},{"issue":"10","key":"2264_CR27","doi-asserted-by":"publisher","first-page":"1398","DOI":"10.1136\/bjo-2023-325053","volume":"108","author":"D Milad","year":"2024","unstructured":"Milad D, Antaki F, Milad J, et al. Assessing the medical reasoning skills of GPT-4 in complex ophthalmology cases. Br J Ophthalmol 2024;108(10):1398-1405. https:\/\/doi.org\/10.1136\/bjo-2023-325053.PubMed: 38365427","journal-title":"Br J Ophthalmol"},{"key":"2264_CR28","doi-asserted-by":"publisher","unstructured":"Mikhail D, Milad D, Antaki F, et al. Multimodal performance of GPT-4 in complex ophthalmology cases. J Pers Med 2025;15(4). https:\/\/doi.org\/10.3390\/jpm15040160. PubMed: 40278339","DOI":"10.3390\/jpm15040160"},{"key":"2264_CR29","unstructured":"Arora R K, Wei J, Hicks R S, et al. Healthbench: Evaluating large language models towards improved human health. arXiv preprint arXiv:2505.08775, 2025"},{"key":"2264_CR30","doi-asserted-by":"publisher","unstructured":"Jiao C, Rosas E, Asadigandomani H, et al. Diagnostic performance of publicly available large language models in corneal diseases: A comparison with human specialists. Diagnostics (Basel) 2025;15(10). https:\/\/doi.org\/10.3390\/diagnostics15101221. PubMed: 40428214","DOI":"10.3390\/diagnostics15101221"},{"issue":"2","key":"2264_CR31","doi-asserted-by":"publisher","first-page":"100667","DOI":"10.1016\/j.xops.2024.100667","volume":"5","author":"J Jalili","year":"2025","unstructured":"Jalili J, Jiravarnsirikul A, Bowd C, et al. Glaucoma Detection and Feature Identification via GPT-4V Fundus Image Analysis. Ophthalmol Sci 2025;5(2):100667. https:\/\/doi.org\/10.1016\/j.xops.2024.100667.PubMed: 39877464","journal-title":"Ophthalmol Sci"},{"issue":"9","key":"2264_CR32","doi-asserted-by":"publisher","first-page":"1812","DOI":"10.1093\/jamia\/ocad259","volume":"31","author":"Y Hu","year":"2024","unstructured":"Hu Y, Chen Q, Du J, et al. Improving large language models for clinical named entity recognition via prompt engineering. J Am Med Inform Assoc 2024;31(9):1812-1820. https:\/\/doi.org\/10.1093\/jamia\/ocad259.PubMed: 38281112","journal-title":"J Am Med Inform Assoc"},{"key":"2264_CR33","doi-asserted-by":"publisher","unstructured":"Miao J, Thongprayoon C, Suppadungsuk S, Krisanapan P, Radhakrishnan Y, Cheungpasitporn W. Chain of thought utilization in large language models and application in nephrology. Medicina (Kaunas) 2024;60(1). https:\/\/doi.org\/10.3390\/medicina60010148. PubMed: 38256408","DOI":"10.3390\/medicina60010148"},{"key":"2264_CR34","doi-asserted-by":"publisher","DOI":"10.1515\/cclm-2025","author":"HS Yang","year":"2025","unstructured":"Yang HS, Li J, Yi X, Wang F. Performance evaluation of large language models with chain-of-thought reasoning ability in clinical laboratory case interpretation. Clin Chem Lab Med 2025. https:\/\/doi.org\/10.1515\/cclm-2025- 0055.PubMed: 40023838","journal-title":"Clin Chem Lab Med"},{"issue":"1","key":"2264_CR35","doi-asserted-by":"publisher","first-page":"240","DOI":"10.1038\/s41746-025-01653-8","volume":"8","author":"H Kim","year":"2025","unstructured":"Kim H, Hwang H, Lee J, et al. Small language models learn enhanced reasoning skills from medical textbooks. NPJ Digit Med 2025;8(1):240. https:\/\/doi.org\/10.1038\/s41746-025-01653-8.PubMed: 40316765","journal-title":"NPJ Digit Med"},{"issue":"1","key":"2264_CR36","doi-asserted-by":"publisher","first-page":"366","DOI":"10.1186\/s12911-024-02709-7","volume":"24","author":"J Lee","year":"2024","unstructured":"Lee J, Park S, Shin J, Cho B. Analyzing evaluation methods for large language models in the medical field: a scoping review. BMC Med Inform Decis Mak 2024;24(1):366. https:\/\/doi.org\/10.1186\/s12911-024-02709-7.PubMed: 39614219","journal-title":"BMC Med Inform Decis Mak"},{"key":"2264_CR37","doi-asserted-by":"publisher","first-page":"e58329","DOI":"10.2196\/58329","volume":"26","author":"J Seo","year":"2024","unstructured":"Seo J, Choi D, Kim T, et al. Evaluation Framework of Large Language Models in Medical Documentation: Development and Usability Study. J Med Internet Res 2024;26:e58329. https:\/\/doi.org\/10.2196\/58329.PubMed: 39566044","journal-title":"J Med Internet Res"},{"key":"2264_CR38","doi-asserted-by":"publisher","first-page":"e52784","DOI":"10.2196\/52784","volume":"10","author":"S Ming","year":"2024","unstructured":"Ming S, Guo Q, Cheng W, et al. Influence of Model Evolution and System Roles on ChatGPT\u2019s Performance in Chinese Medical Licensing Exams: Comparative Study. JMIR Med Educ 2024;10:e52784. https:\/\/doi.org\/10.2196\/52784.","journal-title":"JMIR Med Educ"}],"container-title":["Journal of Medical Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10916-025-02264-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10916-025-02264-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10916-025-02264-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,8]],"date-time":"2025-10-08T06:23:51Z","timestamp":1759904631000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10916-025-02264-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,8]]},"references-count":38,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2025,12]]}},"alternative-id":["2264"],"URL":"https:\/\/doi.org\/10.1007\/s10916-025-02264-2","relation":{},"ISSN":["1573-689X"],"issn-type":[{"value":"1573-689X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,10,8]]},"assertion":[{"value":"24 June 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 September 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 October 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The Institutional Review Board of Henan Provincial People\u2019s Hospital determined that this in silico research did not involve real-world human subjects. All case vignettes obtained from publicly accessible published textbooks. During data collection, both GPT-o1 and o3 were available commercially via ChatGPT Plus subscriptions.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical Approval"}},{"value":"The authors declare no competing interests.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}},{"value":"Not applicable.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Clinical Trial Number"}}],"article-number":"130"}}