{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T15:11:00Z","timestamp":1778080260661,"version":"3.51.4"},"reference-count":46,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,3,13]],"date-time":"2025-03-13T00:00:00Z","timestamp":1741824000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2025,3,13]],"date-time":"2025-03-13T00:00:00Z","timestamp":1741824000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["npj Digit. Med."],"DOI":"10.1038\/s41746-025-01550-0","type":"journal-article","created":{"date-parts":[[2025,3,13]],"date-time":"2025-03-13T11:51:29Z","timestamp":1741866689000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":62,"title":["Enhancing diagnostic capability with multi-agents conversational large language models"],"prefix":"10.1038","volume":"8","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0165-6426","authenticated-orcid":false,"given":"Xi","family":"Chen","sequence":"first","affiliation":[]},{"given":"Huahui","family":"Yi","sequence":"additional","affiliation":[]},{"given":"Mingke","family":"You","sequence":"additional","affiliation":[]},{"given":"WeiZhi","family":"Liu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3260-5998","authenticated-orcid":false,"given":"Li","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Hairui","family":"Li","sequence":"additional","affiliation":[]},{"given":"Xue","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Yingman","family":"Guo","sequence":"additional","affiliation":[]},{"given":"Lei","family":"Fan","sequence":"additional","affiliation":[]},{"given":"Gang","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Qicheng","family":"Lao","sequence":"additional","affiliation":[]},{"given":"Weili","family":"Fu","sequence":"additional","affiliation":[]},{"given":"Kang","family":"Li","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0005-2666-6219","authenticated-orcid":false,"given":"Jian","family":"Li","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,3,13]]},"reference":[{"key":"1550_CR1","doi-asserted-by":"publisher","first-page":"1930","DOI":"10.1038\/s41591-023-02448-8","volume":"29","author":"AJ Thirunavukarasu","year":"2023","unstructured":"Thirunavukarasu, A. J. et al. Large language models in medicine. Nat. Med. 29, 1930\u20131940 (2023).","journal-title":"Nat. Med."},{"key":"1550_CR2","doi-asserted-by":"publisher","first-page":"78","DOI":"10.1001\/jama.2023.8288","volume":"330","author":"Z Kanjee","year":"2023","unstructured":"Kanjee, Z., Crowe, B. & Rodman, A. Accuracy of a Generative Artificial Intelligence Model in a Complex Diagnostic Challenge. JAMA 330, 78\u201380 (2023).","journal-title":"JAMA"},{"key":"1550_CR3","doi-asserted-by":"publisher","first-page":"818","DOI":"10.1001\/jama.2023.15481","volume":"330","author":"R Khera","year":"2023","unstructured":"Khera, R. et al. AI in Medicine-JAMA\u2019s Focus on Clinical Outcomes, Patient-Centered Care, Quality, and Equity. JAMA 330, 818\u2013820 (2023).","journal-title":"JAMA"},{"key":"1550_CR4","doi-asserted-by":"publisher","first-page":"641","DOI":"10.1016\/S0140-6736(23)00216-7","volume":"401","author":"A Arora","year":"2023","unstructured":"Arora, A. & Arora, A. The promise of large language models in health care. Lancet 401, 641 (2023).","journal-title":"Lancet"},{"key":"1550_CR5","doi-asserted-by":"publisher","first-page":"906","DOI":"10.1016\/j.jval.2015.05.008","volume":"18","author":"T Richter","year":"2015","unstructured":"Richter, T. et al. Rare Disease Terminology and Definitions-A Systematic Global Review: Report of the ISPOR Rare Disease Special Interest Group. Value Health 18, 906\u2013914 (2015).","journal-title":"Value Health"},{"key":"1550_CR6","doi-asserted-by":"publisher","first-page":"253","DOI":"10.1038\/nrg.2017.116","volume":"19","author":"CF Wright","year":"2018","unstructured":"Wright, C. F., FitzPatrick, D. R. & Firth, H. V. Paediatric genomics: diagnosing rare disease in children. Nat. Rev. Genet. 19, 253\u2013268 (2018).","journal-title":"Nat. Rev. Genet."},{"key":"1550_CR7","doi-asserted-by":"publisher","first-page":"4732","DOI":"10.3390\/ijerph20064732","volume":"20","author":"T Adachi","year":"2023","unstructured":"Adachi, T. et al. Enhancing Equitable Access to Rare Disease Diagnosis and Treatment around the World: A Review of Evidence, Policies, and Challenges. Int. J. Environ. Res. Public Health 20, 4732 (2023).","journal-title":"Int. J. Environ. Res. Public Health"},{"key":"1550_CR8","doi-asserted-by":"publisher","first-page":"745","DOI":"10.1016\/j.clinthera.2023.06.010","volume":"45","author":"JG Serrano","year":"2023","unstructured":"Serrano, J. G. et al. Advancing Understanding of Inequities in Rare Disease Genomics. Clin. Therapeutics 45, 745\u2013753 (2023).","journal-title":"Clin. Therapeutics"},{"key":"1550_CR9","doi-asserted-by":"publisher","DOI":"10.1038\/s41746-024-01029-4","volume":"7","author":"L Wang","year":"2024","unstructured":"Wang, L. et al. Prompt engineering in consistency and reliability with the evidence-based guideline for LLMs. npj Digital Med. 7, 41 (2024).","journal-title":"npj Digital Med."},{"key":"1550_CR10","doi-asserted-by":"publisher","first-page":"102","DOI":"10.1038\/s41746-024-01091-y","volume":"7","author":"S Kresevic","year":"2024","unstructured":"Kresevic, S. et al. Optimization of hepatological clinical guidelines interpretation by large language models: a retrieval augmented generation-based framework. NPJ Digit Med 7, 102 (2024).","journal-title":"NPJ Digit Med"},{"key":"1550_CR11","unstructured":"Wu, Q. et al. Autogen: Enabling next-gen llm applications via multi-agent conversation. Preprint arXiv:2308.08155 (2023)."},{"key":"1550_CR12","unstructured":"Smit, A., Duckworth, P., Grinsztajn, N., Barrett, T. & Pretorius, A. Are we going MAD? Benchmarking Multi-Agent Debate between Language Models for Medical Q&A. In: Deep Generative Models for Health Workshop NeurIPS 2023) (NeurIPS, 2023)."},{"key":"1550_CR13","unstructured":"Chan, C.-M. et al. Chateval: Towards better llm-based evaluators through multi-agent debate. Preprint arXiv:2308.07201 (2023)."},{"key":"1550_CR14","unstructured":"Hong, S. et al. Metagpt: Meta programming for a multi-agent collaborative framework. URL https:\/\/arxiv.org\/abs\/2308.00352 (2023)."},{"key":"1550_CR15","unstructured":"Berglund, L. et al. The Reversal Curse: LLMs trained on \u201cA is B\u201d fail to learn \u201cB is A\u201d. Preprint arXiv:2309.12288 (2023)."},{"key":"1550_CR16","doi-asserted-by":"crossref","unstructured":"Zhang, X., Wu, J., He, Z., Liu, X. & Su, Y. Medical exam question answering with large-scale reading comprehension. In: Proceedings of the AAAI conference on artificial intelligence) (AAAI, 2018).","DOI":"10.1609\/aaai.v32i1.11970"},{"key":"1550_CR17","unstructured":"Pal, A., Umapathi, L. K. & Sankarasubbu, M. Medmcqa: A large-scale multi-subject multi-choice dataset for medical domain question answering. In: Conference on Health, Inference, and Learning) (PMLR, 2022)."},{"key":"1550_CR18","doi-asserted-by":"crossref","unstructured":"Jin, Q., Dhingra, B., Liu, Z., Cohen, W. W. & Lu, X. Pubmedqa: A dataset for biomedical research question answering. Preprint arXiv:1909.06146 (2019).","DOI":"10.18653\/v1\/D19-1259"},{"key":"1550_CR19","doi-asserted-by":"crossref","unstructured":"Bellgard, M. I. et al. Rare disease research roadmap: navigating the bioinformatics and translational challenges for improved patient health outcomes. Health Policy Technol. 3, 325\u2013335 (2014).","DOI":"10.1016\/j.hlpt.2014.08.007"},{"key":"1550_CR20","doi-asserted-by":"crossref","unstructured":"Hageman, I. C., van Rooij, I. A., de Blaauw, I., Trajanovska, M. & King, S.K. A systematic overview of rare disease patient registries: challenges in design, quality management, and maintenance. Orphanet. J. Rare Dis. 18, 106 (2023).","DOI":"10.1186\/s13023-023-02719-0"},{"key":"1550_CR21","doi-asserted-by":"crossref","unstructured":"Mehnen, L., Gruarin, S., Vasileva, M. & Knapp, B. ChatGPT as a medical doctor? A diagnostic accuracy study on common and rare diseases. 2023.2004. 2020.23288859 (2023).","DOI":"10.1101\/2023.04.20.23288859"},{"key":"1550_CR22","first-page":"46534","volume":"36","author":"A Madaan","year":"2023","unstructured":"Madaan, A. et al. Self-refine: Iterative refinement with self-feedback. Adv. Neural Inf. Process. Syst. 36, 46534\u201346594 (2023).","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"1550_CR23","unstructured":"Wang, X. et al. Self-consistency improves chain of thought reasoning in language models. Preprint arXiv:2203.11171 (2022)."},{"key":"1550_CR24","first-page":"45767","volume":"37","author":"L Chen","year":"2025","unstructured":"Chen, L. et al. Are more llm calls all you need? towards the scaling properties of compound ai systems. Adv. Neural Inf. Process. Syst. 37, 45767\u201345790 (2025).","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"1550_CR25","doi-asserted-by":"crossref","unstructured":"Tang, X. et al. Medagents: Large language models as collaborators for zero-shot medical reasoning. Preprint arXiv:2311.10537 (2023).","DOI":"10.18653\/v1\/2024.findings-acl.33"},{"key":"1550_CR26","unstructured":"Liu, L. et al. A survey on medical large language models: Technology, application, trustworthiness, and future directions. Preprint arXiv:2406.03712 (2024)."},{"key":"1550_CR27","doi-asserted-by":"publisher","first-page":"e58158","DOI":"10.2196\/58158","volume":"26","author":"X Chen","year":"2024","unstructured":"Chen, X. et al. Evaluating and Enhancing Large Language Models\u2019 Performance in Domain-Specific Medicine: Development and Usability Study With DocOA. J. Med. Internet Res. 26, e58158 (2024).","journal-title":"J. Med. Internet Res."},{"key":"1550_CR28","unstructured":"Wang, H. et al. Beyond Direct Diagnosis: LLM-based Multi-Specialist Agent Consultation for Automatic Diagnosis. (2024)."},{"key":"1550_CR29","unstructured":"Ke, Y. H. et al. Enhancing diagnostic accuracy through multi-agent conversations: Using large language models to mitigate cognitive bias. (2024)."},{"key":"1550_CR30","doi-asserted-by":"publisher","first-page":"2613","DOI":"10.1038\/s41591-024-03097-1","volume":"30","author":"P Hager","year":"2024","unstructured":"Hager, P. et al. Evaluation and mitigation of the limitations of large language models in clinical decision-making. Nat. Med. 30, 2613\u20132622 (2024).","journal-title":"Nat. Med."},{"key":"1550_CR31","doi-asserted-by":"publisher","DOI":"10.3389\/fmed.2024.1380148","volume":"11","author":"A R\u00edos-Hoyo","year":"2024","unstructured":"R\u00edos-Hoyo, A. et al. Evaluation of large language models as a diagnostic aid for complex medical cases. Front. Med. 11, 1380148 (2024).","journal-title":"Front. Med."},{"key":"1550_CR32","unstructured":"Zhou, S. et al. Large language models for disease diagnosis: A scoping review. Preprint arXiv:2409.00097 (2024)."},{"key":"1550_CR33","doi-asserted-by":"crossref","unstructured":"Wada, A. et al. Optimizing GPT-4 Turbo Diagnostic Accuracy in Neuroradiology through Prompt Engineering and Confidence Thresholds. Diagnostics 14, 1541 (2024).","DOI":"10.3390\/diagnostics14141541"},{"key":"1550_CR34","doi-asserted-by":"crossref","unstructured":"Song, M. et al. PneumoLLM: Harnessing the power of large language model for pneumoconiosis diagnosis. Med. Image Anal. 97, 103248 (2024).","DOI":"10.1016\/j.media.2024.103248"},{"key":"1550_CR35","doi-asserted-by":"crossref","unstructured":"Zheng, S., Pan, K., Liu, J. & Chen, Y. Empirical study on fine-tuning pre-trained large language models for fault diagnosis of complex systems. Reliability Eng. Syst. Saf. 252, 110382 (2024).","DOI":"10.1016\/j.ress.2024.110382"},{"key":"1550_CR36","doi-asserted-by":"crossref","unstructured":"Dou, C. et al. Integrating physician diagnostic logic into large language models: Preference learning from process feedback. Preprint arXiv:2401.05695 (2024).","DOI":"10.18653\/v1\/2024.findings-acl.144"},{"key":"1550_CR37","doi-asserted-by":"publisher","first-page":"172","DOI":"10.1038\/s41586-023-06291-2","volume":"620","author":"K Singhal","year":"2023","unstructured":"Singhal, K. et al. Large language models encode clinical knowledge. Nature 620, 172\u2013180 (2023).","journal-title":"Nature"},{"key":"1550_CR38","unstructured":"Agarwal, C., Tanneru, S. H. & Lakkaraju, H. Faithfulness vs. plausibility: On the (un) reliability of explanations from large language models. Preprint arXiv:2402.04614 (2024)."},{"key":"1550_CR39","unstructured":"Wei, J. et al. Chain-of-thought prompting elicits reasoning in large language models. 35, 24824\u201324837 (2022)."},{"key":"1550_CR40","doi-asserted-by":"crossref","unstructured":"Xu, J. et al. Faithful logical reasoning via symbolic chain-of-thought. Preprint arXiv:2405.18357 (2024).","DOI":"10.18653\/v1\/2024.acl-long.720"},{"key":"1550_CR41","unstructured":"Luo, H. & Specia, L. From understanding to utilization: A survey on explainability for large language models. Preprint arXiv:2401.12874 (2024)."},{"key":"1550_CR42","doi-asserted-by":"publisher","first-page":"165","DOI":"10.1038\/s41431-019-0508-0","volume":"28","author":"S Nguengang Wakap","year":"2020","unstructured":"Nguengang Wakap, S. et al. Estimating cumulative point prevalence of rare diseases: analysis of the Orphanet database. Eur. J. Hum. Genet. EJHG 28, 165\u2013173 (2020).","journal-title":"Eur. J. Hum. Genet. EJHG"},{"key":"1550_CR43","doi-asserted-by":"crossref","unstructured":"Efraimidis, P. & Spirakis, P. Weighted Random Sampling. In: Encyclopedia of Algorithms (ed Kao, M.-Y.) (Springer US, 2008).","DOI":"10.1007\/978-0-387-30162-4_478"},{"key":"1550_CR44","doi-asserted-by":"publisher","first-page":"162","DOI":"10.1177\/00045632211050531","volume":"59","author":"RM West","year":"2022","unstructured":"West, R. M. Best practice in statistics: The use of log transformation. Ann. Clin. Biochem. 59, 162\u2013165 (2022).","journal-title":"Ann. Clin. Biochem."},{"key":"1550_CR45","doi-asserted-by":"publisher","first-page":"260","DOI":"10.1302\/0301-620X.73B2.2005151","volume":"73","author":"AA Amis","year":"1991","unstructured":"Amis, A. A. & Dawkins, G. P. Functional anatomy of the anterior cruciate ligament. Fibre bundle actions related to ligament replacements and injuries. J. Bone Jt. Surg. Br. Vol. 73, 260\u2013267 (1991).","journal-title":"J. Bone Jt. Surg. Br. Vol."},{"key":"1550_CR46","doi-asserted-by":"crossref","unstructured":"Landis, J. R. & Koch, G.G. The measurement of observer agreement for categorical data. Biometrics, 33, 159\u2013174 (1977).","DOI":"10.2307\/2529310"}],"container-title":["npj Digital Medicine"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.nature.com\/articles\/s41746-025-01550-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s41746-025-01550-0","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s41746-025-01550-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,13]],"date-time":"2025-03-13T11:51:52Z","timestamp":1741866712000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.nature.com\/articles\/s41746-025-01550-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,13]]},"references-count":46,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2025,12]]}},"alternative-id":["1550"],"URL":"https:\/\/doi.org\/10.1038\/s41746-025-01550-0","relation":{},"ISSN":["2398-6352"],"issn-type":[{"value":"2398-6352","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,3,13]]},"assertion":[{"value":"12 April 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 March 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 March 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The authors declare no competing interests.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"159"}}