{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,4]],"date-time":"2026-06-04T15:06:34Z","timestamp":1780585594959,"version":"3.54.1"},"reference-count":36,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2026,2,21]],"date-time":"2026-02-21T00:00:00Z","timestamp":1771632000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2026,3,30]],"date-time":"2026-03-30T00:00:00Z","timestamp":1774828800000},"content-version":"vor","delay-in-days":37,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"DOI":"10.13039\/501100015622","name":"Ministerul Cercet\u0103rii \u015fi Inov\u0103rii","doi-asserted-by":"crossref","award":["MySMIS no. 351416"],"award-info":[{"award-number":["MySMIS no. 351416"]}],"id":[{"id":"10.13039\/501100015622","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100006595","name":"Unitatea Executiva pentru Finantarea Invatamantului Superior, a Cercetarii, Dezvoltarii si Inovarii","doi-asserted-by":"publisher","award":["PN-IV-P6-6.3-SOL-2024-0090"],"award-info":[{"award-number":["PN-IV-P6-6.3-SOL-2024-0090"]}],"id":[{"id":"10.13039\/501100006595","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["npj Digit. Med."],"DOI":"10.1038\/s41746-026-02465-0","type":"journal-article","created":{"date-parts":[[2026,2,21]],"date-time":"2026-02-21T11:35:08Z","timestamp":1771673708000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["A large-scale benchmark for evaluating large language models on medical question answering in Romanian"],"prefix":"10.1038","volume":"9","author":[{"given":"Ana-Cristina","family":"Rogoz","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Radu Tudor","family":"Ionescu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Alexandra-Valentina","family":"Anghel","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ionu\u0163-Lucian","family":"Antone-Iordache","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Simona","family":"Coniac","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Andreea Iuliana","family":"Ionescu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2026,2,21]]},"reference":[{"key":"2465_CR1","unstructured":"Pal, A. & Sankarasubbu, M. OpenBioLLMs: Advancing Open-Source Large Language Models for Healthcare and Life Sciences. Hugging Face Repository. https:\/\/huggingface.co\/aaditya\/OpenBioLLM-Llama3-70B (2024)."},{"key":"2465_CR2","unstructured":"Touvron, H. et al. LLaMA: Open and Efficient Foundation Language Models. arXiv preprint arXiv:2302.13971 (2023)."},{"key":"2465_CR3","unstructured":"Jiang, A. Q. et al. Mistral 7B. arXiv preprint arXiv:2310.06825 (2023)."},{"key":"2465_CR4","unstructured":"Taori, R. et al. Stanford Alpaca: An Instruction-following LLaMA model. GitHub repository. https:\/\/github.com\/tatsu-lab\/stanford_alpaca (2023)."},{"key":"2465_CR5","unstructured":"K\u00f6pf, A. et al. OpenAssistant conversations \u2013 Democratizing large language model alignment. In Proceedings of the 37th International Conference on Neural Information Processing Systems, 47669\u201347681. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2023\/hash\/949f0f8f32267d297c2d4e3ee10a2e7e-Abstract-Datasets_and_Benchmarks.html (Curran Associates Inc., 2023)."},{"key":"2465_CR6","unstructured":"Brown, T. et al. Language models are few-shot learners. In Proceedings of the 34th International Conference on Neural Information Processing Systems, vol. 33, 1877\u20131901. https:\/\/papers.nips.cc\/paper\/2020\/hash\/1457c0d6bfcb4967418bfb8ac142f64a-Abstract.html (Curran Associates Inc., 2020)."},{"key":"2465_CR7","first-page":"1","volume":"24","author":"A Chowdhery","year":"2023","unstructured":"Chowdhery, A. et al. PaLM: Scaling language modeling with pathways. J. Mach. Learn Res. 24, 1\u2013113 (2023).","journal-title":"J. Mach. Learn Res"},{"key":"2465_CR8","doi-asserted-by":"crossref","unstructured":"Rajpurkar, P., Zhang, J., Lopyrev, K. & Liang, P. SQuAD: 100,000+ questions for machine comprehension of text. In Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing, 2383\u20132392 (Association for Computational Linguistics, Austin, Texas, 2016).","DOI":"10.18653\/v1\/D16-1264"},{"key":"2465_CR9","unstructured":"Hendrycks, D. et al. Measuring massive multitask language understanding. In Proceedings of the International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=d7KBjmI3GmQ (2021)."},{"key":"2465_CR10","unstructured":"Achiam, J. et al. GPT-4 Technical Report. arXiv preprint arXiv:2303.08774 (2023)."},{"key":"2465_CR11","doi-asserted-by":"publisher","first-page":"172","DOI":"10.1038\/s41586-023-06291-2","volume":"620","author":"K Singhal","year":"2023","unstructured":"Singhal, K. et al. Large language models encode clinical knowledge. Nature 620, 172\u2013180 (2023).","journal-title":"Nature"},{"key":"2465_CR12","doi-asserted-by":"publisher","first-page":"943","DOI":"10.1038\/s41591-024-03423-7","volume":"31","author":"K Singhal","year":"2025","unstructured":"Singhal, K. et al. Towards expert-level medical question answering with large language models. Nat. Med. 31, 943\u2013950 (2025).","journal-title":"Nat. Med."},{"key":"2465_CR13","first-page":"452","volume":"7","author":"T Kwiatkowski","year":"2019","unstructured":"Kwiatkowski, T. et al. Natural questions: A benchmark for question answering research. Trans. Assoc. Comput Linguist 7, 452\u2013466 (2019).","journal-title":"Trans. Assoc. Comput Linguist"},{"key":"2465_CR14","doi-asserted-by":"crossref","unstructured":"Artetxe, M., Ruder, S. & Yogatama, D. On the cross-lingual transferability of monolingual representations. In Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, 4623\u20134637 (Association for Computational Linguistics, Online, 2020).","DOI":"10.18653\/v1\/2020.acl-main.421"},{"key":"2465_CR15","doi-asserted-by":"crossref","unstructured":"Lewis, P., Oguz, B., Rinott, R., Riedel, S. & Schwenk, H. MLQA: Evaluating cross-lingual extractive question answering. In Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, 7315\u20137330 (Association for Computational Linguistics, Online, 2020).","DOI":"10.18653\/v1\/2020.acl-main.653"},{"key":"2465_CR16","doi-asserted-by":"publisher","first-page":"454","DOI":"10.1162\/tacl_a_00317","volume":"8","author":"JH Clark","year":"2020","unstructured":"Clark, J. H. et al. TyDi QA: A benchmark for information-seeking question answering in typologically diverse languages. Trans. Assoc. Comput Linguist 8, 454\u2013470 (2020).","journal-title":"Trans. Assoc. Comput Linguist"},{"key":"2465_CR17","doi-asserted-by":"publisher","first-page":"6421","DOI":"10.3390\/app11146421","volume":"11","author":"D Jin","year":"2021","unstructured":"Jin, D. et al. What disease does this patient have? a large-scale open domain question answering dataset from medical exams. Appl. Sci. 11, 6421 (2021).","journal-title":"Appl. Sci."},{"key":"2465_CR18","doi-asserted-by":"crossref","unstructured":"Pampari, A., Raghavan, P., Liang, J. & Peng, J. emrQA: A large corpus for question answering on electronic medical records. In Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing, 2357\u20132368 (Association for Computational Linguistics, Brussels, Belgium, 2018).","DOI":"10.18653\/v1\/D18-1258"},{"key":"2465_CR19","doi-asserted-by":"crossref","unstructured":"Jin, Q., Dhingra, B., Liu, Z., Cohen, W. & Lu, X. PubMedQA: A dataset for biomedical research question answering. In Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing, 2567\u20132577 (Association for Computational Linguistics, Hong Kong, China, 2019).","DOI":"10.18653\/v1\/D19-1259"},{"key":"2465_CR20","unstructured":"Pal, A., Umapathi, L. K. & Sankarasubbu, M. MedMCQA: A Large-scale Multi-Subject Multi-Choice Dataset for Medical domain Question Answering. In Proceedings of the Conference on Health, Inference, and Learning, vol. 174, 248\u2013260 (PMLR, 2022). https:\/\/proceedings.mlr.press\/v174\/pal22a.html."},{"key":"2465_CR21","doi-asserted-by":"crossref","unstructured":"Cr\u0103ciun, C.-G., Sm\u0103du, R.-A., Cercel, D.-C. & Cercel, M.-C. GRAF: Graph retrieval augmented by facts for Romanian legal multi-choice question answering. In Findings of the Association for Computational Linguistics: ACL 2025, 12708\u201312742 (Association for Computational Linguistics, Vienna, Austria, 2025).","DOI":"10.18653\/v1\/2025.findings-acl.659"},{"key":"2465_CR22","unstructured":"Nicolae, D. C. & Tufi\u015f, D. RoITD: Romanian IT Question Answering Dataset. In Proceedings of the 16th International Conference on Lingustic Resources and Tools for Natural Language Processsing, 105\u2013117. https:\/\/conferences.info.uaic.ro\/consilr\/prevEditions\/Consilr_2021.pdf (2021)."},{"key":"2465_CR23","unstructured":"Dumitrescu, S. D. et al. LiRo: Benchmark and leaderboard for Romanian language tasks. In Proceedings of the 35th Conference on Neural Information Processing Systems. https:\/\/datasets-benchmarks-proceedings.neurips.cc\/paper\/2021\/hash\/5f93f983524def3dca464469d2cf9f3e-Abstract-round1.html (Curran Associates, Inc., 2021)."},{"key":"2465_CR24","doi-asserted-by":"crossref","unstructured":"Dima, G.-A., Avram, A.-M., Craciun, C.-G. & Cercel, D.-C. RoQLlama: A lightweight Romanian adapted language model. In Findings of the Association for Computational Linguistics: EMNLP 2024, 4531\u20134541 (Association for Computational Linguistics, Miami, Florida, USA, 2024).","DOI":"10.18653\/v1\/2024.findings-emnlp.261"},{"key":"2465_CR25","doi-asserted-by":"crossref","unstructured":"Masala, M. et al. \u201cVorbe\u015fti Rom\u00e2ne\u015fte?\u201d A Recipe to Train Powerful Romanian LLMs with English Instructions. In Findings of the Association for Computational Linguistics: EMNLP 2024, 11632\u201311647 (Association for Computational Linguistics, Miami, Florida, USA, 2024).","DOI":"10.18653\/v1\/2024.findings-emnlp.681"},{"key":"2465_CR26","unstructured":"Abouelenin, A. et al. Phi-4-Mini Technical Report: Compact yet Powerful Multimodal Language Models via Mixture-of-LoRAs. arXiv preprint arXiv:2503.01743 (2025)."},{"key":"2465_CR27","doi-asserted-by":"crossref","unstructured":"Papineni, K., Roukos, S., Ward, T. & Zhu, W.-J. BLEU: A method for automatic evaluation of machine translation. In Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics, 311\u2013318 (Association for Computational Linguistics, 2002).","DOI":"10.3115\/1073083.1073135"},{"key":"2465_CR28","unstructured":"Banerjee, S. & Lavie, A. METEOR: An Automatic Metric for MT Evaluation with Improved Correlation with Human Judgments. In Proceedings of the ACL Workshop on Intrinsic and Extrinsic Evaluation Measures for MT and\/or Summarization, 65\u201372 (Association for Computational Linguistics, Ann Arbor, Michigan, 2005). https:\/\/aclanthology.org\/W05-0909\/."},{"key":"2465_CR29","unstructured":"Loshchilov, I. & Hutter, F. Decoupled Weight Decay Regularization. In Proceedings of the International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=Bkg6RiCqY7 (2019)."},{"key":"2465_CR30","unstructured":"Barbero, F. et al. Why do LLMs attend to the first token? In Proceedings of Conference on Language Modeling. https:\/\/openreview.net\/forum?id=tu4dFUsW5z (2025)."},{"key":"2465_CR31","doi-asserted-by":"publisher","first-page":"124","DOI":"10.1016\/j.jmhg.2007.01.011","volume":"4","author":"JJ Bonhomme","year":"2007","unstructured":"Bonhomme, J. J. Men\u2019s health: impact on women, children and society. J. Mens. Health Gend. 4, 124\u2013130 (2007).","journal-title":"J. Mens. Health Gend."},{"key":"2465_CR32","doi-asserted-by":"publisher","first-page":"258","DOI":"10.1159\/000334256","volume":"58","author":"X Dong","year":"2012","unstructured":"Dong, X., Simon, M. A. & Evans, D. A. Prevalence of self-neglect across gender, race, and socioeconomic status: Findings from the Chicago Health and Aging Project. Gerontology 58, 258\u2013268 (2012).","journal-title":"Gerontology"},{"key":"2465_CR33","unstructured":"OpenAI. Update to GPT-5 System Card: GPT-5.2. Technical Report. https:\/\/cdn.openai.com\/pdf\/3a4153c8-c748-4b71-8e31-aecbde944f8d\/oai_5_2_system-card.pdf (2025)."},{"key":"2465_CR34","unstructured":"Google. Gemini 3 Flash - Model Card. Technical Report. https:\/\/storage.googleapis.com\/deepmind-media\/Model-Cards\/Gemini-3-Flash-Model-Card.pdf (2025)."},{"key":"2465_CR35","unstructured":"Hu, E. J. et al. LoRA: Low-Rank Adaptation of Large Language Models. In Proceedings of the International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=nZeVKeeFYf9 (2022)."},{"key":"2465_CR36","unstructured":"Mukherjee, S. et al. Orca: Progressive learning from complex explanation traces of GPT-4. arXiv preprint arXiv:2306.02707 (2023)."}],"container-title":["npj Digital Medicine"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.nature.com\/articles\/s41746-026-02465-0","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s41746-026-02465-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s41746-026-02465-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,30]],"date-time":"2026-03-30T13:06:08Z","timestamp":1774875968000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.nature.com\/articles\/s41746-026-02465-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,2,21]]},"references-count":36,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2026,12]]}},"alternative-id":["2465"],"URL":"https:\/\/doi.org\/10.1038\/s41746-026-02465-0","relation":{},"ISSN":["2398-6352"],"issn-type":[{"value":"2398-6352","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,2,21]]},"assertion":[{"value":"30 October 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 February 2026","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 February 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The authors declare no competing interests.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"268"}}