{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,17]],"date-time":"2026-01-17T01:26:00Z","timestamp":1768613160548,"version":"3.49.0"},"reference-count":25,"publisher":"Oxford University Press (OUP)","license":[{"start":{"date-parts":[[2024,9,11]],"date-time":"2024-09-11T00:00:00Z","timestamp":1726012800000},"content-version":"vor","delay-in-days":254,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,9,10]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:p>This paper presents a transformer-based approach for symptom Named Entity Recognition (NER) in Spanish clinical texts and multilingual entity linking on the SympTEMIST dataset. For Spanish NER, we fine tune a RoBERTa-based token-level classifier with Bidirectional Long Short-Term Memory and conditional random field layers on an augmented train set, achieving an F1 score of 0.73. Entity linking is performed via a hybrid approach with dictionaries, generating candidates from a knowledge base containing Unified Medical Language System aliases using the cross-lingual SapBERT and reranking the top candidates using GPT-3.5. The entity linking approach shows consistent results for multiple languages of 0.73 accuracy on the SympTEMIST multilingual dataset and also achieves an accuracy of 0.6123 on the Spanish entity linking task surpassing the current top score for this subtask.<\/jats:p>\n               <jats:p>Database URL: https:\/\/github.com\/svassileva\/symptemist-multilingual-linking<\/jats:p>","DOI":"10.1093\/database\/baae090","type":"journal-article","created":{"date-parts":[[2024,9,11]],"date-time":"2024-09-11T22:31:03Z","timestamp":1726093863000},"source":"Crossref","is-referenced-by-count":2,"title":["Transformer-based approach for symptom recognition and multilingual linking"],"prefix":"10.1093","volume":"2024","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2257-0659","authenticated-orcid":false,"given":"Sylvia","family":"Vassileva","sequence":"first","affiliation":[{"name":"Faculty of Mathematics and Informatics, Sofia University St. Kliment Ohridski , Blvd \u201cJames Bourchier\u201d 5, Sofia 1164, Bulgaria"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-7084-6788","authenticated-orcid":false,"given":"Georgi","family":"Grazhdanski","sequence":"additional","affiliation":[{"name":"Faculty of Mathematics and Informatics, Sofia University St. Kliment Ohridski , Blvd \u201cJames Bourchier\u201d 5, Sofia 1164, Bulgaria"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3919-030X","authenticated-orcid":false,"given":"Ivan","family":"Koychev","sequence":"additional","affiliation":[{"name":"Faculty of Mathematics and Informatics, Sofia University St. Kliment Ohridski , Blvd \u201cJames Bourchier\u201d 5, Sofia 1164, Bulgaria"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5542-9168","authenticated-orcid":false,"given":"Svetla","family":"Boytcheva","sequence":"additional","affiliation":[{"name":"Faculty of Mathematics and Informatics, Sofia University St. Kliment Ohridski , Blvd \u201cJames Bourchier\u201d 5, Sofia 1164, Bulgaria"},{"name":"Ontotext , Ontotext, ul. \u201cNikola Gabrovski\u201d 79, Sofia 1700, Bulgaria"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"286","published-online":{"date-parts":[[2024,9,11]]},"reference":[{"key":"2024091218361303700_R1","article-title":"Proceedings of the BioCreative VIII Challenge and Workshop: Curation and Evaluation in the era of Generative Models","author":"Islamaj","year":"2023"},{"key":"2024091218361303700_R2","article-title":"Overview of the SympTEMIST shared task at BioCreative VIII: detection and normalization of symptoms, signs and findings","author":"Lima-L\u00f3pez","year":"2023"},{"key":"2024091218361303700_R3","first-page":"D267","volume-title":"The Unified Medical Language System (UMLS): integrating biomedical terminology","author":"Bodenreider","year":"2004"},{"key":"2024091218361303700_R4","article-title":"SympTEMIST corpus: gold standard annotations for clinical symptoms, signs and findings information extraction","author":"L\u00f3pez","year":"2023"},{"key":"2024091218361303700_R5","article-title":"ICB-UMA at BioCreative VIII @ AMIA 2023 Task 2 SYMPTEMIST (Symptom TExt Mining Shared Task)","author":"Gallego","year":"2023"},{"key":"2024091218361303700_R6","article-title":"Biomedical and clinical language models for Spanish: on the benefits of domain-specific pretraining in a mid-resource scenario","author":"Carrino","year":"2021"},{"key":"2024091218361303700_R7","first-page":"60","article-title":"Discovering medical procedures in Spanish using transformer models with MCRF and augmentation","author":"Almeida","year":"2023"},{"key":"2024091218361303700_R8","first-page":"91","article-title":"Coming a long way with pre-trained transformers and string matching techniques: clinical procedure mention recognition and normalization","author":"Chizhikova","year":"2023"},{"key":"2024091218361303700_R9","doi-asserted-by":"publisher","first-page":"565","DOI":"10.18653\/v1\/2021.acl-short.72","article-title":"Learning domain-specialised representations for cross-lingual biomedical entity linking","author":"Liu","year":"2021"},{"key":"2024091218361303700_R10","first-page":"206","article-title":"VICOMTECH at MedProcNER 2023: transformers-based sequence-labelling and cross-encoding for entity detection and normalisation in Spanish clinical texts","author":"Zotova","year":"2023"},{"key":"2024091218361303700_R11","first-page":"190","article-title":"Fusion @ BioASQ MedProcNER: transformer-based approach for procedure recognition and linking in Spanish clinical text","author":"Vassileva","year":"2023"},{"key":"2024091218361303700_R12","doi-asserted-by":"publisher","first-page":"15080","DOI":"10.18653\/v1\/2023.findings-emnlp.1007","article-title":"mReFinED: an efficient end-to-end multilingual entity linking system","author":"Limkonchotiwat","year":"2023"},{"key":"2024091218361303700_R13","doi-asserted-by":"crossref","first-page":"274","DOI":"10.1162\/tacl_a_00460","article-title":"Multilingual autoregressive entity linking","volume":"10","author":"De Cao","year":"2022","journal-title":"Transactions of the Association for Computational Linguistics"},{"key":"2024091218361303700_R14","doi-asserted-by":"publisher","first-page":"7833","DOI":"10.18653\/v1\/2020.emnlp-main.630","article-title":"Entity linking in 100 languages","author":"Botha","year":"2020"},{"key":"2024091218361303700_R15","doi-asserted-by":"publisher","first-page":"5742","DOI":"10.18653\/v1\/2023.emnlp-main.350","article-title":"Controllable contrastive generation for multilingual biomedical entity linking","author":"Zhu","year":"2023"},{"key":"2024091218361303700_R16","article-title":"Team Fusion@SU @ BC8 SympTEMIST track: transformer-based approach for symptom recognition and linking","author":"Grazhdanski","year":"2023"},{"key":"2024091218361303700_R17","article-title":"Named Entity Recognition","author":"Krishnan","year":"2005"},{"key":"2024091218361303700_R18","doi-asserted-by":"publisher","first-page":"4171","DOI":"10.18653\/v1\/N19-1423","article-title":"BERT: pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2019"},{"key":"2024091218361303700_R19","article-title":"RoBERTa: a robustly optimized BERT pretraining approach","author":"Liu","year":"2019"},{"key":"2024091218361303700_R20","doi-asserted-by":"publisher","first-page":"3267","DOI":"10.1093\/bioinformatics\/btac297","article-title":"CLIN-X\/i: pre-trained language models and a study on cross-task transfer for concept extraction in the clinical domain","volume":"38","author":"Lange","year":"2022","journal-title":"Bioinform"},{"key":"2024091218361303700_R21","first-page":"1","article-title":"Overview of MedProcNER task on medical procedure detection and entity linking at BioASQ 2023","author":"Lima-L\u00f3pez","year":"2023"},{"key":"2024091218361303700_R22","first-page":"179","article-title":"Overview of DisTEMIST at BioASQ: automatic detection and normalization of diseases from clinical texts: results, methods, evaluation and multilingual resources","author":"Miranda-Escalada","year":"2022"},{"key":"2024091218361303700_R23","first-page":"32","article-title":"The MeSpEN resource for English-Spanish Medical Machine Translation and Terminologies: census of parallel corpora, glossaries and term translations","author":"Villegas","year":"2018"},{"key":"2024091218361303700_R24","article-title":"HPI-DHC @ BC8 SympTEMIST track: detection and normalization of symptom mentions with SpanMarker and xMEN","author":"Borchert","year":"2023"},{"key":"2024091218361303700_R25","article-title":"Llama 3 Model Card","author":"AI@Meta.","year":"2024"}],"container-title":["Database"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/database\/article-pdf\/doi\/10.1093\/database\/baae090\/59091441\/baae090.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/database\/article-pdf\/doi\/10.1093\/database\/baae090\/59091441\/baae090.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,13]],"date-time":"2024-09-13T03:49:51Z","timestamp":1726199391000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/database\/article\/doi\/10.1093\/database\/baae090\/7755058"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"references-count":25,"URL":"https:\/\/doi.org\/10.1093\/database\/baae090","relation":{},"ISSN":["1758-0463"],"issn-type":[{"value":"1758-0463","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2024]]},"published":{"date-parts":[[2024]]},"article-number":"baae090"}}