{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,4]],"date-time":"2026-02-04T05:08:59Z","timestamp":1770181739304,"version":"3.49.0"},"reference-count":34,"publisher":"Oxford University Press (OUP)","issue":"2","license":[{"start":{"date-parts":[[2026,1,15]],"date-time":"2026-01-15T00:00:00Z","timestamp":1768435200000},"content-version":"vor","delay-in-days":12,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"Chan Zuckerberg Institute","award":["DAF2024-350950"],"award-info":[{"award-number":["DAF2024-350950"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026,1,3]]},"abstract":"<jats:title>Abstract<\/jats:title>\n                  <jats:sec>\n                    <jats:title>Motivation<\/jats:title>\n                    <jats:p>Biomedical Entity Linking (BEL) maps mentions in biomedical text to standardized identifiers, enabling structured data integration and downstream knowledge discovery. However, current BEL systems remain fundamentally constrained by the recall of the initial candidate pool, where suboptimal retrieval limits the overall effectiveness of the normalization pipeline.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Results<\/jats:title>\n                    <jats:p>We present the first systematic evaluation of Generative Relevance Feedback (GRF) for enhancing candidate retrieval in state-of-the-art BEL systems. GRF leverages large language models (LLMs) to enrich the expressiveness of the mention in a zero-shot fashion. We assess GRF\u2019s impact under two scenarios\u2014direct linking prediction and candidate generation in cascading normalization pipelines\u2014and analyze its sensitivity to different LLMs, feedback types, and integration strategies. Experiments across eight corpora and four biomedical knowledge bases demonstrate that integrating GRF significantly improves both accuracy and recall, thereby increasing the upper bound on normalization performance. Our findings highlight GRF as an efficient, model-agnostic solution and underscore its potential as a key component for advancing BEL.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Availability and implementation<\/jats:title>\n                    <jats:p>The code to reproduce our experiments can be found at: https:\/\/doi.org\/10.5281\/zenodo.17853541.<\/jats:p>\n                  <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btag011","type":"journal-article","created":{"date-parts":[[2026,1,9]],"date-time":"2026-01-09T12:56:05Z","timestamp":1767963365000},"source":"Crossref","is-referenced-by-count":0,"title":["Improving biomedical entity linking with generative relevance feedback"],"prefix":"10.1093","volume":"42","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-4051-6871","authenticated-orcid":false,"given":"Darya","family":"Shlyk","sequence":"first","affiliation":[{"name":"Department of Computer Science, Universit\u00e0 degli Studi di Milano , Milan 20133,","place":["Italy"]}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1455-3370","authenticated-orcid":false,"given":"Lawrence","family":"Hunter","sequence":"additional","affiliation":[{"name":"Department of Pediatrics, The University of Chicago , Chicago IL 60637,","place":["United States"]}]}],"member":"286","published-online":{"date-parts":[[2026,1,14]]},"reference":[{"key":"2026020310415051200_btag011-B1","doi-asserted-by":"publisher","first-page":"147981","DOI":"10.1109\/ACCESS.2024.3472500","article-title":"Large language models for clinical text cleansing enhance medical concept normalization","volume":"12","author":"Abdulnazar","year":"2024","journal-title":"IEEE Access"},{"key":"2026020310415051200_btag011-B2","doi-asserted-by":"publisher","first-page":"4644","DOI":"10.18653\/v1\/2022.naacl-main.343","volume-title":"Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies.","author":"Agarwal","year":"2022"},{"key":"2026020310415051200_btag011-B3","doi-asserted-by":"publisher","author":"Bai","year":"2023","DOI":"10.48550\/arXiv.2505.09388"},{"key":"2026020310415051200_btag011-B4","doi-asserted-by":"publisher","first-page":"104850","DOI":"10.1016\/j.jbi.2025.104850","article-title":"Biomedical text normalization through generative modeling","volume":"167","author":"Berkowitz","year":"2025","journal-title":"J Biomed Inform"},{"key":"2026020310415051200_btag011-B5","doi-asserted-by":"publisher","DOI":"10.1093\/database\/baae067","article-title":"Improving biomedical entity linking for complex entity mentions with LLM-based text simplification","volume":"2024","author":"Borchert","year":"2024","journal-title":"Database (Oxford)"},{"key":"2026020310415051200_btag011-B6","doi-asserted-by":"publisher","first-page":"D36","DOI":"10.1093\/nar\/gku1055","article-title":"Gene: a gene-centered information resource at NCBI","volume":"43","author":"Brown","year":"2015","journal-title":"Nucleic Acids Res"},{"key":"2026020310415051200_btag011-B7","doi-asserted-by":"publisher","first-page":"D1257","DOI":"10.1093\/nar\/gkac833","article-title":"Comparative toxicogenomics database (CTD): update 2023","volume":"51","author":"Davis","year":"2023","journal-title":"Nucleic Acids Research"},{"key":"2026020310415051200_btag011-B8","doi-asserted-by":"publisher","first-page":"479","DOI":"10.1017\/rsm.2025.9","article-title":"Generalizable and scalable multistage biomedical concept normalization leveraging large language models","volume":"16","author":"Dobbins","year":"2025","journal-title":"Res Synth Methods"},{"key":"2026020310415051200_btag011-B9","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.jbi.2013.12.006","article-title":"NCBI disease corpus: a resource for disease name recognition and concept normalization","volume":"47","author":"Do\u011fan","year":"2014","journal-title":"J Biomed Inform"},{"key":"2026020310415051200_btag011-B10","doi-asserted-by":"publisher","first-page":"104252","DOI":"10.1016\/j.jbi.2022.104252","article-title":"An overview of biomedical entity linking throughout the years","volume":"137","author":"French","year":"2023","journal-title":"J Biomed Inform"},{"key":"2026020310415051200_btag011-B11","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btae474","article-title":"BELHD: improving biomedical entity linking with homonym disambiguation","volume":"40","author":"Garda","year":"2024","journal-title":"Bioinformatics"},{"key":"2026020310415051200_btag011-B12","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2308.11537","article-title":"BELB: a biomedical entity linking benchmark","volume":"39","author":"Garda","year":"2023","journal-title":"Bioinformatics"},{"key":"2026020310415051200_btag011-B13","doi-asserted-by":"publisher","author":"Gemma Team Aishwarya Kamath","year":"2025","DOI":"10.48550\/arXiv.2503.19786"},{"key":"2026020310415051200_btag011-B14","doi-asserted-by":"publisher","first-page":"85","DOI":"10.1186\/1471-2105-11-85","article-title":"LINNAEUS: a species name identification system for biomedical literature","volume":"11","author":"Gerner","year":"2010","journal-title":"BMC Bioinformatics"},{"key":"2026020310415051200_btag011-B15","doi-asserted-by":"publisher","author":"Gillick","DOI":"10.18653\/V1\/K19-1049"},{"key":"2026020310415051200_btag011-B16","doi-asserted-by":"publisher","first-page":"103779","DOI":"10.1016\/j.jbi.2021.103779","article-title":"NLM-Gene, a richly annotated gold standard dataset for gene entities that addresses ambiguity and multi-species gene recognition","volume":"118","author":"Islamaj","year":"2021","journal-title":"J Biomed Inform"},{"key":"2026020310415051200_btag011-B17","doi-asserted-by":"publisher","DOI":"10.1093\/database\/baac102","article-title":"NLM-Chem-BC7: manually annotated full-text resources for chemical entity annotation and indexing in biomedical articles","volume":"2022","author":"Islamaj","year":"2022","journal-title":"Database (Oxford)"},{"key":"2026020310415051200_btag011-B18","first-page":"9459","author":"Lewis"},{"key":"2026020310415051200_btag011-B19","doi-asserted-by":"publisher","first-page":"1","DOI":"10.48550\/arXiv.2108.11044","article-title":"Pseudo relevance feedback with deep language models and dense retrievers: successes and pitfalls","volume":"41","author":"Li","year":"2021","journal-title":"ACM Trans Inf Syst"},{"key":"2026020310415051200_btag011-B20","doi-asserted-by":"publisher","author":"Li","year":"2021","DOI":"10.48550\/arXiv.2112.06400"},{"key":"2026020310415051200_btag011-B21","doi-asserted-by":"publisher","DOI":"10.1093\/database\/baw068","article-title":"Biocreative V CDR task corpus: a resource for chemical disease relation extraction","volume":"2016","author":"Li","year":"2016","journal-title":"Database (Oxford)"},{"key":"2026020310415051200_btag011-B22","doi-asserted-by":"publisher","article-title":"Self-","author":"Liu","DOI":"10.18653\/v1\/2021.naacl-main.334"},{"key":"2026020310415051200_btag011-B23","doi-asserted-by":"publisher","first-page":"1491","DOI":"10.1145\/3511808.3557231","author":"MacAvaney","year":"2022"},{"key":"2026020310415051200_btag011-B24","doi-asserted-by":"publisher","first-page":"2026","DOI":"10.1145\/3539618.3591992","author":"Mackie","year":"2023"},{"key":"2026020310415051200_btag011-B25","doi-asserted-by":"publisher","author":"Mackie","year":"2023","DOI":"10.48550\/arXiv.2305.07477"},{"key":"2026020310415051200_btag011-B26","doi-asserted-by":"publisher","first-page":"e65390","DOI":"10.1371\/journal.pone.0065390","article-title":"The SPECIES and ORGANISMS resources for fast and accurate identification of taxonomic names in text","volume":"8","author":"Pafilis","year":"2013","journal-title":"PLoS One"},{"key":"2026020310415051200_btag011-B27","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1186\/s13326-025-00328-3","article-title":"Enriched knowledge representation in biological fields: a case study of literature-based discovery in Alzheimer\u2019s disease","volume":"16","author":"Pu","year":"2025","journal-title":"J Biomed Semantics"},{"key":"2026020310415051200_btag011-B28","doi-asserted-by":"publisher","first-page":"639","DOI":"10.1016\/j.csbj.2024.10.017","article-title":"Biokgrapher: initial evaluation of automated knowledge graph construction from biomedical literature","volume":"24","author":"Sch\u00e4fer","year":"2024","journal-title":"Comput Struct Biotechnol J"},{"key":"2026020310415051200_btag011-B29","doi-asserted-by":"publisher","first-page":"D136","DOI":"10.1093\/nar\/gkr1178","article-title":"The NCBI taxonomy database","volume":"40","author":"Scott","year":"2012","journal-title":"Nucleic Acid Res"},{"key":"2026020310415051200_btag011-B30","doi-asserted-by":"publisher","first-page":"380","DOI":"10.18653\/v1\/2024.bionlp-1.29","author":"Shlyk","year":"2024"},{"key":"2026020310415051200_btag011-B31","doi-asserted-by":"publisher","author":"Sung","year":"2020","DOI":"10.18653\/v1\/2020.acl-main.335"},{"key":"2026020310415051200_btag011-B32","doi-asserted-by":"publisher","first-page":"17","DOI":"10.1038\/s41746-020-0221-y","article-title":"An overview of clinical decision support systems: benefits, risks, and strategies for success","volume":"3","author":"Sutton","year":"2020","journal-title":"NPJ Digit Med"},{"key":"2026020310415051200_btag011-B33","doi-asserted-by":"publisher","author":"Tutubalina","year":"2020","DOI":"10.18653\/v1\/2020.coling-main.588"},{"key":"2026020310415051200_btag011-B34","doi-asserted-by":"publisher","first-page":"e918710","DOI":"10.1155\/2015\/918710","article-title":"GNormPlus: an integrative approach for tagging genes, gene families, and protein domains","volume":"2015","author":"Wei","year":"2015","journal-title":"Biomed Res Int"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/advance-article-pdf\/doi\/10.1093\/bioinformatics\/btag011\/66419810\/btag011.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/42\/2\/btag011\/66419810\/btag011.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/42\/2\/btag011\/66419810\/btag011.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,3]],"date-time":"2026-02-03T15:42:11Z","timestamp":1770133331000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/doi\/10.1093\/bioinformatics\/btag011\/8426181"}},"subtitle":[],"editor":[{"given":"Jonathan","family":"Wren","sequence":"additional","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2026,1,3]]},"references-count":34,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2026,1,3]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btag011","relation":{},"ISSN":["1367-4803","1367-4811"],"issn-type":[{"value":"1367-4803","type":"print"},{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2026,2]]},"published":{"date-parts":[[2026,1,3]]},"article-number":"btag011"}}