{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,16]],"date-time":"2026-02-16T09:56:49Z","timestamp":1771235809810,"version":"3.50.1"},"reference-count":53,"publisher":"Oxford University Press (OUP)","license":[{"start":{"date-parts":[[2024,8,26]],"date-time":"2024-08-26T00:00:00Z","timestamp":1724630400000},"content-version":"vor","delay-in-days":238,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100002347","name":"Bundesministerium f\u00fcr Bildung und Forschung","doi-asserted-by":"publisher","award":["031L0029 [A-C]"],"award-info":[{"award-number":["031L0029 [A-C]"]}],"id":[{"id":"10.13039\/501100002347","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002347","name":"Bundesministerium f\u00fcr Bildung und Forschung","doi-asserted-by":"publisher","award":["031L0029 [A-C]"],"award-info":[{"award-number":["031L0029 [A-C]"]}],"id":[{"id":"10.13039\/501100002347","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,8,5]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:p>MicroRNAs (miRNAs) play important roles in post-transcriptional processes and regulate major cellular functions. The abnormal regulation of expression of miRNAs has been linked to numerous human diseases such as respiratory diseases, cancer, and neurodegenerative diseases. Latest miRNA\u2013disease associations are predominantly found in unstructured biomedical literature. Retrieving these associations manually can be cumbersome and time-consuming due to the continuously expanding number of publications. We propose a deep learning-based text mining approach that extracts normalized miRNA\u2013disease associations from biomedical literature. To train the deep learning models, we build a new training corpus that is extended by distant supervision utilizing multiple external databases. A quantitative evaluation shows that the workflow achieves an area under receiver operator characteristic curve of 98% on a holdout test set for the detection of miRNA\u2013disease associations. We demonstrate the applicability of the approach by extracting new miRNA\u2013disease associations from biomedical literature (PubMed and PubMed Central). We have shown through quantitative analysis and evaluation on three different neurodegenerative diseases that our approach can effectively extract miRNA\u2013disease associations not yet available in public databases.<\/jats:p>\n               <jats:p>Database URL: https:\/\/zenodo.org\/records\/10523046<\/jats:p>","DOI":"10.1093\/database\/baae066","type":"journal-article","created":{"date-parts":[[2024,8,6]],"date-time":"2024-08-06T06:46:05Z","timestamp":1722926765000},"source":"Crossref","is-referenced-by-count":6,"title":["Dataset of miRNA\u2013disease relations extracted from textual data using transformer-based neural networks"],"prefix":"10.1093","volume":"2024","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9970-4144","authenticated-orcid":false,"given":"Sumit","family":"Madan","sequence":"first","affiliation":[{"name":"Department of Bioinformatics, Fraunhofer Institute for Algorithms and Scientific Computing (SCAI) , Schloss Birlinghoven, 53757 Sankt Augustin, Germany"}]},{"given":"Lisa","family":"K\u00fchnel","sequence":"additional","affiliation":[{"name":"Knowledge Management, German National Library of Medicine (ZB MED)\u2014Information Centre for Life Sciences , Friedrich-Hirzebruch-Allee 4, Bonn 53115, Germany"},{"name":"Graduate School DILS, Bielefeld Institute for Bioinformatics Infrastructure (BIBI), Faculty of Technology, Bielefeld University , Postfach 10 01 31, Bielefeld, Nordrhein-Westfalen 33501, Germany"}]},{"given":"Holger","family":"Fr\u00f6hlich","sequence":"additional","affiliation":[{"name":"Department of Bioinformatics, Fraunhofer Institute for Algorithms and Scientific Computing (SCAI) , Schloss Birlinghoven, 53757 Sankt Augustin, Germany"},{"name":"Bonn-Aachen International Center for Information Technology (B-IT), University of Bonn , Friedrich-Hirzebruch-Allee 6, Bonn 53113, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9012-6720","authenticated-orcid":false,"given":"Martin","family":"Hofmann-Apitius","sequence":"additional","affiliation":[{"name":"Department of Bioinformatics, Fraunhofer Institute for Algorithms and Scientific Computing (SCAI) , Schloss Birlinghoven, 53757 Sankt Augustin, Germany"},{"name":"Bonn-Aachen International Center for Information Technology (B-IT), University of Bonn , Friedrich-Hirzebruch-Allee 6, Bonn 53113, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1379-7023","authenticated-orcid":false,"given":"Juliane","family":"Fluck","sequence":"additional","affiliation":[{"name":"Knowledge Management, German National Library of Medicine (ZB MED)\u2014Information Centre for Life Sciences , Friedrich-Hirzebruch-Allee 4, Bonn 53115, Germany"},{"name":"Graduate School DILS, Bielefeld Institute for Bioinformatics Infrastructure (BIBI), Faculty of Technology, Bielefeld University , Postfach 10 01 31, Bielefeld, Nordrhein-Westfalen 33501, Germany"},{"name":"Information management, Institute of Geodesy and Geoinformation, University of Bonn , Katzenburgweg 1a, Bonn 53115, Germany"}]}],"member":"286","published-online":{"date-parts":[[2024,8,5]]},"reference":[{"key":"2024082605272734700_R1","doi-asserted-by":"crossref","first-page":"203","DOI":"10.1038\/nrd.2016.246","article-title":"MicroRNA therapeutics: towards a new era for the management of cancer and other diseases","volume":"16","author":"Rupaimoole","year":"2017","journal-title":"Nat Rev Drug Discov"},{"key":"2024082605272734700_R2","doi-asserted-by":"crossref","first-page":"3753","DOI":"10.1158\/0008-5472.CAN-04-0637","article-title":"Reduced expression of the let-7 MicroRNAs in human lung cancers in association with shortened postoperative survival","volume":"64","author":"Takamizawa","year":"2004","journal-title":"Cancer Res"},{"key":"2024082605272734700_R3","doi-asserted-by":"crossref","DOI":"10.1038\/ncomms2876","article-title":"MicroRNA-135b promotes lung cancer metastasis by regulating multiple targets in the Hippo pathway and LZTS1","volume":"4","author":"Lin","year":"2013","journal-title":"Nat Commun"},{"key":"2024082605272734700_R4","doi-asserted-by":"crossref","first-page":"695","DOI":"10.1183\/09031936.00212011","article-title":"MicroRNAs and respiratory diseases","volume":"41","author":"Rupani","year":"2013","journal-title":"Eur Respir J"},{"key":"2024082605272734700_R5","doi-asserted-by":"crossref","DOI":"10.1038\/s41525-022-00319-8","article-title":"Synaptosome microRNAs regulate synapse functions in Alzheimer\u2019s disease","volume":"7","author":"Kumar","year":"2022","journal-title":"NPJ Genom Med"},{"key":"2024082605272734700_R6","doi-asserted-by":"crossref","first-page":"1468","DOI":"10.1016\/j.jalz.2019.06.4952","article-title":"Differential expression of microRNAs in Alzheimer\u2019s disease brain, blood, and cerebrospinal fluid","volume":"15","author":"Takousis","year":"2019","journal-title":"Alzheimers Dement"},{"key":"2024082605272734700_R7","article-title":"MicroRNAs and Alzheimer\u2019s disease mouse models: current insights and future research avenues","volume":"2011","author":"H\u00e9bert","year":"2011","journal-title":"Int J Alzheimer\u2019s Dis"},{"key":"2024082605272734700_R8","doi-asserted-by":"crossref","DOI":"10.12688\/f1000research.4591.3","article-title":"Detecting miRNA mentions and relations in biomedical literature","volume":"3","author":"Bagewadi","year":"2015","journal-title":"F1000Res"},{"key":"2024082605272734700_R9","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1371\/journal.pcbi.1004391","article-title":"miRTex: a text mining system for miRNA-gene relation extraction","volume":"11","author":"Li","year":"2015","journal-title":"PLoS Comput Biol"},{"key":"2024082605272734700_R10","doi-asserted-by":"crossref","DOI":"10.1186\/s13326-015-0044-y","article-title":"miRiaD: a text mining tool for detecting associations of microRNAs with diseases","volume":"7","author":"Gupta","year":"2016","journal-title":"J Biomed Semant"},{"key":"2024082605272734700_R11","doi-asserted-by":"crossref","DOI":"10.1186\/s12859-015-0472-9","article-title":"Extraction of relations between genes and diseases from text and large-scale data analysis: implications for translational research","volume":"16","author":"Bravo","year":"2015","journal-title":"BMC Bioinf"},{"key":"2024082605272734700_R12","first-page":"D845","article-title":"The DisGeNET knowledge platform for disease genomics: 2019 update","volume":"48","author":"Pi\u00f1ero","year":"2020","journal-title":"Nucleic Acids Res"},{"key":"2024082605272734700_R13","first-page":"4171","article-title":"BERT: pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2019"},{"key":"2024082605272734700_R14","article-title":"Language models are few-shot learners","author":"Brown","year":"2020"},{"key":"2024082605272734700_R15","first-page":"6000","article-title":"Attention is all you need","author":"Vaswani","year":"2017"},{"key":"2024082605272734700_R16","doi-asserted-by":"crossref","first-page":"1234","DOI":"10.1093\/bioinformatics\/btz682","article-title":"BioBERT: a pre-trained biomedical language representation model for biomedical text mining","volume":"36","author":"Lee","year":"2020","journal-title":"Bioinformatics"},{"key":"2024082605272734700_R17","first-page":"4700","article-title":"BioMegatron: larger biomedical domain language model","author":"Shin","year":"2020"},{"key":"2024082605272734700_R18","article-title":"ClinicalBERT: modeling clinical notes and predicting hospital readmission","author":"Huang","year":"2019"},{"key":"2024082605272734700_R19","doi-asserted-by":"crossref","first-page":"276","DOI":"10.1504\/IJDMB.2016.074878","article-title":"A general protein-protein interaction extraction architecture based on word representation and feature selection","volume":"14","author":"Jiang","year":"2016","journal-title":"Int J Data Min Bioinform"},{"key":"2024082605272734700_R20","doi-asserted-by":"crossref","DOI":"10.1016\/j.jbi.2020.103451","article-title":"Extracting drug-drug interactions from texts with BioBERT and multiple entity-aware attentions","volume":"106","author":"Zhu","year":"2020","journal-title":"J Biomed Informat"},{"key":"2024082605272734700_R21","first-page":"15","article-title":"An empirical evaluation of resources for the identification of disease and adverse effects in biomedical literature","author":"Gurulingappa","year":"2010"},{"key":"2024082605272734700_R22","doi-asserted-by":"crossref","first-page":"2193","DOI":"10.1093\/jamia\/ocab112","article-title":"Are synthetic clinical notes useful for real natural language processing tasks: a case study on clinical entity recognition","volume":"28","author":"Li","year":"2021","journal-title":"J Am Med Inf Assoc"},{"key":"2024082605272734700_R23","doi-asserted-by":"crossref","DOI":"10.1093\/jamiaopen\/ooac087","article-title":"Critical assessment of transformer-based AI models for German clinical notes","volume":"5","author":"Lentzen","year":"2022","journal-title":"JAMIA Open"},{"key":"2024082605272734700_R24","first-page":"1","article-title":"Review on event extraction for BioNLP with a survey","author":"Pattankar","year":"2023"},{"key":"2024082605272734700_R25","doi-asserted-by":"crossref","DOI":"10.1371\/journal.pone.0023862","article-title":"Enhancing biomedical text summarization using semantic relation extraction","volume":"6","author":"Shang","year":"2011","journal-title":"PLoS One"},{"key":"2024082605272734700_R26","doi-asserted-by":"crossref","first-page":"5255","DOI":"10.1093\/bioinformatics\/btaa668","article-title":"Highly accurate classification of chest radiographic reports using a deep learning natural language model pre-trained on 3.8 million text reports","volume":"36","author":"Bressem","year":"2020","journal-title":"Bioinformatics"},{"key":"2024082605272734700_R27","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1016\/j.jbi.2013.12.006","article-title":"NCBI disease corpus: a resource for disease name recognition and concept normalization","volume":"47","author":"Do\u01e7an","year":"2014","journal-title":"J Biomed Informat"},{"key":"2024082605272734700_R28","article-title":"BioCreative V CDR task corpus: a resource for chemical disease relation extraction","volume":"2016","author":"Li","year":"2016","journal-title":"Database (Oxford)"},{"key":"2024082605272734700_R29","doi-asserted-by":"crossref","first-page":"157","DOI":"10.1007\/978-94-017-2390-9_10","volume-title":"Natural Language Processing Using Very Large Corpora","author":"Ramshaw","year":"1999","edition":"1"},{"key":"2024082605272734700_R30","article-title":"Relation extraction using distant supervision: a survey","volume":"51","author":"Smirnova","year":"2018","journal-title":"ACM Comput Surv"},{"key":"2024082605272734700_R31","first-page":"1","article-title":"HMDD v2.0: a database for experimentally supported human microRNA and disease associations","volume":"42","author":"Li","year":"2014","journal-title":"Nucleic Acids Res"},{"key":"2024082605272734700_R32","doi-asserted-by":"crossref","first-page":"D1013","DOI":"10.1093\/nar\/gky1010","article-title":"HMDD v3.0: a database for experimentally supported human microRNA-disease associations","volume":"47","author":"Huang","year":"2019","journal-title":"Nucleic Acids Res"},{"key":"2024082605272734700_R33","doi-asserted-by":"crossref","first-page":"D98","DOI":"10.1093\/nar\/gkn714","article-title":"miR2Disease: a manually curated database for microRNA deregulation in human disease","volume":"37","author":"Jiang","year":"2009","journal-title":"Nucleic Acids Res"},{"key":"2024082605272734700_R34","doi-asserted-by":"crossref","first-page":"S14","DOI":"10.1186\/1471-2105-6-S1-S14","article-title":"ProMiner: rule-based protein and gene entity recognition","volume":"6","author":"Hanisch","year":"2005","journal-title":"BMC Bioinf"},{"key":"2024082605272734700_R35","doi-asserted-by":"crossref","first-page":"95","DOI":"10.1007\/978-1-4615-5529-2_5","volume-title":"Learning to Learn","author":"Caruana","year":"1998"},{"key":"2024082605272734700_R36","doi-asserted-by":"crossref","first-page":"D155","DOI":"10.1093\/nar\/gky1141","article-title":"miRBase: from microRNA sequences to function","volume":"47","author":"Kozomara","year":"2019","journal-title":"Nucleic Acids Res"},{"key":"2024082605272734700_R37","article-title":"NormCo: Deep Disease Normalization for Biomedical Knowledge Base Construction","author":"Wright"},{"key":"2024082605272734700_R38","first-page":"115","article-title":"Making a science of model search: hyperparameter optimization in hundreds of dimensions for vision architectures","author":"Bergstra","year":"2013"},{"key":"2024082605272734700_R39","first-page":"2623","article-title":"Optuna: a next-generation hyperparameter optimization framework","author":"Akiba","year":"2019"},{"key":"2024082605272734700_R40","article-title":"Mondo: unifying diseases for the world, by the world","author":"Vasilevsky","year":"2022","journal-title":"medRxiv"},{"key":"2024082605272734700_R41","doi-asserted-by":"crossref","DOI":"10.1186\/s12859-017-1776-8","article-title":"A neural network multi-task learning approach to biomedical named entity recognition","volume":"18","author":"Crichton","year":"2017","journal-title":"BMC Bioinf"},{"key":"2024082605272734700_R42","doi-asserted-by":"crossref","DOI":"10.1093\/database\/bat080","article-title":"A CTD-Pfizer collaboration: manual curation of 88 000 scientific articles text mined for drug-disease and drug-phenotype interactions","volume":"2013","author":"Davis","year":"2013","journal-title":"Database"},{"key":"2024082605272734700_R43","doi-asserted-by":"crossref","DOI":"10.1186\/s13326-022-00280-6","article-title":"We are not ready yet: limitations of state-of-the-art disease named entity recognizers","volume":"13","author":"K\u00fchnel","year":"2022","journal-title":"J Biomed Semant"},{"key":"2024082605272734700_R44","article-title":"Elevated plasma miR-133b and miR-221-3p as biomarkers for early Parkinson\u2019s disease","volume":"11","author":"Chen","year":"2021","journal-title":"Sci Rep"},{"key":"2024082605272734700_R45","doi-asserted-by":"crossref","DOI":"10.1111\/cpr.12856","article-title":"The lncRNA H19 binding to let-7b promotes hippocampal glial cell activation and epileptic seizures by targeting Stat3 in a rat model of temporal lobe epilepsy","volume":"53","author":"Han","year":"2020","journal-title":"Cell Prolif"},{"key":"2024082605272734700_R46","doi-asserted-by":"crossref","DOI":"10.3390\/jcm10143091","article-title":"Circulating microRNAs and novel proteins as potential biomarkers of neurological complications after heart bypass surgery","volume":"10","author":"Szwed","year":"2021","journal-title":"J Clin Med"},{"key":"2024082605272734700_R47","doi-asserted-by":"crossref","DOI":"10.1016\/j.ejphar.2022.175302","article-title":"MiR-29a inhibits MPP + - Induced cell death and inflammation in Parkinson\u2019s disease model in vitro by potential targeting of MAVS","volume":"934","author":"Yang","year":"2022","journal-title":"Eur J Pharmacol"},{"key":"2024082605272734700_R48","doi-asserted-by":"crossref","DOI":"10.1038\/s41598-023-43096-9","article-title":"MicroRNAs in Parkinson\u2019s disease: a systematic review and diagnostic accuracy meta-analysis","volume":"13","author":"Gu\u00e9vremont","year":"2023","journal-title":"Sci Rep"},{"key":"2024082605272734700_R49","first-page":"1857","article-title":"Serum miR-9a and miR-133b, diagnostic markers for Parkinson\u2019s sisease, are up-regulated after Levodopa treatment","volume":"36","author":"Wu","journal-title":"Acta Med Mediterr"},{"key":"2024082605272734700_R50","doi-asserted-by":"crossref","first-page":"457","DOI":"10.1002\/mds.27928","article-title":"Circulating Brain-enriched MicroRNAs for detection and discrimination of idiopathic and genetic Parkinson\u2019s disease","volume":"35","author":"Ravanidis","year":"2020","journal-title":"Mov Disord"},{"key":"2024082605272734700_R51","doi-asserted-by":"crossref","first-page":"468","DOI":"10.1002\/mds.27935","article-title":"Salivary microR-153 and microR-223 Levels as Potential Diagnostic Biomarkers of Idiopathic Parkinson\u2019s Disease","volume":"35","author":"Cressatti","year":"2020","journal-title":"Mov Disord"},{"key":"2024082605272734700_R52","doi-asserted-by":"crossref","first-page":"3363","DOI":"10.1109\/TCBB.2022.3187739","article-title":"Predicting miRNA-disease associations from miRNA-gene-disease heterogeneous network with multi-relational graph convolutional network model","volume":"20","author":"Peng","year":"2023","journal-title":"IEEE\/ACM Trans Comput Biol Bioinform"},{"key":"2024082605272734700_R53","doi-asserted-by":"crossref","first-page":"879","DOI":"10.1016\/j.jbi.2012.04.004","article-title":"The EU-ADR corpus: annotated drugs, diseases, targets, and their relationships","volume":"45","author":"van Mulligen","year":"2012","journal-title":"J Biomed Informat"}],"container-title":["Database"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/database\/article-pdf\/doi\/10.1093\/database\/baae066\/58917096\/baae066.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/database\/article-pdf\/doi\/10.1093\/database\/baae066\/58917096\/baae066.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,26]],"date-time":"2024-08-26T05:28:32Z","timestamp":1724650112000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/database\/article\/doi\/10.1093\/database\/baae066\/7727794"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"references-count":53,"URL":"https:\/\/doi.org\/10.1093\/database\/baae066","relation":{},"ISSN":["1758-0463"],"issn-type":[{"value":"1758-0463","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2024]]},"published":{"date-parts":[[2024]]},"article-number":"baae066"}}