{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,17]],"date-time":"2025-10-17T14:28:23Z","timestamp":1760711303462,"version":"3.41.2"},"reference-count":37,"publisher":"Oxford University Press (OUP)","issue":"1","license":[{"start":{"date-parts":[[2024,1,22]],"date-time":"2024-01-22T00:00:00Z","timestamp":1705881600000},"content-version":"vor","delay-in-days":21,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/100014013","name":"UK Research and Innovation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100014013","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Amazon Machine Learning Research Award"},{"DOI":"10.13039\/501100022011","name":"Cancer Research UK Cambridge Institute","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100022011","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100000268","name":"Biotechnology and Biological Sciences Research Council","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100000268","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100000308","name":"British Council","doi-asserted-by":"publisher","award":["65BX18MNIB"],"award-info":[{"award-number":["65BX18MNIB"]}],"id":[{"id":"10.13039\/501100000308","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,1,2]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:sec>\n                  <jats:title>Motivation<\/jats:title>\n                  <jats:p>Scientific advances build on the findings of existing research. The 2001 publication of the human genome has led to the production of huge volumes of literature exploring the context-specific functions and interactions of genes. Technology is needed to perform large-scale text mining of research papers to extract the reported actions of genes in specific experimental contexts and cell states, such as cancer, thereby facilitating the design of new therapeutic strategies.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Results<\/jats:title>\n                  <jats:p>We present a new corpus and Text Mining methodology that can accurately identify and extract the most important details of cancer genomics experiments from biomedical texts. We build a Named Entity Recognition model that accurately extracts relevant experiment details from PubMed abstract text, and a second model that identifies the relationships between them. This system outperforms earlier models and enables the analysis of gene function in diverse and dynamically evolving experimental contexts.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Availability and implementation<\/jats:title>\n                  <jats:p>Code and data are available here: https:\/\/github.com\/cambridgeltl\/functional-genomics-ie.<\/jats:p>\n               <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btae021","type":"journal-article","created":{"date-parts":[[2024,1,23]],"date-time":"2024-01-23T08:13:00Z","timestamp":1705997580000},"source":"Crossref","is-referenced-by-count":4,"title":["Text mining for contexts and relationships in cancer genomics literature"],"prefix":"10.1093","volume":"40","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8715-9035","authenticated-orcid":false,"given":"Charlotte","family":"Collins","sequence":"first","affiliation":[{"name":"Language Technology Laboratory, Theoretical and Applied Linguistics, Faculty of Modern and Medieval Languages and Linguistics, University of Cambridge , Cambridge CB3 9DA, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0998-438X","authenticated-orcid":false,"given":"Simon","family":"Baker","sequence":"additional","affiliation":[{"name":"Language Technology Laboratory, Theoretical and Applied Linguistics, Faculty of Modern and Medieval Languages and Linguistics, University of Cambridge , Cambridge CB3 9DA, United Kingdom"}]},{"given":"Jason","family":"Brown","sequence":"additional","affiliation":[{"name":"Language Technology Laboratory, Theoretical and Applied Linguistics, Faculty of Modern and Medieval Languages and Linguistics, University of Cambridge , Cambridge CB3 9DA, United Kingdom"}]},{"given":"Huiyuan","family":"Zheng","sequence":"additional","affiliation":[{"name":"Institute of Environmental Medicine, Karolinska Institutet , 171 77 Stockholm, Sweden"}]},{"given":"Adelyne","family":"Chan","sequence":"additional","affiliation":[{"name":"Cancer Research UK Cambridge Institute, Li Ka Shing Centre, University of Cambridge , Cambridge CB2 0RE, United Kingdom"}]},{"given":"Ulla","family":"Stenius","sequence":"additional","affiliation":[{"name":"Institute of Environmental Medicine, Karolinska Institutet , 171 77 Stockholm, Sweden"}]},{"given":"Masashi","family":"Narita","sequence":"additional","affiliation":[{"name":"Cancer Research UK Cambridge Institute, Li Ka Shing Centre, University of Cambridge , Cambridge CB2 0RE, United Kingdom"}]},{"given":"Anna","family":"Korhonen","sequence":"additional","affiliation":[{"name":"Language Technology Laboratory, Theoretical and Applied Linguistics, Faculty of Modern and Medieval Languages and Linguistics, University of Cambridge , Cambridge CB3 9DA, United Kingdom"}]}],"member":"286","published-online":{"date-parts":[[2024,1,22]]},"reference":[{"key":"2024012817140891400_btae021-B1","doi-asserted-by":"crossref","first-page":"213","DOI":"10.1093\/bfgp\/elu015","article-title":"Event-based text mining for biology and functional genomics","volume":"14","author":"Ananiadou","year":"2015","journal-title":"Brief Funct Genomics"},{"key":"2024012817140891400_btae021-B2","doi-asserted-by":"crossref","first-page":"25","DOI":"10.1038\/75556","article-title":"Gene ontology: tool for the unification of biology","volume":"25","author":"Ashburner","year":"2000","journal-title":"Nat Genet"},{"key":"2024012817140891400_btae021-B3","doi-asserted-by":"crossref","first-page":"3973","DOI":"10.1093\/bioinformatics\/btx454","article-title":"Cancer Hallmarks Analytics Tool (CHAT): a text mining approach to organize and evaluate scientific literature on cancer","volume":"33","author":"Baker","year":"2017","journal-title":"Bioinformatics"},{"key":"2024012817140891400_btae021-B4","doi-asserted-by":"crossref","first-page":"432","DOI":"10.1093\/bioinformatics\/btv585","article-title":"Automatic semantic classification of scientific literature according to the hallmarks of cancer","volume":"32","author":"Baker","year":"2016","journal-title":"Bioinformatics"},{"key":"2024012817140891400_btae021-B5","doi-asserted-by":"crossref","first-page":"353","DOI":"10.1038\/s41571-018-0002-6","article-title":"The emerging clinical relevance of genomics in cancer medicine","volume":"15","author":"Berger","year":"2018","journal-title":"Nat Rev Clin Oncol"},{"key":"2024012817140891400_btae021-B6","first-page":"149","volume-title":"Apoptosis-induced Compensatory Proliferation in Cancer","author":"Celis","year":"2022"},{"key":"2024012817140891400_btae021-B7","first-page":"4","article-title":"Extraction of gene-disease relations from Medline using domain dictionaries and machine learning","author":"Chun","year":"2006","journal-title":"In:"},{"key":"2024012817140891400_btae021-B8","doi-asserted-by":"crossref","first-page":"11","DOI":"10.1186\/1742-5581-3-11","article-title":"LitMiner: integration of library services within a bio-informatics application","volume":"3","author":"Demaine","year":"2006","journal-title":"Biomed Digit Libr"},{"key":"2024012817140891400_btae021-B9","doi-asserted-by":"crossref","first-page":"165","DOI":"10.1158\/2159-8290.CD-16-0241","article-title":"Cellular senescence promotes adverse effects of chemotherapy and cancer relapsecellular senescence and chemotherapy","volume":"7","author":"Demaria","year":"2017","journal-title":"Cancer Discov"},{"year":"2019","author":"Devlin","key":"2024012817140891400_btae021-B10"},{"key":"2024012817140891400_btae021-B11","doi-asserted-by":"crossref","first-page":"486","DOI":"10.1038\/s41418-017-0012-4","article-title":"Molecular mechanisms of cell death: recommendations of the nomenclature committee on cell death 2018","volume":"25","author":"Galluzzi","year":"2018","journal-title":"Cell Death Differ"},{"key":"2024012817140891400_btae021-B12","doi-asserted-by":"crossref","first-page":"283","DOI":"10.3390\/info13060283","article-title":"A novel multi-view ensemble learning architecture to improve the structured text classification","volume":"13","author":"Gon\u00e7alves","year":"2022","journal-title":"Information"},{"key":"2024012817140891400_btae021-B13","doi-asserted-by":"crossref","first-page":"111","DOI":"10.1016\/j.ajhg.2015.05.020","article-title":"The human phenotype ontology: semantic unification of common and rare disease","volume":"97","author":"Groza","year":"2015","journal-title":"Am J Hum Genet"},{"key":"2024012817140891400_btae021-B14","doi-asserted-by":"crossref","first-page":"239","DOI":"10.1038\/bjc.1972.33","article-title":"Apoptosis: a basic biological phenomenon with wideranging implications in tissue kinetics","volume":"26","author":"Kerr","year":"1972","journal-title":"Br J Cancer"},{"key":"2024012817140891400_btae021-B15","doi-asserted-by":"crossref","first-page":"W90","DOI":"10.1093\/nar\/gkw377","article-title":"Enrichr: a comprehensive gene set enrichment analysis web server 2016 update","volume":"44","author":"Kuleshov","year":"2016","journal-title":"Nucleic Acids Res"},{"key":"2024012817140891400_btae021-B16","doi-asserted-by":"crossref","first-page":"e1675","DOI":"10.1038\/cddis.2015.20","article-title":"How cell death shapes cancer","volume":"6","author":"Labi","year":"2015","journal-title":"Cell Death Dis"},{"key":"2024012817140891400_btae021-B17","doi-asserted-by":"crossref","first-page":"860","DOI":"10.1038\/35057062","article-title":"Initial sequencing and analysis of the human genome","volume":"409","author":"Lander","year":"2001","journal-title":"Nature"},{"key":"2024012817140891400_btae021-B18","doi-asserted-by":"crossref","first-page":"1234","DOI":"10.1093\/bioinformatics\/btz682","article-title":"BioBERT: a pre-trained biomedical language representation model for biomedical text mining","volume":"36","author":"Lee","year":"2020","journal-title":"Bioinformatics"},{"key":"2024012817140891400_btae021-B19","doi-asserted-by":"crossref","first-page":"D677","DOI":"10.1093\/nar\/gkaa917","article-title":"DEG 15, an update of the Database of Essential Genes that includes built-in analysis tools","volume":"49","author":"Luo","year":"2021","journal-title":"Nucleic Acids Res"},{"key":"2024012817140891400_btae021-B20","doi-asserted-by":"crossref","first-page":"ooab082","DOI":"10.1093\/jamiaopen\/ooab082","article-title":"GENETEX\u2014a genomics report text mining r package and shiny application designed to capture real-world clinico-genomic data","volume":"4","author":"Miller","year":"2021","journal-title":"JAMIA Open"},{"key":"2024012817140891400_btae021-B21","doi-asserted-by":"crossref","first-page":"9","DOI":"10.1038\/cr.2013.169","article-title":"Historical landmarks of autophagy research","volume":"24","author":"Ohsumi","year":"2014","journal-title":"Cell Res"},{"key":"2024012817140891400_btae021-B22","doi-asserted-by":"crossref","first-page":"2674","DOI":"10.3390\/app11062674","article-title":"Classification of full text biomedical documents: sections importance assessment","volume":"11","author":"Oliveira Gon\u00e7alves","year":"2021","journal-title":"Appl Sci"},{"author":"Pafilis","key":"2024012817140891400_btae021-B23"},{"key":"2024012817140891400_btae021-B24","doi-asserted-by":"crossref","first-page":"D548","DOI":"10.1093\/nar\/gkv1048","article-title":"SIGNOR: a database of causal relationships between biological entities","volume":"44","author":"Perfetto","year":"2016","journal-title":"Nucleic Acids Res"},{"key":"2024012817140891400_btae021-B25","doi-asserted-by":"crossref","first-page":"89","DOI":"10.1038\/s41576-021-00409-w","article-title":"A new era in functional genomics screens","volume":"23","author":"Przybyla","year":"2022","journal-title":"Nat Rev Genet"},{"key":"2024012817140891400_btae021-B26","doi-asserted-by":"crossref","first-page":"1553","DOI":"10.1093\/bioinformatics\/bty845","article-title":"LION LBD: a literature-based discovery system for cancer biology","volume":"35","author":"Pyysalo","year":"2019","journal-title":"Bioinformatics"},{"key":"2024012817140891400_btae021-B27","doi-asserted-by":"crossref","first-page":"141","DOI":"10.4258\/hir.2017.23.3.141","article-title":"Text mining in biomedical domain with emphasis on document clustering","volume":"23","author":"Renganathan","year":"2017","journal-title":"Healthc Inform Res"},{"key":"2024012817140891400_btae021-B28","first-page":"75","volume-title":"28 May 2016,","author":"Rim","year":"2016"},{"key":"2024012817140891400_btae021-B29","doi-asserted-by":"crossref","first-page":"e57116","DOI":"10.7554\/eLife.57116","article-title":"shinyDepMap, a tool to identify targetable cancer genes and their functional connections from Cancer Dependency Map data","volume":"10","author":"Shimada","year":"2021","journal-title":"Elife"},{"first-page":"1487","year":"2019","author":"Sousa","key":"2024012817140891400_btae021-B30"},{"key":"2024012817140891400_btae021-B31","doi-asserted-by":"crossref","first-page":"1045","DOI":"10.1016\/j.molcel.2020.05.014","article-title":"Cell death in the origin and treatment of cancer","volume":"78","author":"Strasser","year":"2020","journal-title":"Mol Cell"},{"year":"2011","author":"Stubbs","key":"2024012817140891400_btae021-B32"},{"key":"2024012817140891400_btae021-B33","doi-asserted-by":"crossref","first-page":"209","DOI":"10.3322\/caac.21660","article-title":"Global Cancer Statistics 2020: GLOBOCAN estimates of incidence and mortality worldwide for 36 cancers in 185 countries","volume":"71","author":"Sung","year":"2021","journal-title":"CA Cancer J Clin"},{"key":"2024012817140891400_btae021-B34","doi-asserted-by":"crossref","first-page":"4837","DOI":"10.1093\/bioinformatics\/btac598","article-title":"BERN2: an advanced neural biomedical named entity recognition and normalization tool","volume":"38","author":"Sung","year":"2022","journal-title":"Bioinformatics"},{"key":"2024012817140891400_btae021-B35","doi-asserted-by":"crossref","first-page":"1304","DOI":"10.1126\/science.1058040","article-title":"The sequence of the human genome","volume":"291","author":"Venter","year":"2001","journal-title":"Science"},{"key":"2024012817140891400_btae021-B36","doi-asserted-by":"crossref","first-page":"W214","DOI":"10.1093\/nar\/gkq537","article-title":"The GeneMANIA prediction server: biological network integration for gene prioritization and predicting gene function","volume":"38","author":"Warde-Farley","year":"2010","journal-title":"Nucleic Acids Res"},{"key":"2024012817140891400_btae021-B37","doi-asserted-by":"crossref","first-page":"i386","DOI":"10.1093\/bioinformatics\/bty263","article-title":"A gene\u2013phenotype relationship extraction pipeline from the biomedical literature using a representation learning approach","volume":"34","author":"Xing","year":"2018","journal-title":"Bioinformatics"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/advance-article-pdf\/doi\/10.1093\/bioinformatics\/btae021\/56334549\/btae021.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/40\/1\/btae021\/56440230\/btae021.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/40\/1\/btae021\/56440230\/btae021.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,28]],"date-time":"2024-01-28T17:14:38Z","timestamp":1706462078000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/doi\/10.1093\/bioinformatics\/btae021\/7585443"}},"subtitle":[],"editor":[{"given":"Zhiyong","family":"Lu","sequence":"additional","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2024,1,1]]},"references-count":37,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2024,1,2]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btae021","relation":{},"ISSN":["1367-4803","1367-4811"],"issn-type":[{"type":"print","value":"1367-4803"},{"type":"electronic","value":"1367-4811"}],"subject":[],"published-other":{"date-parts":[[2024,1,1]]},"published":{"date-parts":[[2024,1,1]]},"article-number":"btae021"}}