{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,28]],"date-time":"2026-04-28T23:22:44Z","timestamp":1777418564081,"version":"3.51.4"},"update-to":[{"DOI":"10.1371\/journal.pcbi.1005962","type":"new_version","label":"New version","source":"publisher","updated":{"date-parts":[[2018,2,28]],"date-time":"2018-02-28T00:00:00Z","timestamp":1519776000000}}],"reference-count":52,"publisher":"Public Library of Science (PLoS)","issue":"2","license":[{"start":{"date-parts":[[2018,2,15]],"date-time":"2018-02-15T00:00:00Z","timestamp":1518652800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100009708","name":"Novo Nordisk Foundation","doi-asserted-by":"crossref","award":["NNF14CC0001"],"award-info":[{"award-number":["NNF14CC0001"]}],"id":[{"id":"10.13039\/501100009708","id-type":"DOI","asserted-by":"crossref"}]},{"name":"Danish e-Infrastructure Cooperation (DK)","award":["ActionableBiomarkerDK"],"award-info":[{"award-number":["ActionableBiomarkerDK"]}]}],"content-domain":{"domain":["www.ploscompbiol.org"],"crossmark-restriction":false},"short-container-title":["PLoS Comput Biol"],"DOI":"10.1371\/journal.pcbi.1005962","type":"journal-article","created":{"date-parts":[[2018,2,15]],"date-time":"2018-02-15T13:28:04Z","timestamp":1518701284000},"page":"e1005962","update-policy":"https:\/\/doi.org\/10.1371\/journal.pcbi.corrections_policy","source":"Crossref","is-referenced-by-count":137,"title":["A comprehensive and quantitative comparison of text-mining in 15 million full-text articles versus their corresponding abstracts"],"prefix":"10.1371","volume":"14","author":[{"given":"David","family":"Westergaard","sequence":"first","affiliation":[]},{"given":"Hans-Henrik","family":"St\u00e6rfeldt","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9504-2163","authenticated-orcid":true,"given":"Christian","family":"T\u00f8nsberg","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7885-715X","authenticated-orcid":true,"given":"Lars Juhl","family":"Jensen","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0316-5866","authenticated-orcid":true,"given":"S\u00f8ren","family":"Brunak","sequence":"additional","affiliation":[]}],"member":"340","published-online":{"date-parts":[[2018,2,15]]},"reference":[{"key":"ref1","article-title":"Integration of Data Mining in Business Intelligence Systems","author":"A Azevedo","year":"2014"},{"issue":"7","key":"ref2","doi-asserted-by":"crossref","first-page":"224","DOI":"10.1186\/gb-2005-6-7-224","article-title":"Text-mining and information-retrieval services for molecular biology","volume":"6","author":"M Krallinger","year":"2005","journal-title":"Genome biology"},{"key":"ref3","doi-asserted-by":"crossref","first-page":"97","DOI":"10.1016\/j.ymeth.2015.01.015","article-title":"Application of text mining in the biomedical domain","volume":"74","author":"WWM Fleuren","year":"2015","journal-title":"Methods"},{"issue":"Suppl 1","key":"ref4","first-page":"69","article-title":"Text Mining in Cancer Gene and Pathway Prioritization","volume":"13","author":"Y Luo","year":"2014","journal-title":"Cancer Informatics"},{"issue":"3","key":"ref5","doi-asserted-by":"crossref","first-page":"213","DOI":"10.1093\/bfgp\/elu015","article-title":"Event-based text mining for biology and functional genomics","volume":"14","author":"S Ananiadou","year":"2015","journal-title":"Briefings in functional genomics"},{"key":"ref6","first-page":"e21","article-title":"Text mining for metabolic pathways, signaling cascades, and protein networks","volume":"283\/pe21","author":"R Hoffmann","year":"2005","journal-title":"Sci. STKE"},{"key":"ref7","unstructured":"Liu F, Chen J, Jagannatha A, Yu H. Learning for Biomedical Information Extraction: Methodological Review of Recent Advances. arXiv:1606.07993 [cs]. 2016."},{"issue":"Suppl 2","key":"ref8","doi-asserted-by":"crossref","first-page":"S8","DOI":"10.1186\/gb-2008-9-s2-s8","article-title":"Linking genes to literature: text mining, information extraction, and retrieval applications for biology","volume":"9","author":"M Krallinger","year":"2008","journal-title":"Genome biology"},{"issue":"1","key":"ref9","doi-asserted-by":"crossref","first-page":"33","DOI":"10.1093\/bib\/bbv087","article-title":"Recent advances and emerging applications in text and data mining for biomedical discovery","volume":"17","author":"GH Gonzalez","year":"2016","journal-title":"Briefings in Bioinformatics"},{"issue":"12","key":"ref10","doi-asserted-by":"crossref","first-page":"829","DOI":"10.1038\/nrg3337","article-title":"Text-mining solutions for biomedical research: enabling integrative biology","volume":"13","author":"D Rebholz-Schuhmann","year":"2012","journal-title":"Nature Reviews Genetics"},{"issue":"6","key":"ref11","doi-asserted-by":"crossref","first-page":"395","DOI":"10.1038\/nrg3208","article-title":"Mining electronic health records: towards better research applications and clinical care","volume":"13","author":"PB Jensen","year":"2012","journal-title":"Nature Reviews Genetics"},{"issue":"6","key":"ref12","doi-asserted-by":"crossref","first-page":"997","DOI":"10.1016\/j.drudis.2016.05.002","article-title":"Text mining patents for biomedical knowledge","volume":"21","author":"R Rodriguez-Esteban","year":"2016","journal-title":"Drug Discovery Today"},{"key":"ref13","first-page":"139","article-title":"Advances in Experimental Medicine and Biology","volume":"939","author":"M Simmons","year":"2016"},{"issue":"2","key":"ref14","doi-asserted-by":"crossref","first-page":"119","DOI":"10.1038\/nrg1768","article-title":"Literature mining for the biologist: from information retrieval to biological discovery","volume":"7","author":"LJ Jensen","year":"2006","journal-title":"Nature reviews. Genetics"},{"issue":"6","key":"ref15","doi-asserted-by":"crossref","first-page":"466","DOI":"10.1093\/bib\/bbn043","article-title":"Facts from text: Can text mining help to scale-up high-quality manual curation of gene products with ontologies?","volume":"9","author":"R Winnenburg","year":"2008","journal-title":"Briefings in Bioinformatics"},{"key":"ref16","doi-asserted-by":"crossref","unstructured":"Wei C-H, Kao H-Y, Lu Z. Text mining tools for assisting literature curation. In: Proceedings of the 5th ACM Conference on Bioinformatics, Computational Biology, and Health Informatics\u2014BCB \u201814 [Internet]. New York, New York, USA: ACM Press; 2014. p. 590\u20131.","DOI":"10.1145\/2649387.2660786"},{"issue":"1","key":"ref17","doi-asserted-by":"crossref","first-page":"492","DOI":"10.1186\/1471-2105-11-492","article-title":"The structural and content aspects of abstracts versus bodies of full text journal articles are different","volume":"11","author":"KB Cohen","year":"2010","journal-title":"BMC Bioinformatics"},{"key":"ref18","doi-asserted-by":"crossref","first-page":"47","DOI":"10.1016\/j.ymeth.2014.10.026","article-title":"Protein-protein interaction predictions using text mining methods","volume":"74","author":"N Papanikolaou","year":"2015","journal-title":"Methods"},{"key":"ref19","doi-asserted-by":"crossref","unstructured":"Samuel J, Yuan X, Yuan X, Walton B. Mining online full-text literature for novel protein interaction discovery. In: 2010 IEEE International Conference on Bioinformatics and Biomedicine Workshops, BIBMW 2010 [Internet]. IEEE; 2010. p. 277\u201382.","DOI":"10.1109\/BIBMW.2010.5703812"},{"issue":"Suppl 2","key":"ref20","doi-asserted-by":"crossref","first-page":"S6","DOI":"10.1186\/1471-2105-10-S2-S6","article-title":"Pharmspresso: a text mining tool for extraction of pharmacogenomic concepts and relationships from full text","volume":"10","author":"Y Garten","year":"2009","journal-title":"BMC bioinformatics"},{"issue":"11","key":"ref21","doi-asserted-by":"crossref","first-page":"e309","DOI":"10.1371\/journal.pbio.0020309","article-title":"Textpresso: An ontology-based information retrieval and extraction system for biological literature","volume":"2","author":"HM M\u00fcller","year":"2004","journal-title":"PLoS Biology"},{"key":"ref22","first-page":"96","article-title":"Knowledge Exploration in Life Science Informatics","volume":"3303","author":"EPG Martin","year":"2004"},{"issue":"17","key":"ref23","doi-asserted-by":"crossref","first-page":"3206","DOI":"10.1093\/bioinformatics\/bth386","article-title":"BioRAT: Extracting biological information from full-length papers","volume":"20","author":"DPA Corney","year":"2004","journal-title":"Bioinformatics"},{"issue":"2","key":"ref24","doi-asserted-by":"crossref","first-page":"173","DOI":"10.1016\/j.jbi.2009.11.001","article-title":"Beyond genes, proteins, and abstracts: Identifying scientific claims from full-text biomedical articles","volume":"43","author":"C Blake","year":"2010","journal-title":"Journal of Biomedical Informatics"},{"key":"ref25","doi-asserted-by":"crossref","unstructured":"Constantin A, Pettifer S, Voronkov A. Pdfx. Proceedings of the 2013 ACM symposium on Document engineering\u2014DocEng \u201813. 2013.:177.","DOI":"10.1145\/2494266.2494271"},{"issue":"4","key":"ref26","doi-asserted-by":"crossref","first-page":"1","DOI":"10.4018\/jdls.2010100101","article-title":"Logical Structure Recovery in Scholarly Articles with Rich Document Features","volume":"1","author":"M-T Luong","year":"2012","journal-title":"International Journal of Digital Library Systems"},{"issue":"1","key":"ref27","doi-asserted-by":"crossref","first-page":"7","DOI":"10.1186\/1751-0473-7-7","article-title":"Layout-aware text extraction from full-text PDF of scientific articles","volume":"7","author":"C Ramakrishnan","year":"2012","journal-title":"Source Code for Biology and Medicine"},{"issue":"1","key":"ref28","doi-asserted-by":"crossref","first-page":"e0144717","DOI":"10.1371\/journal.pone.0144717","article-title":"Text mining the history of medicine","volume":"11","author":"P Thompson","year":"2016","journal-title":"PLoS ONE"},{"issue":"3","key":"ref29","doi-asserted-by":"crossref","first-page":"141","DOI":"10.1007\/s10032-009-0094-8","article-title":"Optical character recognition errors and their effects on natural language processing","volume":"12","author":"D Lopresti","year":"2009","journal-title":"International Journal on Document Analysis and Recognition"},{"issue":"D1","key":"ref30","doi-asserted-by":"crossref","first-page":"D447","DOI":"10.1093\/nar\/gku1003","article-title":"STRING v10: Protein-protein interaction networks, integrated over the tree of life","volume":"43","author":"D Szklarczyk","year":"2015","journal-title":"Nucleic Acids Research"},{"key":"ref31","doi-asserted-by":"crossref","first-page":"83","DOI":"10.1016\/j.ymeth.2014.11.020","article-title":"DISEASES: Text mining and data integration of disease-gene associations","volume":"74","author":"S Pletscher-Frankild","year":"2015","journal-title":"Methods"},{"key":"ref32","doi-asserted-by":"crossref","first-page":"e1054","DOI":"10.7717\/peerj.1054","article-title":"Comprehensive comparison of large-scale tissue expression datasets","volume":"3","author":"A Santos","year":"2015","journal-title":"PeerJ"},{"key":"ref33","doi-asserted-by":"crossref","first-page":"bau012","DOI":"10.1093\/database\/bau012","article-title":"COMPARTMENTS: Unification and visualization of protein subcellular localization evidence","volume":"2014","author":"JX Binder","year":"2014","journal-title":"Database"},{"issue":"D1","key":"ref34","doi-asserted-by":"crossref","first-page":"D940","DOI":"10.1093\/nar\/gkr972","article-title":"Disease ontology: A backbone for disease semantic integration","volume":"40","author":"LM Schriml","year":"2012","journal-title":"Nucleic Acids Research"},{"issue":"1","key":"ref35","doi-asserted-by":"crossref","first-page":"25","DOI":"10.1038\/75556","article-title":"Gene Ontology: Tool for The Unification of Biology","volume":"25","author":"M Ashburner","year":"2000","journal-title":"Nature Genetics"},{"issue":"D1","key":"ref36","doi-asserted-by":"crossref","first-page":"D380","DOI":"10.1093\/nar\/gkv1277","article-title":"STITCH 5: Augmenting protein-chemical interaction networks with tissue and affinity data","volume":"44","author":"D Szklarczyk","year":"2016","journal-title":"Nucleic Acids Research"},{"issue":"SUPPL. 1","key":"ref37","doi-asserted-by":"crossref","first-page":"D507","DOI":"10.1093\/nar\/gkq968","article-title":"The BRENDA Tissue Ontology (BTO): The first all-integrating ontology of all organisms for enzyme sources","volume":"39","author":"M Gremse","year":"2011","journal-title":"Nucleic Acids Research"},{"issue":"3","key":"ref38","first-page":"390","article-title":"The mammalian phenotype ontology: Enabling robust annotation and comparative analysis","volume":"1","author":"CL Smith","year":"2009","journal-title":"Wiley Interdisciplinary Reviews: Systems Biology and Medicine"},{"issue":"D1","key":"ref39","doi-asserted-by":"crossref","first-page":"D808","DOI":"10.1093\/nar\/gks1094","article-title":"STRING v9.1: Protein-protein interaction networks, with increased coverage and integration","volume":"41","author":"A Franceschini","year":"2013","journal-title":"Nucleic Acids Research"},{"issue":"3","key":"ref40","doi-asserted-by":"crossref","first-page":"392","DOI":"10.1093\/bioinformatics\/btt677","article-title":"Protein-driven inference of miRNA-disease associations","volume":"30","author":"S M\u00f8rk","year":"2014","journal-title":"Bioinformatics"},{"issue":"1","key":"ref41","doi-asserted-by":"crossref","first-page":"27","DOI":"10.1093\/nar\/28.1.27","article-title":"Kyoto Encyclopedia of Genes and Genomes","volume":"28","author":"M Kanehisa","year":"2000","journal-title":"Nucleic Acids Research"},{"issue":"D1","key":"ref42","doi-asserted-by":"crossref","first-page":"D353","DOI":"10.1093\/nar\/gkw1092","article-title":"KEGG: New perspectives on genomes, pathways, diseases and drugs","volume":"45","author":"M Kanehisa","year":"2017","journal-title":"Nucleic Acids Research"},{"issue":"D1","key":"ref43","doi-asserted-by":"crossref","first-page":"D457","DOI":"10.1093\/nar\/gkv1070","article-title":"KEGG as a reference resource for gene and protein annotation","volume":"44","author":"M Kanehisa","year":"2016","journal-title":"Nucleic Acids Research"},{"issue":"D1","key":"ref44","doi-asserted-by":"crossref","first-page":"D204","DOI":"10.1093\/nar\/gku989","article-title":"UniProt: A hub for protein information","volume":"43","author":"A Bateman","year":"2015","journal-title":"Nucleic Acids Research"},{"issue":"7420","key":"ref45","doi-asserted-by":"crossref","first-page":"335","DOI":"10.1038\/490335a","article-title":"Collaborations: The rise of research networks","volume":"490","author":"J Adams","year":"2012","journal-title":"Nature"},{"issue":"1","key":"ref46","doi-asserted-by":"crossref","first-page":"20","DOI":"10.1016\/j.molonc.2008.03.007","article-title":"Trends in the global funding and activity of cancer research","volume":"2","author":"S Eckhouse","year":"2008","journal-title":"Molecular Oncology"},{"key":"ref47","doi-asserted-by":"crossref","unstructured":"Plaven-Sigray P, Matheson GJ, Schiffler BC, Thompson WH. The Readability Of Scientific Texts Is Decreasing Over Time. bioRxiv. 2017.:119370.","DOI":"10.7554\/eLife.27725"},{"issue":"6822","key":"ref48","doi-asserted-by":"crossref","first-page":"860","DOI":"10.1038\/35057062","article-title":"Initial sequencing and analysis of the human genome","volume":"409","author":"ES Lander","year":"2001","journal-title":"Nature (London)"},{"issue":"12","key":"ref49","doi-asserted-by":"crossref","first-page":"e1002822","DOI":"10.1371\/journal.pcbi.1002822","article-title":"Chapter 11: Genome-Wide Association Studies","volume":"8","author":"WS Bush","year":"2012","journal-title":"PLoS Computational Biology"},{"issue":"1","key":"ref50","doi-asserted-by":"crossref","first-page":"106","DOI":"10.1093\/bioinformatics\/btv476","article-title":"Large-scale extraction of gene interactions from full-text literature using DeepDive","volume":"32","author":"EK Mallory","year":"2015","journal-title":"Bioinformatics"},{"issue":"2","key":"ref51","doi-asserted-by":"crossref","first-page":"140","DOI":"10.1016\/j.drudis.2013.09.012","article-title":"Text mining for systems biology","volume":"19","author":"J Fluck","year":"2014","journal-title":"Drug Discovery Today"},{"issue":"1","key":"ref52","doi-asserted-by":"crossref","first-page":"95","DOI":"10.1186\/s12911-017-0498-1","article-title":"Semantic relatedness and similarity of biomedical terms: examining the effects of recency, size, and section of biomedical publications on the performance of word2vec","volume":"17","author":"Y Zhu","year":"2017","journal-title":"BMC Medical Informatics and Decision Making"}],"updated-by":[{"DOI":"10.1371\/journal.pcbi.1005962","type":"new_version","label":"New version","source":"publisher","updated":{"date-parts":[[2018,2,28]],"date-time":"2018-02-28T00:00:00Z","timestamp":1519776000000}}],"container-title":["PLOS Computational Biology"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/dx.plos.org\/10.1371\/journal.pcbi.1005962","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,31]],"date-time":"2023-08-31T20:56:25Z","timestamp":1693515385000},"score":1,"resource":{"primary":{"URL":"https:\/\/dx.plos.org\/10.1371\/journal.pcbi.1005962"}},"subtitle":[],"editor":[{"given":"Andrey","family":"Rzhetsky","sequence":"first","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2018,2,15]]},"references-count":52,"journal-issue":{"issue":"2","published-online":{"date-parts":[[2018,2,15]]}},"URL":"https:\/\/doi.org\/10.1371\/journal.pcbi.1005962","relation":{"has-preprint":[{"id-type":"doi","id":"10.1101\/162099","asserted-by":"object"}]},"ISSN":["1553-7358"],"issn-type":[{"value":"1553-7358","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018,2,15]]}}}