{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T17:17:35Z","timestamp":1780334255346,"version":"3.54.1"},"reference-count":17,"publisher":"Oxford University Press (OUP)","issue":"18","license":[{"start":{"date-parts":[[2019,1,31]],"date-time":"2019-01-31T00:00:00Z","timestamp":1548892800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/academic.oup.com\/journals\/pages\/open_access\/funder_policies\/chorus\/standard_publication_model"}],"funder":[{"name":"Intramural Research Program"},{"DOI":"10.13039\/100000002","name":"National Institutes of Health","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000092","name":"National Library of Medicine","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000092","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,9,15]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:sec>\n                  <jats:title>Motivation<\/jats:title>\n                  <jats:p>Interest in text mining full-text biomedical research articles is growing. To facilitate automated processing of nearly 3 million full-text articles (in PubMed Central\u00ae Open Access and Author Manuscript subsets) and to improve interoperability, we convert these articles to BioC, a community-driven simple data structure in either XML or JavaScript Object Notation format for conveniently sharing text and annotations.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Results<\/jats:title>\n                  <jats:p>The resultant articles can be downloaded via both File Transfer Protocol for bulk access and a Web API for updates or a more focused collection. Since the availability of the Web API in 2017, our BioC collection has been widely used by the research community.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Availability and implementation<\/jats:title>\n                  <jats:p>https:\/\/www.ncbi.nlm.nih.gov\/research\/bionlp\/APIs\/BioC-PMC\/.<\/jats:p>\n               <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btz070","type":"journal-article","created":{"date-parts":[[2019,1,29]],"date-time":"2019-01-29T04:33:29Z","timestamp":1548736409000},"page":"3533-3535","source":"Crossref","is-referenced-by-count":71,"title":["PMC text mining subset in BioC: about three million full-text articles and growing"],"prefix":"10.1093","volume":"35","author":[{"given":"Donald C","family":"Comeau","sequence":"first","affiliation":[{"name":"National Center for Biotechnology Information (NCBI), U.S. Library of Medicine (NLM), National Institutes of Health (NIH) , Bethesda, MD, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Chih-Hsuan","family":"Wei","sequence":"additional","affiliation":[{"name":"National Center for Biotechnology Information (NCBI), U.S. Library of Medicine (NLM), National Institutes of Health (NIH) , Bethesda, MD, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Rezarta","family":"Islamaj Do\u011fan","sequence":"additional","affiliation":[{"name":"National Center for Biotechnology Information (NCBI), U.S. Library of Medicine (NLM), National Institutes of Health (NIH) , Bethesda, MD, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Zhiyong","family":"Lu","sequence":"additional","affiliation":[{"name":"National Center for Biotechnology Information (NCBI), U.S. Library of Medicine (NLM), National Institutes of Health (NIH) , Bethesda, MD, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"286","published-online":{"date-parts":[[2019,1,31]]},"reference":[{"key":"2023013108055057100_btz070-B1","doi-asserted-by":"crossref","first-page":"161","DOI":"10.1186\/1471-2105-13-161","article-title":"Concept annotation in the CRAFT corpus","volume":"13","author":"Bada","year":"2012","journal-title":"BMC Bioinformatics"},{"key":"2023013108055057100_btz070-B2","doi-asserted-by":"crossref","first-page":"bau033","DOI":"10.1093\/database\/bau033","article-title":"tagtog: interactive and text-mining-assisted annotation of gene mentions in PLOS full-text articles","volume":"2014","author":"Cejuela","year":"2014","journal-title":"Database (Oxford)"},{"key":"2023013108055057100_btz070-B3","doi-asserted-by":"crossref","first-page":"bat064","DOI":"10.1093\/database\/bat064","article-title":"BioC: a minimalist approach to interoperability for biomedical text processing","volume":"2013","author":"Comeau","year":"2013","journal-title":"Database (Oxford)"},{"key":"2023013108055057100_btz070-B4","doi-asserted-by":"crossref","first-page":"bau056","DOI":"10.1093\/database\/bau056","article-title":"Natural language processing pipelines to annotate BioC collections with an application to the NCBI disease corpus","volume":"2014","author":"Comeau","year":"2014","journal-title":"Database (Oxford)"},{"key":"2023013108055057100_btz070-B5","doi-asserted-by":"crossref","first-page":"135","DOI":"10.1007\/978-1-4939-0709-0_8","article-title":"Mining biological networks from full-text articles","volume":"1159","author":"Czarnecki","year":"2014","journal-title":"Methods Mol. Biol."},{"key":"2023013108055057100_btz070-B6","doi-asserted-by":"crossref","first-page":"954","DOI":"10.15252\/msb.20177651","article-title":"From word models to executable models of signaling networks using automated assembly","volume":"13","author":"Gyori","year":"2017","journal-title":"Mol. Syst. Biol."},{"key":"2023013108055057100_btz070-B7","article-title":"BioC and Simplified Use of the PMC Open Access Dataset for Biomedical Text Mining","volume-title":"Proceedings of the 4th Workshop on Building and Evaluating Resources for Health and Biomedical Text Processing","author":"Islamaj Dogan","year":"2014"},{"key":"2023013108055057100_btz070-B8","doi-asserted-by":"crossref","first-page":"baw147","DOI":"10.1093\/database\/baw147","article-title":"The BioC-BioGRID corpus: full text articles annotated for curation of protein-protein and genetic interactions","volume":"2017","author":"Islamaj Dogan","year":"2017","journal-title":"Database (Oxford)"},{"key":"2023013108055057100_btz070-B9","doi-asserted-by":"crossref","first-page":"7","DOI":"10.1186\/s13326-015-0003-7","article-title":"Section level search functionality in Europe PMC","volume":"6","author":"Kafkas","year":"2015","journal-title":"J. Biomed. Semantics"},{"key":"2023013108055057100_btz070-B10","doi-asserted-by":"crossref","first-page":"S3","DOI":"10.1186\/1471-2105-16-S10-S3","article-title":"Extending the evaluation of Genia Event task toward knowledge base construction and comparison to Gene Regulation Ontology task","volume":"16","author":"Kim","year":"2015","journal-title":"BMC Bioinformatics"},{"key":"2023013108055057100_btz070-B11","first-page":"202","article-title":"PubAnnotation: a persistent and sharable corpus and annotation repository","volume-title":"Proceedings of the 2012 Workshop on Biomedical Natural Language Processing","author":"Kim","year":"2012"},{"key":"2023013108055057100_btz070-B12","doi-asserted-by":"crossref","first-page":"1021","DOI":"10.1038\/nmeth.4471","article-title":"SourceData: a semantic platform for curating and searching figures","volume":"14","author":"Liechti","year":"2017","journal-title":"Nat. Methods"},{"key":"2023013108055057100_btz070-B13","doi-asserted-by":"crossref","first-page":"bau059","DOI":"10.1093\/database\/bau059","article-title":"BioC implementations in Go, Perl, Python and Ruby","volume":"2014","author":"Liu","year":"2014","journal-title":"Database (Oxford)"},{"key":"2023013108055057100_btz070-B14","doi-asserted-by":"crossref","first-page":"bav020","DOI":"10.1093\/database\/bav020","article-title":"Construction of phosphorylation interaction networks by text mining of full-length articles using the eFIP system","volume":"2015","author":"Tudor","year":"2015","journal-title":"Database (Oxford)"},{"key":"2023013108055057100_btz070-B15","doi-asserted-by":"crossref","first-page":"bau074","DOI":"10.1093\/database\/bau074","article-title":"BC4GO: a full-text corpus for the BioCreative IV GO task","volume":"2014","author":"Van Auken","year":"2014","journal-title":"Database (Oxford)"},{"key":"2023013108055057100_btz070-B16","doi-asserted-by":"crossref","first-page":"e55814","DOI":"10.1371\/journal.pone.0055814","article-title":"Large-scale event extraction from literature with multi-level gene normalization","volume":"8","author":"Van Landeghem","year":"2013","journal-title":"PLoS One"},{"key":"2023013108055057100_btz070-B17","doi-asserted-by":"crossref","first-page":"e1005962","DOI":"10.1371\/journal.pcbi.1005962","article-title":"A comprehensive and quantitative comparison of text-mining in 15 million full-text articles versus their corresponding abstracts","volume":"14","author":"Westergaard","year":"2018","journal-title":"PLoS Comput. Biol."}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/35\/18\/3533\/48975610\/bioinformatics_35_18_3533.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/35\/18\/3533\/48975610\/bioinformatics_35_18_3533.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,31]],"date-time":"2023-01-31T13:45:33Z","timestamp":1675172733000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/35\/18\/3533\/5305021"}},"subtitle":[],"editor":[{"given":"Jonathan","family":"Wren","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"editor"}]}],"short-title":[],"issued":{"date-parts":[[2019,1,31]]},"references-count":17,"journal-issue":{"issue":"18","published-print":{"date-parts":[[2019,9,15]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btz070","relation":{},"ISSN":["1367-4803","1367-4811"],"issn-type":[{"value":"1367-4803","type":"print"},{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2019,9,15]]},"published":{"date-parts":[[2019,1,31]]}}}