{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,26]],"date-time":"2025-07-26T09:10:34Z","timestamp":1753521034602,"version":"3.37.3"},"reference-count":21,"publisher":"Oxford University Press (OUP)","issue":"20","license":[{"start":{"date-parts":[[2020,7,19]],"date-time":"2020-07-19T00:00:00Z","timestamp":1595116800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by-nc\/4.0\/"}],"funder":[{"DOI":"10.13039\/100000002","name":"National Institutes of Health","doi-asserted-by":"crossref","id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/100000060","name":"NIAID","doi-asserted-by":"publisher","award":["R01AI117011"],"award-info":[{"award-number":["R01AI117011"]}],"id":[{"id":"10.13039\/100000060","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000092","name":"NLM","doi-asserted-by":"publisher","award":["R01LM012080"],"award-info":[{"award-number":["R01LM012080"]}],"id":[{"id":"10.13039\/100000092","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,12,22]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:sec>\n                  <jats:title>Summary<\/jats:title>\n                  <jats:p>We present GeoBoost2, a natural language-processing pipeline for extracting the location of infected hosts for enriching metadata in nucleotide sequences repositories like National Center of Biotechnology Information\u2019s GenBank for downstream analysis including phylogeography and genomic epidemiology. The increasing number of pathogen sequences requires complementary information extraction methods for focused research, including surveillance within countries and between borders. In this article, we describe the enhancements from our earlier release including improvement in end-to-end extraction performance and speed, availability of a fully functional web-interface and state-of-the-art methods for location extraction using deep learning.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Availability and implementation<\/jats:title>\n                  <jats:p>Application is freely available on the web at https:\/\/zodo.asu.edu\/geoboost2. Source code, usage examples and annotated data for GeoBoost2 is freely available at https:\/\/github.com\/ZooPhy\/geoboost2.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Supplementary information<\/jats:title>\n                  <jats:p>Supplementary data are available at Bioinformatics online.<\/jats:p>\n               <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btaa647","type":"journal-article","created":{"date-parts":[[2020,7,14]],"date-time":"2020-07-14T19:09:47Z","timestamp":1594753787000},"page":"5120-5121","source":"Crossref","is-referenced-by-count":8,"title":["GeoBoost2: a natural languageprocessing pipeline for GenBank metadata enrichment for virus phylogeography"],"prefix":"10.1093","volume":"36","author":[{"given":"Arjun","family":"Magge","sequence":"first","affiliation":[{"name":"College of Health Solutions, Arizona State University , Phoenix, AZ 85004, USA"},{"name":"Biodesign Center for Environmental Health Engineering, Biodesign Institute, Arizona State University , Tempe, AZ 85287, USA"},{"name":"University of Pennsylvania Department of Biostatistics, Epidemiology and Informatics, Perelman School of Medicine, , Philadelphia, PA 19104, USA"}]},{"given":"Davy","family":"Weissenbacher","sequence":"additional","affiliation":[{"name":"University of Pennsylvania Department of Biostatistics, Epidemiology and Informatics, Perelman School of Medicine, , Philadelphia, PA 19104, USA"}]},{"given":"Karen","family":"O\u2019Connor","sequence":"additional","affiliation":[{"name":"University of Pennsylvania Department of Biostatistics, Epidemiology and Informatics, Perelman School of Medicine, , Philadelphia, PA 19104, USA"}]},{"given":"Tasnia","family":"Tahsin","sequence":"additional","affiliation":[{"name":"College of Health Solutions, Arizona State University , Phoenix, AZ 85004, USA"}]},{"given":"Graciela","family":"Gonzalez-Hernandez","sequence":"additional","affiliation":[{"name":"University of Pennsylvania Department of Biostatistics, Epidemiology and Informatics, Perelman School of Medicine, , Philadelphia, PA 19104, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5100-9724","authenticated-orcid":false,"given":"Matthew","family":"Scotch","sequence":"additional","affiliation":[{"name":"College of Health Solutions, Arizona State University , Phoenix, AZ 85004, USA"},{"name":"Biodesign Center for Environmental Health Engineering, Biodesign Institute, Arizona State University , Tempe, AZ 85287, USA"}]}],"member":"286","published-online":{"date-parts":[[2020,7,19]]},"reference":[{"key":"2023062408151588400_btaa647-B1","doi-asserted-by":"crossref","first-page":"D41","DOI":"10.1093\/nar\/gkx1094","article-title":"GenBank","volume":"46","author":"Benson","year":"2018","journal-title":"Nucleic Acids Res"},{"year":"2016","author":"Calvet","key":"2023062408151588400_btaa647-B2"},{"key":"2023062408151588400_btaa647-B3","doi-asserted-by":"crossref","first-page":"309","DOI":"10.1038\/nature22040","article-title":"Virus genomes reveal factors that spread and sustained the Ebola epidemic","volume":"544","author":"Dudas","year":"2017","journal-title":"Nature"},{"year":"2020","key":"2023062408151588400_btaa647-B4"},{"year":"2020","key":"2023062408151588400_btaa647-B5"},{"year":"2019","key":"2023062408151588400_btaa647-B6"},{"key":"2023062408151588400_btaa647-B7","doi-asserted-by":"crossref","first-page":"105924","DOI":"10.1016\/j.ijantimicag.2020.105924","article-title":"Severe acute respiratory syndrome coronavirus 2 (sars-cov-2) and coronavirus disease-2019 (covid-19): the epidemic and the challenges","volume":"55","author":"Lai","year":"2020","journal-title":"Int. J. Antimicrob. Agents"},{"key":"2023062408151588400_btaa647-B8","doi-asserted-by":"crossref","first-page":"i565","DOI":"10.1093\/bioinformatics\/bty273","article-title":"Deep neural networks and distant supervision for geographic location mention extraction","volume":"34","author":"Magge","year":"2018","journal-title":"Bioinformatics"},{"key":"2023062408151588400_btaa647-B9","first-page":"100","article-title":"Bi-directional recurrent neural network models for geographic location extraction in biomedical literature","volume":"24","author":"Magge","year":"2019","journal-title":"Pac. Symp. Biocomput"},{"year":"2020","key":"2023062408151588400_btaa647-B10"},{"year":"2020","key":"2023062408151588400_btaa647-B11"},{"key":"2023062408151588400_btaa647-B12","doi-asserted-by":"crossref","first-page":"15066","DOI":"10.1073\/pnas.1206598109","article-title":"Unifying the spatial epidemiology and molecular evolution of emerging epidemics","volume":"109","author":"Pybus","year":"2012","journal-title":"Proc. Natl. Acad. Sci. USA"},{"key":"2023062408151588400_btaa647-B13","doi-asserted-by":"crossref","first-page":"764","DOI":"10.1097\/EDE.0b013e3181f534dd","article-title":"At the intersection of public-health informatics and bioinformatics: using advanced web technologies for phylogeography","volume":"21","author":"Scotch","year":"2010","journal-title":"Epidemiology (Cambridge, Mass.)"},{"key":"2023062408151588400_btaa647-B14","doi-asserted-by":"crossref","first-page":"S44","DOI":"10.1016\/j.jbi.2011.06.005","article-title":"Enhancing phylogeography by improving geographical information from genbank","volume":"44","author":"Scotch","year":"2011","journal-title":"J. Biomed. Inform"},{"key":"2023062408151588400_btaa647-B15","doi-asserted-by":"crossref","first-page":"vey043","DOI":"10.1093\/ve\/vey043","article-title":"Incorporating sampling uncertainty in the geospatial assignment of taxa for virus phylogeography","volume":"5","author":"Scotch","year":"2019","journal-title":"Virus Evol"},{"key":"2023062408151588400_btaa647-B16","first-page":"e301","article-title":"Zoophy: a bioinformatics pipeline for virus phylogeography and surveillance","volume":"11","author":"Scotch","year":"2019","journal-title":"Online J. Public Health Inf"},{"key":"2023062408151588400_btaa647-B17","doi-asserted-by":"crossref","first-page":"vey016","DOI":"10.1093\/ve\/vey016","article-title":"Bayesian phylogenetic and phylodynamic data integration using beast 1.10","volume":"4","author":"Suchard","year":"2018","journal-title":"Virus Evol"},{"first-page":"102","year":"2014","author":"Tahsin","key":"2023062408151588400_btaa647-B18"},{"key":"2023062408151588400_btaa647-B19","doi-asserted-by":"crossref","first-page":"934","DOI":"10.1093\/jamia\/ocv172","article-title":"A high-precision rule-based extraction system for expanding geospatial metadata in genbank records","volume":"23","author":"Tahsin","year":"2016","journal-title":"J. Am. Med. Inform. Assoc"},{"key":"2023062408151588400_btaa647-B20","doi-asserted-by":"crossref","first-page":"1606","DOI":"10.1093\/bioinformatics\/btx799","article-title":"Geoboost: accelerating research involving the geospatial metadata of virus genbank records","volume":"34","author":"Tahsin","year":"2018","journal-title":"Bioinformatics"},{"year":"2020","key":"2023062408151588400_btaa647-B21"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/academic.oup.com\/bioinformatics\/advance-article-pdf\/doi\/10.1093\/bioinformatics\/btaa647\/33991515\/btaa647.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/36\/20\/5120\/50692947\/btaa647.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/36\/20\/5120\/50692947\/btaa647.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,6,24]],"date-time":"2023-06-24T23:47:53Z","timestamp":1687650473000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/36\/20\/5120\/5873583"}},"subtitle":[],"editor":[{"given":"Arne","family":"Elofsson","sequence":"additional","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2020,7,19]]},"references-count":21,"journal-issue":{"issue":"20","published-print":{"date-parts":[[2020,12,22]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btaa647","relation":{},"ISSN":["1367-4803","1367-4811"],"issn-type":[{"type":"print","value":"1367-4803"},{"type":"electronic","value":"1367-4811"}],"subject":[],"published-other":{"date-parts":[[2020,10,15]]},"published":{"date-parts":[[2020,7,19]]}}}