{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,14]],"date-time":"2026-04-14T20:45:44Z","timestamp":1776199544238,"version":"3.50.1"},"reference-count":19,"publisher":"Oxford University Press (OUP)","issue":"5","license":[{"start":{"date-parts":[[2020,8,20]],"date-time":"2020-08-20T00:00:00Z","timestamp":1597881600000},"content-version":"vor","delay-in-days":1,"URL":"https:\/\/academic.oup.com\/journals\/pages\/open_access\/funder_policies\/chorus\/standard_publication_model"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["NSF-2028040"],"award-info":[{"award-number":["NSF-2028040"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Google Cloud Platform Research Credits Program"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,5,5]]},"abstract":"<jats:title>Abstract<\/jats:title>\n                  <jats:sec>\n                    <jats:title>Motivation<\/jats:title>\n                    <jats:p>In molecular epidemiology, the identification of clusters of transmissions typically requires the alignment of viral genomic sequence data. However, existing methods of multiple sequence alignment (MSA) scale poorly with respect to the number of sequences.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Results<\/jats:title>\n                    <jats:p>ViralMSA is a user-friendly reference-guided MSA tool that leverages the algorithmic techniques of read mappers to enable the MSA of ultra-large viral genome datasets. It scales linearly with the number of sequences, and it is able to align tens of thousands of full viral genomes in seconds. However, alignments produced by ViralMSA omit insertions with respect to the reference genome.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Availability and implementation<\/jats:title>\n                    <jats:p>ViralMSA is freely available at https:\/\/github.com\/niemasd\/ViralMSA as an open-source software project.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Supplementary information<\/jats:title>\n                    <jats:p>Supplementary data are available at Bioinformatics online.<\/jats:p>\n                  <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btaa743","type":"journal-article","created":{"date-parts":[[2020,8,13]],"date-time":"2020-08-13T15:27:40Z","timestamp":1597332460000},"page":"714-716","source":"Crossref","is-referenced-by-count":73,"title":["ViralMSA: massively scalable reference-guided multiple sequence alignment of viral genomes"],"prefix":"10.1093","volume":"37","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2209-8128","authenticated-orcid":false,"given":"Niema","family":"Moshiri","sequence":"first","affiliation":[{"name":"Department of Computer Science and Engineering, UC San Diego , La Jolla, CA 92093, USA"}]}],"member":"286","published-online":{"date-parts":[[2020,8,19]]},"reference":[{"key":"2023051800331983600_btaa743-B1","doi-asserted-by":"crossref","first-page":"e0221068","DOI":"10.1371\/journal.pone.0221068","article-title":"TreeCluster: clustering biological sequences using phylogenetic trees","volume":"14","author":"Balaban","year":"2019","journal-title":"PLoS One"},{"key":"2023051800331983600_btaa743-B2","doi-asserted-by":"crossref","first-page":"1422","DOI":"10.1093\/bioinformatics\/btp163","article-title":"Biopython: freely available Python tools for computational molecular biology and bioinformatics","volume":"25","author":"Cock","year":"2009","journal-title":"Bioinformatics"},{"key":"2023051800331983600_btaa743-B3","doi-asserted-by":"crossref","first-page":"15","DOI":"10.1093\/bioinformatics\/bts635","article-title":"STAR: ultrafast universal RNA-seq aligner","volume":"29","author":"Dobin","year":"2013","journal-title":"Bioinformatics"},{"key":"2023051800331983600_btaa743-B4","doi-asserted-by":"crossref","first-page":"113","DOI":"10.1186\/1471-2105-5-113","article-title":"MUSCLE: a multiple sequence alignment method with reduced time and space complexity","volume":"5","author":"Edgar","year":"2004","journal-title":"BMC Bioinform"},{"key":"2023051800331983600_btaa743-B5","doi-asserted-by":"crossref","first-page":"772","DOI":"10.1093\/molbev\/mst010","article-title":"MAFFT multiple sequence alignment software version 7: improvements in performance and usability","volume":"30","author":"Katoh","year":"2013","journal-title":"Mol. Biol. Evol"},{"key":"2023051800331983600_btaa743-B6","doi-asserted-by":"crossref","first-page":"907","DOI":"10.1038\/s41587-019-0201-4","article-title":"Graph-based genome alignment and genotyping with HISAT2 and HISAT-genotype","volume":"37","author":"Kim","year":"2019","journal-title":"Nat. Biotechnol"},{"key":"2023051800331983600_btaa743-B7","doi-asserted-by":"crossref","first-page":"357","DOI":"10.1038\/nmeth.1923","article-title":"Fast gapped-read alignment with Bowtie 2","volume":"9","author":"Langmead","year":"2012","journal-title":"Nat. Methods"},{"key":"2023051800331983600_btaa743-B8","doi-asserted-by":"crossref","first-page":"3094","DOI":"10.1093\/bioinformatics\/bty191","article-title":"Minimap2: pairwise alignment for nucleotide sequences","volume":"34","author":"Li","year":"2018","journal-title":"Bioinformatics"},{"key":"2023051800331983600_btaa743-B9","doi-asserted-by":"crossref","first-page":"1763","DOI":"10.1093\/bioinformatics\/bty851","article-title":"VIRULIGN: fast codon-correct alignment and annotation of viral genomes","volume":"35","author":"Libin","year":"2019","journal-title":"Bioinformatics"},{"key":"2023051800331983600_btaa743-B10","first-page":"51","article-title":"Phylogenetic inference of HIV transmission clusters","volume":"3","author":"Novitsky","year":"2017","journal-title":"Infect. Dis. Transl. Med"},{"key":"2023051800331983600_btaa743-B11","article-title":"VeryFastTree: speeding up the estimation of phylogenies for large alignments through parallelization and vectorization strategies","author":"Pi\u00f1eiro","year":"2020"},{"key":"2023051800331983600_btaa743-B12","doi-asserted-by":"crossref","first-page":"1812","DOI":"10.1093\/molbev\/msy016","article-title":"HIV-TRACE (TRAnsmission Cluster Engine): a tool for large scale molecular epidemiology of HIV-1 and other rapidly evolving pathogens","volume":"35","author":"Pond","year":"2018","journal-title":"Mol. Biol. Evol"},{"key":"2023051800331983600_btaa743-B13","doi-asserted-by":"crossref","first-page":"e231","DOI":"10.1016\/S2352-3018(16)00046-1","article-title":"Near real-time monitoring of HIV transmission hotspots from routine HIV genotyping: an implementation case study","volume":"3","author":"Poon","year":"2016","journal-title":"Lancet HIV"},{"key":"2023051800331983600_btaa743-B14","doi-asserted-by":"crossref","first-page":"321","DOI":"10.1038\/ncomms1325","article-title":"A novel methodology for large-scale phylogeny partition","volume":"2","author":"Prosperi","year":"2011","journal-title":"Nat. Commun"},{"key":"2023051800331983600_btaa743-B15","doi-asserted-by":"crossref","first-page":"317","DOI":"10.1186\/1471-2105-14-317","article-title":"Automated analysis of phylogenetic clusters","volume":"14","author":"Ragonnet-Cronin","year":"2013","journal-title":"BMC Bioinform"},{"key":"2023051800331983600_btaa743-B16","doi-asserted-by":"crossref","first-page":"131","DOI":"10.1016\/0025-5564(81)90043-2","article-title":"Comparison of phylogenetic trees","volume":"53","author":"Robinson","year":"1981","journal-title":"Math. Biosci"},{"key":"2023051800331983600_btaa743-B17","doi-asserted-by":"crossref","first-page":"105","DOI":"10.1007\/978-1-62703-646-7_6","article-title":"Clustal Omega, accurate alignment of very large numbers of sequences","volume":"1079","author":"Sievers","year":"2014","journal-title":"Methods Mol. Biol"},{"key":"2023051800331983600_btaa743-B18","first-page":"512","article-title":"Estimation of the number of nucleotide substitutions in the control region of mitochondrial DNA in humans and chimpanzees","volume":"10","author":"Tamura","year":"1993","journal-title":"Mol. Biol. Evol"},{"key":"2023051800331983600_btaa743-B19","first-page":"57","article-title":"Some probabilistic and statistical problems in the analysis of DNA sequences","volume":"17","author":"Tavar\u00e9","year":"1986","journal-title":"Lectures Math. Life Sci"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/academic.oup.com\/bioinformatics\/advance-article-pdf\/doi\/10.1093\/bioinformatics\/btaa743\/34127041\/btaa743.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/37\/5\/714\/50357152\/btaa743.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/37\/5\/714\/50357152\/btaa743.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,5,17]],"date-time":"2023-05-17T20:33:46Z","timestamp":1684355626000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/37\/5\/714\/5894544"}},"subtitle":[],"editor":[{"given":"Peter","family":"Robinson","sequence":"additional","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2020,8,19]]},"references-count":19,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2021,5,5]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btaa743","relation":{"has-preprint":[{"id-type":"doi","id":"10.1101\/2020.04.20.052068","asserted-by":"object"}]},"ISSN":["1367-4803","1367-4811"],"issn-type":[{"value":"1367-4803","type":"print"},{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2021,3,1]]},"published":{"date-parts":[[2020,8,19]]}}}