{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,5]],"date-time":"2025-11-05T06:54:32Z","timestamp":1762325672566,"version":"3.41.2"},"reference-count":66,"publisher":"Oxford University Press (OUP)","issue":"8","license":[{"start":{"date-parts":[[2023,8,1]],"date-time":"2023-08-01T00:00:00Z","timestamp":1690848000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/100000002","name":"National Institutes of Health","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000054","name":"National Cancer Institute","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000054","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,8,1]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:sec>\n                  <jats:title>Motivation<\/jats:title>\n                  <jats:p>Read alignment is an essential first step in the characterization of DNA sequence variation. The accuracy of variant-calling results depends not only on the quality of read alignment and variant-calling software but also on the interaction between these complex software tools.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Results<\/jats:title>\n                  <jats:p>In this review, we evaluate short-read aligner performance with the goal of optimizing germline variant-calling accuracy. We examine the performance of three general-purpose short-read aligners\u2014BWA-MEM, Bowtie 2, and Arioc\u2014in conjunction with three germline variant callers: DeepVariant, FreeBayes, and GATK HaplotypeCaller. We discuss the behavior of the read aligners with regard to the data elements on which the variant callers rely, and illustrate how the runtime configurations of these software tools combine to affect variant-calling performance.<\/jats:p>\n               <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btad480","type":"journal-article","created":{"date-parts":[[2023,8,1]],"date-time":"2023-08-01T16:23:09Z","timestamp":1690906989000},"source":"Crossref","is-referenced-by-count":5,"title":["Short-read aligner performance in germline variant identification"],"prefix":"10.1093","volume":"39","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1263-5532","authenticated-orcid":false,"given":"Richard","family":"Wilton","sequence":"first","affiliation":[{"name":"Department of Physics and Astronomy, Johns Hopkins University , Baltimore, MD 21218, United States"}]},{"given":"Alexander S","family":"Szalay","sequence":"additional","affiliation":[{"name":"Department of Physics and Astronomy, Johns Hopkins University , Baltimore, MD 21218, United States"},{"name":"Department of Computer Science, Johns Hopkins University , Baltimore, MD 21218, United States"}]}],"member":"286","published-online":{"date-parts":[[2023,8,1]]},"reference":[{"key":"2023101110495332100_btad480-B1","doi-asserted-by":"publisher","first-page":"eabl3533","DOI":"10.1126\/science.abl3533","article-title":"A complete reference genome improves analysis of human genetic variation","volume":"376","author":"Aganezov","year":"2022","journal-title":"Science"},{"key":"2023101110495332100_btad480-B28400180","doi-asserted-by":"publisher","first-page":"403","DOI":"10.1016\/S0022-2836(05)80360-2","article-title":"Basic local alignment search tool","volume":"215","author":"Altschul","year":"1990","journal-title":"J Mol Biol"},{"key":"2023101110495332100_btad480-B2","doi-asserted-by":"publisher","first-page":"41","DOI":"10.1146\/annurev-animal-020518-115005","article-title":"Whole-genome alignment and comparative annotation","volume":"7","author":"Armstrong","year":"2019","journal-title":"Annu Rev Anim Biosci"},{"key":"2023101110495332100_btad480-B3","doi-asserted-by":"publisher","first-page":"1003","DOI":"10.1126\/science.1072047","article-title":"Recent segmental duplications in the human genome","volume":"297","author":"Bailey","year":"2002","journal-title":"Science"},{"key":"2023101110495332100_btad480-B4","doi-asserted-by":"publisher","first-page":"155","DOI":"10.1186\/s12864-022-08365-3","article-title":"Systematic benchmark of state-of-the-art variant calling pipelines identifies major factors affecting accuracy of coding sequence variant discovery","volume":"23","author":"Barbitoff","year":"2022","journal-title":"BMC Genomics"},{"key":"2023101110495332100_btad480-B5","doi-asserted-by":"publisher","first-page":"21502","DOI":"10.1038\/s41598-022-26181-3","article-title":"Comparison of calling pipelines for whole genome sequencing: an empirical study demonstrating the importance of mapping and alignment","volume":"12","author":"Betschart","year":"2022","journal-title":"Sci Rep"},{"year":"2022","author":"Broad Institute","key":"2023101110495332100_btad480-B6"},{"key":"2023101110495332100_btad480-B7","doi-asserted-by":"publisher","first-page":"9345","DOI":"10.1038\/s41598-019-45835-32","article-title":"Systematic comparison of germline variant calling pipelines cross multiple next-generation sequencers","volume":"9","author":"Chen","year":"2019","journal-title":"Sci Rep"},{"key":"2023101110495332100_btad480-B8","doi-asserted-by":"publisher","DOI":"10.1101\/023754","article-title":"Comparing variant call files for performance benchmarking of next-generation sequencing variant calling pipelines","author":"Cleary","year":"2015","journal-title":"bioRxiv"},{"key":"2023101110495332100_btad480-B9","doi-asserted-by":"publisher","first-page":"e10501","DOI":"10.7717\/peerj.10501","article-title":"Recalibration of mapping quality scores in Illumina short-read alignments improves SNP detection results in low-coverage sequencing data","volume":"8","author":"Cline","year":"2020","journal-title":"PeerJ"},{"key":"2023101110495332100_btad480-B10","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1093\/gigascience\/giab008","article-title":"Twelve years of SAMtools and BCFtools","volume":"10","author":"Danecek","year":"2021","journal-title":"GigaScience"},{"key":"2023101110495332100_btad480-B11","doi-asserted-by":"publisher","first-page":"e1002384","DOI":"10.1371\/journal.pgen.1002384","article-title":"Repetitive elements may comprise over two-thirds of the human genome","volume":"7","author":"de Koning","year":"2011","journal-title":"PLoS Genet"},{"key":"2023101110495332100_btad480-B5943810","doi-asserted-by":"publisher","first-page":"85","DOI":"10.1038\/nrg1767","article-title":"Structural variation in the human genome","volume":"7","author":"Feuk","year":"2006","journal-title":"Nat Rev Genet"},{"key":"2023101110495332100_btad480-B12","doi-asserted-by":"publisher","first-page":"408","DOI":"10.1093\/bioinformatics\/btz576","article-title":"How sequence alignment scores correspond to probability models","volume":"36","author":"Frith","year":"2020","journal-title":"Bioinformatics"},{"year":"2012","author":"Garrison","key":"2023101110495332100_btad480-B13","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1207.3907"},{"year":"2022","author":"Global Alliance for Genomics and Health","key":"2023101110495332100_btad480-B14"},{"key":"2023101110495332100_btad480-B15","doi-asserted-by":"publisher","first-page":"239","DOI":"10.1038\/10297","article-title":"Patterns of single-nucleotide polymorphisms in candidate genes for blood-pressure homeostasis","volume":"22","author":"Halushka","year":"1999","journal-title":"Nat Genet"},{"key":"2023101110495332100_btad480-B16","doi-asserted-by":"publisher","first-page":"541","DOI":"10.1186\/1471-2105-7-541","article-title":"How repetitive are genomes?","volume":"7","author":"Haubold","year":"2006","journal-title":"BMC Bioinformatics"},{"key":"2023101110495332100_btad480-B17","doi-asserted-by":"publisher","first-page":"3219","DOI":"10.1038\/s41598-019-39108-2","article-title":"Comparative analysis of whole-genome sequencing pipelines to minimize false negative findings","volume":"9","author":"Hwang","year":"2019","journal-title":"Sci Rep"},{"year":"2022","author":"Illumina Corporation","key":"2023101110495332100_btad480-B18"},{"year":"2022","author":"Illumina Corporation","key":"2023101110495332100_btad480-B19"},{"key":"2023101110495332100_btad480-B20","doi-asserted-by":"publisher","first-page":"130","DOI":"10.1186\/s13073-016-0383-z","article-title":"Alternate-locus aware variant calling in whole genome sequencing","volume":"8","author":"J\u00e4ger","year":"2016","journal-title":"Genome Med"},{"key":"2023101110495332100_btad480-B21","doi-asserted-by":"publisher","first-page":"214","DOI":"10.1111\/ahg.12383","article-title":"Thousands of missing variants in the UK biobank are recoverable by genome realignment","volume":"84","author":"Jia","year":"2020","journal-title":"Ann Hum Genet"},{"key":"2023101110495332100_btad480-B22","doi-asserted-by":"publisher","first-page":"1784","DOI":"10.1038\/s41598-018-38346-0","article-title":"Empirical evaluation of variant calling accuracy using ultra-deep whole-genome sequencing data","volume":"9","author":"Kishikawa","year":"2019","journal-title":"Sci Rep"},{"year":"2021","author":"Krusche","key":"2023101110495332100_btad480-B23"},{"key":"2023101110495332100_btad480-B24","doi-asserted-by":"publisher","first-page":"152","DOI":"10.1186\/s13059-017-1290-3","article-title":"A tandem simulation framework for predicting mapping quality","volume":"18","author":"Langmead","year":"2017","journal-title":"Genome Biol"},{"key":"2023101110495332100_btad480-B25","doi-asserted-by":"publisher","first-page":"357","DOI":"10.1038\/nmeth.1923","article-title":"Fast gapped-read alignment with bowtie 2","volume":"9","author":"Langmead","year":"2012","journal-title":"Nat Methods"},{"year":"2013","author":"Li","key":"2023101110495332100_btad480-B26"},{"key":"2023101110495332100_btad480-B27","doi-asserted-by":"publisher","first-page":"2843","DOI":"10.1093\/bioinformatics\/btu356","article-title":"Toward better understanding of artifacts in variant calling from high-coverage samples","volume":"30","author":"Li","year":"2014","journal-title":"Bioinformatics"},{"year":"2014","author":"Li","key":"2023101110495332100_btad480-B28"},{"key":"2023101110495332100_btad480-B30","doi-asserted-by":"publisher","first-page":"1851","DOI":"10.1101\/gr.078212.108","article-title":"Mapping short DNA sequencing reads and calling variants using mapping quality scores","volume":"18","author":"Li","year":"2008","journal-title":"Genome Res"},{"key":"2023101110495332100_btad480-B31","doi-asserted-by":"publisher","first-page":"1239","DOI":"10.1016\/j.ajhg.2021.05.011","article-title":"Exome variant discrepancies due to reference-genome differences","volume":"108","author":"Li","year":"2021","journal-title":"Am J Hum Genet"},{"key":"2023101110495332100_btad480-B1207564","doi-asserted-by":"publisher","first-page":"312","DOI":"10.1038\/s41586-023-05896-x","article-title":"A draft human pangenome reference","volume":"617","author":"Liao","year":"2023","journal-title":"Nature"},{"key":"2023101110495332100_btad480-B32","doi-asserted-by":"publisher","first-page":"201","DOI":"10.1038\/nature18964","article-title":"The Simons genome diversity project: 300 genomes from 142 diverse populations","volume":"538","author":"Mallick","year":"2016","journal-title":"Nature"},{"key":"2023101110495332100_btad480-B33","doi-asserted-by":"publisher","first-page":"1527","DOI":"10.1101\/gr.091868.109","article-title":"Sequence and structural variation in a human genome uncovered by short-read, massively parallel ligation sequencing using two-base encoding","volume":"19","author":"McKernan","year":"2009","journal-title":"Genome Res"},{"key":"2023101110495332100_btad480-B34","doi-asserted-by":"publisher","first-page":"314","DOI":"10.1109\/IPDPS.2019.00041","author":"","year":"2019"},{"year":"2023","author":"NCBI","key":"2023101110495332100_btad480-B35"},{"year":"2023","author":"NCBI","key":"2023101110495332100_btad480-B36"},{"year":"2023","author":"NCBI","key":"2023101110495332100_btad480-B37"},{"year":"2020","author":"NIST","key":"2023101110495332100_btad480-B38"},{"year":"2022","author":"NIST","key":"2023101110495332100_btad480-B39"},{"key":"2023101110495332100_btad480-B40","doi-asserted-by":"publisher","first-page":"44","DOI":"10.1126\/science.abj6987","article-title":"The complete sequence of a human genome","volume":"376","author":"Nurk","year":"2022","journal-title":"Science"},{"key":"2023101110495332100_btad480-B6504318","doi-asserted-by":"publisher","first-page":"464","DOI":"10.1038\/s41576-023-00590-0","article-title":"Variant calling and benchmarking in an era of complete human genome sequences","volume":"24","author":"Olson","year":"2023","journal-title":"Nat Rev Genet"},{"key":"2023101110495332100_btad480-B41","doi-asserted-by":"publisher","first-page":"100129","DOI":"10.1016\/j.xgen.2022.100129","article-title":"PrecisionFDA truth challenge V2: calling variants from short and long reads in difficult-to-map regions","volume":"2","author":"Olson","year":"2022","journal-title":"Cell Genomics"},{"key":"2023101110495332100_btad480-B42","doi-asserted-by":"publisher","first-page":"e6323","DOI":"10.1371\/journal.pone.0006323","article-title":"Mapping accuracy of short reads from massively parallel sequencing and the implications for quantitative expression profiling","volume":"4","author":"Palmieri","year":"2009","journal-title":"PLoS One"},{"key":"2023101110495332100_btad480-B43","doi-asserted-by":"publisher","first-page":"983","DOI":"10.1038\/nbt.4235","article-title":"A universal SNP and small-indel variant caller using deep neural networks","volume":"36","author":"Poplin","year":"2018","journal-title":"Nat Biotechnol"},{"year":"2018","author":"Poplin","key":"2023101110495332100_btad480-B44","doi-asserted-by":"publisher","DOI":"10.1101\/201178"},{"key":"2023101110495332100_btad480-B45","doi-asserted-by":"publisher","first-page":"3221","DOI":"10.1038\/s41467-022-30930-3","article-title":"Robust and accurate estimation of paralog-specific copy number for duplicated genes using whole-genome sequencing","volume":"13","author":"Prodanov","year":"2022","journal-title":"Nat Commun"},{"key":"2023101110495332100_btad480-B46","doi-asserted-by":"publisher","first-page":"R51","DOI":"10.1186\/gb-2013-14-5-r51","article-title":"Characterizing and measuring bias in sequence data","volume":"14","author":"Ross","year":"2013","journal-title":"Genome Biol"},{"key":"2023101110495332100_btad480-B47","doi-asserted-by":"publisher","first-page":"i349","DOI":"10.1093\/bioinformatics\/bts408","article-title":"Accurate estimation of short read mapping quality for next-generation genome sequencing","volume":"28","author":"Ruffalo","year":"2012","journal-title":"Bioinformatics"},{"key":"2023101110495332100_btad480-B48","doi-asserted-by":"publisher","first-page":"92","DOI":"10.1186\/s13059-019-1715-2","article-title":"Next-generation genome annotation: we still struggle to get it right","volume":"20","author":"Salzberg","year":"2019","journal-title":"Genome Biol"},{"year":"2022","author":"SAM\/BAM","key":"2023101110495332100_btad480-B49"},{"key":"2023101110495332100_btad480-B50","doi-asserted-by":"publisher","first-page":"439","DOI":"10.3390\/plants9040439","article-title":"Comparison of read mapping and variant calling tools for the analysis of plant NGS data","volume":"9","author":"Schilbert","year":"2020","journal-title":"Plants"},{"key":"2023101110495332100_btad480-B51","doi-asserted-by":"publisher","first-page":"849","DOI":"10.1101\/gr.213611.116","article-title":"Evaluation of GRCh38 and de novo haploid genome assemblies demonstrates the enduring quality of the reference assembly","volume":"27","author":"Schneider","year":"2017","journal-title":"Genome Res"},{"key":"2023101110495332100_btad480-B4672869","doi-asserted-by":"publisher","first-page":"abg8871","DOI":"10.1126\/science.abg8871","article-title":"Pangenomics enables genotyping of known structural variants in 5202 diverse genomes","volume":"374","author":"Sir\u00e9n","year":"2021","journal-title":"Science"},{"key":"2023101110495332100_btad480-B000087199","doi-asserted-by":"publisher","first-page":"195","DOI":"10.1016\/0022-2836(81)90087-5","article-title":"Identification of common molecular subsequences","volume":"147","author":"Smith","year":"1981","journal-title":"J Mol Biol"},{"key":"2023101110495332100_btad480-B52","doi-asserted-by":"publisher","first-page":"2856","DOI":"10.1038\/s41598-019-39076-7","article-title":"Long fragments achieve lower base quality in Illumina paired-end sequencing","volume":"9","author":"Tan","year":"2019","journal-title":"Sci Rep"},{"key":"2023101110495332100_btad480-B53","doi-asserted-by":"publisher","first-page":"1304","DOI":"10.1126\/science.1058040","article-title":"The sequence of the human genome","volume":"291","author":"Venter","year":"2001","journal-title":"Science"},{"key":"2023101110495332100_btad480-B54","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/s0022-2836(05)80006-3","article-title":"Sequence alignment and penalty choice: review of concepts, case studies, and implications","volume":"235","author":"Vingron","year":"1994","journal-title":"J Mol Biol"},{"key":"2023101110495332100_btad480-B55","doi-asserted-by":"publisher","first-page":"100128","DOI":"10.1016\/j.xgen.2022.100128","article-title":"Benchmarking challenging small variants with linked and long reads","volume":"2","author":"Wagner","year":"2022","journal-title":"Cell Genomics"},{"key":"2023101110495332100_btad480-B56","doi-asserted-by":"publisher","first-page":"2081","DOI":"10.1093\/bioinformatics\/btac066","article-title":"Performance optimization in DNA short-read alignment","volume":"38","author":"Wilton","year":"2022","journal-title":"Bioinformatics"},{"key":"2023101110495332100_btad480-B57","doi-asserted-by":"publisher","first-page":"e808","DOI":"10.7717\/peerj.808","article-title":"Arioc: high-throughput read alignment with GPU-accelerated exploration of the seed-and-extend search space","volume":"3","author":"Wilton","year":"2015","journal-title":"PeerJ"},{"key":"2023101110495332100_btad480-B58","doi-asserted-by":"publisher","first-page":"665","DOI":"10.1093\/bioinformatics\/bty657","article-title":"The terabase search engine: a large-scale relational database of short-read sequences","volume":"35","author":"Wilton","year":"2019","journal-title":"Bioinformatics"},{"key":"2023101110495332100_btad480-B59","doi-asserted-by":"publisher","first-page":"218","DOI":"10.1186\/s12859-021-04144-1","article-title":"Performance evaluation of pipelines for mapping, variant calling and interval padding, for the analysis of NGS germline panels","volume":"22","author":"Zanti","year":"2021","journal-title":"BMC Bioinformatics"},{"key":"2023101110495332100_btad480-B60","doi-asserted-by":"publisher","first-page":"20222","DOI":"10.1038\/s41598-020-77218-4","article-title":"Accuracy and efficiency of germline variant calling pipelines for human genome data","volume":"10","author":"Zhao","year":"2020","journal-title":"Sci Rep"},{"key":"2023101110495332100_btad480-B61","doi-asserted-by":"publisher","first-page":"561","DOI":"10.1038\/s41587-019-0074-6","article-title":"An open resource for accurately benchmarking small variant and reference calls","volume":"37","author":"Zook","year":"2019","journal-title":"Nat Biotechnol"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/advance-article-pdf\/doi\/10.1093\/bioinformatics\/btad480\/51016588\/btad480.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/39\/8\/btad480\/52005868\/btad480.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/39\/8\/btad480\/52005868\/btad480.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,10,11]],"date-time":"2023-10-11T10:56:14Z","timestamp":1697021774000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/doi\/10.1093\/bioinformatics\/btad480\/7234613"}},"subtitle":[],"editor":[{"given":"Jonathan","family":"Wren","sequence":"additional","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2023,8,1]]},"references-count":66,"journal-issue":{"issue":"8","published-print":{"date-parts":[[2023,8,1]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btad480","relation":{},"ISSN":["1367-4811"],"issn-type":[{"type":"electronic","value":"1367-4811"}],"subject":[],"published-other":{"date-parts":[[2023,8,1]]},"published":{"date-parts":[[2023,8,1]]},"article-number":"btad480"}}