{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T02:28:49Z","timestamp":1775096929316,"version":"3.50.1"},"reference-count":31,"publisher":"Oxford University Press (OUP)","issue":"2","license":[{"start":{"date-parts":[[2019,7,26]],"date-time":"2019-07-26T00:00:00Z","timestamp":1564099200000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/100000002","name":"National Institutes of Health","doi-asserted-by":"publisher","award":["5U41HG007234"],"award-info":[{"award-number":["5U41HG007234"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000002","name":"National Institutes of Health","doi-asserted-by":"publisher","award":["1U01HL137183-01"],"award-info":[{"award-number":["1U01HL137183-01"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100010269","name":"Wellcome Trust","doi-asserted-by":"publisher","award":["WT206194"],"award-info":[{"award-number":["WT206194"]}],"id":[{"id":"10.13039\/100010269","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100010269","name":"Wellcome Trust","doi-asserted-by":"publisher","award":["WT207492"],"award-info":[{"award-number":["WT207492"]}],"id":[{"id":"10.13039\/100010269","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000888","name":"W. M. Keck Foundation","doi-asserted-by":"publisher","award":["DT06172015"],"award-info":[{"award-number":["DT06172015"]}],"id":[{"id":"10.13039\/100000888","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,1,15]]},"abstract":"<jats:title>Abstract<\/jats:title>\n                  <jats:sec>\n                    <jats:title>Motivation<\/jats:title>\n                    <jats:p>The variation graph toolkit (VG) represents genetic variation as a graph. Although each path in the graph is a potential haplotype, most paths are non-biological, unlikely recombinations of true haplotypes.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Results<\/jats:title>\n                    <jats:p>We augment the VG model with haplotype information to identify which paths are more likely to exist in nature. For this purpose, we develop a scalable implementation of the graph extension of the positional Burrows\u2013Wheeler transform. We demonstrate the scalability of the new implementation by building a whole-genome index of the 5008 haplotypes of the 1000 Genomes Project, and an index of all 108\u00a0070 Trans-Omics for Precision Medicine Freeze 5 chromosome 17 haplotypes. We also develop an algorithm for simplifying variation graphs for k-mer indexing without losing any k-mers in the haplotypes.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Availability and implementation<\/jats:title>\n                    <jats:p>Our software is available at https:\/\/github.com\/vgteam\/vg, https:\/\/github.com\/jltsiren\/gbwt and https:\/\/github.com\/jltsiren\/gcsa2.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Supplementary information<\/jats:title>\n                    <jats:p>Supplementary data are available at Bioinformatics online.<\/jats:p>\n                  <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btz575","type":"journal-article","created":{"date-parts":[[2019,7,24]],"date-time":"2019-07-24T07:27:00Z","timestamp":1563953220000},"page":"400-407","source":"Crossref","is-referenced-by-count":79,"title":["Haplotype-aware graph indexes"],"prefix":"10.1093","volume":"36","author":[{"given":"Jouni","family":"Sir\u00e9n","sequence":"first","affiliation":[{"name":"UC Santa Cruz Genomics Institute, University of California , Santa Cruz, CA 95064, USA"},{"name":"Wellcome Sanger Institute, Wellcome Genome Campus , Hinxton CB10 1SA, UK"}]},{"given":"Erik","family":"Garrison","sequence":"additional","affiliation":[{"name":"Wellcome Sanger Institute, Wellcome Genome Campus , Hinxton CB10 1SA, UK"}]},{"given":"Adam M","family":"Novak","sequence":"additional","affiliation":[{"name":"UC Santa Cruz Genomics Institute, University of California , Santa Cruz, CA 95064, USA"}]},{"given":"Benedict","family":"Paten","sequence":"additional","affiliation":[{"name":"UC Santa Cruz Genomics Institute, University of California , Santa Cruz, CA 95064, USA"}]},{"given":"Richard","family":"Durbin","sequence":"additional","affiliation":[{"name":"Wellcome Sanger Institute, Wellcome Genome Campus , Hinxton CB10 1SA, UK"},{"name":"Department of Genetics, University of Cambridge , Cambridge CB2 3EH, UK"}]}],"member":"286","published-online":{"date-parts":[[2019,7,26]]},"reference":[{"key":"2023013112062788800_btz575-B1","doi-asserted-by":"crossref","first-page":"134","DOI":"10.1016\/j.tcs.2012.02.002","article-title":"Lightweight algorithms for constructing and inverting the BWT of string collections","volume":"483","author":"Bauer","year":"2013","journal-title":"Theor. Comput. Sci"},{"key":"2023013112062788800_btz575-B2","author":"Burrows","year":"1994"},{"key":"2023013112062788800_btz575-B3","doi-asserted-by":"crossref","first-page":"21.","DOI":"10.1145\/1240233.1240244","article-title":"Compressed indexes for dynamic text collections","volume":"3","author":"Chan","year":"2007","journal-title":"ACM Trans. Algorithms"},{"key":"2023013112062788800_btz575-B4","doi-asserted-by":"crossref","first-page":"2156","DOI":"10.1093\/bioinformatics\/btr330","article-title":"The variant call format and VCFtools","volume":"27","author":"Danecek","year":"2011","journal-title":"Bioinformatics"},{"key":"2023013112062788800_btz575-B5","doi-asserted-by":"crossref","first-page":"1266","DOI":"10.1093\/bioinformatics\/btu014","article-title":"Efficient haplotype matching and storage using the Positional Burrows\u2013Wheeler transform (PBWT)","volume":"30","author":"Durbin","year":"2014","journal-title":"Bioinformatics"},{"key":"2023013112062788800_btz575-B6","doi-asserted-by":"crossref","first-page":"1654","DOI":"10.1038\/ng.3964","article-title":"Graphtyper enables population-scale genotyping using pangenome graphs","volume":"49","author":"Eggertsson","year":"2017","journal-title":"Nat. Genet"},{"key":"2023013112062788800_btz575-B7","doi-asserted-by":"crossref","first-page":"552","DOI":"10.1145\/1082036.1082039","article-title":"Indexing compressed text","volume":"52","author":"Ferragina","year":"2005","journal-title":"J. ACM"},{"key":"2023013112062788800_btz575-B8","doi-asserted-by":"crossref","first-page":"67","DOI":"10.1016\/j.tcs.2017.06.016","article-title":"Wheeler graphs: a framework for BWT-based data structures","volume":"698","author":"Gagie","year":"2017","journal-title":"Theor. Comput. Sci"},{"key":"2023013112062788800_btz575-B9","first-page":"1459","volume-title":"Proceedings of SODA 2018","author":"Gagie","year":"2018"},{"key":"2023013112062788800_btz575-B10","doi-asserted-by":"crossref","first-page":"875","DOI":"10.1038\/nbt.4227","article-title":"Variation graph toolkit improves read mapping by representing genetic variation in the reference","volume":"36","author":"Garrison","year":"2018","journal-title":"Nat. Biotechnol"},{"key":"2023013112062788800_btz575-B11","first-page":"326","volume-title":"Proceedings of SEA 2014, Vol. 8504 of LNCS","author":"Gog","year":"2014"},{"key":"2023013112062788800_btz575-B12","doi-asserted-by":"crossref","first-page":"i361","DOI":"10.1093\/bioinformatics\/btt215","article-title":"Short read alignment with populations of genomes","volume":"29","author":"Huang","year":"2013","journal-title":"Bioinformatics"},{"key":"2023013112062788800_btz575-B13","first-page":"180","volume-title":"Proceedings of AAIM 2010, Vol. 6124 of LNCS","author":"Huang","year":"2010"},{"key":"2023013112062788800_btz575-B14","first-page":"158","volume-title":"Proceedings of RECOMB 2019, Vol. 11467 of LNCS","author":"Kuhnle","year":"2019"},{"key":"2023013112062788800_btz575-B15","doi-asserted-by":"crossref","first-page":"1838","DOI":"10.1093\/bioinformatics\/bts280","article-title":"Exploring single-sample SNP and INDEL calling with whole-genome de novo assembly","volume":"28","author":"Li","year":"2012","journal-title":"Bioinformatics"},{"key":"2023013112062788800_btz575-B16","doi-asserted-by":"crossref","first-page":"3274","DOI":"10.1093\/bioinformatics\/btu541","article-title":"Fast construction of FM-index for long sequence reads","volume":"30","author":"Li","year":"2014","journal-title":"Bioinformatics"},{"key":"2023013112062788800_btz575-B17","first-page":"222","volume-title":"Proceedings of WABI 2016, Vol. 9838 of LNCS","author":"Maciuca","year":"2016"},{"key":"2023013112062788800_btz575-B18","doi-asserted-by":"crossref","first-page":"281","DOI":"10.1089\/cmb.2009.0169","article-title":"Storage and retrieval of highly repetitive sequence collections","volume":"17","author":"M\u00e4kinen","year":"2010","journal-title":"J. Comput. Biol"},{"key":"2023013112062788800_btz575-B19","author":"Mokveld","year":"2018"},{"key":"2023013112062788800_btz575-B20","doi-asserted-by":"crossref","first-page":"159","DOI":"10.1016\/j.tcs.2015.08.008","article-title":"FM-index of alignment: a compressed index for similar strings","volume":"638","author":"Na","year":"2016","journal-title":"Theor. Comp. Sci"},{"key":"2023013112062788800_btz575-B21","doi-asserted-by":"crossref","first-page":"148","DOI":"10.1016\/j.tcs.2017.02.020","article-title":"FM-index of alignment with gaps","volume":"710","author":"Na","year":"2018","journal-title":"Theor. Comp. Sci"},{"key":"2023013112062788800_btz575-B22","doi-asserted-by":"crossref","first-page":"18.","DOI":"10.1186\/s13015-017-0109-9","article-title":"A graph extension of the positional Burrows-Wheeler transform and its applications","volume":"12","author":"Novak","year":"2017","journal-title":"Algorith. Mol. Biol"},{"key":"2023013112062788800_btz575-B23","doi-asserted-by":"crossref","first-page":"665","DOI":"10.1101\/gr.214155.116","article-title":"Genome graphs and the evolution of genome inference","volume":"27","author":"Paten","year":"2017","journal-title":"Genome Res"},{"key":"2023013112062788800_btz575-B24","doi-asserted-by":"crossref","first-page":"354","DOI":"10.1038\/s41588-018-0316-4","article-title":"Fast and accurate genomic analyses using genome graphs","volume":"51","author":"Rakocevic","year":"2019","journal-title":"Nat. Genet"},{"key":"2023013112062788800_btz575-B25","doi-asserted-by":"crossref","first-page":"i118","DOI":"10.1093\/bioinformatics\/btx236","article-title":"Modelling haplotypes with respect to reference cohort variation graphs","volume":"33","author":"Rosen","year":"2017","journal-title":"Bioinformatics"},{"key":"2023013112062788800_btz575-B26","doi-asserted-by":"crossref","first-page":"R98.","DOI":"10.1186\/gb-2009-10-9-r98","article-title":"Simultaneous alignment of short reads against multiple genomes","volume":"10","author":"Schneeberger","year":"2009","journal-title":"Genome Biol"},{"key":"2023013112062788800_btz575-B27","first-page":"211","volume-title":"Proceedings of DCC 2016","author":"Sir\u00e9n","year":"2016"},{"key":"2023013112062788800_btz575-B28","first-page":"13","volume-title":"Proceedings of ALENEX 2017","author":"Sir\u00e9n","year":"2017"},{"key":"2023013112062788800_btz575-B29","doi-asserted-by":"crossref","first-page":"375","DOI":"10.1109\/TCBB.2013.2297101","article-title":"Indexing graphs for path queries with applications in genome research","volume":"11","author":"Sir\u00e9n","year":"2014","journal-title":"IEEE\/ACM Trans. Comput. Biol. Bioinform"},{"key":"2023013112062788800_btz575-B30","doi-asserted-by":"crossref","first-page":"68","DOI":"10.1038\/nature15393","article-title":"A global reference for human genetic variation","volume":"526","year":"2015","journal-title":"Nature"},{"key":"2023013112062788800_btz575-B31","first-page":"118","article-title":"Computational pan-genomics: status, promises and challenges","volume":"19","year":"2018","journal-title":"Brief. Bioinform"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/academic.oup.com\/bioinformatics\/advance-article-pdf\/doi\/10.1093\/bioinformatics\/btz575\/29154761\/btz575.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/36\/2\/400\/48990879\/btz575.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/36\/2\/400\/48990879\/btz575.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,31]],"date-time":"2023-01-31T16:22:42Z","timestamp":1675182162000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/36\/2\/400\/5538990"}},"subtitle":[],"editor":[{"given":"Alfonso","family":"Valencia","sequence":"additional","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2019,7,26]]},"references-count":31,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2020,1,15]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btz575","relation":{"has-preprint":[{"id-type":"doi","id":"10.1101\/559583","asserted-by":"object"}]},"ISSN":["1367-4803","1367-4811"],"issn-type":[{"value":"1367-4803","type":"print"},{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2020,1,15]]},"published":{"date-parts":[[2019,7,26]]}}}