{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,17]],"date-time":"2026-06-17T02:59:45Z","timestamp":1781665185649,"version":"3.54.5"},"reference-count":12,"publisher":"Oxford University Press (OUP)","issue":"5","license":[{"start":{"date-parts":[[2020,9,8]],"date-time":"2020-09-08T00:00:00Z","timestamp":1599523200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/academic.oup.com\/journals\/pages\/open_access\/funder_policies\/chorus\/standard_publication_model"}],"funder":[{"DOI":"10.13039\/100006483","name":"AbbVie","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100006483","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,5,5]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:sec>\n                  <jats:title>Summary<\/jats:title>\n                  <jats:p>Phenome-wide association studies (PheWASs) are known to be a powerful tool in discovery and replication of genetic association studies. To reduce the computational burden of PheWAS in the large cohorts, such as the UK Biobank, the SAIGE method has been proposed to control for case\u2013control imbalance and sample relatedness in a tractable manner. However, SAIGE is still computationally intensive when deployed in analyzing the associations of thousands of ICD10-coded phenotypes with whole-genome imputed genotype data. Here, we present a new high-performance statistical R package (SAIGEgds) for large-scale PheWAS using generalized linear mixed models. The package implements the SAIGE method in optimized C++ codes, taking advantage of sparse genotype dosages and integrating the efficient genomic data structure file format. Benchmarks using the UK Biobank White British genotype data (N\u2009\u2248\u2009430\u00a0K) with coronary heart disease and simulated cases show that the implementation in SAIGEgds is 5\u20136 times faster than the SAIGE R package. When used in conjunction with high-performance computing clusters, SAIGEgds provides an efficient analysis pipeline for biobank-scale PheWAS.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Availability and implementation<\/jats:title>\n                  <jats:p>https:\/\/bioconductor.org\/packages\/SAIGEgds; vignettes included.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Supplementary information<\/jats:title>\n                  <jats:p>Supplementary data are available at Bioinformatics online.<\/jats:p>\n               <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btaa731","type":"journal-article","created":{"date-parts":[[2020,9,3]],"date-time":"2020-09-03T03:21:58Z","timestamp":1599103318000},"page":"728-730","source":"Crossref","is-referenced-by-count":40,"title":["SAIGEgds\u2014an efficient statistical tool for large-scale PheWAS with mixed models"],"prefix":"10.1093","volume":"37","author":[{"given":"Xiuwen","family":"Zheng","sequence":"first","affiliation":[{"name":"Genomics Research Center, AbbVie Inc ., North Chicago, IL 60064, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"J Wade","family":"Davis","sequence":"additional","affiliation":[{"name":"Genomics Research Center, AbbVie Inc ., North Chicago, IL 60064, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"286","published-online":{"date-parts":[[2020,9,8]]},"reference":[{"key":"2023051704102722300_btaa731-B1","doi-asserted-by":"crossref","first-page":"e56","DOI":"10.1161\/CIR.0000000000000659","article-title":"Heart disease and stroke statistics\u20142019 update: a report from the American Heart Association","volume":"139","author":"Benjamin","year":"2019","journal-title":"Circulation"},{"key":"2023051704102722300_btaa731-B2","doi-asserted-by":"crossref","first-page":"1560","DOI":"10.1038\/ng.3968","article-title":"Analysis commons, a team approach to discovery in a big-data environment for genetic epidemiology","volume":"49","author":"Brody","year":"2017","journal-title":"Nat. Genet"},{"key":"2023051704102722300_btaa731-B3","doi-asserted-by":"crossref","first-page":"203","DOI":"10.1038\/s41586-018-0579-z","article-title":"The UK Biobank resource with deep phenotyping and genomic data","volume":"562","author":"Bycroft","year":"2018","journal-title":"Nature"},{"key":"2023051704102722300_btaa731-B4","doi-asserted-by":"crossref","first-page":"1102","DOI":"10.1038\/nbt.2749","article-title":"Systematic comparison of phenome-wide association study of electronic medical record data and genome-wide association study data","volume":"31","author":"Denny","year":"2013","journal-title":"Nat. Biotechnol"},{"key":"2023051704102722300_btaa731-B5","doi-asserted-by":"crossref","first-page":"3329","DOI":"10.1093\/bioinformatics\/bts610","article-title":"GWASTools: an R\/Bioconductor package for quality control and analysis of genome-wide association studies","volume":"28","author":"Gogarten","year":"2012","journal-title":"Bioinformatics"},{"key":"2023051704102722300_btaa731-B6","doi-asserted-by":"crossref","first-page":"5346","DOI":"10.1093\/bioinformatics\/btz567","article-title":"Genetic association testing using the GENESIS R\/Bioconductor package","volume":"35","author":"Gogarten","year":"2019","journal-title":"Bioinformatics"},{"key":"2023051704102722300_btaa731-B7","doi-asserted-by":"crossref","first-page":"410","DOI":"10.1016\/j.ajhg.2019.01.002","article-title":"ACAT: a fast and powerful p value combination method for rare-variant analysis in sequencing studies","volume":"104","author":"Liu","year":"2019","journal-title":"Am. J. Hum. Genet"},{"key":"2023051704102722300_btaa731-B8","doi-asserted-by":"crossref","first-page":"284","DOI":"10.1038\/ng.3190","article-title":"Efficient Bayesian mixed-model analysis increases association power in large cohorts","volume":"47","author":"Loh","year":"2015","journal-title":"Nat. Genet"},{"key":"2023051704102722300_btaa731-B9","doi-asserted-by":"crossref","first-page":"906","DOI":"10.1038\/s41588-018-0144-6","article-title":"Mixed-model association for biobank-scale datasets","volume":"50","author":"Loh","year":"2018","journal-title":"Nat. Genet"},{"key":"2023051704102722300_btaa731-B10","doi-asserted-by":"crossref","first-page":"3326","DOI":"10.1093\/bioinformatics\/bts606","article-title":"A high-performance computing toolset for relatedness and principal component analysis of SNP data","volume":"28","author":"Zheng","year":"2012","journal-title":"Bioinformatics"},{"key":"2023051704102722300_btaa731-B11","doi-asserted-by":"crossref","first-page":"2251","DOI":"10.1093\/bioinformatics\/btx145","article-title":"SeqArray \u2013 a storage-efficient high-performance data format for WGS variant calls","volume":"33","author":"Zheng","year":"2017","journal-title":"Bioinformatics"},{"key":"2023051704102722300_btaa731-B12","doi-asserted-by":"crossref","first-page":"1335","DOI":"10.1038\/s41588-018-0184-y","article-title":"Efficiently controlling for case-control imbalance and sample relatedness in large-scale genetic association studies","volume":"50","author":"Zhou","year":"2018","journal-title":"Nat. Genet"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/academic.oup.com\/bioinformatics\/advance-article-pdf\/doi\/10.1093\/bioinformatics\/btaa731\/34194886\/btaa731.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/37\/5\/728\/50356942\/btaa731.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/37\/5\/728\/50356942\/btaa731.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,5,17]],"date-time":"2023-05-17T04:10:50Z","timestamp":1684296650000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/37\/5\/728\/5902828"}},"subtitle":[],"editor":[{"given":"Russell","family":"Schwartz","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"editor"}]}],"short-title":[],"issued":{"date-parts":[[2020,9,8]]},"references-count":12,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2021,5,5]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btaa731","relation":{},"ISSN":["1367-4803","1367-4811"],"issn-type":[{"value":"1367-4803","type":"print"},{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2021,3,1]]},"published":{"date-parts":[[2020,9,8]]}}}