{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,21]],"date-time":"2026-03-21T16:44:16Z","timestamp":1774111456483,"version":"3.50.1"},"reference-count":6,"publisher":"Oxford University Press (OUP)","issue":"4","license":[{"start":{"date-parts":[[2016,11,29]],"date-time":"2016-11-29T00:00:00Z","timestamp":1480377600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/academic.oup.com\/journals\/pages\/about_us\/legal\/notices"}],"funder":[{"DOI":"10.13039\/100000002","name":"National Institutes of Health","doi-asserted-by":"publisher","award":["P20GM103534"],"award-info":[{"award-number":["P20GM103534"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017,2,15]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:sec>\n                  <jats:title>Motivation<\/jats:title>\n                  <jats:p>Checking concordance between reported sex and genotype-inferred sex is a crucial quality control measure in genome-wide association studies (GWAS). However, limited insights exist regarding the true accuracy of software that infer sex from genotype array data.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Results<\/jats:title>\n                  <jats:p>We present seXY, a logistic regression model trained on both X chromosome heterozygosity and Y chromosome missingness, that consistently demonstrated\u2009&amp;gt;99.5% sex inference accuracy in cross-validation for 889 males and 5,361 females enrolled in prostate cancer and ovarian cancer GWAS. Compared to PLINK, one of the most popular tools for sex inference in GWAS that assesses only X chromosome heterozygosity, seXY achieved marginally better male classification and 3% more accurate female classification.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Availability and Implementation<\/jats:title>\n                  <jats:p>https:\/\/github.com\/Christopher-Amos-Lab\/seXY<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Supplementary information<\/jats:title>\n                  <jats:p>Supplementary data are available at Bioinformatics online.<\/jats:p>\n               <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btw696","type":"journal-article","created":{"date-parts":[[2016,11,3]],"date-time":"2016-11-03T12:05:32Z","timestamp":1478174732000},"page":"561-563","source":"Crossref","is-referenced-by-count":6,"title":["seXY: a tool for sex inference from genotype arrays"],"prefix":"10.1093","volume":"33","author":[{"given":"David C","family":"Qian","sequence":"first","affiliation":[{"name":"Department of Biomedical Data Science, Dartmouth Geisel School of Medicine, Lebanon, NH, USA"}]},{"given":"Jonathan A","family":"Busam","sequence":"additional","affiliation":[{"name":"Department of Biological Sciences, Dartmouth College, Hanover, NH, USA"}]},{"given":"Xiangjun","family":"Xiao","sequence":"additional","affiliation":[{"name":"Department of Biomedical Data Science, Dartmouth Geisel School of Medicine, Lebanon, NH, USA"}]},{"given":"Tracy A","family":"O\u2019Mara","sequence":"additional","affiliation":[{"name":"Department of Genetics and Computational Biology, QIMR Berghofer Medical Research Institute, Brisbane, QLD, Australia"}]},{"given":"Rosalind A","family":"Eeles","sequence":"additional","affiliation":[{"name":"Division of Genetics and Epidemiology, Institute of Cancer Research, London, UK"}]},{"given":"Frederick R","family":"Schumacher","sequence":"additional","affiliation":[{"name":"Department of Epidemiology and Biostatistics, Case Western Reserve University, Cleveland, OH, USA"}]},{"given":"Catherine M","family":"Phelan","sequence":"additional","affiliation":[{"name":"Department of Cancer Epidemiology, Moffitt Cancer Center, Tampa, FL, USA"}]},{"given":"Christopher I","family":"Amos","sequence":"additional","affiliation":[{"name":"Department of Biomedical Data Science, Dartmouth Geisel School of Medicine, Lebanon, NH, USA"}]}],"member":"286","published-online":{"date-parts":[[2016,11,29]]},"reference":[{"key":"2023020204424079100_btw696-B1","article-title":"The OncoArray Consortium: a Network for Understanding the Genetic Architecture of Common Cancers","author":"Amos","year":"2016","journal-title":"Cancer Epidemiol. Biomarkers Prev"},{"key":"2023020204424079100_btw696-B2","doi-asserted-by":"crossref","first-page":"3777","DOI":"10.1093\/nar\/gkr1255","article-title":"Comprehensive literature review and statistical considerations for GWAS meta-analysis","volume":"40","author":"Begum","year":"2012","journal-title":"Nucleic Acids Res"},{"key":"2023020204424079100_btw696-B3","doi-asserted-by":"crossref","first-page":"2","DOI":"10.1016\/j.cca.2015.03.009","article-title":"Detection of sex chromosome aneuploidies using quantitative fluorescent PCR in the Hungarian population","volume":"445","author":"Nagy","year":"2015","journal-title":"Clin. Chim. Acta"},{"key":"2023020204424079100_btw696-B4","doi-asserted-by":"crossref","first-page":"559","DOI":"10.1086\/519795","article-title":"PLINK: a tool set for whole-genome association and population-based linkage analyses","volume":"81","author":"Purcell","year":"2007","journal-title":"Am. J. Hum. Genet"},{"key":"2023020204424079100_btw696-B5","doi-asserted-by":"crossref","first-page":"31.","DOI":"10.3389\/fgene.2011.00031","article-title":"Cost-effective prediction of gender-labeling errors and estimation of gender-labeling error rates in candidate-gene association studies","volume":"2","author":"Qu","year":"2011","journal-title":"Front. Genet"},{"key":"2023020204424079100_btw696-B6","doi-asserted-by":"crossref","first-page":"e67863.","DOI":"10.1371\/journal.pone.0067863","article-title":"The role of balanced training and testing data sets for binary classifiers in bioinformatics","volume":"8","author":"Wei","year":"2013","journal-title":"PloS One"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/33\/4\/561\/49037806\/bioinformatics_33_4_561.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/33\/4\/561\/49037806\/bioinformatics_33_4_561.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,2]],"date-time":"2023-02-02T04:47:18Z","timestamp":1675313238000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/33\/4\/561\/2666346"}},"subtitle":[],"editor":[{"given":"John","family":"Hancock","sequence":"additional","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2016,11,29]]},"references-count":6,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2017,2,15]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btw696","relation":{},"ISSN":["1367-4803","1367-4811"],"issn-type":[{"value":"1367-4803","type":"print"},{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2017,2,15]]},"published":{"date-parts":[[2016,11,29]]}}}