{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,30]],"date-time":"2025-07-30T11:44:22Z","timestamp":1753875862838,"version":"3.41.2"},"reference-count":24,"publisher":"Oxford University Press (OUP)","issue":"6","license":[{"start":{"date-parts":[[2025,6,13]],"date-time":"2025-06-13T00:00:00Z","timestamp":1749772800000},"content-version":"vor","delay-in-days":12,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,6,2]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:sec>\n                  <jats:title>Motivation<\/jats:title>\n                  <jats:p>HAP-SAMPLE2 extends the functionality of the original HAP-SAMPLE tool for simulating genotype-phenotype data, now with features to handle population admixture and rare variant analysis. It allows users to define parameters such as disease prevalence and allele effect sizes for both common and rare variant simulations.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Results<\/jats:title>\n                  <jats:p>HAP-SAMPLE2 provides an efficient means for simulating complex datasets, suitable for large-scale projects like the 1000 Genomes Project. Its capabilities for population admixture allow users to create admixed populations or preserve substructures while introducing novel variation through artificial recombination. Additionally, the tool supports burden testing for rare variants using fixed and Madsen-Browning weighting schemes.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Availability and implementation<\/jats:title>\n                  <jats:p>The software, along with a detailed vignette, is available on GitHub: https:\/\/github.com\/M3dical\/HAPSAMPLE2.<\/jats:p>\n               <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btaf333","type":"journal-article","created":{"date-parts":[[2025,6,13]],"date-time":"2025-06-13T13:26:55Z","timestamp":1749821215000},"source":"Crossref","is-referenced-by-count":0,"title":["HAP-SAMPLE2: data-based resampling for association studies with admixture"],"prefix":"10.1093","volume":"41","author":[{"given":"George","family":"Sun","sequence":"first","affiliation":[{"name":"Bioinformatics Research Center, North Carolina State University , Raleigh, NC 27695,","place":["United States"]}]},{"given":"Bryan W","family":"Ting","sequence":"additional","affiliation":[{"name":"Bioinformatics Research Center, North Carolina State University , Raleigh, NC 27695,","place":["United States"]}]},{"given":"Fred A","family":"Wright","sequence":"additional","affiliation":[{"name":"Bioinformatics Research Center, North Carolina State University , Raleigh, NC 27695,","place":["United States"]},{"name":"Department of Biological Sciences, North Carolina State University , Raleigh, NC 27695,","place":["United States"]},{"name":"Department of Statistics, North Carolina State University , Raleigh, NC 27695,","place":["United States"]}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4092-7463","authenticated-orcid":false,"given":"Yi-Hui","family":"Zhou","sequence":"additional","affiliation":[{"name":"Bioinformatics Research Center, North Carolina State University , Raleigh, NC 27695,","place":["United States"]},{"name":"Department of Biological Sciences, North Carolina State University , Raleigh, NC 27695,","place":["United States"]},{"name":"Department of Statistics, North Carolina State University , Raleigh, NC 27695,","place":["United States"]}]}],"member":"286","published-online":{"date-parts":[[2025,6,13]]},"reference":[{"key":"2025070408272992900_btaf333-B1","doi-asserted-by":"crossref","first-page":"16","DOI":"10.1186\/s13073-015-0138-2","article-title":"Rare variant association studies: considerations, challenges and opportunities","volume":"7","author":"Auer","year":"2015","journal-title":"Genome Med"},{"key":"2025070408272992900_btaf333-B2","doi-asserted-by":"crossref","first-page":"68","DOI":"10.1038\/nature15393","article-title":"A global reference for human genetic variation","volume":"526","author":"Auton","year":"2015","journal-title":"Nature"},{"key":"2025070408272992900_btaf333-B3","doi-asserted-by":"crossref","first-page":"333","DOI":"10.1038\/nrg3931","article-title":"Estimating the mutation load in human genomes","volume":"16","author":"Henn","year":"2015","journal-title":"Nat Rev Genet"},{"year":"2015","author":"Howie","key":"2025070408272992900_btaf333-B4"},{"key":"2025070408272992900_btaf333-B5","doi-asserted-by":"crossref","first-page":"1497","DOI":"10.1080\/02664763.2023.2208773","article-title":"Combining phenotypic and genomic data to improve prediction of binary traits","volume":"51","author":"Jarquin","year":"2024","journal-title":"J Appl Stat"},{"key":"2025070408272992900_btaf333-B6","doi-asserted-by":"crossref","first-page":"e1000831","DOI":"10.1371\/journal.pgen.1000831","article-title":"Genetic crossovers are predicted accurately by the computed human recombination map","volume":"6","author":"Khil","year":"2010","journal-title":"PLoS Genet"},{"key":"2025070408272992900_btaf333-B7","doi-asserted-by":"crossref","first-page":"5","DOI":"10.1016\/j.ajhg.2014.06.009","article-title":"Rare-variant association analysis: study designs and statistical tests","volume":"95","author":"Lee","year":"2014","journal-title":"Am J Hum Genet"},{"key":"2025070408272992900_btaf333-B8","doi-asserted-by":"crossref","first-page":"140","DOI":"10.1093\/bioinformatics\/btm549","article-title":"Gwasimulator: a rapid whole-genome simulation program","volume":"24","author":"Li","year":"2008","journal-title":"Bioinformatics"},{"key":"2025070408272992900_btaf333-B9","doi-asserted-by":"crossref","first-page":"e1000384","DOI":"10.1371\/journal.pgen.1000384","article-title":"A groupwise association test for rare mutations using a weighted sum statistic","volume":"5","author":"Madsen","year":"2009","journal-title":"PLoS Genet"},{"key":"2025070408272992900_btaf333-B10","doi-asserted-by":"crossref","first-page":"442","DOI":"10.1186\/1471-2105-11-442","article-title":"Forward-time simulation of realistic samples for genome-wide association studies","volume":"11","author":"Peng","year":"2010","journal-title":"BMC Bioinformatics"},{"key":"2025070408272992900_btaf333-B11","doi-asserted-by":"crossref","first-page":"3686","DOI":"10.1093\/bioinformatics\/bti584","article-title":"Simupop: a forward-time population genetics simulation environment","volume":"21","author":"Peng","year":"2005","journal-title":"Bioinformatics"},{"key":"2025070408272992900_btaf333-B12","doi-asserted-by":"crossref","first-page":"1101","DOI":"10.1093\/bioinformatics\/btt094","article-title":"Genetic simulation resources: a website for the registration and discovery of genetic data simulators","volume":"29","author":"Peng","year":"2013","journal-title":"Bioinformatics"},{"key":"2025070408272992900_btaf333-B13","first-page":"20","article-title":"Seqsimla2: simulating correlated quantitative traits accounting for shared environmental effects in user-specified pedigree structure","volume":"39","author":"Ren-Hua","year":"2014","journal-title":"Genetic Epidemiol"},{"key":"2025070408272992900_btaf333-B14","doi-asserted-by":"crossref","first-page":"e2319496121","DOI":"10.1073\/pnas.2319496121","article-title":"Heritability within groups is uninformative about differences among groups: cases from behavioral, evolutionary, and statistical genetics","volume":"121","author":"Schraiber","year":"2024","journal-title":"Proc Natl Acad Sci USA"},{"key":"2025070408272992900_btaf333-B15","doi-asserted-by":"crossref","first-page":"2304","DOI":"10.1093\/bioinformatics\/btr341","article-title":"Hapgen2: simulation of multiple disease SNPs","volume":"27","author":"Su","year":"2011","journal-title":"Bioinformatics"},{"key":"2025070408272992900_btaf333-B16","doi-asserted-by":"crossref","first-page":"btad535","DOI":"10.1093\/bioinformatics\/btad535","article-title":"Hapnest: efficient, large-scale generation and evaluation of synthetic datasets for genotypes and phenotypes","volume":"39","author":"Wharrie","year":"2023","journal-title":"Bioinformatics"},{"volume-title":"Molecular Biology of the Cell, 4th Edition: A Problems Approach","year":"2002","author":"Wilson","key":"2025070408272992900_btaf333-B17"},{"key":"2025070408272992900_btaf333-B18","doi-asserted-by":"crossref","first-page":"2581","DOI":"10.1093\/bioinformatics\/btm386","article-title":"Simulating association studies: a data-based resampling method for candidate regions or whole genome scans","volume":"23","author":"Wright","year":"2007","journal-title":"Bioinformatics"},{"key":"2025070408272992900_btaf333-B19","doi-asserted-by":"crossref","first-page":"163","DOI":"10.1002\/gepi.21696","article-title":"Simulating realistic genomic data with rare variants","volume":"37","author":"Xu","year":"2013","journal-title":"Genet Epidemiol"},{"key":"2025070408272992900_btaf333-B20","doi-asserted-by":"crossref","first-page":"76","DOI":"10.1016\/j.ajhg.2010.11.011","article-title":"Gcta: a tool for genome-wide complex trait analysis","volume":"88","author":"Yang","year":"2011","journal-title":"The Am J Human Gene"},{"key":"2025070408272992900_btaf333-B21","doi-asserted-by":"crossref","first-page":"42","DOI":"10.1089\/cmb.2010.0188","article-title":"An overview of population genetic data simulation","volume":"19","author":"Yuan","year":"2012","journal-title":"J Comput Biol"},{"key":"2025070408272992900_btaf333-B22","doi-asserted-by":"crossref","first-page":"331","DOI":"10.1186\/1471-2105-9-331","article-title":"Hapsimu: a genetic simulation platform for population-based association studies","volume":"9","author":"Zhang","year":"2008","journal-title":"BMC Bioinfo"},{"key":"2025070408272992900_btaf333-B23","doi-asserted-by":"crossref","first-page":"10","DOI":"10.1186\/s12863-015-0173-4","article-title":"Gpopsim: a simulation tool for whole-genome genetic data","volume":"16","author":"Zhang","year":"2015","journal-title":"BMC Genet"},{"key":"2025070408272992900_btaf333-B24","doi-asserted-by":"crossref","first-page":"155","DOI":"10.1111\/biom.12708","article-title":"Computation of ancestry scores with mixed families and unrelated individuals","volume":"74","author":"Zhou","year":"2018","journal-title":"Biometrics"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/advance-article-pdf\/doi\/10.1093\/bioinformatics\/btaf333\/63484360\/btaf333.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/41\/6\/btaf333\/63484360\/btaf333.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/41\/6\/btaf333\/63484360\/btaf333.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,4]],"date-time":"2025-07-04T12:27:35Z","timestamp":1751632055000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/doi\/10.1093\/bioinformatics\/btaf333\/8161565"}},"subtitle":[],"editor":[{"given":"Janet","family":"Kelso","sequence":"additional","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2025,6]]},"references-count":24,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2025,6,2]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btaf333","relation":{},"ISSN":["1367-4811"],"issn-type":[{"type":"electronic","value":"1367-4811"}],"subject":[],"published-other":{"date-parts":[[2025,6]]},"published":{"date-parts":[[2025,6]]},"article-number":"btaf333"}}