{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,24]],"date-time":"2026-03-24T11:52:05Z","timestamp":1774353125301,"version":"3.50.1"},"reference-count":40,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2018,4,19]],"date-time":"2018-04-19T00:00:00Z","timestamp":1524096000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"},{"start":{"date-parts":[[2018,4,19]],"date-time":"2018-04-19T00:00:00Z","timestamp":1524096000000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/100000002","name":"National Institutes of Health","doi-asserted-by":"publisher","award":["AI116794"],"award-info":[{"award-number":["AI116794"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000002","name":"National Institutes of Health","doi-asserted-by":"publisher","award":["DK112217"],"award-info":[{"award-number":["DK112217"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000002","name":"National Institutes of Health","doi-asserted-by":"publisher","award":["ES013508"],"award-info":[{"award-number":["ES013508"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["BioData Mining"],"published-print":{"date-parts":[[2018,12]]},"DOI":"10.1186\/s13040-018-0167-7","type":"journal-article","created":{"date-parts":[[2018,4,19]],"date-time":"2018-04-19T13:31:40Z","timestamp":1524144700000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["Improving machine learning reproducibility in genetic association studies with proportional instance cross validation (PICV)"],"prefix":"10.1186","volume":"11","author":[{"given":"Elizabeth R.","family":"Piette","sequence":"first","affiliation":[]},{"given":"Jason H.","family":"Moore","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,4,19]]},"reference":[{"issue":"7265","key":"167_CR1","doi-asserted-by":"publisher","first-page":"747","DOI":"10.1038\/nature08494","volume":"461","author":"TA Manolio","year":"2009","unstructured":"Manolio TA, et al. Finding the missing heritability of complex diseases. Nature. 2009;461(7265):747.","journal-title":"Nature"},{"issue":"1","key":"167_CR2","doi-asserted-by":"publisher","first-page":"7","DOI":"10.1016\/j.ajhg.2011.11.029","volume":"90","author":"PM Visscher","year":"2012","unstructured":"Visscher PM, et al. Five years of GWAS discovery. Am J Hum Genet. 2012;90(1):7\u201324.","journal-title":"Am J Hum Genet"},{"issue":"4","key":"167_CR3","doi-asserted-by":"publisher","first-page":"1193","DOI":"10.1073\/pnas.1119675109","volume":"109","author":"O Zuk","year":"2012","unstructured":"Zuk O, et al. The mystery of missing heritability: genetic interactions create phantom heritability. P Natl Acad Sci. 2012;109(4):1193\u20138.","journal-title":"P Natl Acad Sci"},{"issue":"5950","key":"167_CR4","doi-asserted-by":"publisher","first-page":"289","DOI":"10.1126\/science.1181369","volume":"326","author":"E Lieberman-Aiden","year":"2009","unstructured":"Lieberman-Aiden E, et al. Comprehensive mapping of long-range interactions reveals folding principles of the human genome. Science. 2009;326(5950):289\u201393.","journal-title":"Science"},{"issue":"6235","key":"167_CR5","doi-asserted-by":"publisher","first-page":"648","DOI":"10.1126\/science.1262110","volume":"348","author":"GTEx Consortium","year":"2015","unstructured":"GTEx Consortium. The genotype-tissue expression (GTEx) pilot analysis: multitissue gene regulation in humans. Science. 2015;348(6235):648\u201360.","journal-title":"Science"},{"issue":"4","key":"167_CR6","doi-asserted-by":"publisher","first-page":"364","DOI":"10.1002\/gepi.20492","volume":"34","author":"MC Cornelis","year":"2010","unstructured":"Cornelis MC, et al. The gene, environment association studies consortium (GENEVA): maximizing the knowledge obtained from GWAS by collaboration across studies of multiple conditions. Genet Epidemiol. 2010;34(4):364\u201372.","journal-title":"Genet Epidemiol"},{"key":"167_CR7","unstructured":"Bush WS, Dudek SM, Ritchie MD. Biofilter: a knowledge-integration system for the multi-locus analysis of genome-wide association studies. Pac Symp Biocomput. NIH Public Access. 2009;"},{"issue":"2","key":"167_CR8","doi-asserted-by":"publisher","first-page":"85","DOI":"10.1038\/nrg3868","volume":"16","author":"MD Ritchie","year":"2015","unstructured":"Ritchie MD, et al. Methods of integrating data to uncover genotype-phenotype interactions. Nat Rev Gen. 2015;16(2):85.","journal-title":"Nat Rev Gen"},{"issue":"6","key":"167_CR9","doi-asserted-by":"publisher","first-page":"321","DOI":"10.1038\/nrg3920","volume":"16","author":"MW Libbrecht","year":"2015","unstructured":"Libbrecht MW, Noble WS. Machine learning in genetics and genomics. Nat Rev Gen. 2015;16(6):321.","journal-title":"Nat Rev Gen"},{"key":"167_CR10","doi-asserted-by":"crossref","unstructured":"Larranaga P, et al. Machine learning in bioinformatics. Brief Bioinform. 2006;7:86\u2013112.","DOI":"10.1093\/bib\/bbk007"},{"issue":"6","key":"167_CR11","doi-asserted-by":"publisher","first-page":"637","DOI":"10.1002\/bies.20236","volume":"27","author":"JH Moore","year":"2005","unstructured":"Moore JH, Williams SM. Traversing the conceptual divide between biological and statistical epistasis: systems biology and a more modern synthesis. BioEssays. 2005;27(6):637\u201346.","journal-title":"BioEssays"},{"issue":"6","key":"167_CR12","doi-asserted-by":"publisher","first-page":"e5639","DOI":"10.1371\/journal.pone.0005639","volume":"4","author":"CS Greene","year":"2009","unstructured":"Greene CS, et al. Failure to replicate a genetic association may provide important clues about genetic architecture. PLoS One. 2009;4(6):e5639.","journal-title":"PLoS One"},{"issue":"4","key":"167_CR13","doi-asserted-by":"publisher","first-page":"445","DOI":"10.1093\/bioinformatics\/btp713","volume":"26","author":"JH Moore","year":"2010","unstructured":"Moore JH, Asselbergs FW, Williams SM. Bioinformatics challenges for genome-wide association studies. Bioinformatics. 2010;26(4):445\u201355.","journal-title":"Bioinformatics"},{"key":"167_CR14","unstructured":"Longo DL, Drazen JM. Data sharing. N Engl J Med. 2016;7:276\u20137."},{"key":"167_CR15","unstructured":"Kluyver T, et al. Jupyter notebooks-a publishing format for reproducible computational workflows: ELPUB; 2016. \n                    http:\/\/ebooks.iospress.nl\/publication\/42900\n                    \n                  ."},{"issue":"2014","key":"167_CR16","first-page":"2","volume":"239","author":"D Merkel","year":"2014","unstructured":"Merkel D. Docker: lightweight linux containers for consistent development and deployment. Linux Journal. 2014;239(2014):2.","journal-title":"Linux Journal"},{"issue":"4","key":"167_CR17","doi-asserted-by":"publisher","first-page":"342","DOI":"10.1038\/nbt.3780","volume":"35","author":"BK Beaulieu-Jones","year":"2017","unstructured":"Beaulieu-Jones BK, Greene CS. Reproducibility of computational workflows is automated using continuous analysis. Nat Biotechnol. 2017;35(4):342\u20136.","journal-title":"Nat Biotechnol"},{"issue":"5","key":"167_CR18","doi-asserted-by":"publisher","first-page":"779","DOI":"10.1016\/j.celrep.2014.02.021","volume":"6","author":"WC Hines","year":"2014","unstructured":"Hines WC, et al. Sorting out the FACS: a devil in the details. Cell Rep. 2014;6(5):779\u201381.","journal-title":"Cell Rep"},{"issue":"7668","key":"167_CR19","doi-asserted-by":"publisher","first-page":"387","DOI":"10.1038\/548387a","volume":"548","author":"GJ Lithgow","year":"2017","unstructured":"Lithgow GJ, Driscoll M, Phillips P. A long journey to reproducible results. Nature News. 2017;548(7668):387.","journal-title":"Nature News"},{"issue":"1-3","key":"167_CR20","doi-asserted-by":"publisher","first-page":"73","DOI":"10.1159\/000073735","volume":"56","author":"JH Moore","year":"2003","unstructured":"Moore JH. The ubiquitous nature of epistasis in determining susceptibility to common human diseases. Hum Hered. 2003;56(1-3):73\u201382.","journal-title":"Hum Hered"},{"issue":"8","key":"167_CR21","doi-asserted-by":"publisher","first-page":"618","DOI":"10.1038\/nrg1407","volume":"5","author":"\u00d6 Carlborg","year":"2004","unstructured":"Carlborg \u00d6, Haley CS. Epistasis: too often neglected in complex trait studies? Nat Rev Gen. 2004;5(8):618.","journal-title":"Nat Rev Gen"},{"issue":"4","key":"167_CR22","doi-asserted-by":"publisher","first-page":"413","DOI":"10.1038\/ng1537","volume":"37","author":"J Marchini","year":"2005","unstructured":"Marchini J, Donnelly P, Cardon LR. Genome-wide strategies for detecting multiple loci that influence complex diseases. Nat Genet. 2005;37(4):413.","journal-title":"Nat Genet"},{"key":"167_CR23","doi-asserted-by":"publisher","first-page":"40","DOI":"10.1214\/09-SS054","volume":"4","author":"S Arlot","year":"2010","unstructured":"Arlot S, Celisse A. A survey of cross-validation procedures for model selection. Stat Surv. 2010;4:40\u201379.","journal-title":"Stat Surv"},{"key":"167_CR24","doi-asserted-by":"crossref","unstructured":"van Hulse J, Khoshgoftaar TM, Napolitano A. Experimental perspectives on learning from imbalanced data. In: Proceedings of the 24th international conference on machine learning: ACM; 2007. \n                    http:\/\/ebooks.iospress.nl\/publication\/42900\n                    \n                  .","DOI":"10.1145\/1273496.1273614"},{"issue":"9","key":"167_CR25","doi-asserted-by":"publisher","first-page":"1263","DOI":"10.1109\/TKDE.2008.239","volume":"21","author":"H He","year":"2009","unstructured":"He H, Garcia EA. Learning from imbalanced data. IEEE T Knowl Data En. 2009;21(9):1263\u201384.","journal-title":"IEEE T Knowl Data En"},{"issue":"1","key":"167_CR26","doi-asserted-by":"publisher","first-page":"18","DOI":"10.1111\/j.0824-7935.2004.t01-1-00228.x","volume":"20","author":"A Estabrooks","year":"2004","unstructured":"Estabrooks A, Jo T, Japkowicz N. A multiple resampling method for learning from imbalanced data sets. Comput Intell. 2004;20(1):18\u201336.","journal-title":"Comput Intell"},{"issue":"1","key":"167_CR27","doi-asserted-by":"publisher","first-page":"16","DOI":"10.1186\/1756-0381-5-16","volume":"5","author":"RJ Urbanowicz","year":"2012","unstructured":"Urbanowicz RJ, et al. GAMETES: a fast, direct algorithm for generating pure, strict, epistatic models with random architectures. BioDat Min. 2012;5(1):16.","journal-title":"BioDat Min"},{"key":"167_CR28","unstructured":"Centers for Disease Control and Prevention. Chronic Disease Prevention and Health Promotion. \n                    https:\/\/www.cdc.gov\/chronicdisease\/overview\/index.htm\n                    \n                  . Accessed 05 Sep 2017."},{"key":"167_CR29","unstructured":"National Center for Health Statistics. (2017). Heart Disease. Retrieved December 19, 2017, from \n                    https:\/\/www.cdc.gov\/nchs\/fastats\/heart-disease.htm"},{"issue":"4","key":"167_CR30","doi-asserted-by":"publisher","first-page":"532","DOI":"10.1001\/archopht.122.4.532","volume":"122","author":"DS Friedman","year":"2004","unstructured":"Friedman DS, Wolfs RCW, O\u2019Colmain BJ, Klein BE, Taylor HR, West S, et al. Prevalence of open-angle glaucoma among adults in the United States. Arch Ophthalmol (Chicago, Ill : 1960). 2004;122(4):532\u20138. \n                    https:\/\/doi.org\/10.1001\/archopht.122.4.532","journal-title":"Arch Ophthalmol (Chicago, Ill : 1960)"},{"key":"167_CR31","doi-asserted-by":"publisher","unstructured":"Verma SS, Cooke Bailey JN, Lucas A, Bradford Y, Linneman JG, Hauser MA, et al. Epistatic gene-based interaction analyses for Glaucoma in eMERGE and NEIGHBOR consortium. PLoS Genet. 2016;12(9) \n                    https:\/\/doi.org\/10.1371\/journal.pgen.1006186","DOI":"10.1371\/journal.pgen.1006186"},{"issue":"1","key":"167_CR32","doi-asserted-by":"publisher","first-page":"13","DOI":"10.1093\/bib\/bbs006","volume":"14","author":"W-J Lin","year":"2012","unstructured":"Lin W-J, Chen JJ. Class-imbalanced classifiers for high-dimensional data. Brief Bioinform. 2012;14(1):13\u201326.","journal-title":"Brief Bioinform"},{"issue":"7","key":"167_CR33","doi-asserted-by":"publisher","first-page":"10206","DOI":"10.1016\/j.eswa.2009.02.037","volume":"36","author":"TS Guzella","year":"2009","unstructured":"Guzella TS, Caminhas WM. A review of machine learning approaches to spam filtering. Expert Syst Appl. 2009;36(7):10206\u201322.","journal-title":"Expert Syst Appl"},{"issue":"4","key":"167_CR34","doi-asserted-by":"publisher","first-page":"463","DOI":"10.1109\/TSMCC.2011.2161285","volume":"42","author":"M Galar","year":"2012","unstructured":"Galar M, et al. A review on ensembles for the class imbalance problem: bagging-, boosting-, and hybrid-based approaches. IEEE T Syst Man Cyb C. 2012;42(4):463\u201384.","journal-title":"IEEE T Syst Man Cyb C"},{"issue":"2","key":"167_CR35","doi-asserted-by":"publisher","first-page":"539","DOI":"10.1109\/TSMCB.2008.2007853","volume":"39","author":"X-Y Liu","year":"2009","unstructured":"Liu X-Y, Wu J, Zhou Z-H. Exploratory undersampling for class-imbalance learning. IEEE T Syst Man Cyb B. 2009;39(2):539\u201350.","journal-title":"IEEE T Syst Man Cyb B"},{"key":"167_CR36","doi-asserted-by":"publisher","first-page":"113","DOI":"10.1016\/j.ins.2013.07.007","volume":"250","author":"V L\u00f3pez","year":"2013","unstructured":"L\u00f3pez V, et al. An insight into classification with imbalanced data: empirical results and current trends on using data intrinsic characteristics. Inform Sci. 2013;250:113\u201341.","journal-title":"Inform Sci"},{"issue":"4","key":"167_CR37","doi-asserted-by":"publisher","first-page":"306","DOI":"10.1002\/gepi.20211","volume":"31","author":"DR Velez","year":"2007","unstructured":"Velez DR, et al. A balanced accuracy function for epistasis modeling in imbalanced datasets using multifactor dimensionality reduction. Genet Epi. 2007;31(4):306\u201315.","journal-title":"Genet Epi"},{"key":"167_CR38","doi-asserted-by":"crossref","unstructured":"Quionero-Candela J, et al. Dataset shift in machine learning: The MIT Press; 2009. \n                    http:\/\/ebooks.iospress.nl\/publication\/42900\n                    \n                  .","DOI":"10.7551\/mitpress\/9780262170055.001.0001"},{"key":"167_CR39","unstructured":"Sugiyama M, et al. Direct importance estimation with model selection and its application to covariate shift adaptation. Adv Neur In Process Syst. 2008;20:1433\u201340."},{"issue":"2","key":"167_CR40","doi-asserted-by":"publisher","first-page":"227","DOI":"10.1016\/S0378-3758(00)00115-4","volume":"90","author":"H Shimodaira","year":"2000","unstructured":"Shimodaira H. Improving predictive inference under covariate shift by weighting the log-likelihood function. J Stat Plan Infer. 2000;90(2):227\u201344.","journal-title":"J Stat Plan Infer"}],"container-title":["BioData Mining"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s13040-018-0167-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1186\/s13040-018-0167-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s13040-018-0167-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,5,14]],"date-time":"2020-05-14T12:58:13Z","timestamp":1589461093000},"score":1,"resource":{"primary":{"URL":"https:\/\/biodatamining.biomedcentral.com\/articles\/10.1186\/s13040-018-0167-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,4,19]]},"references-count":40,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2018,12]]}},"alternative-id":["167"],"URL":"https:\/\/doi.org\/10.1186\/s13040-018-0167-7","relation":{},"ISSN":["1756-0381"],"issn-type":[{"value":"1756-0381","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018,4,19]]},"assertion":[{"value":"9 October 2017","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 April 2018","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 April 2018","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"Not applicable.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval and consent to participate"}},{"value":"The authors declare that they have no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}},{"value":"Springer Nature remains neutral with regard to jurisdictional claims in published maps and institutional affiliations.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Publisher\u2019s Note"}}],"article-number":"6"}}