{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,26]],"date-time":"2026-02-26T20:34:56Z","timestamp":1772138096277,"version":"3.50.1"},"reference-count":27,"publisher":"Oxford University Press (OUP)","issue":"24","license":[{"start":{"date-parts":[[2019,8,1]],"date-time":"2019-08-01T00:00:00Z","timestamp":1564617600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/academic.oup.com\/journals\/pages\/open_access\/funder_policies\/chorus\/standard_publication_model"}],"funder":[{"DOI":"10.13039\/100000002","name":"NIH","doi-asserted-by":"publisher","award":["R01CA216265"],"award-info":[{"award-number":["R01CA216265"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000002","name":"NIH","doi-asserted-by":"publisher","award":["R01DE022772"],"award-info":[{"award-number":["R01DE022772"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000002","name":"NIH","doi-asserted-by":"publisher","award":["P20GM104416"],"award-info":[{"award-number":["P20GM104416"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Dartmouth College Neukom Institute for Computational Science CompX award","award":["T32LM012204"],"award-info":[{"award-number":["T32LM012204"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,12,15]]},"abstract":"<jats:title>Abstract<\/jats:title>\n                  <jats:sec>\n                    <jats:title>Summary<\/jats:title>\n                    <jats:p>Performing highly parallelized preprocessing of methylation array data using Python can accelerate data preparation for downstream methylation analyses, including large scale production-ready machine learning pipelines. We present a highly reproducible, scalable pipeline (PyMethylProcess) that can be quickly set-up and deployed through Docker and PIP.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Availability and implementation<\/jats:title>\n                    <jats:p>Project Home Page: https:\/\/github.com\/Christensen-Lab-Dartmouth\/PyMethylProcess. Available on PyPI (pymethylprocess), Docker (joshualevy44\/pymethylprocess).<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Supplementary information<\/jats:title>\n                    <jats:p>Supplementary data are available at Bioinformatics online.<\/jats:p>\n                  <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btz594","type":"journal-article","created":{"date-parts":[[2019,7,26]],"date-time":"2019-07-26T07:10:22Z","timestamp":1564125022000},"page":"5379-5381","source":"Crossref","is-referenced-by-count":12,"title":["PyMethylProcess\u2014convenient high-throughput preprocessing workflow for DNA methylation data"],"prefix":"10.1093","volume":"35","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8050-1291","authenticated-orcid":false,"given":"Joshua J","family":"Levy","sequence":"first","affiliation":[{"name":"Department of Epidemiology, Geisel School of Medicine at Dartmouth"},{"name":"Program in Quantitative Biomedical Sciences, Geisel School of Medicine at Dartmouth , Hanover, NH, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0145-9564","authenticated-orcid":false,"given":"Alexander J","family":"Titus","sequence":"additional","affiliation":[{"name":"Department of Epidemiology, Geisel School of Medicine at Dartmouth"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2279-4097","authenticated-orcid":false,"given":"Lucas A","family":"Salas","sequence":"additional","affiliation":[{"name":"Department of Epidemiology, Geisel School of Medicine at Dartmouth"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3022-426X","authenticated-orcid":false,"given":"Brock C","family":"Christensen","sequence":"additional","affiliation":[{"name":"Department of Epidemiology, Geisel School of Medicine at Dartmouth"},{"name":"Department of Molecular and Systems Biology , Hanover, NH, USA"}]}],"member":"286","published-online":{"date-parts":[[2019,8,1]]},"reference":[{"key":"2023013108403668400_btz594-B1","first-page":"265","author":"Abadi","year":"2016"},{"key":"2023013108403668400_btz594-B2","author":"Amstutz","year":"2016"},{"key":"2023013108403668400_btz594-B3","doi-asserted-by":"crossref","first-page":"1363","DOI":"10.1093\/bioinformatics\/btu049","article-title":"Minfi: a flexible and comprehensive Bioconductor package for the analysis of Infinium DNA methylation microarrays","volume":"30","author":"Aryee","year":"2014","journal-title":"Bioinformatics"},{"key":"2023013108403668400_btz594-B4","doi-asserted-by":"crossref","first-page":"177","DOI":"10.2217\/epi.09.14","article-title":"Genome-wide DNA methylation profiling using Infinium\u00ae assay","volume":"1","author":"Bibikova","year":"2009","journal-title":"Epigenomics"},{"key":"2023013108403668400_btz594-B5","doi-asserted-by":"crossref","first-page":"71","DOI":"10.1145\/2723872.2723882","article-title":"An introduction to Docker for reproducible research","volume":"49","author":"Boettiger","year":"2015","journal-title":"SIGOPS Oper. Syst. Rev"},{"key":"2023013108403668400_btz594-B6","doi-asserted-by":"crossref","first-page":"160","DOI":"10.1007\/978-3-642-37456-2_14","volume-title":"Advances in Knowledge Discovery and Data Mining, Lecture Notes in Computer Science","author":"Campello","year":"2013"},{"key":"2023013108403668400_btz594-B7","doi-asserted-by":"crossref","first-page":"469","DOI":"10.1038\/nature26000","article-title":"DNA methylation-based classification of central nervous system tumours","volume":"555","author":"Capper","year":"2018","journal-title":"Nature"},{"key":"2023013108403668400_btz594-B8","doi-asserted-by":"crossref","first-page":"S11","DOI":"10.1186\/1471-2105-11-S12-S11","article-title":"An intuitive Python interface for Bioconductor libraries demonstrates the utility of language translators","volume":"11","author":"Gautier","year":"2010","journal-title":"BMC Bioinformatics"},{"key":"2023013108403668400_btz594-B9","doi-asserted-by":"crossref","first-page":"981","DOI":"10.1093\/bioinformatics\/bty713","article-title":"Bigmelon: tools for analysing large DNA methylation datasets","volume":"6","author":"Gorrie-Stone","year":"2019","journal-title":"Bioinformatics"},{"key":"2023013108403668400_btz594-B10","doi-asserted-by":"crossref","first-page":"86.","DOI":"10.1186\/1471-2105-13-86","article-title":"DNA methylation arrays as surrogate measures of cell mixture distribution","volume":"13","author":"Houseman","year":"2012","journal-title":"BMC Bioinformatics"},{"key":"2023013108403668400_btz594-B11","doi-asserted-by":"crossref","first-page":"R31.","DOI":"10.1186\/gb-2014-15-2-r31","article-title":"Accounting for cellular heterogeneity is critical in epigenome-wide association studies","volume":"15","author":"Jaffe","year":"2014","journal-title":"Genome Biol"},{"key":"2023013108403668400_btz594-B12","doi-asserted-by":"crossref","first-page":"e67378.","DOI":"10.1371\/journal.pone.0067378","article-title":"Continuous aging of the human DNA methylome throughout the human lifespan","volume":"8","author":"Johansson","year":"2013","journal-title":"PLoS One"},{"key":"2023013108403668400_btz594-B13","doi-asserted-by":"crossref","first-page":"65","DOI":"10.1186\/s13148-016-0230-5","article-title":"Peripheral blood methylation profiling of female Crohn\u2019s disease patients","volume":"8","author":"Li Yim","year":"2016","journal-title":"Clin. Epigenet"},{"key":"2023013108403668400_btz594-B14","author":"Mahpour","year":"2016"},{"key":"2023013108403668400_btz594-B15","author":"McInnes","year":"2018"},{"key":"2023013108403668400_btz594-B16","doi-asserted-by":"crossref","first-page":"3983","DOI":"10.1093\/bioinformatics\/bty476","article-title":"Meffil: efficient normalization and analysis of very large DNA methylation datasets","volume":"34","author":"Min","year":"2018","journal-title":"Bioinformatics"},{"key":"2023013108403668400_btz594-B17","doi-asserted-by":"crossref","first-page":"389","DOI":"10.2217\/epi.15.114","article-title":"Validation of a DNA methylation microarray for 850,000 CpG sites of the human genome enriched in enhancer sequences","volume":"8","author":"Moran","year":"2016","journal-title":"Epigenomics"},{"key":"2023013108403668400_btz594-B18","author":"Pai","year":"2019"},{"key":"2023013108403668400_btz594-B19","first-page":"2825","article-title":"Scikit-learn: machine learning in Python","volume":"12","author":"Pedregosa","year":"2011","journal-title":"J. Mach. Learn. Res"},{"key":"2023013108403668400_btz594-B20","doi-asserted-by":"crossref","first-page":"293.","DOI":"10.1186\/1471-2164-14-293","article-title":"A data-driven approach to preprocessing Illumina 450K methylation array data","volume":"14","author":"Pidsley","year":"2013","journal-title":"BMC Genomics"},{"key":"2023013108403668400_btz594-B21","doi-asserted-by":"crossref","first-page":"1870","DOI":"10.1093\/bioinformatics\/btx059","article-title":"GLINT: a user-friendly toolset for the analysis of high-throughput DNA-methylation array data","volume":"33","author":"Rahmani","year":"2017","journal-title":"Bioinformatics"},{"key":"2023013108403668400_btz594-B22","doi-asserted-by":"crossref","DOI":"10.1186\/s13059-018-1448-7","article-title":"An optimized library for reference-based deconvolution of whole-blood biospecimens assayed using the Illumina HumanMethylationEPIC BeadArray","volume":"19","author":"Salas","year":"2018","journal-title":"Genome Biol"},{"key":"2023013108403668400_btz594-B23","doi-asserted-by":"crossref","first-page":"561","DOI":"10.1080\/15592294.2017.1319043","article-title":"Integrative epigenetic and genetic pan-cancer somatic alteration portraits","volume":"12","author":"Salas","year":"2017","journal-title":"Epigenetics"},{"key":"2023013108403668400_btz594-B24","doi-asserted-by":"crossref","first-page":"692","DOI":"10.4161\/epi.6.6.16196","article-title":"Validation of a DNA methylation microarray for 450,000 CpG sites in the human genome","volume":"6","author":"Sandoval","year":"2011","journal-title":"Epigenetics"},{"key":"2023013108403668400_btz594-B25","doi-asserted-by":"crossref","first-page":"166","DOI":"10.1109\/ICDCSW.2011.20","article-title":"Finding a \u2018Kneedle\u2019 in a haystack: detecting knee points in system behavior","author":"Satopaa","year":"2011","journal-title":"2011 31st International Conference on Distributed Computing Systems Workshops"},{"key":"2023013108403668400_btz594-B26","doi-asserted-by":"crossref","first-page":"4148.","DOI":"10.1038\/s41598-018-22579-0","article-title":"Biological age is a predictor of mortality in ischemic stroke","volume":"8","author":"Soriano-T\u00e1rraga","year":"2018","journal-title":"Sci. Rep"},{"key":"2023013108403668400_btz594-B27","doi-asserted-by":"crossref","first-page":"e20.","DOI":"10.1093\/nar\/gkv907","article-title":"ENmix: a novel background correction method for Illumina HumanMethylation450 BeadChip","volume":"44","author":"Xu","year":"2016","journal-title":"Nucleic Acids Res"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/academic.oup.com\/bioinformatics\/advance-article-pdf\/doi\/10.1093\/bioinformatics\/btz594\/29026289\/btz594.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/35\/24\/5379\/48977658\/bioinformatics_35_24_5379.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/35\/24\/5379\/48977658\/bioinformatics_35_24_5379.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,31]],"date-time":"2023-01-31T13:08:38Z","timestamp":1675170518000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/35\/24\/5379\/5542385"}},"subtitle":[],"editor":[{"given":"Jonathan","family":"Wren","sequence":"additional","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2019,8,1]]},"references-count":27,"journal-issue":{"issue":"24","published-print":{"date-parts":[[2019,12,15]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btz594","relation":{"has-preprint":[{"id-type":"doi","id":"10.1101\/604496","asserted-by":"object"}]},"ISSN":["1367-4803","1367-4811"],"issn-type":[{"value":"1367-4803","type":"print"},{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2019,12,15]]},"published":{"date-parts":[[2019,8,1]]}}}