{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T10:33:34Z","timestamp":1774002814584,"version":"3.50.1"},"reference-count":63,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2026,3,3]],"date-time":"2026-03-03T00:00:00Z","timestamp":1772496000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2026,3,4]],"date-time":"2026-03-04T00:00:00Z","timestamp":1772582400000},"content-version":"vor","delay-in-days":1,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["BMC Bioinformatics"],"DOI":"10.1186\/s12859-025-06365-0","type":"journal-article","created":{"date-parts":[[2026,3,3]],"date-time":"2026-03-03T09:56:27Z","timestamp":1772531787000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["DNA sequence contamination analyzer (DNASCAN): a supervised analysis toolkit for detecting and removing DNA contaminants"],"prefix":"10.1186","volume":"27","author":[{"given":"John Stephen","family":"Malamon","sequence":"first","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,3,3]]},"reference":[{"issue":"17","key":"6365_CR1","doi-asserted-by":"publisher","DOI":"10.1016\/j.heliyon.2024.e36914","volume":"10","author":"JS Malamon","year":"2024","unstructured":"Malamon JS. DNA N-gram analysis framework (DNAnamer): a generalized N-gram frequency analysis framework for the supervised classification of DNA sequences. Heliyon. 2024;10(17):e36914. https:\/\/doi.org\/10.1016\/j.heliyon.2024.e36914.","journal-title":"Heliyon"},{"issue":"2","key":"6365_CR2","doi-asserted-by":"publisher","first-page":"87","DOI":"10.2144\/000114320","volume":"59","author":"P Korlevic","year":"2015","unstructured":"Korlevic P, Gerber T, Gansauge MT, Hajdinjak M, Nagel S, Aximu-Petri A, et al. Reducing microbial and human contamination in DNA extractions from ancient bones and teeth. Biotechniques. 2015;59(2):87\u201393. https:\/\/doi.org\/10.2144\/000114320.","journal-title":"Biotechniques"},{"issue":"3","key":"6365_CR3","doi-asserted-by":"publisher","first-page":"127","DOI":"10.1089\/bio.2010.0012","volume":"8","author":"JE Lee","year":"2010","unstructured":"Lee JE, Hong EJ, Shim SM, Kim JW, Bae GR, Cho YS, et al. Bacterial contamination of blood DNA samples is associated with donor\u2019s health condition. Biopreserv Biobank. 2010;8(3):127\u201331. https:\/\/doi.org\/10.1089\/bio.2010.0012.","journal-title":"Biopreserv Biobank"},{"issue":"11-12","key":"6365_CR4","doi-asserted-by":"publisher","first-page":"2134","DOI":"10.1101\/gr.276794.122","volume":"32","author":"CJ Yoon","year":"2022","unstructured":"Yoon CJ, Kim SY, Nam CH, Lee J, Park JW, Mun J, et al. Estimation of intrafamilial DNA contamination in family trio genome sequencing using deviation from Mendelian inheritance. Genome Res. 2022;32(11\u201312):2134\u201344. https:\/\/doi.org\/10.1101\/gr.276794.122.","journal-title":"Genome Res"},{"issue":"6","key":"6365_CR5","doi-asserted-by":"publisher","first-page":"954","DOI":"10.1101\/gr.245373.118","volume":"29","author":"FP Breitwieser","year":"2019","unstructured":"Breitwieser FP, Pertea M, Zimin AV, Salzberg SL. Human contamination in bacterial genomes has created thousands of spurious proteins. Genome Res. 2019;29(6):954\u201360. https:\/\/doi.org\/10.1101\/gr.245373.118.","journal-title":"Genome Res"},{"issue":"1","key":"6365_CR6","doi-asserted-by":"publisher","DOI":"10.1186\/s12915-020-0748-z","volume":"18","author":"GA Goig","year":"2020","unstructured":"Goig GA, Blanco S, Garcia-Basteiro AL, Comas I. Contaminant DNA in bacterial sequencing experiments is a major source of false genetic variability. BMC Biol. 2020;18(1):24. https:\/\/doi.org\/10.1186\/s12915-020-0748-z.","journal-title":"BMC Biol"},{"issue":"5","key":"6365_CR7","doi-asserted-by":"publisher","first-page":"839","DOI":"10.1016\/j.ajhg.2012.09.004","volume":"91","author":"G Jun","year":"2012","unstructured":"Jun G, Flickinger M, Hetrick KN, Romm JM, Doheny KF, Abecasis GR, et al. Detecting and estimating contamination of human DNA samples in sequencing and array-based genotype data. Am J Hum Genet. 2012;91(5):839\u201348. https:\/\/doi.org\/10.1016\/j.ajhg.2012.09.004.","journal-title":"Am J Hum Genet"},{"issue":"24","key":"6365_CR8","doi-asserted-by":"publisher","first-page":"5296","DOI":"10.1021\/ac9805854","volume":"70","author":"T Nogami","year":"1998","unstructured":"Nogami T, Ohto T, Kawaguchi O, Zaitsu Y, Sasaki S. Estimation of bacterial contamination in ultrapure water: application of the anti-DNA antibody. Anal Chem. 1998;70(24):5296\u2013301. https:\/\/doi.org\/10.1021\/ac9805854. (PubMed PMID: 9868920).","journal-title":"Anal Chem"},{"issue":"3-4","key":"6365_CR9","doi-asserted-by":"publisher","first-page":"147","DOI":"10.1016\/j.mcp.2005.09.007","volume":"20","author":"H Shen","year":"2006","unstructured":"Shen H, Rogelj S, Kieft TL. Sensitive, real-time PCR detects low-levels of contamination by Legionella pneumophila in commercial reagents. Mol Cell Probes. 2006;20(3\u20134):147\u201353. https:\/\/doi.org\/10.1016\/j.mcp.2005.09.007.","journal-title":"Mol Cell Probes"},{"issue":"1","key":"6365_CR10","doi-asserted-by":"publisher","first-page":"11","DOI":"10.1006\/mcpr.1994.1002","volume":"8","author":"M Maiwald","year":"1994","unstructured":"Maiwald M, Ditton HJ, Sonntag HG, von Knebel Doeberitz M. Characterization of contaminating DNA in Taq polymerase which occurs during amplification with a primer set for Legionella 5S ribosomal RNA. Mol Cell Probes. 1994;8(1):11\u20134. https:\/\/doi.org\/10.1006\/mcpr.1994.1002.","journal-title":"Mol Cell Probes"},{"issue":"5","key":"6365_CR11","doi-asserted-by":"publisher","first-page":"2264","DOI":"10.1128\/JCM.42.5.2264-2267.2004","volume":"42","author":"T Newsome","year":"2004","unstructured":"Newsome T, Li BJ, Zou N, Lo SC. Presence of bacterial phage-like DNA sequences in commercial Taq DNA polymerase reagents. J Clin Microbiol. 2004;42(5):2264\u20137. https:\/\/doi.org\/10.1128\/JCM.42.5.2264-2267.2004.","journal-title":"J Clin Microbiol"},{"issue":"6","key":"6365_CR12","doi-asserted-by":"publisher","first-page":"445","DOI":"10.1016\/0890-8508(90)90003-i","volume":"4","author":"KH Rand","year":"1990","unstructured":"Rand KH, Houck H. Taq polymerase contains bacterial DNA of unknown origin. Mol Cell Probes. 1990;4(6):445\u201350. https:\/\/doi.org\/10.1016\/0890-8508(90)90003-i.","journal-title":"Mol Cell Probes"},{"key":"6365_CR13","doi-asserted-by":"publisher","first-page":"207","DOI":"10.1007\/978-3-540-35306-5_10","volume-title":"Structural approaches to sequence evolution: molecules, networks, populations","author":"D Charif","year":"2007","unstructured":"Charif D, Lobry JR. SeqinR 1.0\u20132: a contributed package to the R project for statistical computing devoted to biological sequences retrieval and analysis. In: Bastolla U, Porto M, Roman HE, Vendruscolo M, editors. Structural approaches to sequence evolution: molecules, networks, populations. Berlin: Springer; 2007. p. 207\u201332."},{"issue":"1","key":"6365_CR14","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-020-58586-3","volume":"10","author":"E Jue","year":"2020","unstructured":"Jue E, Witters D, Ismagilov RF. Two-phase wash to solve the ubiquitous contaminant-carryover problem in commercial nucleic-acid extraction kits. Sci Rep. 2020;10(1):1940. https:\/\/doi.org\/10.1038\/s41598-020-58586-3.","journal-title":"Sci Rep"},{"issue":"2","key":"6365_CR15","doi-asserted-by":"publisher","first-page":"285","DOI":"10.1016\/j.mimet.2004.11.018","volume":"61","author":"T Mohammadi","year":"2005","unstructured":"Mohammadi T, Reesink HW, Vandenbroucke-Grauls CM, Savelkoul PH. Removal of contaminating DNA from commercial nucleic acid extraction kit reagents. J Microbiol Methods. 2005;61(2):285\u20138. https:\/\/doi.org\/10.1016\/j.mimet.2004.11.018.","journal-title":"J Microbiol Methods"},{"key":"6365_CR16","doi-asserted-by":"publisher","DOI":"10.1016\/j.forsciint.2019.06.025","volume":"302","author":"K Toole","year":"2019","unstructured":"Toole K, Roffey P, Young E, Cho K, Shaw T, Smith M, et al. Evaluation of commercial forensic DNA extraction kits for decontamination and extraction of DNA from biological samples contaminated with radionuclides. For Sci Int. 2019;302:109867. https:\/\/doi.org\/10.1016\/j.forsciint.2019.06.025.","journal-title":"For Sci Int"},{"issue":"1","key":"6365_CR17","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-022-13269-z","volume":"12","author":"B Chrisman","year":"2022","unstructured":"Chrisman B, He C, Jung JY, Stockham N, Paskov K, Washington P, et al. The human \u201ccontaminome\u201d: bacterial, viral, and computational contamination in whole genome sequences from 1000 families. Sci Rep. 2022;12(1):9863. https:\/\/doi.org\/10.1038\/s41598-022-13269-z.","journal-title":"Sci Rep"},{"issue":"5","key":"6365_CR18","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0097876","volume":"9","author":"M Laurence","year":"2014","unstructured":"Laurence M, Hatzis C, Brash DE. Common contaminants in next-generation sequencing that hinder discovery of low-abundance microbes. PLoS ONE. 2014;9(5):e97876. https:\/\/doi.org\/10.1371\/journal.pone.0097876.","journal-title":"PLoS ONE"},{"issue":"2","key":"6365_CR19","doi-asserted-by":"publisher","first-page":"249","DOI":"10.1016\/j.femsim.2004.05.009","volume":"42","author":"RP Peters","year":"2004","unstructured":"Peters RP, Mohammadi T, Vandenbroucke-Grauls CM, Danner SA, van Agtmael MA, Savelkoul PH. Detection of bacterial DNA in blood samples from febrile patients: underestimated infection or emerging contamination? FEMS Immunol Med Microbiol. 2004;42(2):249\u201353. https:\/\/doi.org\/10.1016\/j.femsim.2004.05.009.","journal-title":"FEMS Immunol Med Microbiol"},{"issue":"3-4","key":"6365_CR20","doi-asserted-by":"publisher","first-page":"81","DOI":"10.1007\/s12079-008-0034-2","volume":"2","author":"F Chen","year":"2008","unstructured":"Chen F, Mackerell AD Jr., Luo Y, Shapiro P. Using Caenorhabditis elegans as a model organism for evaluating extracellular signal-regulated kinase docking domain inhibitors. J Cell Commun Signal. 2008;2(3\u20134):81\u201392. https:\/\/doi.org\/10.1007\/s12079-008-0034-2.","journal-title":"J Cell Commun Signal"},{"issue":"7","key":"6365_CR21","doi-asserted-by":"publisher","first-page":"2075","DOI":"10.1128\/AEM.07486-11","volume":"78","author":"EK Marsh","year":"2012","unstructured":"Marsh EK, May RC. Caenorhabditis elegans, a model organism for investigating immunity. Appl Environ Microbiol. 2012;78(7):2075\u201381. https:\/\/doi.org\/10.1128\/AEM.07486-11.","journal-title":"Appl Environ Microbiol"},{"issue":"1","key":"6365_CR22","doi-asserted-by":"publisher","first-page":"85","DOI":"10.1007\/BF02700625","volume":"35","author":"S Murakami","year":"2007","unstructured":"Murakami S. Caenorhabditis elegans as a model system to study aging of learning and memory. Mol Neurobiol. 2007;35(1):85\u201394. https:\/\/doi.org\/10.1007\/BF02700625. (PubMed PMID: 17519507).","journal-title":"Mol Neurobiol"},{"key":"6365_CR23","doi-asserted-by":"publisher","DOI":"10.3390\/biom13030478","author":"A Roussos","year":"2023","unstructured":"Roussos A, Kitopoulou K, Borbolis F, Palikaras K. Caenorhabditis elegans as a model system to study human neurodegenerative disorders. Biomolecules. 2023. https:\/\/doi.org\/10.3390\/biom13030478.","journal-title":"Biomolecules"},{"key":"6365_CR24","doi-asserted-by":"publisher","DOI":"10.1186\/s12263-019-0637-7","volume":"14","author":"A Zecic","year":"2019","unstructured":"Zecic A, Dhondt I, Braeckman BP. The nutritional requirements of Caenorhabditis elegans. Genes Nutr. 2019;14:15. https:\/\/doi.org\/10.1186\/s12263-019-0637-7.","journal-title":"Genes Nutr"},{"key":"6365_CR25","doi-asserted-by":"publisher","DOI":"10.1101\/2023.01.13.523974","author":"ZD Bush","year":"2023","unstructured":"Bush ZD, Naftaly AFS, Dinwiddie D, Albers C, Hillers KJ, Libuda DE. Comprehensive detection of structural variation and transposable element differences between wild type laboratory lineages of C. elegans. bioRxiv. 2023. https:\/\/doi.org\/10.1101\/2023.01.13.523974.","journal-title":"bioRxiv"},{"issue":"1","key":"6365_CR26","doi-asserted-by":"publisher","DOI":"10.1038\/ncomms2071","volume":"3","author":"H Liu","year":"2012","unstructured":"Liu H, Wang X, Wang HD, Wu J, Ren J, Meng L, et al. Escherichia coli noncoding RNAs can affect gene expression and physiology of Caenorhabditis elegans. Nat Commun. 2012;3(1):1073. https:\/\/doi.org\/10.1038\/ncomms2071.","journal-title":"Nat Commun"},{"issue":"1","key":"6365_CR27","doi-asserted-by":"publisher","DOI":"10.1186\/s13059-020-02023-1","volume":"21","author":"M Steinegger","year":"2020","unstructured":"Steinegger M, Salzberg SL. Terminating contamination: large-scale search identifies more than 2,000,000 contaminated entries in GenBank. Genome Biol. 2020;21(1):115. https:\/\/doi.org\/10.1186\/s13059-020-02023-1.","journal-title":"Genome Biol"},{"issue":"1","key":"6365_CR28","doi-asserted-by":"publisher","DOI":"10.1038\/s41392-022-00974-4","volume":"7","author":"K Hou","year":"2022","unstructured":"Hou K, Wu ZX, Chen XY, Wang JQ, Zhang D, Xiao C, et al. Microbiota in health and diseases. Signal Transduct Target Ther. 2022;7(1):135. https:\/\/doi.org\/10.1038\/s41392-022-00974-4.","journal-title":"Signal Transduct Target Ther"},{"issue":"1","key":"6365_CR29","doi-asserted-by":"publisher","DOI":"10.1038\/s41522-025-00683-0","volume":"11","author":"J Rodriguez","year":"2025","unstructured":"Rodriguez J, Cordaillat-Simmons M, Pot B, Druart C. The regulatory framework for microbiome-based therapies: insights into European regulatory developments. NPJ Biofilms Microbiomes. 2025;11(1):53. https:\/\/doi.org\/10.1038\/s41522-025-00683-0.","journal-title":"NPJ Biofilms Microbiomes"},{"issue":"6","key":"6365_CR30","doi-asserted-by":"publisher","first-page":"365","DOI":"10.1038\/s41579-021-00667-9","volume":"20","author":"MT Sorbara","year":"2022","unstructured":"Sorbara MT, Pamer EG. Microbiome-based therapeutics. Nat Rev Microbiol. 2022;20(6):365\u201380. https:\/\/doi.org\/10.1038\/s41579-021-00667-9.","journal-title":"Nat Rev Microbiol"},{"issue":"1","key":"6365_CR31","doi-asserted-by":"publisher","DOI":"10.1186\/s13099-016-0103-7","volume":"8","author":"A Glassing","year":"2016","unstructured":"Glassing A, Dowd SE, Galandiuk S, Davis B, Chiodini RJ. Inherent bacterial DNA contamination of extraction and sequencing reagents may affect interpretation of microbiota in low bacterial biomass samples. Gut Pathog. 2016;8(1):24. https:\/\/doi.org\/10.1186\/s13099-016-0103-7.","journal-title":"Gut Pathog"},{"issue":"1","key":"6365_CR32","doi-asserted-by":"publisher","DOI":"10.1186\/s13059-019-1843-8","volume":"20","author":"KL Greathouse","year":"2019","unstructured":"Greathouse KL, Sinha R, Vogtmann E. DNA extraction for human microbiome studies: the issue of standardization. Genome Biol. 2019;20(1):212. https:\/\/doi.org\/10.1186\/s13059-019-1843-8.","journal-title":"Genome Biol"},{"issue":"1","key":"6365_CR33","doi-asserted-by":"publisher","DOI":"10.1186\/s12915-014-0087-z","volume":"12","author":"SJ Salter","year":"2014","unstructured":"Salter SJ, Cox MJ, Turek EM, Calus ST, Cookson WO, Moffatt MF, et al. Reagent and laboratory contamination can critically impact sequence-based microbiome analyses. BMC Biol. 2014;12(1):87. https:\/\/doi.org\/10.1186\/s12915-014-0087-z.","journal-title":"BMC Biol"},{"issue":"12","key":"6365_CR34","doi-asserted-by":"publisher","DOI":"10.1186\/s13059-014-0564-2","volume":"15","author":"S Weiss","year":"2014","unstructured":"Weiss S, Amir A, Hyde ER, Metcalf JL, Song SJ, Knight R. Tracking down the sources of experimental contamination in microbiome studies. Genome Biol. 2014;15(12):564. https:\/\/doi.org\/10.1186\/s13059-014-0564-2.","journal-title":"Genome Biol"},{"issue":"1","key":"6365_CR35","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-019-13036-1","volume":"10","author":"JS Johnson","year":"2019","unstructured":"Johnson JS, Spakowicz DJ, Hong BY, Petersen LM, Demkowicz P, Chen L, et al. Evaluation of 16S rRNA gene sequencing for species and strain-level microbiome analysis. Nat Commun. 2019;10(1):5029. https:\/\/doi.org\/10.1038\/s41467-019-13036-1.","journal-title":"Nat Commun"},{"issue":"3","key":"6365_CR36","doi-asserted-by":"publisher","first-page":"592","DOI":"10.1520\/JFS14515J","volume":"44","author":"DM Kupfer","year":"1999","unstructured":"Kupfer DM, Chaturvedi AK, Canfield DV, Roe BA. PCR-based identification of postmortem microbial contaminants\u2013a preliminary study. J Forensic Sci. 1999;44(3):592\u20136 (PubMed PMID: 10408116).","journal-title":"J Forensic Sci"},{"issue":"1","key":"6365_CR37","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-022-34409-z","volume":"13","author":"Y Liu","year":"2022","unstructured":"Liu Y, Elworth RAL, Jochum MD, Aagaard KM, Treangen TJ. De novo identification of microbial contaminants in low microbial biomass microbiomes with Squeegee. Nat Commun. 2022;13(1):6799. https:\/\/doi.org\/10.1038\/s41467-022-34409-z.","journal-title":"Nat Commun"},{"issue":"8","key":"6365_CR38","doi-asserted-by":"publisher","first-page":"1216","DOI":"10.1093\/bioinformatics\/btw821","volume":"33","author":"HG Drost","year":"2017","unstructured":"Drost HG, Paszkowski J. Biomartr: genomic data retrieval with R. Bioinformatics. 2017;33(8):1216\u20137. https:\/\/doi.org\/10.1093\/bioinformatics\/btw821.","journal-title":"Bioinformatics"},{"issue":"6","key":"6365_CR39","doi-asserted-by":"publisher","first-page":"1023","DOI":"10.1101\/gr.246082.118","volume":"29","author":"C Kim","year":"2019","unstructured":"Kim C, Kim J, Kim S, Cook DE, Evans KS, Andersen EC, et al. Long-read sequencing reveals intra-species tolerance of substantial structural variations and new subtelomere formation in C. elegans. Genome Res. 2019;29(6):1023\u201335. https:\/\/doi.org\/10.1101\/gr.246082.118.","journal-title":"Genome Res"},{"issue":"1","key":"6365_CR40","doi-asserted-by":"publisher","first-page":"74","DOI":"10.3414\/ME00-01-0052","volume":"51","author":"JD Malley","year":"2012","unstructured":"Malley JD, Kruppa J, Dasgupta A, Malley KG, Ziegler A. Probability machines: consistent probability estimation using nonparametric learning machines. Methods Inf Med. 2012;51(1):74\u201381. https:\/\/doi.org\/10.3414\/ME00-01-0052.","journal-title":"Methods Inf Med"},{"key":"6365_CR41","unstructured":"Team RC. R: A language and environment for statistical computing (Version 4.0. 2). R Foundation for Statistical Computing. 2020."},{"issue":"5","key":"6365_CR42","doi-asserted-by":"publisher","first-page":"1","DOI":"10.18637\/jss.v028.i05","volume":"28","author":"M Kuhn","year":"2008","unstructured":"Kuhn M. Building predictive models in R using the caret package. J Stat Softw. 2008;28(5):1\u201326. https:\/\/doi.org\/10.18637\/jss.v028.i05.","journal-title":"J Stat Softw"},{"key":"6365_CR43","unstructured":"Liaw AaW, Matthew. Classification and Regression by randomForest. R News. 2002;2:18\u201322."},{"key":"6365_CR44","doi-asserted-by":"publisher","DOI":"10.1186\/1471-2105-7-3","volume":"7","author":"R Diaz-Uriarte","year":"2006","unstructured":"Diaz-Uriarte R, de Alvarez Andres S. Gene selection and classification of microarray data using random forest. BMC Bioinform. 2006;7:3. https:\/\/doi.org\/10.1186\/1471-2105-7-3.","journal-title":"BMC Bioinform"},{"issue":"1","key":"6365_CR45","doi-asserted-by":"publisher","DOI":"10.2202\/1544-6115.1691","volume":"10","author":"BA Goldstein","year":"2011","unstructured":"Goldstein BA, Polley EC, Briggs FB. Random forests for genetic association studies. Stat Appl Genet Mol Biol. 2011;10(1):32. https:\/\/doi.org\/10.2202\/1544-6115.1691.","journal-title":"Stat Appl Genet Mol Biol"},{"issue":"1","key":"6365_CR46","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-021-01253-y","volume":"11","author":"E Pellegrino","year":"2021","unstructured":"Pellegrino E, Jacques C, Beaufils N, Nanni I, Carlioz A, Metellus P, et al. Machine learning random forest for predicting oncosomatic variant NGS analysis. Sci Rep. 2021;11(1):21820. https:\/\/doi.org\/10.1038\/s41598-021-01253-y.","journal-title":"Sci Rep"},{"issue":"1","key":"6365_CR47","doi-asserted-by":"publisher","DOI":"10.1186\/s13148-019-0736-8","volume":"11","author":"R Toth","year":"2019","unstructured":"Toth R, Schiffmann H, Hube-Magg C, Buscheck F, Hoflmayer D, Weidemann S, et al. Random forest-based modelling to detect biomarkers for prostate cancer progression. Clin Epigenetics. 2019;11(1):148. https:\/\/doi.org\/10.1186\/s13148-019-0736-8.","journal-title":"Clin Epigenetics"},{"issue":"1","key":"6365_CR48","doi-asserted-by":"publisher","DOI":"10.1186\/s12863-018-0710-z","volume":"20","author":"PK Meher","year":"2019","unstructured":"Meher PK, Sahu TK, Gahoi S, Tomar R, Rao AR. funbarRF: DNA barcode-based fungal species prediction using multiclass Random Forest supervised learning model. BMC Genet. 2019;20(1):2. https:\/\/doi.org\/10.1186\/s12863-018-0710-z.","journal-title":"BMC Genet"},{"issue":"2","key":"6365_CR49","doi-asserted-by":"publisher","first-page":"316","DOI":"10.1016\/j.gene.2016.07.010","volume":"592","author":"PK Meher","year":"2016","unstructured":"Meher PK, Sahu TK, Rao AR. Identification of species based on DNA barcode using k-mer feature vector and Random forest classifier. Gene. 2016;592(2):316\u201324. https:\/\/doi.org\/10.1016\/j.gene.2016.07.010.","journal-title":"Gene"},{"issue":"10","key":"6365_CR50","doi-asserted-by":"publisher","DOI":"10.1016\/j.heliyon.2023.e20161","volume":"9","author":"LS Riza","year":"2023","unstructured":"Riza LS, Zain MI, Izzuddin A, Prasetyo Y, Hidayat T, Abu Samah KAF. Implementation of machine learning in DNA barcoding for determining the plant family taxonomy. Heliyon. 2023;9(10):e20161. https:\/\/doi.org\/10.1016\/j.heliyon.2023.e20161.","journal-title":"Heliyon"},{"key":"6365_CR51","doi-asserted-by":"crossref","unstructured":"Oshiro TM, Perez PS, Baranauskas JA, editors. How Many Trees in a Random Forest?2012; Berlin, Heidelberg: Springer Berlin Heidelberg.","DOI":"10.1007\/978-3-642-31537-4_13"},{"key":"6365_CR52","doi-asserted-by":"publisher","DOI":"10.1016\/j.rse.2019.04.034","volume":"232","author":"K Zhao","year":"2019","unstructured":"Zhao K, Wulder MA, Hu T, Bright R, Wu Q, Qin H, et al. Detecting change-point, trend, and seasonality in satellite time series data to track abrupt changes and nonlinear dynamics: a Bayesian ensemble algorithm. Remote Sens Environ. 2019;232:111181. https:\/\/doi.org\/10.1016\/j.rse.2019.04.034.","journal-title":"Remote Sens Environ"},{"issue":"1","key":"6365_CR53","doi-asserted-by":"publisher","DOI":"10.1186\/s13073-020-00791-w","volume":"12","author":"DC Koboldt","year":"2020","unstructured":"Koboldt DC. Best practices for variant calling in clinical sequencing. Genome Med. 2020;12(1):91. https:\/\/doi.org\/10.1186\/s13073-020-00791-w.","journal-title":"Genome Med"},{"key":"6365_CR54","doi-asserted-by":"publisher","DOI":"10.26508\/lsa.202302181","author":"JS Malamon","year":"2024","unstructured":"Malamon JS, Farrell JJ, Xia LC, Dombroski BA, Das RG, Way J, et al. A comparative study of structural variant calling in WGS from Alzheimer\u2019s disease families. Life Sci Alliance. 2024. https:\/\/doi.org\/10.26508\/lsa.202302181.","journal-title":"Life Sci Alliance"},{"issue":"4","key":"6365_CR55","doi-asserted-by":"publisher","first-page":"808","DOI":"10.1016\/j.ygeno.2018.05.004","volume":"111","author":"AC Naj","year":"2019","unstructured":"Naj AC, Lin H, Vardarajan BN, White S, Lancour D, Ma Y, et al. Quality control and integration of genotypes from two calling pipelines for whole genome sequence data in the Alzheimer\u2019s disease sequencing project. Genomics. 2019;111(4):808\u201318. https:\/\/doi.org\/10.1016\/j.ygeno.2018.05.004.","journal-title":"Genomics"},{"issue":"2","key":"6365_CR56","doi-asserted-by":"publisher","first-page":"187","DOI":"10.1093\/bioinformatics\/btu591","volume":"31","author":"V Trubetskoy","year":"2015","unstructured":"Trubetskoy V, Rodriguez A, Dave U, Campbell N, Crawford EL, Cook EH, et al. Consensus genotyper for exome sequencing (CGES): improving the quality of exome variant genotypes. Bioinformatics. 2015;31(2):187\u201393. https:\/\/doi.org\/10.1093\/bioinformatics\/btu591.","journal-title":"Bioinformatics"},{"key":"6365_CR57","unstructured":"Andrews S. FastQC: A Quality Control Tool for High Throughput Sequence Data. . Scientific Research. 2010."},{"key":"6365_CR58","doi-asserted-by":"publisher","DOI":"10.12688\/f1000research.21142.2","volume":"8","author":"G de Sena Brandine","year":"2019","unstructured":"de Sena Brandine G, Smith AD. Falco: high-speed FastQC emulation for quality control of sequencing data. F1000Res. 2019;8:1874. https:\/\/doi.org\/10.12688\/f1000research.21142.2.","journal-title":"F1000Res"},{"issue":"17","key":"6365_CR59","doi-asserted-by":"publisher","first-page":"i884","DOI":"10.1093\/bioinformatics\/bty560","volume":"34","author":"S Chen","year":"2018","unstructured":"Chen S, Zhou Y, Chen Y, Gu J. fastp: an ultra-fast all-in-one FASTQ preprocessor. Bioinformatics. 2018;34(17):i884\u201390. https:\/\/doi.org\/10.1093\/bioinformatics\/bty560.","journal-title":"Bioinformatics"},{"issue":"3","key":"6365_CR60","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0017288","volume":"6","author":"R Schmieder","year":"2011","unstructured":"Schmieder R, Edwards R. Fast identification and removal of sequence contamination from genomic and metagenomic datasets. PLoS ONE. 2011;6(3):e17288. https:\/\/doi.org\/10.1371\/journal.pone.0017288.","journal-title":"PLoS ONE"},{"issue":"10","key":"6365_CR61","doi-asserted-by":"publisher","first-page":"2826","DOI":"10.1016\/j.ymthe.2023.07.025","volume":"31","author":"MA Brimble","year":"2023","unstructured":"Brimble MA, Winston SM, Davidoff AM. Stowaways in the cargo: contaminating nucleic acids in rAAV preparations for gene therapy. Mol Ther. 2023;31(10):2826\u201338. https:\/\/doi.org\/10.1016\/j.ymthe.2023.07.025.","journal-title":"Mol Ther"},{"issue":"11-12","key":"6365_CR62","doi-asserted-by":"publisher","first-page":"578","DOI":"10.1089\/hum.2023.006","volume":"34","author":"K Higashiyama","year":"2023","unstructured":"Higashiyama K, Yuan Y, Hashiba N, Masumi-Koizumi K, Yusa K, Uchida K. Quantitation of residual host cell DNA in recombinant adeno-associated virus using droplet digital polymerase chain reaction. Hum Gene Ther. 2023;34(11\u201312):578\u201385. https:\/\/doi.org\/10.1089\/hum.2023.006.","journal-title":"Hum Gene Ther"},{"issue":"4","key":"6365_CR63","doi-asserted-by":"publisher","DOI":"10.1016\/j.omtm.2024.101334","volume":"32","author":"CR Luthers","year":"2024","unstructured":"Luthers CR, Ha SM, Mittelhauser A, Morselli M, Long JD, Kuo CY, et al. DNA contamination within recombinant adeno-associated virus preparations correlates with decreased CD34(+) cell clonogenic potential. Mol Ther Methods Clin Dev. 2024;32(4):101334. https:\/\/doi.org\/10.1016\/j.omtm.2024.101334.","journal-title":"Mol Ther Methods Clin Dev"}],"container-title":["BMC Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/article\/10.1186\/s12859-025-06365-0","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1186\/s12859-025-06365-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1186\/s12859-025-06365-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T08:06:23Z","timestamp":1773993983000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1186\/s12859-025-06365-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3,3]]},"references-count":63,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2026,12]]}},"alternative-id":["6365"],"URL":"https:\/\/doi.org\/10.1186\/s12859-025-06365-0","relation":{},"ISSN":["1471-2105"],"issn-type":[{"value":"1471-2105","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,3,3]]},"assertion":[{"value":"23 October 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 December 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 March 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 March 2026","order":6,"name":"change_date","label":"Change Date","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"Update","order":7,"name":"change_type","label":"Change Type","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The link was incorrect in the original publication. Thearticle has been updated to rectify the error.","order":8,"name":"change_details","label":"Change Details","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"Not applicable.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval and consent to participate"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}},{"value":"The authors declare no competing interests.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}},{"value":"Human (hg38):\n                      \n                      . E. coli (ASM584v2):\n                      \n                      . C. elegans (WBcel235):\n                      \n                      . Links to NCBI Whole Genomes: C. elegans (CB4856):\n                      \n                      .","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Links to NCBI Reference Genomes"}}],"article-number":"57"}}