{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,31]],"date-time":"2026-03-31T08:01:46Z","timestamp":1774944106349,"version":"3.50.1"},"reference-count":37,"publisher":"Oxford University Press (OUP)","issue":"9","license":[{"start":{"date-parts":[[2024,9,3]],"date-time":"2024-09-03T00:00:00Z","timestamp":1725321600000},"content-version":"vor","delay-in-days":2,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,9,2]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:sec>\n                  <jats:title>Motivation<\/jats:title>\n                  <jats:p>Tracking SARS-CoV-2 variants through genomic sequencing has been an important part of the global response to the pandemic and remains a useful tool for surveillance of the virus. As well as whole-genome sequencing of clinical samples, this surveillance effort has been aided by amplicon sequencing of wastewater samples, which proved effective in real case studies. Because of its relevance to public healthcare decisions, testing and benchmarking wastewater sequencing analysis methods is also crucial, which necessitates a simulator. Although metagenomic simulators exist, none is fit for the purpose of simulating the metagenomes produced through amplicon sequencing of wastewater.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Results<\/jats:title>\n                  <jats:p>Our new simulation tool, SWAMPy (Simulating SARS-CoV-2 Wastewater Amplicon Metagenomes with Python), is intended to provide realistic simulated SARS-CoV-2 wastewater sequencing datasets with which other programs that rely on this type of data can be evaluated and improved. Our tool is suitable for simulating Illumina short-read RT\u2013PCR amplified metagenomes.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Availability and implementation<\/jats:title>\n                  <jats:p>The code for this project is available at https:\/\/github.com\/goldman-gp-ebi\/SWAMPy. It can be installed on any Unix-based operating system and is available under the GPL-v3 license.<\/jats:p>\n               <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btae532","type":"journal-article","created":{"date-parts":[[2024,8,31]],"date-time":"2024-08-31T22:51:04Z","timestamp":1725144664000},"source":"Crossref","is-referenced-by-count":4,"title":["SWAMPy: simulating SARS-CoV-2 wastewater amplicon metagenomes"],"prefix":"10.1093","volume":"40","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8258-4673","authenticated-orcid":false,"given":"William","family":"Boulton","sequence":"first","affiliation":[{"name":"European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI) , Hinxton, Cambs CB10 1SD, United Kingdom"},{"name":"Department of Computing Sciences, University of East Anglia , Norwich, Norfolk NR4 7TJ, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5877-7945","authenticated-orcid":false,"given":"Fatma Rabia","family":"Fidan","sequence":"additional","affiliation":[{"name":"European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI) , Hinxton, Cambs CB10 1SD, United Kingdom"},{"name":"Department of Biological Sciences, Middle East Technical University , Ankara 06800, Turkey"},{"name":"Cancer Dynamics Laboratory, Francis Crick Institute , London NW1 1AT, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9862-5890","authenticated-orcid":false,"given":"Hubert","family":"Denise","sequence":"additional","affiliation":[{"name":"Department of Health and Social Care, UK Health Security Agency , London SW1P 3HX, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1776-8564","authenticated-orcid":false,"given":"Nicola","family":"De Maio","sequence":"additional","affiliation":[{"name":"European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI) , Hinxton, Cambs CB10 1SD, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8486-2211","authenticated-orcid":false,"given":"Nick","family":"Goldman","sequence":"additional","affiliation":[{"name":"European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI) , Hinxton, Cambs CB10 1SD, United Kingdom"}]}],"member":"286","published-online":{"date-parts":[[2024,9,3]]},"reference":[{"key":"2024091502104144500_btae532-B1","author":"Anaconda, Inc. Conda","year":"2023"},{"key":"2024091502104144500_btae532-B2","doi-asserted-by":"crossref","first-page":"e94","DOI":"10.1093\/nar\/gks251","article-title":"Grinder: a versatile amplicon and shotgun sequence simulator","volume":"40","author":"Angly","year":"2012","journal-title":"Nucleic Acids Res"},{"key":"2024091502104144500_btae532-B3","doi-asserted-by":"crossref","first-page":"e75448","DOI":"10.1371\/journal.pone.0075448","article-title":"NeSSM: a next-generation sequencing simulator for metagenomics","volume":"8","author":"Jia","year":"2013","journal-title":"PLoS One"},{"key":"2024091502104144500_btae532-B4","doi-asserted-by":"crossref","first-page":"236","DOI":"10.1186\/s13059-022-02805-9","article-title":"Lineage abundance estimation for SARS-CoV-2 in wastewater using transcriptome quantification techniques","volume":"23","author":"Baaijens","year":"2022","journal-title":"Genome Biol"},{"key":"2024091502104144500_btae532-B5","article-title":"Wastewater monitoring of SARS-CoV-2 variants in England: demonstration case study for Bristol (Dec 2020\u2014March 2021)","author":"Brown","year":"2021","journal-title":"ePrints"},{"key":"2024091502104144500_btae532-B6","doi-asserted-by":"crossref","first-page":"104993","DOI":"10.1016\/j.jcv.2021.104993","article-title":"SARS-CoV-2 whole-genome sequencing using reverse complement PCR: for easy, fast and accurate outbreak and variant analysis","volume":"144","author":"Coolen","year":"2021","journal-title":"J Clin Virol"},{"key":"2024091502104144500_btae532-B7","author":"De Maio","year":"2020"},{"key":"2024091502104144500_btae532-B8","author":"Docker Inc. Docker","year":"2023"},{"key":"2024091502104144500_btae532-B9","doi-asserted-by":"crossref","first-page":"459","DOI":"10.1038\/nrg.2016.57","article-title":"A comparison of tools for the simulation of genomic next-generation sequencing data","volume":"17","author":"Escalona","year":"2016","journal-title":"Nat Rev Genet"},{"key":"2024091502104144500_btae532-B10","doi-asserted-by":"crossref","first-page":"551","DOI":"10.1186\/s12859-022-05100-3","article-title":"VirPool: model-based estimation of SARS-CoV-2 variant proportions in wastewater samples","volume":"23","author":"Gafurov","year":"2022","journal-title":"BMC Bioinformatics"},{"key":"2024091502104144500_btae532-B11","doi-asserted-by":"crossref","first-page":"521","DOI":"10.1093\/bioinformatics\/bty630","article-title":"Simulating Illumina metagenomic data with InSilicoSeq","volume":"35","author":"Gourl\u00e9","year":"2018","journal-title":"Bioinformatics"},{"key":"2024091502104144500_btae532-B12","doi-asserted-by":"crossref","first-page":"1647","DOI":"10.3390\/v13081647","article-title":"Monitoring SARS-CoV-2 populations in wastewater by amplicon sequencing and using the novel program SAM Refiner","volume":"13","author":"Gregory","year":"2021","journal-title":"Viruses"},{"key":"2024091502104144500_btae532-B13","author":"Hietaniemi","year":"2023"},{"key":"2024091502104144500_btae532-B14","doi-asserted-by":"crossref","first-page":"571328","DOI":"10.3389\/fmicb.2020.571328","article-title":"Rapid genomic characterization of SARS-CoV-2 by direct amplicon-based sequencing through comparison of MinION and Illumina iSeq100TM system","volume":"11","author":"Hourdel","year":"2020","journal-title":"Front Microbiol"},{"key":"2024091502104144500_btae532-B15","doi-asserted-by":"crossref","first-page":"1533","DOI":"10.1093\/bioinformatics\/bts187","article-title":"pIRS: profile-based Illumina pair-end reads simulator","volume":"28","author":"Hu","year":"2012","journal-title":"Bioinformatics"},{"key":"2024091502104144500_btae532-B16","doi-asserted-by":"crossref","first-page":"593","DOI":"10.1093\/bioinformatics\/btr708","article-title":"ART: a next-generation sequencing read simulator","volume":"28","author":"Huang","year":"2011","journal-title":"Bioinformatics"},{"key":"2024091502104144500_btae532-B17","doi-asserted-by":"crossref","first-page":"e00944-21","DOI":"10.1128\/JCM.00944-21","article-title":"Assessment of SARS-CoV-2 genome sequencing: quality criteria and low-frequency variants","volume":"59","author":"Jacot","year":"2021","journal-title":"J Clin Microbiol"},{"key":"2024091502104144500_btae532-B18","doi-asserted-by":"crossref","first-page":"1151","DOI":"10.1038\/s41564-022-01185-x","article-title":"Early detection and surveillance of SARS-CoV-2 genomic variants in wastewater using COJAC","volume":"7","author":"Jahn","year":"2022","journal-title":"Nat Microbiol"},{"key":"2024091502104144500_btae532-B19","doi-asserted-by":"crossref","first-page":"S14","DOI":"10.1186\/1471-2105-15-S9-S14","article-title":"A better sequence-read simulator program for metagenomics","volume":"15","author":"Johnson","year":"2014","journal-title":"BMC Bioinformatics"},{"key":"2024091502104144500_btae532-B20","doi-asserted-by":"crossref","first-page":"101","DOI":"10.1038\/s41586-022-05049-6","article-title":"Wastewater sequencing reveals early cryptic SARS-CoV-2 variant transmission","volume":"609","author":"Karthikeyan","year":"2022","journal-title":"Nature"},{"key":"2024091502104144500_btae532-B21","doi-asserted-by":"crossref","first-page":"e14596","DOI":"10.7717\/peerj.14596","article-title":"Performance of methods for SARS-CoV-2 variant detection and abundance estimation within mixed population samples","volume":"11","author":"Kayikcioglu","year":"2023","journal-title":"PeerJ"},{"key":"2024091502104144500_btae532-B22","doi-asserted-by":"crossref","first-page":"524","DOI":"10.1002\/bies.200900181","article-title":"High-throughput DNA sequencing\u2014concepts and limitations","volume":"32","author":"Kircher","year":"2010","journal-title":"Bioessays"},{"key":"2024091502104144500_btae532-B23","doi-asserted-by":"crossref","first-page":"357","DOI":"10.1038\/nmeth.1923","article-title":"Fast gapped-read alignment with Bowtie 2","volume":"9","author":"Langmead","year":"2012","journal-title":"Nat Methods"},{"key":"2024091502104144500_btae532-B24","doi-asserted-by":"crossref","first-page":"74","DOI":"10.1186\/1471-2164-13-74","article-title":"GemSIM: general, error-model based simulator of next-generation sequencing data","volume":"13","author":"McElroy","year":"2012","journal-title":"BMC Genomics"},{"key":"2024091502104144500_btae532-B25","doi-asserted-by":"crossref","first-page":"1687","DOI":"10.1093\/nar\/18.7.1687","article-title":"DNA recombination during PCR","volume":"18","author":"Meyerhans","year":"1990","journal-title":"Nucleic Acids Res"},{"key":"2024091502104144500_btae532-B26","author":"pip developers. Pip","year":"2023"},{"key":"2024091502104144500_btae532-B27","doi-asserted-by":"crossref","first-page":"e0169774","DOI":"10.1371\/journal.pone.0169774","article-title":"Examining sources of error in PCR by single-molecule sequencing","volume":"12","author":"Potapov","year":"2017","journal-title":"PLoS One"},{"key":"2024091502104144500_btae532-B28","doi-asserted-by":"crossref","first-page":"24","DOI":"10.1038\/nbt.1754","article-title":"Integrative genomics viewer","volume":"29","author":"Robinson","year":"2011","journal-title":"Nat Biotechnol"},{"key":"2024091502104144500_btae532-B29","doi-asserted-by":"crossref","first-page":"2834","DOI":"10.1038\/s41467-023-38184-3","article-title":"Enabling accurate and early detection of recently emerged SARS-CoV-2 variants of concern in wastewater","volume":"14","author":"Sapoval","year":"2023","journal-title":"Nat Commun"},{"key":"2024091502104144500_btae532-B30","doi-asserted-by":"crossref","first-page":"533","DOI":"10.1186\/1756-0500-7-533","article-title":"FASTQSim: platform-independent data characterization and in silico read generation for NGS datasets","volume":"7","author":"Shcherbina","year":"2014","journal-title":"BMC Res Notes"},{"key":"2024091502104144500_btae532-B31","doi-asserted-by":"crossref","first-page":"e59","DOI":"10.1002\/cpmb.59","article-title":"Overview of next-generation sequencing technologies","volume":"122","author":"Slatko","year":"2018","journal-title":"Curr Protoc Mol Biol"},{"key":"2024091502104144500_btae532-B32","author":"The Debian Project","year":"2023"},{"key":"2024091502104144500_btae532-B33","doi-asserted-by":"crossref","first-page":"e1009175","DOI":"10.1371\/journal.pgen.1009175","article-title":"Stability of SARS-CoV-2 phylogenies","volume":"16","author":"Turakhia","year":"2020","journal-title":"PLoS Genet"},{"key":"2024091502104144500_btae532-B34","author":"Tyson","year":"2020"},{"key":"2024091502104144500_btae532-B35","doi-asserted-by":"crossref","first-page":"1809","DOI":"10.1093\/bioinformatics\/btac047","article-title":"A mixture model for determining SARS-CoV-2 variant composition in pooled samples","volume":"38","author":"Valieris","year":"2022","journal-title":"Bioinformatics"},{"key":"2024091502104144500_btae532-B36","doi-asserted-by":"crossref","first-page":"265","DOI":"10.1038\/s41586-020-2008-3","article-title":"A new coronavirus associated with human respiratory disease in China","volume":"579","author":"Wu","year":"2020","journal-title":"Nature"},{"key":"2024091502104144500_btae532-B37","doi-asserted-by":"crossref","first-page":"3","DOI":"10.1007\/978-1-4939-3578-9_1","volume-title":"Statistical Genomics: Methods and Protocols","author":"Zhang","year":"2016"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/advance-article-pdf\/doi\/10.1093\/bioinformatics\/btae532\/59004810\/btae532.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/40\/9\/btae532\/59124395\/btae532.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/40\/9\/btae532\/59124395\/btae532.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,15]],"date-time":"2024-09-15T02:11:01Z","timestamp":1726366261000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/doi\/10.1093\/bioinformatics\/btae532\/7748403"}},"subtitle":[],"editor":[{"given":"Janet","family":"Kelso","sequence":"additional","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2024,9]]},"references-count":37,"journal-issue":{"issue":"9","published-print":{"date-parts":[[2024,9,2]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btae532","relation":{},"ISSN":["1367-4811"],"issn-type":[{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2024,9]]},"published":{"date-parts":[[2024,9]]},"article-number":"btae532"}}