{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,28]],"date-time":"2026-03-28T18:57:05Z","timestamp":1774724225105,"version":"3.50.1"},"reference-count":37,"publisher":"Oxford University Press (OUP)","issue":"6","license":[{"start":{"date-parts":[[2024,5,26]],"date-time":"2024-05-26T00:00:00Z","timestamp":1716681600000},"content-version":"vor","delay-in-days":1,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/100000002","name":"National Institutes of Health","doi-asserted-by":"publisher","award":["R03OD034499"],"award-info":[{"award-number":["R03OD034499"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000002","name":"National Institutes of Health","doi-asserted-by":"publisher","award":["U01HL157989"],"award-info":[{"award-number":["U01HL157989"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Additional Ventures, and Gladstone Institutes"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,6,3]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:sec>\n                  <jats:title>Summary<\/jats:title>\n                  <jats:p>The increasing development of sequence-based machine learning models has raised the demand for manipulating sequences for this application. However, existing approaches to edit and evaluate genome sequences using models have limitations, such as incompatibility with structural variants, challenges in identifying responsible sequence perturbations, and the need for vcf file inputs and phased data. To address these bottlenecks, we present Sequence Mutator for Predictive Models (SuPreMo), a scalable and comprehensive tool for performing and supporting in silico mutagenesis experiments. We then demonstrate how pairs of reference and perturbed sequences can be used with machine learning models to prioritize pathogenic variants or discover new functional sequences.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Availability and implementation<\/jats:title>\n                  <jats:p>SuPreMo was written in Python, and can be run using only one line of code to generate both sequences and 3D genome disruption scores. The codebase, instructions for installation and use, and tutorials are on the GitHub page: https:\/\/github.com\/ketringjoni\/SuPreMo.<\/jats:p>\n               <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btae340","type":"journal-article","created":{"date-parts":[[2024,5,26]],"date-time":"2024-05-26T03:27:21Z","timestamp":1716694041000},"source":"Crossref","is-referenced-by-count":10,"title":["SuPreMo: a computational tool for streamlining <i>in silico<\/i> perturbation using sequence-based predictive models"],"prefix":"10.1093","volume":"40","author":[{"given":"Ketrin","family":"Gjoni","sequence":"first","affiliation":[{"name":"Institute of Data Science and Biotechnology, Gladstone Institutes , 1650 Owens Street , San Francisco, CA 94158, United States"},{"name":"Department of Epidemiology & Biostatistics, University of California , San Francisco, CA 94158, United States"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9870-6196","authenticated-orcid":false,"given":"Katherine S","family":"Pollard","sequence":"additional","affiliation":[{"name":"Institute of Data Science and Biotechnology, Gladstone Institutes , 1650 Owens Street , San Francisco, CA 94158, United States"},{"name":"Department of Epidemiology & Biostatistics, University of California , San Francisco, CA 94158, United States"},{"name":"Chan Zuckerberg Biohub , San Francisco, CA 94158, United States"}]}],"member":"286","published-online":{"date-parts":[[2024,5,25]]},"reference":[{"key":"2024060606105148700_btae340-B1","doi-asserted-by":"crossref","first-page":"68","DOI":"10.1038\/nature15393","article-title":"A global reference for human genetic variation","volume":"526","author":"Auton","year":"2015","journal-title":"Nature"},{"key":"2024060606105148700_btae340-B2","doi-asserted-by":"crossref","first-page":"107663","DOI":"10.1016\/j.celrep.2020.107663","article-title":"Predicting mRNA abundance directly from genomic sequence using deep convolutional neural networks","volume":"31","author":"Agarwal","year":"2020","journal-title":"Cell Rep"},{"key":"2024060606105148700_btae340-B3","doi-asserted-by":"crossref","first-page":"1196","DOI":"10.1038\/s41592-021-01252-x","article-title":"Effective gene expression prediction from sequence by integrating long-range interactions","volume":"18","author":"Avsec","year":"2021","journal-title":"Nat Methods"},{"key":"2024060606105148700_btae340-B4","doi-asserted-by":"crossref","first-page":"354","DOI":"10.1038\/s41588-021-00782-6","article-title":"Base-resolution models of transcription-factor binding reveal soft motif syntax","volume":"53","author":"Avsec","year":"2021","journal-title":"Nat Genet"},{"key":"2024060606105148700_btae340-B5","author":"Benegas","year":"2023"},{"key":"2024060606105148700_btae340-B6","author":"Bushnell","year":"2014"},{"key":"2024060606105148700_btae340-B7","doi-asserted-by":"crossref","first-page":"940","DOI":"10.1038\/s41588-022-01102-2","article-title":"A sequence-based global map of regulatory activity for deciphering human genetics","volume":"54","author":"Chen","year":"2022","journal-title":"Nat Genet"},{"key":"2024060606105148700_btae340-B8","author":"Chen","year":"2022"},{"key":"2024060606105148700_btae340-B9","doi-asserted-by":"crossref","first-page":"1220","DOI":"10.1093\/bioinformatics\/btv710","article-title":"Manta: rapid detection of structural variants and indels for germline and cancer sequencing applications","volume":"32","author":"Chen","year":"2016","journal-title":"Bioinformatics"},{"key":"2024060606105148700_btae340-B10","doi-asserted-by":"crossref","first-page":"2156","DOI":"10.1093\/bioinformatics\/btr330","article-title":"The variant call format and VCFtools","volume":"27","author":"Danecek","year":"2011","journal-title":"Bioinformatics"},{"key":"2024060606105148700_btae340-B11","doi-asserted-by":"crossref","first-page":"e20","DOI":"10.7717\/peerj-cs.20","article-title":"CFSAN SNP pipeline: an automated method for constructing SNP matrices from next-generation sequence data","volume":"1","author":"Davis","year":"2015","journal-title":"PeerJ Comput Sci"},{"key":"2024060606105148700_btae340-B12","author":"Deng","year":"2024"},{"key":"2024060606105148700_btae340-B13","doi-asserted-by":"crossref","first-page":"1111","DOI":"10.1038\/s41592-020-0958-x","article-title":"Predicting 3D genome folding from DNA sequence with Akita","volume":"17","author":"Fudenberg","year":"2020","journal-title":"Nat Methods"},{"key":"2024060606105148700_btae340-B14","doi-asserted-by":"crossref","first-page":"3572","DOI":"10.1093\/bioinformatics\/bty304","article-title":"AnnotSV: an integrated tool for structural variations annotation","volume":"34","author":"Geoffroy","year":"2018","journal-title":"Bioinformatics"},{"key":"2024060606105148700_btae340-B15","author":"Gosai","year":"2023"},{"key":"2024060606105148700_btae340-B16","doi-asserted-by":"crossref","first-page":"100410","DOI":"10.1016\/j.xgen.2023.100410","article-title":"In silico discovery of repetitive elements as key sequence determinants of 3D genome folding","volume":"3","author":"Gunsalus","year":"2023","journal-title":"Cell Genom"},{"key":"2024060606105148700_btae340-B17","author":"Gunsalus","year":"2023"},{"key":"2024060606105148700_btae340-B18","doi-asserted-by":"crossref","first-page":"10597","DOI":"10.1093\/nar\/gkz808","article-title":"Functional interpretation of genetic variants using deep learning predicts impact on chromatin accessibility and histone modification","volume":"47","author":"Hoffman","year":"2019","journal-title":"Nucleic Acids Res"},{"key":"2024060606105148700_btae340-B19","doi-asserted-by":"crossref","first-page":"100316","DOI":"10.1016\/j.xgen.2023.100316","article-title":"Genome-wide structural variant analysis identifies risk loci for non-Alzheimer\u2019s dementias","volume":"3","author":"Kaivola","year":"2023","journal-title":"Cell Genom"},{"key":"2024060606105148700_btae340-B20","doi-asserted-by":"crossref","first-page":"e1008050","DOI":"10.1371\/journal.pcbi.1008050","article-title":"Cross-species regulatory sequence activity prediction","volume":"16","author":"Kelley","year":"2020","journal-title":"PLoS Comput Biol"},{"key":"2024060606105148700_btae340-B21","doi-asserted-by":"crossref","first-page":"990","DOI":"10.1101\/gr.200535.115","article-title":"Basset: learning the regulatory code of the accessible genome with deep convolutional neural networks","volume":"26","author":"Kelley","year":"2016","journal-title":"Genome Res"},{"key":"2024060606105148700_btae340-B22","doi-asserted-by":"crossref","first-page":"eabm1696","DOI":"10.1126\/science.abm1696","article-title":"Three-dimensional genome rewiring in loci with human accelerated regions","volume":"380","author":"Keough","year":"2023","journal-title":"Science"},{"key":"2024060606105148700_btae340-B23","volume-title":"Biorxiv","author":"Kuang","year":"2023"},{"key":"2024060606105148700_btae340-B24","doi-asserted-by":"crossref","first-page":"2987","DOI":"10.1093\/bioinformatics\/btr509","article-title":"A statistical framework for SNP calling, mutation discovery, association mapping and population genetical parameter estimation from sequencing data","volume":"27","author":"Li","year":"2011","journal-title":"Bioinformatics"},{"key":"2024060606105148700_btae340-B25","doi-asserted-by":"crossref","first-page":"246","DOI":"10.1186\/s13059-019-1828-7","article-title":"Structural variant calling: the long and the short of it","volume":"20","author":"Mahmoud","year":"2019","journal-title":"Genome Biol"},{"key":"2024060606105148700_btae340-B26","doi-asserted-by":"crossref","first-page":"4481","DOI":"10.1038\/s41467-021-24582-y","article-title":"Quantifying the contribution of neanderthal introgression to the heritability of complex traits","volume":"12","author":"McArthur","year":"2021","journal-title":"Nat Commun"},{"key":"2024060606105148700_btae340-B27","author":"Nguyen","year":"2023"},{"key":"2024060606105148700_btae340-B28","doi-asserted-by":"crossref","first-page":"326","DOI":"10.1139\/gen-2020-0097","article-title":"Dysregulation of chromatin organization in pediatric and adult brain tumors: oncoepigenomic contributions to tumorigenesis and cancer stem cell properties","volume":"64","author":"Paik","year":"2021","journal-title":"Genome"},{"key":"2024060606105148700_btae340-B29","doi-asserted-by":"crossref","first-page":"3427","DOI":"10.1093\/bioinformatics\/btr578","article-title":"Mapping personal functional data to personal genomes","volume":"27","author":"Rivas-Astroza","year":"2011","journal-title":"Bioinformatics"},{"key":"2024060606105148700_btae340-B30","doi-asserted-by":"crossref","first-page":"1118","DOI":"10.1038\/s41592-020-0960-3","article-title":"DeepC: predicting 3D genome folding using megabase-scale transfer learning","volume":"17","author":"Schwessinger","year":"2020","journal-title":"Nat Methods"},{"key":"2024060606105148700_btae340-B31","doi-asserted-by":"crossref","first-page":"255","DOI":"10.1186\/s13059-022-02816-6","article-title":"Structural variant analysis of a cancer reference cell line sample using multiple sequencing technologies","volume":"23","author":"Talsania","year":"2022","journal-title":"Genome Biol"},{"key":"2024060606105148700_btae340-B32","doi-asserted-by":"crossref","first-page":"1140","DOI":"10.1038\/s41587-022-01612-8","article-title":"Cell-type-specific prediction of 3D chromatin organization enables high-throughput in silico genetic screening","volume":"41","author":"Tan","year":"2023","journal-title":"Nat Biotechnol"},{"key":"2024060606105148700_btae340-B33","volume-title":"Genomics in the Cloud: Using docker, GATK, and WDL in Terra","author":"Van der Auwera","year":"2020"},{"key":"2024060606105148700_btae340-B34","doi-asserted-by":"crossref","first-page":"725","DOI":"10.1038\/s41588-022-01065-4","article-title":"Sequence-based modeling of three-dimensional genome architecture from kilobase to chromosome scale","volume":"54","author":"Zhou","year":"2022","journal-title":"Nat Genet"},{"key":"2024060606105148700_btae340-B35","doi-asserted-by":"crossref","first-page":"1171","DOI":"10.1038\/s41588-018-0160-6","article-title":"Deep learning sequence-based ab initio prediction of variant effects on expression and disease risk","volume":"50","author":"Zhou","year":"2018","journal-title":"Nat Genet"},{"key":"2024060606105148700_btae340-B36","doi-asserted-by":"crossref","first-page":"931","DOI":"10.1038\/nmeth.3547","article-title":"Predicting effects of noncoding variants with deep learning\u2013based sequence model","volume":"12","author":"Zhou","year":"2015","journal-title":"Nat Methods"},{"key":"2024060606105148700_btae340-B37","doi-asserted-by":"crossref","first-page":"1347","DOI":"10.1038\/s41587-020-0538-8","article-title":"A robust benchmark for detection of germline large deletions and insertions","volume":"38","author":"Zook","year":"2020","journal-title":"Nat Biotechnol"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/advance-article-pdf\/doi\/10.1093\/bioinformatics\/btae340\/57955135\/btae340.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/40\/6\/btae340\/58120348\/btae340.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/40\/6\/btae340\/58120348\/btae340.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,6]],"date-time":"2024-06-06T09:33:50Z","timestamp":1717666430000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/doi\/10.1093\/bioinformatics\/btae340\/7682378"}},"subtitle":[],"editor":[{"given":"Pier Luigi","family":"Martelli","sequence":"additional","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2024,5,25]]},"references-count":37,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2024,6,3]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btae340","relation":{},"ISSN":["1367-4811"],"issn-type":[{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2024,6,1]]},"published":{"date-parts":[[2024,5,25]]},"article-number":"btae340"}}