{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,26]],"date-time":"2026-02-26T18:10:35Z","timestamp":1772129435533,"version":"3.50.1"},"reference-count":41,"publisher":"Oxford University Press (OUP)","issue":"9","license":[{"start":{"date-parts":[[2024,9,6]],"date-time":"2024-09-06T00:00:00Z","timestamp":1725580800000},"content-version":"vor","delay-in-days":5,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,9,2]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:sec>\n                  <jats:title>Motivation<\/jats:title>\n                  <jats:p>Structural variants (SVs) play an important role in genetic research and precision medicine. As existing SV detection methods usually contain a substantial number of false positive calls, approaches to filter the detection results are needed.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Results<\/jats:title>\n                  <jats:p>We developed a novel deep learning-based SV filtering tool, CSV-Filter, for both short and long reads. CSV-Filter uses a novel multi-level grayscale image encoding method based on CIGAR strings of the alignment results and employs image augmentation techniques to improve SV feature extraction. CSV-Filter also utilizes self-supervised learning networks for transfer as classification models, and employs mixed-precision operations to accelerate training. The experiments showed that the integration of CSV-Filter with popular SV detection tools could considerably reduce false positive SVs for short and long reads, while maintaining true positive SVs almost unchanged. Compared with DeepSVFilter, a SV filtering tool for short reads, CSV-Filter could recognize more false positive calls and support long reads as an additional feature.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Availability and implementation<\/jats:title>\n                  <jats:p>https:\/\/github.com\/xzyschumacher\/CSV-Filter<\/jats:p>\n               <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btae539","type":"journal-article","created":{"date-parts":[[2024,9,7]],"date-time":"2024-09-07T01:09:47Z","timestamp":1725671387000},"source":"Crossref","is-referenced-by-count":7,"title":["CSV-Filter: a deep learning-based comprehensive structural variant filtering method for both short and long reads"],"prefix":"10.1093","volume":"40","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2816-1805","authenticated-orcid":false,"given":"Zeyu","family":"Xia","sequence":"first","affiliation":[{"name":"College of Computer Science and Technology, National University of Defense Technology , Hunan 410073,","place":["P. R. China"]}]},{"given":"Weiming","family":"Xiang","sequence":"additional","affiliation":[{"name":"College of Computer Science and Electronic Engineering, Hunan University , Hunan 410082,","place":["P. R. China"]}]},{"given":"Qingzhe","family":"Wang","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology, National University of Defense Technology , Hunan 410073,","place":["P. R. China"]}]},{"given":"Xingze","family":"Li","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology, National University of Defense Technology , Hunan 410073,","place":["P. R. China"]}]},{"given":"Yilin","family":"Li","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology, National University of Defense Technology , Hunan 410073,","place":["P. R. China"]}]},{"given":"Junyu","family":"Gao","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology, National University of Defense Technology , Hunan 410073,","place":["P. R. China"]}]},{"given":"Tao","family":"Tang","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology, National University of Defense Technology , Hunan 410073,","place":["P. R. China"]}]},{"given":"Canqun","family":"Yang","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology, National University of Defense Technology , Hunan 410073,","place":["P. R. China"]},{"name":"National Supercomputer Center in Tianjin , Tianjin, 300457,","place":["P. R. China"]},{"name":"Haihe Lab of ITAI , Tianjin, 300457,","place":["P. R. China"]}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4000-4957","authenticated-orcid":false,"given":"Yingbo","family":"Cui","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology, National University of Defense Technology , Hunan 410073,","place":["P. R. China"]}]}],"member":"286","published-online":{"date-parts":[[2024,9,6]]},"reference":[{"key":"2024100917405547700_btae539-B1","author":"Bardes","year":"2021"},{"key":"2024100917405547700_btae539-B2","doi-asserted-by":"crossref","first-page":"161","DOI":"10.1186\/s13059-021-02380-5","article-title":"Samplot: A platform for structural variant visual validation and automated filtering","volume":"22","author":"Belyeu","year":"2021","journal-title":"Genome Biol"},{"key":"2024100917405547700_btae539-B3","doi-asserted-by":"crossref","first-page":"602","DOI":"10.1038\/nn.4524","article-title":"Whole genome sequencing resource identifies 18 new candidate genes for autism spectrum disorder","volume":"20","author":"C Yuen","year":"2017","journal-title":"Nat Neurosci"},{"key":"2024100917405547700_btae539-B4","doi-asserted-by":"crossref","first-page":"310","DOI":"10.1101\/gr.162883.113","article-title":"Tigra: A targeted iterative graph routing assembler for breakpoint assembly","volume":"24","author":"Chen","year":"2014","journal-title":"Genome Res"},{"key":"2024100917405547700_btae539-B5","doi-asserted-by":"crossref","first-page":"677","DOI":"10.1038\/nmeth.1363","article-title":"Breakdancer: An algorithm for high-resolution mapping of genomic structural variation","volume":"6","author":"Chen","year":"2009","journal-title":"Nat Methods"},{"key":"2024100917405547700_btae539-B6","doi-asserted-by":"crossref","first-page":"1220","DOI":"10.1093\/bioinformatics\/btv710","article-title":"Manta: Rapid detection of structural variants and indels for germline and cancer sequencing applications","volume":"32","author":"Chen","year":"2016","journal-title":"Bioinformatics"},{"key":"2024100917405547700_btae539-B7","doi-asserted-by":"crossref","first-page":"692","DOI":"10.1038\/ng.3834","article-title":"The impact of structural variation on human gene expression","volume":"49","author":"Chiang","year":"2017","journal-title":"Nat Genet"},{"key":"2024100917405547700_btae539-B8","doi-asserted-by":"crossref","DOI":"10.1093\/gigascience\/giab008","article-title":"Twelve years of SAMtools and BCFtools","volume":"10","author":"Danecek","year":"2021","journal-title":"Gigascience"},{"key":"2024100917405547700_btae539-B9","first-page":"248","author":"Deng","year":"2009"},{"key":"2024100917405547700_btae539-B10","doi-asserted-by":"crossref","first-page":"271","DOI":"10.1186\/s13059-022-02840-6","article-title":"Truvari: Refined structural variant comparison preserves allelic diversity","volume":"23","author":"English","year":"2022","journal-title":"Genome Biol"},{"key":"2024100917405547700_btae539-B11","doi-asserted-by":"crossref","first-page":"85","DOI":"10.1038\/nrg1767","article-title":"Structural variation in the human genome","volume":"7","author":"Feuk","year":"2006","journal-title":"Nat Rev Genet"},{"key":"2024100917405547700_btae539-B12","doi-asserted-by":"crossref","first-page":"3181","DOI":"10.1093\/bioinformatics\/btac306","article-title":"Detection of oncogenic and clinically actionable mutations in cancer genomes critically depends on variant calling tools","volume":"38","author":"Garcia-Prieto","year":"2022","journal-title":"Bioinformatics"},{"key":"2024100917405547700_btae539-B13","doi-asserted-by":"crossref","first-page":"2907","DOI":"10.1093\/bioinformatics\/btz041","article-title":"Svim: Structural variant identification using mapped long reads","volume":"35","author":"Heller","year":"2019","journal-title":"Bioinformatics"},{"key":"2024100917405547700_btae539-B14","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1186\/s12859-018-2425-6","article-title":"Tigmint: Correcting assembly errors using linked reads from large molecules","volume":"19","author":"Jackman","year":"2018","journal-title":"BMC Bioinformatics"},{"key":"2024100917405547700_btae539-B15","doi-asserted-by":"crossref","first-page":"189","DOI":"10.1186\/s13059-020-02107-y","article-title":"Long-read-based human genomic structural variation detection with cutesv","volume":"21","author":"Jiang","year":"2020","journal-title":"Genome Biol"},{"key":"2024100917405547700_btae539-B16","author":"Kingma","year":"2014"},{"key":"2024100917405547700_btae539-B17","doi-asserted-by":"crossref","first-page":"E69","DOI":"10.1093\/nar\/gks003","article-title":"Ulrich bodenhofer, and sepp hochreiter. cn. mops: Mixture of poissons for discovering copy number variations in next-generation sequencing data with a low false discovery rate","volume":"40","author":"Klambauer","year":"2012","journal-title":"Nucleic Acids Res"},{"key":"2024100917405547700_btae539-B18","doi-asserted-by":"crossref","first-page":"117","DOI":"10.1186\/s13059-019-1720-5","article-title":"Comprehensive evaluation of structural variation detection algorithms for whole genome sequencing","volume":"20","author":"Kosugi","year":"2019","journal-title":"Genome Biol"},{"key":"2024100917405547700_btae539-B19","doi-asserted-by":"crossref","first-page":"R84","DOI":"10.1186\/gb-2014-15-6-r84","article-title":"Lumpy: A probabilistic framework for structural variant discovery","volume":"15","author":"Layer","year":"2014","journal-title":"Genome Biol"},{"key":"2024100917405547700_btae539-B20","author":"Li","year":"2013"},{"key":"2024100917405547700_btae539-B21","doi-asserted-by":"crossref","first-page":"595","DOI":"10.1038\/s41592-018-0054-7","article-title":"A synthetic-diploid benchmark for accurate variant-calling evaluation","volume":"15","author":"Li","year":"2018","journal-title":"Nat Methods"},{"key":"2024100917405547700_btae539-B22","doi-asserted-by":"crossref","first-page":"1230","DOI":"10.1038\/s41592-022-01609-w","article-title":"Svision: A deep learning approach to resolve complex structural variants","volume":"19","author":"Lin","year":"2022","journal-title":"Nat Methods"},{"key":"2024100917405547700_btae539-B23","doi-asserted-by":"crossref","first-page":"bbaa370","DOI":"10.1093\/bib\/bbaa370","article-title":"A deep learning approach for filtering structural variants in short read sequencing data","volume":"22","author":"Liu","year":"2021","journal-title":"Brief Bioinform"},{"key":"2024100917405547700_btae539-B24","doi-asserted-by":"crossref","first-page":"119","DOI":"10.1186\/s12859-023-05243-x","article-title":"Cnnlsv: Detecting structural variants by encoding long-read alignment information and convolutional neural network","volume":"24","author":"Ma","year":"2023","journal-title":"BMC Bioinformatics"},{"key":"2024100917405547700_btae539-B25","author":"Pacific Biosciences","year":"2021"},{"key":"2024100917405547700_btae539-B26","doi-asserted-by":"crossref","first-page":"559","DOI":"10.1038\/s41592-023-01799-x","article-title":"Cue: A deep-learning framework for structural variant discovery and genotyping","volume":"20","author":"Popic","year":"2023","journal-title":"Nat Methods"},{"key":"2024100917405547700_btae539-B27","doi-asserted-by":"crossref","first-page":"983","DOI":"10.1038\/nbt.4235","article-title":"A universal snp and small-indel variant caller using deep neural networks","volume":"36","author":"Poplin","year":"2018","journal-title":"Nat Biotechnol"},{"key":"2024100917405547700_btae539-B28","doi-asserted-by":"crossref","first-page":"I333","DOI":"10.1093\/bioinformatics\/bts378","article-title":"Delly: Structural variant discovery by integrated paired-end and split-read analysis","volume":"28","author":"Rausch","year":"2012","journal-title":"Bioinformatics"},{"key":"2024100917405547700_btae539-B29","doi-asserted-by":"crossref","first-page":"24","DOI":"10.1038\/nbt.1754","article-title":"Integrative genomics viewer","volume":"29","author":"Robinson","year":"2011","journal-title":"Nat Biotechnol"},{"key":"2024100917405547700_btae539-B30","doi-asserted-by":"crossref","first-page":"461","DOI":"10.1038\/s41592-018-0001-7","article-title":"Accurate detection of complex structural variations using single-molecule sequencing","volume":"15","author":"Sedlazeck","year":"2018","journal-title":"Nat Methods"},{"key":"2024100917405547700_btae539-B31","doi-asserted-by":"crossref","first-page":"1215","DOI":"10.1038\/s41588-019-0459-y","article-title":"Long-read sequencing identifies ggc repeat expansions in notch2nlc associated with neuronal intranuclear inclusion disease","volume":"51","author":"Sone","year":"2019","journal-title":"Nat Genet"},{"key":"2024100917405547700_btae539-B32","doi-asserted-by":"crossref","first-page":"3994","DOI":"10.1093\/bioinformatics\/btv478","article-title":"Svviz: A read viewer for validating structural variants","volume":"31","author":"Spies","year":"2015","journal-title":"Bioinformatics"},{"key":"2024100917405547700_btae539-B33","first-page":"2818","author":"Szegedy","year":"2016"},{"key":"2024100917405547700_btae539-B34","doi-asserted-by":"crossref","first-page":"581","DOI":"10.1101\/gr.221028.117","article-title":"Svaba: Genome-wide detection of structural variants and indels by local assembly","volume":"28","author":"Wala","year":"2018","journal-title":"Genome Res"},{"key":"2024100917405547700_btae539-B35","doi-asserted-by":"crossref","first-page":"1122","DOI":"10.1038\/s41592-021-01205-4","article-title":"Dome: Recommendations for supervised machine learning validation in biology","volume":"18","author":"Walsh","year":"2021","journal-title":"Nature Methods"},{"key":"2024100917405547700_btae539-B36","first-page":"293","author":"Xiang","year":"2022"},{"key":"2024100917405547700_btae539-B37","doi-asserted-by":"crossref","first-page":"2865","DOI":"10.1093\/bioinformatics\/btp394","article-title":"Pindel: A pattern growth approach to detect break points of large deletions and medium sized insertions from paired-end short reads","volume":"25","author":"Ye","year":"2009","journal-title":"Bioinformatics"},{"key":"2024100917405547700_btae539-B38","doi-asserted-by":"crossref","first-page":"213","DOI":"10.1186\/s12859-023-05324-x","article-title":"Svcnn: An accurate deep learning-based method for detecting structural variation based on long-read data","volume":"24","author":"Zheng","year":"2023","journal-title":"BMC Bioinformatics"},{"key":"2024100917405547700_btae539-B39","doi-asserted-by":"crossref","first-page":"797","DOI":"10.1038\/s43588-022-00387-x","article-title":"Symphonizing pileup and full-alignment for deep learning-based long-read variant calling","volume":"2","author":"Zheng","year":"2022","journal-title":"Nat Comput Sci"},{"key":"2024100917405547700_btae539-B40","doi-asserted-by":"crossref","first-page":"246","DOI":"10.1038\/nbt.2835","article-title":"Integrating human sequence data sets provides a resource of benchmark snp and indel genotype calls","volume":"32","author":"Zook","year":"2014","journal-title":"Nat Biotechnol"},{"key":"2024100917405547700_btae539-B41","doi-asserted-by":"crossref","first-page":"1347","DOI":"10.1038\/s41587-020-0538-8","article-title":"A robust benchmark for detection of germline large deletions and insertions","volume":"38","author":"Zook","year":"2020","journal-title":"Nat Biotechnol"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/advance-article-pdf\/doi\/10.1093\/bioinformatics\/btae539\/59034352\/btae539.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/40\/9\/btae539\/59648945\/btae539.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/40\/9\/btae539\/59648945\/btae539.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,9]],"date-time":"2024-10-09T17:41:13Z","timestamp":1728495673000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/doi\/10.1093\/bioinformatics\/btae539\/7750355"}},"subtitle":[],"editor":[{"given":"Inanc","family":"Birol","sequence":"additional","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2024,9]]},"references-count":41,"journal-issue":{"issue":"9","published-print":{"date-parts":[[2024,9,2]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btae539","relation":{},"ISSN":["1367-4811"],"issn-type":[{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2024,9]]},"published":{"date-parts":[[2024,9]]},"article-number":"btae539"}}