{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,28]],"date-time":"2026-02-28T03:46:25Z","timestamp":1772250385772,"version":"3.50.1"},"reference-count":35,"publisher":"Oxford University Press (OUP)","issue":"5","license":[{"start":{"date-parts":[[2019,10,8]],"date-time":"2019-10-08T00:00:00Z","timestamp":1570492800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/academic.oup.com\/journals\/pages\/open_access\/funder_policies\/chorus\/standard_publication_model"}],"funder":[{"name":"American Lebanese Syrian Associated Charities of St. Jude Children's Research Hospital"},{"DOI":"10.13039\/100000057","name":"National Institute of General Medical Sciences","doi-asserted-by":"publisher","award":["P50GM115279-03"],"award-info":[{"award-number":["P50GM115279-03"]}],"id":[{"id":"10.13039\/100000057","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000002","name":"NIH","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,3,1]]},"abstract":"<jats:title>Abstract<\/jats:title>\n                  <jats:sec>\n                    <jats:title>Motivation<\/jats:title>\n                    <jats:p>Reliable identification of expressed somatic insertions\/deletions (indels) is an unmet need due to artifacts generated in PCR-based RNA-Seq library preparation and the lack of normal RNA-Seq data, presenting analytical challenges for discovery of somatic indels in tumor transcriptome.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Results<\/jats:title>\n                    <jats:p>We present RNAIndel, a tool for predicting somatic, germline and artifact indels from tumor RNA-Seq data. RNAIndel leverages features derived from indel sequence context and biological effect in a machine-learning framework. Except for tumor samples with microsatellite instability, RNAIndel robustly predicts 88\u2013100% of somatic indels in five diverse test datasets of pediatric and adult cancers, even recovering subclonal (VAF range 0.01\u20130.15) driver indels missed by targeted deep-sequencing, outperforming the current best-practice for RNA-Seq variant calling which had 57% sensitivity but with 14 times more false positives.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Availability and implementation<\/jats:title>\n                    <jats:p>RNAIndel is freely available at https:\/\/github.com\/stjude\/RNAIndel.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Supplementary information<\/jats:title>\n                    <jats:p>Supplementary data are available at Bioinformatics online.<\/jats:p>\n                  <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btz753","type":"journal-article","created":{"date-parts":[[2019,10,1]],"date-time":"2019-10-01T15:27:59Z","timestamp":1569943679000},"page":"1382-1390","source":"Crossref","is-referenced-by-count":19,"title":["RNAIndel: discovering somatic coding indels from tumor RNA-Seq data"],"prefix":"10.1093","volume":"36","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7787-2008","authenticated-orcid":false,"given":"Kohei","family":"Hagiwara","sequence":"first","affiliation":[{"name":"Computational Biology, St Jude Children\u2019s Research Hospital , Memphis, TN 38105, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Liang","family":"Ding","sequence":"additional","affiliation":[{"name":"Computational Biology, St Jude Children\u2019s Research Hospital , Memphis, TN 38105, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Michael N","family":"Edmonson","sequence":"additional","affiliation":[{"name":"Computational Biology, St Jude Children\u2019s Research Hospital , Memphis, TN 38105, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Stephen V","family":"Rice","sequence":"additional","affiliation":[{"name":"Computational Biology, St Jude Children\u2019s Research Hospital , Memphis, TN 38105, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Scott","family":"Newman","sequence":"additional","affiliation":[{"name":"Computational Biology, St Jude Children\u2019s Research Hospital , Memphis, TN 38105, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"John","family":"Easton","sequence":"additional","affiliation":[{"name":"Computational Biology, St Jude Children\u2019s Research Hospital , Memphis, TN 38105, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Juncheng","family":"Dai","sequence":"additional","affiliation":[{"name":"Department of Epidemiology, Nanjing Medical University School of Public Health , Jiangning District, Nanjing, 211166, People\u2019s Republic of China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Soheil","family":"Meshinchi","sequence":"additional","affiliation":[{"name":"Clinical Research Division, Fred Hutchinson Cancer Research Center , Seattle, WA 98109, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Rhonda E","family":"Ries","sequence":"additional","affiliation":[{"name":"Clinical Research Division, Fred Hutchinson Cancer Research Center , Seattle, WA 98109, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5363-1848","authenticated-orcid":false,"given":"Michael","family":"Rusch","sequence":"additional","affiliation":[{"name":"Computational Biology, St Jude Children\u2019s Research Hospital , Memphis, TN 38105, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jinghui","family":"Zhang","sequence":"additional","affiliation":[{"name":"Computational Biology, St Jude Children\u2019s Research Hospital , Memphis, TN 38105, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"286","published-online":{"date-parts":[[2019,10,8]]},"reference":[{"key":"2023060910383285000_btz753-B1","doi-asserted-by":"crossref","first-page":"103","DOI":"10.1038\/nm.4439","article-title":"The molecular landscape of pediatric acute myeloid leukemia reveals recurrent structural alterations and age-specific mutational interactions","volume":"1","author":"Bolouri","year":"2018","journal-title":"Nat. Med"},{"key":"2023060910383285000_btz753-B2","doi-asserted-by":"crossref","first-page":"5","DOI":"10.1023\/A:1010933404324","article-title":"Random forests","volume":"45","author":"Breiman","year":"2001","journal-title":"Mach. Learn"},{"key":"2023060910383285000_btz753-B3","doi-asserted-by":"crossref","first-page":"213","DOI":"10.1038\/nbt.2514","article-title":"Sensitive detection of somatic point mutations in impure and heterogeneous cancer samples","volume":"31","author":"Cibulskis","year":"2013","journal-title":"Nat. Biotechnol"},{"key":"2023060910383285000_btz753-B4","doi-asserted-by":"crossref","DOI":"10.1038\/ncomms15180","article-title":"A molecular portrait of microsatellite instability across multiple cancers","volume":"8","author":"Cortes-Ciriano","year":"2017","journal-title":"Nat. Commun"},{"key":"2023060910383285000_btz753-B5","doi-asserted-by":"crossref","first-page":"2156","DOI":"10.1093\/bioinformatics\/btr330","article-title":"The variant call format and VCFtools","volume":"27","author":"Danecek","year":"2011","journal-title":"Bioinformatics"},{"key":"2023060910383285000_btz753-B6","doi-asserted-by":"crossref","first-page":"491","DOI":"10.1038\/ng.806","article-title":"A framework for variation discovery and genotyping using next-generation DNA sequencing data","volume":"43","author":"DePristo","year":"2011","journal-title":"Nat Genet"},{"key":"2023060910383285000_btz753-B7","doi-asserted-by":"crossref","first-page":"15","DOI":"10.1093\/bioinformatics\/bts635","article-title":"STAR: ultrafast universal RNA-seq aligner","volume":"29","author":"Dobin","year":"2013","journal-title":"Bioinformatics"},{"key":"2023060910383285000_btz753-B8","doi-asserted-by":"crossref","first-page":"865","DOI":"10.1093\/bioinformatics\/btr032","article-title":"Bambino: a variant detector and alignment viewer for next-generation sequencing data in the SAM\/BAM format","volume":"27","author":"Edmonson","year":"2011","journal-title":"Bioinformatics"},{"key":"2023060910383285000_btz753-B9","doi-asserted-by":"crossref","DOI":"10.1101\/gr.250357.119","article-title":"Pediatric cancer variant pathogenicity information exchange (PeCanPIE): a cloud-based platform for curating and classifying germline variants","author":"Edmonson","year":"2019","journal-title":"Genome Res"},{"key":"2023060910383285000_btz753-B10","doi-asserted-by":"crossref","DOI":"10.1186\/s13073-014-0089-z","article-title":"Reducing INDEL calling errors in whole genome and exome sequencing data","volume":"6","author":"Fang","year":"2014","journal-title":"Genome Med"},{"key":"2023060910383285000_btz753-B11","doi-asserted-by":"crossref","first-page":"206","DOI":"10.1016\/j.tibs.2006.02.004","article-title":"Mechanism of a genetic glissando: structural biology of indel mutations","volume":"31","author":"Garcia-Diaz","year":"2006","journal-title":"Trends Biochem. Sci"},{"key":"2023060910383285000_btz753-B12","doi-asserted-by":"crossref","first-page":"171","DOI":"10.1023\/A:1010920819831","article-title":"A simple generalisation of the area under the ROC curve for multiple class classification problems","volume":"45","author":"Hand","year":"2001","journal-title":"Mach. Learn"},{"key":"2023060910383285000_btz753-B13","author":"Karczewski","year":"2019"},{"key":"2023060910383285000_btz753-B14","first-page":"656","article-title":"BLAT\u2013the BLAST-like alignment tool","volume":"12","author":"Kent","year":"2002","journal-title":"Genome Res"},{"key":"2023060910383285000_btz753-B15","doi-asserted-by":"crossref","first-page":"e12433","DOI":"10.1371\/journal.pone.0012433","article-title":"A phenomenological model for predicting melting temperatures of DNA sequences","volume":"5","author":"Khandelwal","year":"2010","journal-title":"PLoS One"},{"key":"2023060910383285000_btz753-B16","doi-asserted-by":"crossref","first-page":"D980","DOI":"10.1093\/nar\/gkt1113","article-title":"ClinVar: public archive of relationships among sequence variation and human phenotype","volume":"42","author":"Landrum","year":"2014","journal-title":"Nucleic Acids Res"},{"key":"2023060910383285000_btz753-B17","doi-asserted-by":"crossref","first-page":"371","DOI":"10.1038\/nature25795","article-title":"Pan-cancer genome and transcriptome analyses of 1,699 paediatric leukaemias and solid tumours","volume":"555","author":"Ma","year":"2018","journal-title":"Nature"},{"key":"2023060910383285000_btz753-B18","doi-asserted-by":"crossref","first-page":"e1000160.","DOI":"10.1371\/journal.pgen.1000160","article-title":"Genetic variation in an individual human exome","volume":"4","author":"Ng","year":"2008","journal-title":"PLoS Genet"},{"key":"2023060910383285000_btz753-B19","doi-asserted-by":"crossref","DOI":"10.12688\/wellcomeopenres.10501.2","article-title":"Making the most of RNA-seq: pre-processing sequencing data with Opossum for reliable SNP variant detection","author":"Oikkonen","year":"2017","journal-title":"Wellcome Open Res"},{"key":"2023060910383285000_btz753-B20","doi-asserted-by":"crossref","first-page":"641","DOI":"10.1016\/j.ajhg.2013.08.008","article-title":"Reliable identification of genomic variants from RNA-Seq data","volume":"93","author":"Piskol","year":"2013","journal-title":"Am. J. Hum. Genet"},{"key":"2023060910383285000_btz753-B21","doi-asserted-by":"crossref","first-page":"1319","DOI":"10.1016\/j.cell.2016.05.053","article-title":"Leveraging rules of nonsense-mediated mRNA decay for genome engineering and personalized medicine","volume":"165","author":"Popp","year":"2016","journal-title":"Cell"},{"key":"2023060910383285000_btz753-B22","doi-asserted-by":"crossref","first-page":"D501","DOI":"10.1093\/nar\/gki025","article-title":"NCBI Reference Sequence (RefSeq): a curated non-redundant sequence database of genomes, transcripts and proteins","volume":"33","author":"Pruitt","year":"2004","journal-title":"Nucleic Acids Res"},{"key":"2023060910383285000_btz753-B23","doi-asserted-by":"crossref","DOI":"10.1038\/s41467-018-06485-7","article-title":"Clinical cancer genomic profiling by three-platform sequencing of whole genome, whole exome and transcriptome","volume":"9","author":"Rusch","year":"2018","journal-title":"Nat. Commun"},{"key":"2023060910383285000_btz753-B24","doi-asserted-by":"crossref","first-page":"1811","DOI":"10.1093\/bioinformatics\/bts271","article-title":"Strelka: accurate somatic small-variant calling from sequenced tumor-normal sample pairs","volume":"28","author":"Saunders","year":"2012","journal-title":"Bioinformatics"},{"key":"2023060910383285000_btz753-B25","doi-asserted-by":"crossref","first-page":"308","DOI":"10.1093\/nar\/29.1.308","article-title":"dbSNP: the NCBI database of genetic variation","volume":"29","author":"Sherry","year":"2001","journal-title":"Nucleic Acids Res"},{"key":"2023060910383285000_btz753-B26","first-page":"973","article-title":"Indel detection from RNA-seq data: tool evaluation and strategies for accurate detection of actionable mutations","volume":"18","author":"Sun","year":"2017","journal-title":"Brief. Bioinform"},{"key":"2023060910383285000_btz753-B27","doi-asserted-by":"crossref","first-page":"2202","DOI":"10.1093\/bioinformatics\/btv112","article-title":"Unified representation of genetic variants","volume":"31","author":"Tan","year":"2015","journal-title":"Bioinformatics"},{"key":"2023060910383285000_btz753-B28","doi-asserted-by":"crossref","first-page":"e172.","DOI":"10.1093\/nar\/gku1005","article-title":"The eSNV-detect: a computational system to identify expressed single nucleotide variants from transcriptome sequencing data","volume":"42","author":"Tang","year":"2014","journal-title":"Nucleic Acids Res"},{"key":"2023060910383285000_btz753-B29","doi-asserted-by":"crossref","first-page":"D941","DOI":"10.1093\/nar\/gky1015","article-title":"COSMIC: the catalogue of somatic mutations in cancer","volume":"47","author":"Tate","year":"2019","journal-title":"Nucleic Acids Res"},{"key":"2023060910383285000_btz753-B30","first-page":"330","article-title":"Comprehensive molecular characterization of human colon and rectal cancer","volume-title":"Nature","year":"2012"},{"key":"2023060910383285000_btz753-B31","doi-asserted-by":"crossref","first-page":"3414","DOI":"10.1093\/bioinformatics\/btu577","article-title":"RVboost: RNA-seq variant prioritization using a boosting method","volume":"30","author":"Wang","year":"2014","journal-title":"Bioinformatics"},{"key":"2023060910383285000_btz753-B32","article-title":"Whole-genome sequencing reveals genomic signatures associated with the inflammatory microenvironments in Chinese NSCLC patients","author":"Wang","year":"2018","journal-title":"Nat. Commun"},{"key":"2023060910383285000_btz753-B33","doi-asserted-by":"crossref","first-page":"854","DOI":"10.1086\/342727","article-title":"Human diallelic insertion\/deletion polymorphisms","volume":"71","author":"Weber","year":"2002","journal-title":"Am. J. Hum. Genet"},{"key":"2023060910383285000_btz753-B34","doi-asserted-by":"crossref","first-page":"97","DOI":"10.1038\/nm.4002","article-title":"Systematic discovery of complex insertions and deletions in human cancers","volume":"22","author":"Ye","year":"2016","journal-title":"Nat. Med"},{"key":"2023060910383285000_btz753-B35","doi-asserted-by":"crossref","first-page":"5338","DOI":"10.1093\/nar\/gkg745","article-title":"Patterns of nucleotide substitution, insertion and deletion in the human genome inferred from pseudogenes","volume":"15","author":"Zhang","year":"2003","journal-title":"Nucleic Acids Res"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/academic.oup.com\/bioinformatics\/advance-article-pdf\/doi\/10.1093\/bioinformatics\/btz753\/30458576\/btz753.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/36\/5\/1382\/50553117\/btz753.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/36\/5\/1382\/50553117\/btz753.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,6,9]],"date-time":"2023-06-09T06:39:53Z","timestamp":1686292793000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/36\/5\/1382\/5583731"}},"subtitle":[],"editor":[{"given":"Yann","family":"Ponty","sequence":"additional","affiliation":[],"role":[{"role":"editor","vocabulary":"crossref"}]}],"short-title":[],"issued":{"date-parts":[[2019,10,8]]},"references-count":35,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2020,3,1]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btz753","relation":{"has-preprint":[{"id-type":"doi","id":"10.1101\/512749","asserted-by":"object"}]},"ISSN":["1367-4803","1367-4811"],"issn-type":[{"value":"1367-4803","type":"print"},{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2020,3]]},"published":{"date-parts":[[2019,10,8]]}}}