{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T13:35:41Z","timestamp":1773840941854,"version":"3.50.1"},"reference-count":31,"publisher":"Oxford University Press (OUP)","issue":"2","license":[{"start":{"date-parts":[[2024,2,3]],"date-time":"2024-02-03T00:00:00Z","timestamp":1706918400000},"content-version":"vor","delay-in-days":2,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"General Program of Guangxi Natural Science Foundation","award":["2023GXNSFAA026410"],"award-info":[{"award-number":["2023GXNSFAA026410"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["11931008"],"award-info":[{"award-number":["11931008"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["12101368"],"award-info":[{"award-number":["12101368"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61771009"],"award-info":[{"award-number":["61771009"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62272268"],"award-info":[{"award-number":["62272268"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002858","name":"China Postdoctoral Science Foundation","doi-asserted-by":"publisher","award":["2021M701998"],"award-info":[{"award-number":["2021M701998"]}],"id":[{"id":"10.13039\/501100002858","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100007129","name":"Natural Science Foundation of Shandong Province","doi-asserted-by":"publisher","award":["ZR2021QA013"],"award-info":[{"award-number":["ZR2021QA013"]}],"id":[{"id":"10.13039\/501100007129","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100007129","name":"Natural Science Foundation of Shandong Province","doi-asserted-by":"publisher","award":["ZR2023QA059"],"award-info":[{"award-number":["ZR2023QA059"]}],"id":[{"id":"10.13039\/501100007129","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,2,1]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:sec>\n                  <jats:title>Motivation<\/jats:title>\n                  <jats:p>The advancement of long-read RNA sequencing technologies leads to a bright future for transcriptome analysis, in which clustering long reads according to their gene family of origin is of great importance. However, existing de novo clustering algorithms require plenty of computing resources.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Results<\/jats:title>\n                  <jats:p>We developed a new algorithm GeLuster for clustering long RNA-seq reads. Based on our tests on one simulated dataset and nine real datasets, GeLuster exhibited superior performance. On the tested Nanopore datasets it ran 2.9\u201317.5 times as fast as the second-fastest method with less than one-seventh of memory consumption, while achieving higher clustering accuracy. And on the PacBio data, GeLuster also had a similar performance. It sets the stage for large-scale transcriptome study in future.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Availability and implementation<\/jats:title>\n                  <jats:p>GeLuster is freely available at https:\/\/github.com\/yutingsdu\/GeLuster.<\/jats:p>\n               <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btae059","type":"journal-article","created":{"date-parts":[[2024,2,4]],"date-time":"2024-02-04T05:13:49Z","timestamp":1707023629000},"source":"Crossref","is-referenced-by-count":3,"title":["Highly efficient clustering of long-read transcriptomic data with GeLuster"],"prefix":"10.1093","volume":"40","author":[{"given":"Junchi","family":"Ma","sequence":"first","affiliation":[{"name":"Research Center for Mathematics and Interdisciplinary Sciences (Frontiers Science Center for Nonlinear Expectations), Shandong University , Qingdao 266237, China"},{"name":"School of Mathematics, Shandong University , Jinan, Shandong 250100, China"}]},{"given":"Xiaoyu","family":"Zhao","sequence":"additional","affiliation":[{"name":"School of Mathematics, Shandong University , Jinan, Shandong 250100, China"}]},{"given":"Enfeng","family":"Qi","sequence":"additional","affiliation":[{"name":"School of Mathematics and Statistics, Guangxi Normal University , Guilin 541000, China"}]},{"given":"Renmin","family":"Han","sequence":"additional","affiliation":[{"name":"Research Center for Mathematics and Interdisciplinary Sciences (Frontiers Science Center for Nonlinear Expectations), Shandong University , Qingdao 266237, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0073-9644","authenticated-orcid":false,"given":"Ting","family":"Yu","sequence":"additional","affiliation":[{"name":"Research Center for Mathematics and Interdisciplinary Sciences (Frontiers Science Center for Nonlinear Expectations), Shandong University , Qingdao 266237, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1581-5897","authenticated-orcid":false,"given":"Guojun","family":"Li","sequence":"additional","affiliation":[{"name":"Research Center for Mathematics and Interdisciplinary Sciences (Frontiers Science Center for Nonlinear Expectations), Shandong University , Qingdao 266237, China"}]}],"member":"286","published-online":{"date-parts":[[2024,2,3]]},"reference":[{"key":"2024022120062375700_btae059-B1","doi-asserted-by":"crossref","first-page":"30","DOI":"10.1186\/s13059-020-1935-5","article-title":"Opportunities and challenges in long-read sequencing data analysis","volume":"21","author":"Amarasinghe","year":"2020","journal-title":"Genome Biol"},{"key":"2024022120062375700_btae059-B2","doi-asserted-by":"crossref","first-page":"16027","DOI":"10.1038\/ncomms16027","article-title":"Nanopore long-read RNAseq reveals widespread transcriptional variation among the surface receptors of individual B cells","volume":"8","author":"Byrne","year":"2017","journal-title":"Nat Commun"},{"key":"2024022120062375700_btae059-B3","doi-asserted-by":"crossref","first-page":"153","DOI":"10.1186\/s13059-022-02715-w","article-title":"RATTLE: reference-free reconstruction and quantification of transcriptomes from nanopore sequencing","volume":"23","author":"de la Rubia","year":"2022","journal-title":"Genome Biol"},{"key":"2024022120062375700_btae059-B4","doi-asserted-by":"crossref","first-page":"eabq5072","DOI":"10.1126\/sciadv.abq5072","article-title":"ESPRESSO: robust discovery and quantification of transcript isoforms from error-prone long-read RNA-seq data","volume":"9","author":"Gao","year":"2023","journal-title":"Sci Adv"},{"key":"2024022120062375700_btae059-B5","doi-asserted-by":"crossref","first-page":"giaa061","DOI":"10.1093\/gigascience\/giaa061","article-title":"Trans-NanoSim characterizes and simulates nanopore RNA-sequencing data","volume":"9","author":"Hafezqorani","year":"2020","journal-title":"Gigascience"},{"key":"2024022120062375700_btae059-B6","doi-asserted-by":"crossref","first-page":"395","DOI":"10.1186\/s12864-017-3757-8","article-title":"A survey of the complex transcriptome from the highly polyploid sugarcane genome using full-length isoform sequencing and de novo assembly from short read sequencing","volume":"18","author":"Hoang","year":"2017","journal-title":"BMC Genomics"},{"key":"2024022120062375700_btae059-B7","doi-asserted-by":"crossref","first-page":"12","DOI":"10.1038\/s41592-022-01716-8","article-title":"Approaching complete genomes, transcriptomes and epi-omes with accurate long-read sequencing","volume":"20","author":"Kovaka","year":"2023","journal-title":"Nat Methods"},{"key":"2024022120062375700_btae059-B8","doi-asserted-by":"crossref","first-page":"323","DOI":"10.1186\/s12864-017-3691-9","article-title":"Normalized long read RNA sequencing in chicken reveals transcriptome complexity similar to human","volume":"18","author":"Kuo","year":"2017","journal-title":"BMC Genomics"},{"key":"2024022120062375700_btae059-B9","doi-asserted-by":"crossref","first-page":"2300","DOI":"10.1093\/bioinformatics\/btt396","article-title":"Inference of alternative splicing from RNA-Seq data with probabilistic splice graphs","volume":"29","author":"LeGault","year":"2013","journal-title":"Bioinformatics"},{"key":"2024022120062375700_btae059-B10","doi-asserted-by":"crossref","first-page":"2103","DOI":"10.1093\/bioinformatics\/btw152","article-title":"Minimap and miniasm: fast mapping and de novo assembly for noisy long sequences","volume":"32","author":"Li","year":"2016","journal-title":"Bioinformatics"},{"key":"2024022120062375700_btae059-B11","doi-asserted-by":"crossref","first-page":"3094","DOI":"10.1093\/bioinformatics\/bty191","article-title":"Minimap2: pairwise alignment for nucleotide sequences","volume":"34","author":"Li","year":"2018","journal-title":"Bioinformatics"},{"key":"2024022120062375700_btae059-B12","doi-asserted-by":"crossref","first-page":"93","DOI":"10.1146\/annurev-biodatasci-072018-021156","article-title":"Sketching and sublinear data structures in genomics","volume":"2","author":"Mar\u00e7ais","year":"2019","journal-title":"Annu Rev Biomed Data Sci"},{"key":"2024022120062375700_btae059-B13","doi-asserted-by":"crossref","first-page":"e2","DOI":"10.1093\/nar\/gky834","article-title":"De novo clustering of long reads by gene from transcriptomics data","volume":"47","author":"Marchet","year":"2019","journal-title":"Nucleic Acids Res"},{"key":"2024022120062375700_btae059-B14","volume-title":"Genome Biology and Evolution","author":"Marta","year":"2023"},{"key":"2024022120062375700_btae059-B15","doi-asserted-by":"crossref","first-page":"1126","DOI":"10.1101\/gr.231100.117","article-title":"Complex rearrangements and oncogene amplifications revealed by long-read DNA and RNA sequencing of a breast cancer cell line","volume":"28","author":"Nattestad","year":"2018","journal-title":"Genome Res"},{"key":"2024022120062375700_btae059-B16","doi-asserted-by":"crossref","first-page":"i326","DOI":"10.1093\/bioinformatics\/btt219","article-title":"IDBA-tran: a more robust de novo de Bruijn graph assembler for transcriptomes with uneven expression levels","volume":"29","author":"Peng","year":"2013","journal-title":"Bioinformatics"},{"key":"2024022120062375700_btae059-B17","doi-asserted-by":"crossref","first-page":"R234","DOI":"10.1093\/hmg\/ddy177","article-title":"Long reads: their purpose and place","volume":"27","author":"Pollard","year":"2018","journal-title":"Hum Mol Genet"},{"key":"2024022120062375700_btae059-B18","doi-asserted-by":"crossref","first-page":"915","DOI":"10.1038\/s41587-022-01565-y","article-title":"Accurate isoform discovery with IsoQuant using long reads","volume":"41","author":"Prjibelski","year":"2023","journal-title":"Nat Biotechnol"},{"key":"2024022120062375700_btae059-B19","doi-asserted-by":"crossref","first-page":"90","DOI":"10.1186\/s13059-018-1462-9","article-title":"From squiggle to basepair: computational approaches for improving nanopore sequencing read accuracy","volume":"19","author":"Rang","year":"2018","journal-title":"Genome Biol"},{"key":"2024022120062375700_btae059-B20","doi-asserted-by":"crossref","first-page":"199","DOI":"10.1186\/s13059-019-1809-x","article-title":"When the levee breaks: a practical guide to sketching algorithms for processing the flood of genomic data","volume":"20","author":"Rowe","year":"2019","journal-title":"Genome Biol"},{"key":"2024022120062375700_btae059-B21","doi-asserted-by":"crossref","first-page":"472","DOI":"10.1089\/cmb.2019.0299","article-title":"De novo clustering of Long-Read transcriptome data using a greedy, quality Value-Based algorithm","volume":"27","author":"Sahlin","year":"2020","journal-title":"J Comput Biol"},{"key":"2024022120062375700_btae059-B22","doi-asserted-by":"crossref","first-page":"2","DOI":"10.1038\/s41467-020-20340-8","article-title":"Error correction enables use of oxford nanopore technology for reference-free transcriptome analysis","volume":"12","author":"Sahlin","year":"2021","journal-title":"Nat Commun"},{"key":"2024022120062375700_btae059-B23","doi-asserted-by":"crossref","first-page":"4601","DOI":"10.1038\/s41467-018-06910-x","article-title":"Deciphering highly similar multigene family transcripts from Iso-Seq data with IsoCon","volume":"9","author":"Sahlin","year":"2018","journal-title":"Nat Commun"},{"key":"2024022120062375700_btae059-B24","doi-asserted-by":"crossref","first-page":"1117","DOI":"10.1016\/j.bbagrm.2017.08.007","article-title":"Altered expression of the FMR1 splicing variants landscape in premutation carriers","volume":"1860","author":"Tseng","year":"2017","journal-title":"Biochim Biophys Acta Gene Regul Mech"},{"key":"2024022120062375700_btae059-B25","doi-asserted-by":"crossref","first-page":"9726","DOI":"10.1073\/pnas.1806447115","article-title":"Improving nanopore read accuracy with the R2C2 method enables the sequencing of highly multiplexed full-length single-cell cDNA","volume":"115","author":"Volden","year":"2018","journal-title":"Proc Natl Acad Sci USA"},{"key":"2024022120062375700_btae059-B26","doi-asserted-by":"crossref","first-page":"1003771","DOI":"10.3389\/fgene.2022.1003771","article-title":"Discovering novel reproductive genes in a non-model fly using de novo GridION transcriptomics","volume":"13","author":"Walter","year":"2022","journal-title":"Front Genet"},{"key":"2024022120062375700_btae059-B27","doi-asserted-by":"crossref","first-page":"1348","DOI":"10.1038\/s41587-021-01108-x","article-title":"Nanopore sequencing technology, bioinformatics and applications","volume":"39","author":"Wang","year":"2021","journal-title":"Nat Biotechnol"},{"key":"2024022120062375700_btae059-B28","doi-asserted-by":"crossref","first-page":"100","DOI":"10.12688\/f1000research.10571.2","article-title":"Comprehensive comparison of pacific biosciences and oxford nanopore technologies and their applications to transcriptome analysis [version 2; peer review: 2 approved]","volume":"6","author":"Weirather","year":"2017","journal-title":"F1000Res"},{"key":"2024022120062375700_btae059-B29","doi-asserted-by":"crossref","first-page":"1155","DOI":"10.1038\/s41587-019-0217-9","article-title":"Accurate circular consensus long-read sequencing improves variant detection and assembly of a human genome","volume":"37","author":"Wenger","year":"2019","journal-title":"Nat Biotechnol"},{"key":"2024022120062375700_btae059-B30","doi-asserted-by":"crossref","first-page":"i283","DOI":"10.1093\/bioinformatics\/btu288","article-title":"RNA-Skim: a rapid method for RNA-Seq quantification at transcript level","volume":"30","author":"Zhang","year":"2014","journal-title":"Bioinformatics"},{"key":"2024022120062375700_btae059-B31","doi-asserted-by":"crossref","first-page":"i119","DOI":"10.1093\/bioinformatics\/btaa472","article-title":"Improved design and analysis of practical minimizers","volume":"36","author":"Zheng","year":"2020","journal-title":"Bioinformatics"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/advance-article-pdf\/doi\/10.1093\/bioinformatics\/btae059\/56580308\/btae059.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/40\/2\/btae059\/56731955\/btae059.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/40\/2\/btae059\/56731955\/btae059.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,21]],"date-time":"2024-02-21T20:06:51Z","timestamp":1708546011000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/doi\/10.1093\/bioinformatics\/btae059\/7600422"}},"subtitle":[],"editor":[{"given":"Peter","family":"Robinson","sequence":"additional","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2024,2,1]]},"references-count":31,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2024,2,1]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btae059","relation":{},"ISSN":["1367-4803","1367-4811"],"issn-type":[{"value":"1367-4803","type":"print"},{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2024,2,1]]},"published":{"date-parts":[[2024,2,1]]},"article-number":"btae059"}}