{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,27]],"date-time":"2026-02-27T04:10:49Z","timestamp":1772165449626,"version":"3.50.1"},"reference-count":48,"publisher":"Springer Science and Business Media LLC","issue":"1","content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["BMC Bioinformatics"],"published-print":{"date-parts":[[2017,12]]},"DOI":"10.1186\/s12859-017-1881-8","type":"journal-article","created":{"date-parts":[[2017,11,3]],"date-time":"2017-11-03T08:41:06Z","timestamp":1509698466000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":18,"title":["K-mer clustering algorithm using a MapReduce framework: application to the parallelization of the Inchworm module of Trinity"],"prefix":"10.1186","volume":"18","author":[{"given":"Chang Sik","family":"Kim","sequence":"first","affiliation":[]},{"given":"Martyn D.","family":"Winn","sequence":"additional","affiliation":[]},{"given":"Vipin","family":"Sachdeva","sequence":"additional","affiliation":[]},{"given":"Kirk E.","family":"Jordan","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,11,3]]},"reference":[{"key":"1881_CR1","doi-asserted-by":"crossref","first-page":"203","DOI":"10.13070\/mm.en.3.203","volume":"3","author":"DC Corney","year":"2013","unstructured":"Corney DC. RNA-Seq using next generation sequencing. Materials and Methods. 2013;3:203.","journal-title":"Materials and Methods"},{"key":"1881_CR2","doi-asserted-by":"crossref","first-page":"220","DOI":"10.3389\/fpls.2012.00220","volume":"3","author":"S Schliesky","year":"2012","unstructured":"Schliesky S, Gowik U, Weber APM, Brautigam A. RNA-Seq assembly - are we there yet? Front Plant Sci. 2012;3:220.","journal-title":"Front Plant Sci"},{"key":"1881_CR3","doi-asserted-by":"crossref","first-page":"220","DOI":"10.1186\/gb-2010-11-12-220","volume":"11","author":"A Oshlack","year":"2010","unstructured":"Oshlack A, Robinson MD, Young MD. From RNA-Seq reads to differential expression results. Genome Biol. 2010;11:220.","journal-title":"Genome Biol"},{"key":"1881_CR4","doi-asserted-by":"crossref","first-page":"273","DOI":"10.1007\/978-1-61779-427-8_19","volume":"822","author":"PH Gunaratne","year":"2012","unstructured":"Gunaratne PH, Coarfa C, Soibam B, Tandon A. miRNA data analysis: next-gen sequencing. Methods Mol Biol. 2012;822:273\u201388.","journal-title":"Methods Mol Biol"},{"key":"1881_CR5","doi-asserted-by":"crossref","first-page":"26","DOI":"10.1016\/j.cell.2013.06.020","volume":"154","author":"I Ulitsky","year":"2013","unstructured":"Ulitsky I, Bartel DP. lincRNAs: genomics, evolution, and mechanisms. Cell. 2013;154:26\u201346.","journal-title":"Cell"},{"key":"1881_CR6","doi-asserted-by":"crossref","first-page":"333","DOI":"10.1038\/nature11928","volume":"495","author":"S Memczak","year":"2013","unstructured":"Memczak S, Jens M, Elefsinioti A, Torti F, Krueger J, Rybak A, Maier L, Mackowiak SD, Gregersen LH, Munschauer M, et al. Circular RNAs are a large class of animal RNAs with regulatory potency. Nature. 2013;495:333\u20138.","journal-title":"Nature"},{"key":"1881_CR7","doi-asserted-by":"crossref","unstructured":"Reddy ASN, Rogers MF, Richardson DN, Hamilton M, Ben-Hur A. Deciphering the plant splicing code: experimental and computational approaches for predicting alternative splicing and splicing regulatory elements. Front Plant Sci. 2012;3","DOI":"10.3389\/fpls.2012.00018"},{"key":"1881_CR8","doi-asserted-by":"crossref","unstructured":"Hooper JE. A survey of software for genome-wide discovery of differential splicing in RNA-Seq data. Human Genomics. 2014;8","DOI":"10.1186\/1479-7364-8-3"},{"key":"1881_CR9","doi-asserted-by":"crossref","first-page":"81","DOI":"10.2144\/000112900","volume":"45","author":"RD Morin","year":"2008","unstructured":"Morin RD, Bainbridge M, Fejes A, Hirst M, Krzywinski M, Pugh TJ, McDonald H, Varhol R, Jones SJM, Marra MA. Profiling the HeLa S3 transcriptome using randomly primed cDNA and massively parallel short-read sequencing. BioTechniques. 2008;45:81\u201394.","journal-title":"BioTechniques"},{"key":"1881_CR10","doi-asserted-by":"crossref","first-page":"470","DOI":"10.1038\/nature07509","volume":"456","author":"ET Wang","year":"2008","unstructured":"Wang ET, Sandberg R, Luo S, Khrebtukova I, Zhang L, Mayr C, Kingsmore SF, Schroth GP, Burge CB. Alternative isoform regulation in human tissue transcriptions. Nature. 2008;456:470\u20136.","journal-title":"Nature"},{"key":"1881_CR11","doi-asserted-by":"crossref","first-page":"521","DOI":"10.1101\/gr.074906.107","volume":"4","author":"MJ Fullwood","year":"2009","unstructured":"Fullwood MJ, Wei C-L, Liu ET, Ruan Y. Next-generation DNA sequencing of paired-end tags (PET) for transcriptome and genome analyses. Genome Res. 2009;4:521\u201332.","journal-title":"Genome Res"},{"key":"1881_CR12","doi-asserted-by":"crossref","first-page":"3264","DOI":"10.1073\/pnas.0812841106","volume":"9","author":"M Yassour","year":"2009","unstructured":"Yassour M, Kaplan T, Fraser HB, Levin JZ, Pfiffner J, Adiconis X, Schroth G, Luo S, Khrebtukova I, Gnirke A, et al. Ab initio construction of a eukaryotic transcriptome by massively parallel mRNA sequencing. Proc Natl Acad Sci U S A. 2009;9:3264\u20139.","journal-title":"Proc Natl Acad Sci U S A"},{"key":"1881_CR13","doi-asserted-by":"crossref","first-page":"503","DOI":"10.1038\/nbt.1633","volume":"28","author":"M Guttman","year":"2010","unstructured":"Guttman M, Garber M, Levin JZ, Donaghey J, Robinson J, Adiconis X, Fan L, Koziol MJ, Gnirke A, Nusbaum C, et al. Ab initio reconstruction of cell type-specific transcriptomes in mouse reveals the conserved multi-exomic structure of LineRNA. Nat Biotechnol. 2010;28:503\u201310.","journal-title":"Nat Biotechnol"},{"key":"1881_CR14","doi-asserted-by":"crossref","first-page":"511","DOI":"10.1038\/nbt.1621","volume":"28","author":"C Trapnell","year":"2010","unstructured":"Trapnell C, Williams BA, Pertea G, Mortazavi A, Kwan G, van Baren MJ, Salzberg SL, Wold BJ, Pachter L. Transcripts assembly and quantification by RNA-Seq reveals unannotated transcripts and isoform switching during cell differentiation. Nat Biotechnol. 2010;28:511\u20135.","journal-title":"Nat Biotechnol"},{"key":"1881_CR15","doi-asserted-by":"crossref","first-page":"1086","DOI":"10.1093\/bioinformatics\/bts094","volume":"28","author":"MH Schulz","year":"2012","unstructured":"Schulz MH, Zerbino DR, Vingron M, Birney E. Oases: robust de novo RNA-seq assembly across the dynamic range of expression levels. Bioinformatics. 2012;28:1086\u201392.","journal-title":"Bioinformatics"},{"key":"1881_CR16","doi-asserted-by":"crossref","first-page":"S6","DOI":"10.1186\/1471-2164-15-S5-S6","volume":"15","author":"S-H Sze","year":"2014","unstructured":"Sze S-H, Tarone AM. A memory-efficient algorithm to obtain splicing graphs and de novo expression estimates from de Bruijn graphs of RNA-Seq data. BMC Genomics. 2014;15:S6.","journal-title":"BMC Genomics"},{"key":"1881_CR17","doi-asserted-by":"crossref","first-page":"821","DOI":"10.1101\/gr.074492.107","volume":"18","author":"DR Zerbino","year":"2008","unstructured":"Zerbino DR, Birney E. Velvet: algorithms for de novo short read assembly using de Bruijn graphs. Genome Res. 2008;18:821\u20139.","journal-title":"Genome Res"},{"key":"1881_CR18","doi-asserted-by":"crossref","first-page":"2872","DOI":"10.1093\/bioinformatics\/btp367","volume":"25","author":"I Birol","year":"2009","unstructured":"Birol I, Jackman SD, Nielsen CB, Qian JQ, Varhol R, Stazyk G, Morin RD, Zhao Y, Hirst M, Schein JE, et al. De novo transcriptome assembly with ABySS. Bioinformatics. 2009;25:2872\u20137.","journal-title":"Bioinformatics"},{"key":"1881_CR19","doi-asserted-by":"crossref","first-page":"1117","DOI":"10.1101\/gr.089532.108","volume":"19","author":"JT Simpson","year":"2009","unstructured":"Simpson JT, Wong K, Jackman SD, Schein JE, Jones SJM, Birol I. ABySS: a parallel assembler for short read sequence data. Genome Res. 2009;19:1117\u201323.","journal-title":"Genome Res"},{"key":"1881_CR20","doi-asserted-by":"crossref","first-page":"1660","DOI":"10.1093\/bioinformatics\/btu077","volume":"30","author":"Y Xie","year":"2014","unstructured":"Xie Y, Wu G, Tang J, Luo R, Patterson J, Liu S, Huang W, He G, Gu S, Li S, et al. SOAPdenovo-trans: de novo transcriptome assembly with short RNA-Seq reads. Bioinformatics. 2014;30:1660\u20136.","journal-title":"Bioinformatics"},{"key":"1881_CR21","doi-asserted-by":"crossref","first-page":"265","DOI":"10.1101\/gr.097261.109","volume":"20","author":"R Li","year":"2010","unstructured":"Li R, Zhu H, Ruan J, Qian W, Fang X, Shi Z, Li Y, Li S, Shan G, Kristiansen K, et al. De novo assembly of human genomes with massively parallel short read sequencing. Genome Res. 2010;20:265\u201372.","journal-title":"Genome Res"},{"key":"1881_CR22","doi-asserted-by":"crossref","first-page":"644","DOI":"10.1038\/nbt.1883","volume":"29","author":"MG Grabherr","year":"2011","unstructured":"Grabherr MG, Haas BJ, Yassour M, Levin JZ, Thompson DA, Amit I, Adiconis X, Fan L, Raychowdhury R, Zeng Q, et al. Full-length transcriptome assembly from RNA-seq data without a reference genome. Nat Biotechnol. 2011;29:644\u201352.","journal-title":"Nat Biotechnol"},{"key":"1881_CR23","doi-asserted-by":"crossref","unstructured":"Chang Z, Li G, Liu J, Zhang Y, Ashby C, Liu D, Cramer CL, Huang X. Bridger: a new framework for de novo transcriptome assembly using RNA-seq data. Genome Biol. 2015;16","DOI":"10.1186\/s13059-015-0596-2"},{"key":"1881_CR24","volume":"12","author":"J Liu","year":"2016","unstructured":"Liu J, Li G, Chang Z, Yu T, Liu B, McMullen R, Chen P, Huang X. BinPacker: packing-based de novo Transcriptome assembly from RNA-seq data. PLoS Comput Biol. 2016;12:e1004772.","journal-title":"PLoS Comput Biol"},{"key":"1881_CR25","unstructured":"Martello S, Toth P. Knapsack problems: algorithms and computer implementations: John Wiley and Sons; 1990."},{"key":"1881_CR26","doi-asserted-by":"crossref","unstructured":"Cabau C, Escudi\u00e9 F, Djari A, Guiguen Y, Bobe J, Klopp C. Compacting and correcting trinity and oases RNA-Seq de novo assemblies. PeerJ. 2017;5","DOI":"10.7717\/peerj.2988"},{"key":"1881_CR27","doi-asserted-by":"crossref","first-page":"S2","DOI":"10.1186\/1471-2105-12-S14-S2","volume":"12","author":"Q-Y Zhao","year":"2011","unstructured":"Zhao Q-Y, Wang Y, Kong Y-M, Luo D, Li X, Hao P. Optimizing de novo transcriptome assembly from short-read RNA-Seq data: a comparative study. BMC Bioinformatics. 2011;12:S2.","journal-title":"BMC Bioinformatics"},{"key":"1881_CR28","doi-asserted-by":"crossref","first-page":"566","DOI":"10.1109\/IPDPSW.2014.67","volume-title":"Parallel & distributed processing symposium workshops (IPDPSW), 2014 IEEE international; may 19\u201323","author":"V Sachdeva","year":"2014","unstructured":"Sachdeva V, Kim CS, Jordan KE, Winn MD. Parallelization of the trinity pipeline for de novo transcriptome assembly. In: Parallel & distributed processing symposium workshops (IPDPSW), 2014 IEEE international; may 19\u201323, vol. 2014. USA: Phoenix, AZ; 2014. p. 566\u201375."},{"key":"1881_CR29","volume-title":"arXiv:12034802","author":"CT Brown","year":"2012","unstructured":"Brown CT, Howe A, Zhang Q, Pyrkosz AB, Brom TH. A reference-free algorithm for computational normalization of shotgun sequencing data. In: arXiv:12034802; 2012."},{"key":"1881_CR30","doi-asserted-by":"crossref","first-page":"29","DOI":"10.1109\/MCSE.2009.120","volume":"11","author":"J Dean","year":"2009","unstructured":"Dean J, Ghemawat S. MapReduce: simplified data processing on large clusters. Computing in Science and Engineering. 2009;11:29\u201341.","journal-title":"Computing in Science and Engineering"},{"key":"1881_CR31","doi-asserted-by":"crossref","first-page":"1297","DOI":"10.1101\/gr.107524.110","volume":"20","author":"A McKenna","year":"2010","unstructured":"McKenna A, Hanna M, Banks E, Sivachenko A, Kristian Cibulskis AK, Garimella K, Altshuler D, Gabriel S, Daly M, DePristo MA. The genome analysis toolkit: a MapReduce framework for analyzing next-generation DNA sequencing data. Genome Res. 2010;20:1297\u2013303.","journal-title":"Genome Res"},{"key":"1881_CR32","doi-asserted-by":"crossref","first-page":"22","DOI":"10.1186\/1756-0381-7-22","volume":"7","author":"EA Mohammed","year":"2014","unstructured":"Mohammed EA, Far BH, Naugler C. Applications of the MapReduce programming framework to clinical big data analysis: current landscape and future trends. Biodata Mining. 2014;7:22.","journal-title":"Biodata Mining"},{"key":"1881_CR33","doi-asserted-by":"crossref","first-page":"395","DOI":"10.1016\/j.bbrc.2012.08.101","volume":"426","author":"B Xu","year":"2012","unstructured":"Xu B, Gao J, Li C. An efficient algorithm for DNA fragment assembly in MapReduce. Biochem Biophys Res Commun. 2012;426:395\u20138.","journal-title":"Biochem Biophys Res Commun"},{"key":"1881_CR34","unstructured":"MapReduce-MPI Library."},{"key":"1881_CR35","unstructured":"White T. Hadoop: The Definitive Guide. O'Reilly. Media. 2009;"},{"key":"1881_CR36","doi-asserted-by":"crossref","unstructured":"Ranger C, Raghuraman R, Penmetsa A, Bradski G, Kozyrakis C. Evaluating MapReduce for multi-core and multiprocessor systems. IEEE 13th International Symposium on High Performance Computer Architecture. 2007:13\u201324.","DOI":"10.1109\/HPCA.2007.346181"},{"key":"1881_CR37","doi-asserted-by":"crossref","first-page":"372","DOI":"10.1145\/362248.362272","volume":"16","author":"JE Hopcroft","year":"1973","unstructured":"Hopcroft JE, Tarjan RE. Efficient algorithms for graph manipulation. Commun ACM. 1973;16:372\u20138.","journal-title":"Commun ACM"},{"key":"1881_CR38","doi-asserted-by":"crossref","first-page":"610","DOI":"10.1016\/j.parco.2011.02.004","volume":"37","author":"SJ Plimpton","year":"2011","unstructured":"Plimpton SJ, Devine KD. MapReduce in MPI for large-scale graph algorithms. Parallel Comput. 2011;37:610\u201332.","journal-title":"Parallel Comput"},{"key":"1881_CR39","doi-asserted-by":"crossref","first-page":"493","DOI":"10.1093\/bioinformatics\/btp692","volume":"26","author":"B Li","year":"2010","unstructured":"Li B, Ruotti V, Stewart RM, Thomson JA, Dewey CN. RNA-Seq gene expression estimation with read mapping uncertainty. Bioinformatics. 2010;26:493\u2013500.","journal-title":"Bioinformatics"},{"key":"1881_CR40","doi-asserted-by":"crossref","unstructured":"Li B, Dewey CN. RSEM: accurate transcript quantification from RNA-Seq data with or without a reference genome. BMC Bioinformatics. 2011;12","DOI":"10.1186\/1471-2105-12-323"},{"key":"1881_CR41","doi-asserted-by":"crossref","first-page":"553","DOI":"10.1186\/s13059-014-0553-5","volume":"15","author":"B Li","year":"2014","unstructured":"Li B, Fillmore N, Bai Y, Collins M, Thomson JA, Stewart R, Dewey C. Evaluation of de novo transcriptome assemblies from RNA-Seq data. Genome Biol. 2014;15:553.","journal-title":"Genome Biol"},{"key":"1881_CR42","first-page":"654","volume":"12","author":"WJ Kent","year":"2002","unstructured":"Kent WJ. BLAT-the BLAST-like alignment tool. Genome Res. 2002;12:654\u201364.","journal-title":"Genome Res"},{"key":"1881_CR43","doi-asserted-by":"crossref","first-page":"3003","DOI":"10.1093\/bioinformatics\/btt534","volume":"29","author":"BS Pedersen","year":"2013","unstructured":"Pedersen BS, Yang IV, De S. CruzDB: software for annotation of genomic intervals with UCSC genome-browser database. Bioinformatics. 2013;29:3003\u20136.","journal-title":"Bioinformatics"},{"key":"1881_CR44","doi-asserted-by":"crossref","first-page":"764","DOI":"10.1093\/bioinformatics\/btr011","volume":"27","author":"G Marcais","year":"2011","unstructured":"Marcais G, Kingsford C. A fast, lock-free approach for efficient parallel counting of occurrences of k-mers. Bioinformatics. 2011;27:764\u201370.","journal-title":"Bioinformatics"},{"key":"1881_CR45","doi-asserted-by":"crossref","first-page":"13272","DOI":"10.1073\/pnas.1121464109","volume":"109","author":"J Pell","year":"2012","unstructured":"Pell J, Hintze A, Canino-Koning R, Howe A, Tiedje JM, Brown CT. Scaling metagenome sequence assembly with probabilistic de Bruijn graphs. Proc Natl Acad Sci U S A. 2012;109:13272\u20137.","journal-title":"Proc Natl Acad Sci U S A"},{"key":"1881_CR46","doi-asserted-by":"crossref","unstructured":"Chikhi R, Rizk G. Space-efficient and exact de Bruijn graph representation based on a bloom filter. Algorithms for Molecular Biology. 2013;8","DOI":"10.1186\/1748-7188-8-22"},{"key":"1881_CR47","doi-asserted-by":"crossref","first-page":"i94","DOI":"10.1093\/bioinformatics\/btr216","volume":"27","author":"Y Peng","year":"2011","unstructured":"Peng Y, Leung HCM, Yiu SM, Chin FYL. Meta-IDBA: a de novo assembler for metagenomic data. Bioinformatics. 2011;27:i94\u2013i101.","journal-title":"Bioinformatics"},{"key":"1881_CR48","doi-asserted-by":"crossref","first-page":"540","DOI":"10.1089\/cmb.2013.0042","volume":"20","author":"HCM Leung","year":"2013","unstructured":"Leung HCM, Yiu SM, Parkinson J, Chin FYL. IDBA-MT: de novo assembler for Metatranscriptomic data generated from next-generation sequencing technology. J Comp Biol. 2013;20:540\u201350.","journal-title":"J Comp Biol"}],"container-title":["BMC Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s12859-017-1881-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,10,20]],"date-time":"2020-10-20T21:41:19Z","timestamp":1603230079000},"score":1,"resource":{"primary":{"URL":"http:\/\/bmcbioinformatics.biomedcentral.com\/articles\/10.1186\/s12859-017-1881-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,11,3]]},"references-count":48,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2017,12]]}},"alternative-id":["1881"],"URL":"https:\/\/doi.org\/10.1186\/s12859-017-1881-8","relation":{"has-preprint":[{"id-type":"doi","id":"10.1101\/149948","asserted-by":"object"}]},"ISSN":["1471-2105"],"issn-type":[{"value":"1471-2105","type":"electronic"}],"subject":[],"published":{"date-parts":[[2017,11,3]]},"article-number":"467"}}