{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,19]],"date-time":"2025-09-19T08:47:44Z","timestamp":1758271664801},"reference-count":14,"publisher":"Oxford University Press (OUP)","issue":"9","license":[{"start":{"date-parts":[[2017,12,15]],"date-time":"2017-12-15T00:00:00Z","timestamp":1513296000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/academic.oup.com\/journals\/pages\/about_us\/legal\/notices"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018,5,1]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:sec>\n                  <jats:title>Motivation<\/jats:title>\n                  <jats:p>Despite successful applications of data clustering and visualization techniques in molecular sequence identification, current technologies still do not scale to large biological datasets.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Results<\/jats:title>\n                  <jats:p>We address this problem by a new multi-threaded tool, fMLC, primarily developed to cluster DNA sequences, that is supplemented with an interactive web-based visualization component, DiVE. fMLC enabled to compare, cluster and visualize 350K ITS fungal sequences at the species level. It took less than two hours to compare and cluster the dataset, which is twelve times faster than the time reported previously.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Availability and implementation<\/jats:title>\n                  <jats:p>https:\/\/github.com\/FastMLC\/fMLC (doi: 10.5281\/zenodo.926820)<\/jats:p>\n               <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btx810","type":"journal-article","created":{"date-parts":[[2017,12,14]],"date-time":"2017-12-14T16:12:05Z","timestamp":1513267925000},"page":"1577-1579","source":"Crossref","is-referenced-by-count":11,"title":["fMLC: fast multi-level clustering and visualization of large molecular datasets"],"prefix":"10.1093","volume":"34","author":[{"given":"D","family":"Vu","sequence":"first","affiliation":[{"name":"Bioinformatics group, Westerdijk Fungal Biodiversity Institute, Utrecht, The Netherlands"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"S","family":"Georgievska","sequence":"additional","affiliation":[{"name":"Netherlands eScience Center, XG Amsterdam, The Netherlands"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"S","family":"Szoke","sequence":"additional","affiliation":[{"name":"Bioinformatics group, Westerdijk Fungal Biodiversity Institute, Utrecht, The Netherlands"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"A","family":"Kuzniar","sequence":"additional","affiliation":[{"name":"Netherlands eScience Center, XG Amsterdam, The Netherlands"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"V","family":"Robert","sequence":"additional","affiliation":[{"name":"Bioinformatics group, Westerdijk Fungal Biodiversity Institute, Utrecht, The Netherlands"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"286","published-online":{"date-parts":[[2017,12,15]]},"reference":[{"key":"2023012713021645100_btx810-B1","doi-asserted-by":"crossref","first-page":"3389","DOI":"10.1093\/nar\/25.17.3389","article-title":"Gapped BLAST and PSI-BLAST: a new generation protein database search programs","volume":"25","author":"Altschul","year":"1997","journal-title":"Nucleic Acids Res"},{"key":"2023012713021645100_btx810-B2","doi-asserted-by":"crossref","first-page":"935","DOI":"10.1093\/bioinformatics\/17.10.935","article-title":"Clustering protein sequences- structure prediction by transitive homology","volume":"17","author":"Bolten","year":"2001","journal-title":"Bioinformatics"},{"key":"2023012713021645100_btx810-B3","doi-asserted-by":"crossref","first-page":"2460","DOI":"10.1093\/bioinformatics\/btq461","article-title":"Search and clustering orders of magnitude faster than BLAST","volume":"26","author":"Edgar","year":"2010","journal-title":"Bioinformatics"},{"key":"2023012713021645100_btx810-B4","doi-asserted-by":"crossref","first-page":"3150","DOI":"10.1093\/bioinformatics\/bts565","article-title":"CD-HIT: accelerated for clustering the next-generation sequencing data","volume":"28","author":"Fu","year":"2012","journal-title":"Bioinformatics"},{"key":"2023012713021645100_btx810-B5","author":"Georgievska","year":"2017"},{"key":"2023012713021645100_btx810-B6","doi-asserted-by":"crossref","first-page":"2482","DOI":"10.1093\/bioinformatics\/btq435","article-title":"Multi-netclust: an efficient tool for finding connected clusters in multi-parametric networks","volume":"26","author":"Kuzniar","year":"2010","journal-title":"Bioinformatics"},{"key":"2023012713021645100_btx810-B7","first-page":"3221","article-title":"Accelerating t-SNE using Tree-Based Algorithms","volume":"15","author":"Maaten","year":"2014","journal-title":"J. Mach. Learn. Res"},{"key":"2023012713021645100_btx810-B8","doi-asserted-by":"crossref","first-page":"1571","DOI":"10.1093\/nar\/gkj515","article-title":"Spectral clustering of proteins sequences","volume":"34","author":"Paccanaro","year":"2006","journal-title":"Nucleic Acids Res"},{"key":"2023012713021645100_btx810-B9","doi-asserted-by":"crossref","DOI":"10.1017\/CBO9780511806384","volume-title":"Experimental Design and Data Analysis for Biologists","author":"Quinn","year":"2002"},{"key":"2023012713021645100_btx810-B10","doi-asserted-by":"crossref","DOI":"10.1038\/nbt.3988","article-title":"MMseqs2 enables sensitive protein sequence searching for the analysis of massive data sets","author":"Steinegger","year":"2017","journal-title":"Nat. Biotechnol"},{"key":"2023012713021645100_btx810-B11","first-page":"287","author":"Tang","year":"2016"},{"key":"2023012713021645100_btx810-B12","doi-asserted-by":"crossref","first-page":"6837","DOI":"10.1038\/srep06837","article-title":"Massive fungal biodiversity data re-annotation with multi-level clustering","volume":"4","author":"Vu","year":"2014","journal-title":"Sci. Rep"},{"key":"2023012713021645100_btx810-B13","doi-asserted-by":"crossref","first-page":"91","DOI":"10.1016\/j.simyco.2016.11.007","article-title":"DNA barcoding analysis of more than 9000 yeast isolates contributes to quantitative thresholds for yeast species and genera delimitation","volume":"85","author":"Vu","year":"2016","journal-title":"Stud. Mycol"},{"key":"2023012713021645100_btx810-B14","doi-asserted-by":"crossref","first-page":"419","DOI":"10.1038\/nmeth0610-419","article-title":"Partitioning biological data with transitivity clustering","volume":"7","author":"Wittkop","year":"2010","journal-title":"Nat. Methods"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/34\/9\/1577\/48915098\/bioinformatics_34_9_1577.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/34\/9\/1577\/48915098\/bioinformatics_34_9_1577.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,27]],"date-time":"2023-01-27T13:54:02Z","timestamp":1674827642000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/34\/9\/1577\/4747887"}},"subtitle":[],"editor":[{"given":"John","family":"Hancock","sequence":"additional","affiliation":[],"role":[{"role":"editor","vocabulary":"crossref"}]}],"short-title":[],"issued":{"date-parts":[[2017,12,15]]},"references-count":14,"journal-issue":{"issue":"9","published-print":{"date-parts":[[2018,5,1]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btx810","relation":{},"ISSN":["1367-4803","1367-4811"],"issn-type":[{"value":"1367-4803","type":"print"},{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2018,5,1]]},"published":{"date-parts":[[2017,12,15]]}}}