{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T00:13:54Z","timestamp":1773274434365,"version":"3.50.1"},"reference-count":46,"publisher":"Springer Science and Business Media LLC","issue":"S6","content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["BMC Bioinformatics"],"published-print":{"date-parts":[[2015,12]]},"DOI":"10.1186\/1471-2105-16-s6-s2","type":"journal-article","created":{"date-parts":[[2015,6,18]],"date-time":"2015-06-18T18:01:55Z","timestamp":1434650515000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":43,"title":["Probabilistic topic modeling for the analysis and classification of genomic sequences"],"prefix":"10.1186","volume":"16","author":[{"given":"Massimo","family":"La Rosa","sequence":"first","affiliation":[]},{"given":"Antonino","family":"Fiannaca","sequence":"additional","affiliation":[]},{"given":"Riccardo","family":"Rizzo","sequence":"additional","affiliation":[]},{"given":"Alfonso","family":"Urso","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2015,4,17]]},"reference":[{"issue":"9","key":"6922_CR1","doi-asserted-by":"publisher","first-page":"4311","DOI":"10.1128\/JCM.43.9.4311-4315.2005","volume":"43","author":"M Drancourt","year":"2005","unstructured":"Drancourt M, Raoult D: Sequence-based identification of new bacteria: a proposition for creation of an orphan bacterium repository. J Clin Microbiol. 2005, 43 (9): 4311-4315. 10.1128\/JCM.43.9.4311-4315.2005.","journal-title":"J Clin Microbiol"},{"issue":"6783","key":"6922_CR2","doi-asserted-by":"publisher","first-page":"220","DOI":"10.1038\/35012228","volume":"405","author":"KJ Gaston","year":"2000","unstructured":"Gaston KJ: Global patterns in biodiversity. Nature. 2000, 405 (6783): 220-7. 10.1038\/35012228.","journal-title":"Nature"},{"issue":"5","key":"6922_CR3","doi-asserted-by":"publisher","first-page":"2197","DOI":"10.1128\/JCM.42.5.2197-2202.2004","volume":"42","author":"M Drancourt","year":"2004","unstructured":"Drancourt M, Berger P, Raoult D: Systematic 16S rRNA Gene Sequencing of Atypical Clinical Isolates Identified 27 New Bacterial Species Associated with Humans. Journal of Clinical Microbiology. 2004, 42 (5): 2197-2202. 10.1128\/JCM.42.5.2197-2202.2004.","journal-title":"Journal of Clinical Microbiology"},{"issue":"Suppl 1","key":"6922_CR4","doi-asserted-by":"publisher","first-page":"96","DOI":"10.1098\/rsbl.2003.0025","volume":"270","author":"PDN Hebert","year":"2003","unstructured":"Hebert PDN, Ratnasingham S, DeWaard JR: Barcoding animal life: cytochrome c oxidase subunit 1 divergences among closely related species. Proceedings of the Royal Society Series B, Biological sciences. 2003, 270 (Suppl 1): 96-99.","journal-title":"Proceedings of the Royal Society Series B, Biological sciences"},{"key":"6922_CR5","doi-asserted-by":"crossref","DOI":"10.1093\/oso\/9780195135848.001.0001","volume-title":"Molecular Evolution and Phylogenetics","author":"M Nei","year":"2000","unstructured":"Nei M, Kumar MD: Molecular Evolution and Phylogenetics. 2000, Oxford University Press, New York"},{"key":"6922_CR6","first-page":"332","volume-title":"Advances in Intelligent Data Analysis VII Lecture Notes in Computer Science","author":"M La Rosa","year":"2007","unstructured":"La Rosa M, Di Fatta G, Gaglio S, Giammanco G, Rizzo R, Urso A: Soft topographic map for clustering and classification of bacteria. Advances in Intelligent Data Analysis VII Lecture Notes in Computer Science. 2007, Springer, Berlin, Heidelberg, 4723: 332-343."},{"key":"6922_CR7","volume-title":"Advances in Artificial Neural Systems","author":"M La Rosa","year":"2011","unstructured":"La Rosa M, Rizzo R, Urso A: Soft Topographic Maps for Clustering and Classifying Bacteria Using Housekeeping Genes. Advances in Artificial Neural Systems. 2011, 2011:"},{"key":"6922_CR8","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-540-85567-5_92","volume-title":"Knowledge-Based Intelligent Information and Engineering Systems Lecture Notes in Computer Science","author":"M La Rosa","year":"2008","unstructured":"La Rosa M, Rizzo R, Urso A, Gaglio S: Comparison of genomic sequences clustering using normalized compression distance and evolutionary distance. Knowledge-Based Intelligent Information and Engineering Systems Lecture Notes in Computer Science. 2008, Springer, Berlin, Heidelberg, 5179: 740-746."},{"issue":"4","key":"6922_CR9","doi-asserted-by":"publisher","first-page":"345","DOI":"10.1504\/IJKESDP.2009.028987","volume":"1","author":"M La Rosa","year":"2009","unstructured":"La Rosa M, Gaglio S, Rizzo R, Urso A: Normalised compression distance and evolutionary distance of genomic sequences: comparison of clustering results. International Journal of Knowledge Engineering and Soft Data Paradigms. 2009, 1 (4): 345-362. 10.1504\/IJKESDP.2009.028987.","journal-title":"International Journal of Knowledge Engineering and Soft Data Paradigms"},{"issue":"12","key":"6922_CR10","doi-asserted-by":"publisher","first-page":"3250","DOI":"10.1109\/TIT.2004.838101","volume":"50","author":"M Li","year":"2004","unstructured":"Li M, Chen X, Li X, Ma B, Vitanyi PMB: The similarity metric. IEEE Transactions on Information Theory. 2004, 50 (12): 3250-3264. 10.1109\/TIT.2004.838101.","journal-title":"IEEE Transactions on Information Theory"},{"key":"6922_CR11","doi-asserted-by":"publisher","first-page":"105","DOI":"10.1007\/978-3-642-38342-7_10","volume-title":"Computational Intelligence Methods for Bioinformatics and Biostatistics Lecture Notes in Computer Science","author":"M La Rosa","year":"2013","unstructured":"La Rosa M, Fiannaca A, Rizzo R, Urso A: A Study of Compression-Based Methods for the Analysis of Barcode Sequences. Computational Intelligence Methods for Bioinformatics and Biostatistics Lecture Notes in Computer Science. Edited by: Peterson, L.E., Masulli, F., Russo, G. 2013, Springer, Berlin, Heidelberg, 7845: 105-116. 10.1007\/978-3-642-38342-7_10."},{"issue":"Suppl 7","key":"6922_CR12","doi-asserted-by":"publisher","first-page":"S4","DOI":"10.1186\/1471-2105-14-S7-S4","volume":"14","author":"M La Rosa","year":"2013","unstructured":"La Rosa M, Fiannaca A, Rizzo R, Urso A: Alignment-free analysis of barcode sequences by means of compression-based methods. BMC Bioinformatics. 2013, 14 (Suppl 7): S4-10.1186\/1471-2105-14-S7-S4.","journal-title":"BMC Bioinformatics"},{"issue":"10","key":"6922_CR13","doi-asserted-by":"publisher","first-page":"R108","DOI":"10.1186\/gb-2009-10-10-r108","volume":"10","author":"B Chor","year":"2009","unstructured":"Chor B, Horn D, Goldman N, Levy Y, Massingham T: Genomic DNA k-mer spectra: models and modalities. Genome biology. 2009, 10 (10): R108-10.1186\/gb-2009-10-10-r108.","journal-title":"Genome biology"},{"key":"6922_CR14","volume-title":"Learning with Kernels","author":"B Scholkopf","year":"2002","unstructured":"Scholkopf B, Smola AJ: Learning with Kernels. 2002, MIT Press, Cambridge"},{"key":"6922_CR15","doi-asserted-by":"publisher","first-page":"228","DOI":"10.1007\/978-3-540-74126-8_22","volume-title":"Algorithms in Bioinformatics Lecture Notes in Computer Science","author":"P Kuksa","year":"2007","unstructured":"Kuksa P, Pavlovic V: Fast Kernel Methods for SVM Sequence Classifiers. Algorithms in Bioinformatics Lecture Notes in Computer Science. Edited by: Giancarlo, R., Hannenhalli, S. 2007, Springer, Berlin, Heidelberg, 4645: 228-239. 10.1007\/978-3-540-74126-8_22."},{"issue":"Suppl 14","key":"6922_CR16","doi-asserted-by":"publisher","first-page":"S9","DOI":"10.1186\/1471-2105-10-S14-S9","volume":"10","author":"P Kuksa","year":"2009","unstructured":"Kuksa P, Pavlovic V: Efficient alignment-free DNA barcode analytics. BMC Bioinformatics. 2009, 10 (Suppl 14): S9-10.1186\/1471-2105-10-S14-S9.","journal-title":"BMC Bioinformatics"},{"issue":"4","key":"6922_CR17","doi-asserted-by":"publisher","first-page":"558","DOI":"10.1109\/72.238311","volume":"4","author":"TM Martinetz","year":"1993","unstructured":"Martinetz TM, Berkovich SG, Schulten KJ: \"Neural-gas\" network for vector quantization and its application to time-series prediction. IEEE transactions on neural networks. 1993, 4 (4): 558-569. 10.1109\/72.238311.","journal-title":"IEEE transactions on neural networks"},{"key":"6922_CR18","doi-asserted-by":"publisher","first-page":"212","DOI":"10.1007\/978-3-642-41016-1_23","volume-title":"Engineering Applications of Neural Networks Communications in Computer and Information Science","author":"A Fiannaca","year":"2013","unstructured":"Fiannaca A, La Rosa M, Rizzo R, Urso A: Analysis of DNA Barcode Sequences Using Neural Gas and Spectral Representation. Engineering Applications of Neural Networks Communications in Computer and Information Science. Edited by: Iliadis, L., Papadopoulos, H., Jayne, C. 2013, Springer, Berlin, Heidelberg, 384: 212-221. 10.1007\/978-3-642-41016-1_23."},{"key":"6922_CR19","doi-asserted-by":"publisher","first-page":"1404","DOI":"10.1101\/gr.186401","volume":"11","author":"R Sandberg","year":"2001","unstructured":"Sandberg R, Winberg G, Br\u00e4nden C.-i, Kaske A, Ernberg I, C\u00f6ster J: Capturing Whole-Genome Characteristics in Short Sequences Using a Na\u00efve Bayesian Classifier. Genome Research. 2001, 11: 1404-1409. 10.1101\/gr.186401.","journal-title":"Genome Research"},{"issue":"16","key":"6922_CR20","doi-asserted-by":"publisher","first-page":"5261","DOI":"10.1128\/AEM.00062-07","volume":"73","author":"Q Wang","year":"2007","unstructured":"Wang Q, Garrity GM, Tiedje JM, Cole JR: Naive Bayesian classifier for rapid assignment of rRNA sequences into the new bacterial taxonomy. Applied and environmental microbiology. 2007, 73 (16): 5261-5267. 10.1128\/AEM.00062-07.","journal-title":"Applied and environmental microbiology"},{"issue":"18","key":"6922_CR21","doi-asserted-by":"publisher","first-page":"e120","DOI":"10.1093\/nar\/gkn491","volume":"36","author":"Z Liu","year":"2008","unstructured":"Liu Z, DeSantis TZ, Andersen GL, Knight R: Accurate taxonomy assignments from 16S rRNA sequences produced by highly parallel pyrosequencers. Nucleic acids research. 2008, 36 (18): e120-10.1093\/nar\/gkn491.","journal-title":"Nucleic acids research"},{"key":"6922_CR22","doi-asserted-by":"publisher","first-page":"11","DOI":"10.1186\/1472-6785-11-11","volume":"11","author":"TZ DeSantis","year":"2011","unstructured":"DeSantis TZ, Keller K, Karaoz U, Alekseyenko AV, Singh NNS, Brodie EL, Pei Z, Andersen GL, Larsen N: Simrank: Rapid and sensitive general-purpose k-mer search tool. BMC Ecology. 2011, 11: 11-10.1186\/1472-6785-11-11.","journal-title":"BMC Ecology"},{"issue":"2-3","key":"6922_CR23","doi-asserted-by":"publisher","first-page":"103","DOI":"10.1023\/A:1007413511361","volume":"29","author":"P Domingos","year":"1997","unstructured":"Domingos P, Pazzani M: On the optimality of the simple Bayesian classifier under zero-one loss. Machine Learning. 1997, 29 (2-3): 103-130.","journal-title":"Machine Learning"},{"key":"6922_CR24","volume-title":"Handbook of Latent Semantic Analysis","author":"M Steyvers","year":"2007","unstructured":"Steyvers M, Griffiths T: Probabilistic Topic Models. Handbook of Latent Semantic Analysis. Edited by: Landauer, T., McNamara, D.S., Dennis, S., Kintsch, W. 2007, Erlbaum, Hillsdale, NJ"},{"key":"6922_CR25","first-page":"524","volume-title":"IEEE Computer Society Conference on Computer Vision and Pattern Recognition (CVPR'05)","author":"P Perona","year":"2005","unstructured":"Perona P: A Bayesian Hierarchical Model for Learning Natural Scene Categories. IEEE Computer Society Conference on Computer Vision and Pattern Recognition (CVPR'05). 2005, IEEE, 2: 524-531."},{"issue":"11","key":"6922_CR26","doi-asserted-by":"publisher","first-page":"2302","DOI":"10.1109\/TPAMI.2011.79","volume":"33","author":"E Bart","year":"2011","unstructured":"Bart E, Welling M, Perona P: Unsupervised organization of image collections: taxonomies and beyond. IEEE transactions on pattern analysis and machine intelligence. 2011, 33 (11): 2302-2315.","journal-title":"IEEE transactions on pattern analysis and machine intelligence"},{"key":"6922_CR27","volume-title":"Proceedings of the 26th Annual International ACM SIGIR Conference on Research and Development in Informaion Retrieval - SIGIR '03","author":"DM Blei","year":"2003","unstructured":"Blei DM, Jordan MI: Modeling annotated data. Proceedings of the 26th Annual International ACM SIGIR Conference on Research and Development in Informaion Retrieval - SIGIR '03. 2003, ACM Press, New York, New York, USA, 127:"},{"key":"6922_CR28","first-page":"441","volume-title":"10th International Society for Music Information Retrieval Conference (ISMIR 2009)","author":"DJ Hu","year":"2009","unstructured":"Hu DJ, Saul LK: A probabilistic topic model for unsupervised learning of musical key-profiles. 10th International Society for Music Information Retrieval Conference (ISMIR 2009). 2009, 441-446."},{"key":"6922_CR29","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1109\/ASPAA.2009.5346483","volume-title":"2009 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics","author":"S Kim","year":"2009","unstructured":"Kim S, Narayanan S, Sundaram S: Acoustic topic model for audio information retrieval. 2009 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics. 2009, 37-40."},{"issue":"4","key":"6922_CR30","doi-asserted-by":"crossref","first-page":"1567","DOI":"10.1093\/genetics\/164.4.1567","volume":"164","author":"D Falush","year":"2003","unstructured":"Falush D, Stephens M, Pritchard JK: Inference of Population Structure Using Multilocus Genotype Data: Linked Loci and Correlated Allele Frequencies. Genetics. 2003, 164 (4): 1567-1587.","journal-title":"Genetics"},{"key":"6922_CR31","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/BIBE.2013.6701702","volume-title":"13th IEEE International Conference on BioInformatics and BioEngineering","author":"P Pinoli","year":"2013","unstructured":"Pinoli P, Chicco D, Masseroli M: Enhanced probabilistic latent semantic analysis with weighting schemes to predict genomic annotations. 13th IEEE International Conference on BioInformatics and BioEngineering. 2013, IEEE, Los Alamitos, CA, USA, 1-4."},{"key":"6922_CR32","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/IJCNN.2012.6252767","volume-title":"The 2012 International Joint Conference on Neural Networks (IJCNN)","author":"M Masseroli","year":"2012","unstructured":"Masseroli M, Chicco D, Pinoli P: Probabilistic Latent Semantic Analysis for prediction of Gene Ontology annotations. The 2012 International Joint Conference on Neural Networks (IJCNN). 2012, IEEE, Brisbane, QLD, 1-8."},{"key":"6922_CR33","doi-asserted-by":"publisher","first-page":"50","DOI":"10.1145\/312624.312649","volume-title":"Proceedings of the 22nd Annual International ACM SIGIR Conference on Research and Development in Information Retrieval - SIGIR '99","author":"T Hofmann","year":"1999","unstructured":"Hofmann T: Probabilistic latent semantic indexing. Proceedings of the 22nd Annual International ACM SIGIR Conference on Research and Development in Information Retrieval - SIGIR '99. 1999, ACM Press, New York, New York, USA, 50-57."},{"issue":"4","key":"6922_CR34","doi-asserted-by":"publisher","first-page":"77","DOI":"10.1145\/2133806.2133826","volume":"55","author":"DM Blei","year":"2012","unstructured":"Blei DM: Probabilistic Topic Models. Communication of the ACM. 2012, 55 (4): 77-84. 10.1145\/2133806.2133826.","journal-title":"Communication of the ACM"},{"issue":"Suppl 1","key":"6922_CR35","doi-asserted-by":"publisher","first-page":"5228","DOI":"10.1073\/pnas.0307752101","volume":"101","author":"TL Griffiths","year":"2004","unstructured":"Griffiths TL, Steyvers M: Finding scientific topics. PNAS. 2004, 101 (Suppl 1): 5228-5235.","journal-title":"PNAS"},{"key":"6922_CR36","first-page":"993","volume":"3","author":"DM Blei","year":"2003","unstructured":"Blei DM, Ng AY, Jordan MI: Latent Dirichlet Allocation. J Mach Learn Res. 2003, 3: 993-1022.","journal-title":"J Mach Learn Res"},{"key":"6922_CR37","doi-asserted-by":"publisher","first-page":"577","DOI":"10.1145\/1143844.1143917","volume-title":"Proceedings of the 23rd International Conference on Machine Learning - ICML '06","author":"W Li","year":"2006","unstructured":"Li W, McCallum A: Pachinko allocation: DAG-structured mixture models of topic correlations. Proceedings of the 23rd International Conference on Machine Learning - ICML '06. 2006, ACM Press, New York, New York, USA, 577-584."},{"issue":"476","key":"6922_CR38","doi-asserted-by":"publisher","first-page":"1566","DOI":"10.1198\/016214506000000302","volume":"101","author":"YW Teh","year":"2006","unstructured":"Teh YW, Jordan MI, Beal MJ, Blei DM: Hierarchical Dirichlet Processes. Journal of the American Statistical Association. 2006, 101 (476): 1566-1581. 10.1198\/016214506000000302.","journal-title":"Journal of the American Statistical Association"},{"key":"6922_CR39","doi-asserted-by":"crossref","unstructured":"Grun B, Hornik K: topicmodels: An R Package for Fitting Topic Models. Journal of Statistical Software. 2011, 40 (13):","DOI":"10.18637\/jss.v040.i13"},{"issue":"3","key":"6922_CR40","doi-asserted-by":"crossref","first-page":"167","DOI":"10.1080\/00031305.1992.10475878","volume":"46","author":"G Casella","year":"1992","unstructured":"Casella G, George EI: Explaining the Gibbs Sampler. The American Statistician. 1992, 46 (3): 167-174.","journal-title":"The American Statistician"},{"issue":"Database issue","key":"6922_CR41","doi-asserted-by":"publisher","first-page":"D141","DOI":"10.1093\/nar\/gkn879","volume":"37","author":"JR Cole","year":"2009","unstructured":"Cole JR, Wang Q, Cardenas E, Fish J, Chai B, Farris RJ, Kulam-Syed-Mohideen aS, McGarrell DM, Marsh T, Garrity GM, Tiedje JM: The Ribosomal Database Project: improved alignments and new tools for rRNA analysis. Nucleic acids research. 2009, 37 (Database issue): D141-D145.","journal-title":"Nucleic acids research"},{"issue":"Database issue","key":"6922_CR42","doi-asserted-by":"publisher","first-page":"D492","DOI":"10.1093\/nar\/gkp858","volume":"38","author":"LY Geer","year":"2010","unstructured":"Geer LY, Marchler-Bauer A, Geer RC, Han L, He J, He S, Liu C, Shi W, Bryant SH: The NCBI BioSystems database. Nucleic acids research. 2010, 38 (Database issue): D492-D496.","journal-title":"Nucleic acids research"},{"key":"6922_CR43","doi-asserted-by":"crossref","first-page":"49","DOI":"10.1007\/978-3-319-09042-9_4","volume-title":"Computational Intelligence Methods for Bioinformatics and Biostatistics Lecture Notes in Computer Science","author":"M La Rosa","year":"2014","unstructured":"La Rosa M, Fiannaca A, Rizzo R, Urso A: Genomic Sequence Classification using Probabilistic Topic Modeling. Computational Intelligence Methods for Bioinformatics and Biostatistics Lecture Notes in Computer Science. 2014, Springer, Berlin, Heidelberg, 8452: 49-61. 10.1007\/978-3-319-09042-9_4."},{"issue":"2","key":"6922_CR44","doi-asserted-by":"publisher","first-page":"e1000667","DOI":"10.1371\/journal.pcbi.1000667","volume":"6","author":"JC Wooley","year":"2010","unstructured":"Wooley JC, Godzik A, Friedberg I: A primer on metagenomics. PLoS Computat Biol. 2010, 6 (2): e1000667-10.1371\/journal.pcbi.1000667.","journal-title":"PLoS Computat Biol"},{"issue":"9","key":"6922_CR45","doi-asserted-by":"publisher","first-page":"1","DOI":"10.18637\/jss.v015.i09","volume":"15","author":"A Karatzoglou","year":"2006","unstructured":"Karatzoglou A, Meyer D, Hornik K: Support Vector Machines in R. Journal of Statistical Software. 2006, 15 (9): 1-28.","journal-title":"Journal of Statistical Software"},{"issue":"3","key":"6922_CR46","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/1961189.1961199","volume":"2","author":"CC Chang","year":"2011","unstructured":"Chang CC, Lin CJ: LIBSVM: A library for support vector machines. ACM Transactions on Intelligent Systems and Technology. 2011, 2 (3): 1-27.","journal-title":"ACM Transactions on Intelligent Systems and Technology"}],"container-title":["BMC Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/1471-2105-16-S6-S2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,9]],"date-time":"2024-06-09T15:56:54Z","timestamp":1717948614000},"score":1,"resource":{"primary":{"URL":"https:\/\/bmcbioinformatics.biomedcentral.com\/articles\/10.1186\/1471-2105-16-S6-S2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,4,17]]},"references-count":46,"journal-issue":{"issue":"S6","published-print":{"date-parts":[[2015,12]]}},"alternative-id":["6922"],"URL":"https:\/\/doi.org\/10.1186\/1471-2105-16-s6-s2","relation":{},"ISSN":["1471-2105"],"issn-type":[{"value":"1471-2105","type":"electronic"}],"subject":[],"published":{"date-parts":[[2015,4,17]]},"assertion":[{"value":"17 April 2015","order":1,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"S2"}}