{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T03:24:05Z","timestamp":1740108245242,"version":"3.37.3"},"reference-count":58,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2020,7,8]],"date-time":"2020-07-08T00:00:00Z","timestamp":1594166400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,7,8]],"date-time":"2020-07-08T00:00:00Z","timestamp":1594166400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"name":"Domaine d\u2019Int\u00e9r\u00eat Majeur - MathInnov","award":["DIM-MathInnov 2017"],"award-info":[{"award-number":["DIM-MathInnov 2017"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Comput Stat"],"published-print":{"date-parts":[[2021,3]]},"DOI":"10.1007\/s00180-020-01008-9","type":"journal-article","created":{"date-parts":[[2020,7,8]],"date-time":"2020-07-08T21:02:53Z","timestamp":1594242173000},"page":"1-33","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Greedy clustering of count data through a mixture of multinomial PCA"],"prefix":"10.1007","volume":"36","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0331-1571","authenticated-orcid":false,"given":"Nicolas","family":"Jouvin","sequence":"first","affiliation":[]},{"given":"Pierre","family":"Latouche","sequence":"additional","affiliation":[]},{"given":"Charles","family":"Bouveyron","sequence":"additional","affiliation":[]},{"given":"Guillaume","family":"Bataillon","sequence":"additional","affiliation":[]},{"given":"Alain","family":"Livartowski","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,7,8]]},"reference":[{"key":"1008_CR1","doi-asserted-by":"publisher","first-page":"77","DOI":"10.1007\/978-1-4614-3223-4_4","volume-title":"Mining text data","author":"CC Aggarwal","year":"2012","unstructured":"Aggarwal CC, Zhai C (2012) A survey of text clustering algorithms. Mining text data. Springer, New York, pp 77\u2013128"},{"key":"1008_CR2","doi-asserted-by":"publisher","first-page":"199","DOI":"10.1007\/978-1-4612-1694-0_15","volume-title":"Selected papers of hirotugu akaike","author":"H Akaike","year":"1998","unstructured":"Akaike H (1998) Information theory and an extension of the maximum likelihood principle. Selected papers of hirotugu akaike. Springer, New York, pp 199\u2013213"},{"issue":"10","key":"1008_CR3","doi-asserted-by":"publisher","first-page":"R106","DOI":"10.1186\/gb-2010-11-10-r106","volume":"11","author":"S Anders","year":"2010","unstructured":"Anders S, Huber W (2010) Differential expression analysis for sequence count data. Genome Biol 11(10):R106","journal-title":"Genome Biol"},{"key":"1008_CR4","doi-asserted-by":"crossref","unstructured":"Banfield JD, Raftery AE (1993) Model-based gaussian and non-gaussian clustering. Biometrics 803\u2013821","DOI":"10.2307\/2532201"},{"key":"1008_CR5","doi-asserted-by":"crossref","unstructured":"Berg\u00e9 LR, Bouveyron C, Corneli M, Latouche P (2019) The latent topic block model for the co-clustering of textual interaction data. Comput Stat Data Anal","DOI":"10.1016\/j.csda.2019.03.005"},{"issue":"7","key":"1008_CR6","doi-asserted-by":"publisher","first-page":"719","DOI":"10.1109\/34.865189","volume":"22","author":"C Biernacki","year":"2000","unstructured":"Biernacki C, Celeux G, Govaert G (2000) Assessing a mixture model for clustering with the integrated completed likelihood. IEEE Trans Pattern Anal Mach Intell 22(7):719\u2013725","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"518","key":"1008_CR7","doi-asserted-by":"publisher","first-page":"859","DOI":"10.1080\/01621459.2017.1285773","volume":"112","author":"DM Blei","year":"2017","unstructured":"Blei DM, Kucukelbir A, McAuliffe JD (2017) Variational inference: a review for statisticians. J Am Stat Assoc 112(518):859\u2013877","journal-title":"J Am Stat Assoc"},{"issue":"Jan","key":"1008_CR8","first-page":"993","volume":"3","author":"DM Blei","year":"2003","unstructured":"Blei DM, Ng AY, Jordan MI (2003) Latent dirichlet allocation. J Mach Learn Res 3(Jan):993\u20131022","journal-title":"J Mach Learn Res"},{"key":"1008_CR9","unstructured":"Bouveyron C, Celeux G, Murphy TB, Raftery AE (2019) Model-based clustering and classification for data science: with applications in R. Cambridge Series in Statistical and Probabilistic Mathematics. Cambridge University Press, Cambridge"},{"issue":"1","key":"1008_CR10","doi-asserted-by":"publisher","first-page":"502","DOI":"10.1016\/j.csda.2007.02.009","volume":"52","author":"C Bouveyron","year":"2007","unstructured":"Bouveyron C, Girard S, Schmid C (2007) High-dimensional data clustering. Comput Stat Data Anal 52(1):502\u2013519","journal-title":"Comput Stat Data Anal"},{"issue":"1","key":"1008_CR11","doi-asserted-by":"publisher","first-page":"11","DOI":"10.1007\/s11222-016-9713-7","volume":"28","author":"C Bouveyron","year":"2018","unstructured":"Bouveyron C, Latouche P, Zreik R (2018) The stochastic topic block model for the clustering of vertices in networks with textual edges. Stat Comput 28(1):11\u201331","journal-title":"Stat Comput"},{"key":"1008_CR12","doi-asserted-by":"crossref","unstructured":"Bui QV, Sayadi K, Amor SB, Bui M (2017) Combining latent dirichlet allocation and k-means for documents clustering: effect of probabilistic based distance measures. In: Asian conference on intelligent information and database systems. Springer, New York, pp 248\u2013257","DOI":"10.1007\/978-3-319-54472-4_24"},{"key":"1008_CR13","doi-asserted-by":"crossref","unstructured":"Buntine W (2002) Variational extensions to em and multinomial pca. In: European conference on machine learning. Springer, New York, pp 23\u201334","DOI":"10.1007\/3-540-36755-1_3"},{"key":"1008_CR14","unstructured":"Buntine WL, Perttu S (2003) Is multinomial pca multi-faceted clustering or dimensionality reduction? In AISTATS"},{"key":"1008_CR15","unstructured":"Carel L, Alquier P (2017) Simultaneous dimension reduction and clustering via the nmf-em algorithm. arXiv preprint arXiv:1709.03346"},{"issue":"3","key":"1008_CR16","doi-asserted-by":"publisher","first-page":"315","DOI":"10.1016\/0167-9473(92)90042-E","volume":"14","author":"G Celeux","year":"1992","unstructured":"Celeux G, Govaert G (1992) A classification em algorithm for clustering and two stochastic versions. Comput Stat Data Anal 14(3):315\u2013332","journal-title":"Comput Stat Data Anal"},{"key":"1008_CR17","doi-asserted-by":"crossref","unstructured":"Chien J-T, Lee C-H, Tan Z-H (2017) Latent dirichlet mixture model. Neurocomputing","DOI":"10.1016\/j.neucom.2017.08.029"},{"issue":"4","key":"1008_CR18","doi-asserted-by":"publisher","first-page":"2674","DOI":"10.1214\/18-AOAS1177","volume":"12","author":"J Chiquet","year":"2018","unstructured":"Chiquet J, Mariadassou M, Robin S et al (2018) Variational inference for probabilistic poisson pca. Ann Appl Stat 12(4):2674\u20132698","journal-title":"Ann Appl Stat"},{"issue":"5","key":"1008_CR19","doi-asserted-by":"publisher","first-page":"1135","DOI":"10.1890\/04-0589","volume":"86","author":"RB Cunningham","year":"2005","unstructured":"Cunningham RB, Lindenmayer DB (2005) Modeling count data of rare species: some statistical issues. Ecology 86(5):1135\u20131142","journal-title":"Ecology"},{"issue":"2","key":"1008_CR20","doi-asserted-by":"publisher","first-page":"173","DOI":"10.1007\/s11222-007-9046-7","volume":"18","author":"J-J Daudin","year":"2008","unstructured":"Daudin J-J, Picard F, Robin S (2008) A mixture model for random graphs. Stat Comput 18(2):173\u2013183","journal-title":"Stat Comput"},{"key":"1008_CR21","unstructured":"Defossez G, Le\u00a0Guyader-Peyrou S, Uhry Z, Grosclaude P, Remontet L, Colonna M, Dantony E, Delafosse P, Molini\u00e9 F, Woronoff A-S, et\u00a0al (2019) Estimations nationales de l\u2019incidence et de la mortalit\u00e9 par cancer en france m\u00e9tropolitaine entre 1990 et 2018. R\u00e9sultats pr\u00e9liminaires. Saint-Maurice (Fra): Sant\u00e9 publique France"},{"issue":"1","key":"1008_CR22","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1111\/j.2517-6161.1977.tb01600.x","volume":"39","author":"AP Dempster","year":"1977","unstructured":"Dempster AP, Laird NM, Rubin DB (1977) Maximum likelihood from incomplete data via the em algorithm. J R Stat Soc: Ser B (Methodol) 39(1):1\u201322","journal-title":"J R Stat Soc: Ser B (Methodol)"},{"issue":"8","key":"1008_CR23","doi-asserted-by":"publisher","first-page":"3913","DOI":"10.1016\/j.csda.2008.01.011","volume":"52","author":"C Ding","year":"2008","unstructured":"Ding C, Li T, Peng W (2008) On the equivalence between non-negative matrix factorization and probabilistic latent semantic indexing. Comput Stat Data Anal 52(8):3913\u20133927","journal-title":"Comput Stat Data Anal"},{"issue":"3","key":"1008_CR24","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/BF02288367","volume":"1","author":"C Eckart","year":"1936","unstructured":"Eckart C, Young G (1936) The approximation of one matrix by another of lower rank. Psychometrika 1(3):211\u2013218","journal-title":"Psychometrika"},{"key":"1008_CR25","doi-asserted-by":"publisher","first-page":"225","DOI":"10.1016\/B978-0-443-06680-1.50026-0","volume-title":"Breast pathology","author":"IO Ellis","year":"2006","unstructured":"Ellis IO, Elston CW (2006) Histologic grade. Breast pathology. Elsevier, Amsterdam, pp 225\u2013233"},{"issue":"11","key":"1008_CR26","doi-asserted-by":"publisher","first-page":"e26785","DOI":"10.1371\/journal.pone.0026785","volume":"6","author":"JA Fordyce","year":"2011","unstructured":"Fordyce JA, Gompert Z, Forister ML, Nice CC (2011) A hierarchical bayesian approach to ecological count data: a flexible tool for ecologists. PLoS ONE 6(11):e26785","journal-title":"PLoS ONE"},{"key":"1008_CR27","volume-title":"Clustering algorithms","author":"JA Hartigan","year":"1975","unstructured":"Hartigan JA (1975) Clustering algorithms. Wiley, Hoboken"},{"key":"1008_CR28","unstructured":"Hoffman M, Bach FR, Blei DM (2010) Online learning for latent dirichlet allocation. Adv Neural Inf Process Syst 856\u2013864"},{"key":"1008_CR29","unstructured":"Hofmann T (1999) Probabilistic latent semantic analysis. In: Proceedings of the fifteenth conference on Uncertainty in artificial intelligence. Morgan Kaufmann Publishers Inc, pp 289\u2013296"},{"issue":"13","key":"1008_CR30","first-page":"1","volume":"40","author":"K Hornik","year":"2011","unstructured":"Hornik K, Gr\u00fcn B (2011) topicmodels: an r package for fitting topic models. J Stat Softw 40(13):1\u201330","journal-title":"J Stat Softw"},{"issue":"6","key":"1008_CR31","doi-asserted-by":"publisher","first-page":"417","DOI":"10.1037\/h0071325","volume":"24","author":"H Hotelling","year":"1933","unstructured":"Hotelling H (1933) Analysis of a complex of statistical variables into principal components. J Educ Psychol 24(6):417","journal-title":"J Educ Psychol"},{"key":"1008_CR32","unstructured":"Lakhani SR (2012) WHO classification of tumours of the breast. International Agency for Research on Cancer"},{"key":"1008_CR33","doi-asserted-by":"crossref","unstructured":"Lazebnik S, Schmid C, Ponce J (2006) Beyond bags of features: Spatial pyramid matching for recognizing natural scene categories. In: 2006 IEEE computer society conference on computer vision and pattern recognition (CVPR\u201906), volume\u00a02. IEEE, pp 2169\u20132178","DOI":"10.1109\/CVPR.2006.68"},{"issue":"6755","key":"1008_CR34","doi-asserted-by":"publisher","first-page":"788","DOI":"10.1038\/44565","volume":"401","author":"DD Lee","year":"1999","unstructured":"Lee DD, Seung HS (1999) Learning the parts of objects by non-negative matrix factorization. Nature 401(6755):788","journal-title":"Nature"},{"key":"1008_CR35","unstructured":"Lee DD, Seung HS (2001) Algorithms for non-negative matrix factorization. Adv Neural Inf Process Syst 556\u2013562"},{"issue":"1","key":"1008_CR36","doi-asserted-by":"publisher","first-page":"1608","DOI":"10.1186\/s40064-016-3252-8","volume":"5","author":"L Liu","year":"2016","unstructured":"Liu L, Tang L, Dong W, Yao S, Zhou W (2016) An overview of topic modeling and its current applications in bioinformatics. SpringerPlus 5(1):1608","journal-title":"SpringerPlus"},{"key":"1008_CR37","unstructured":"Mattei P-A, Bouveyron C, Latouche P (2016) Globally sparse probabilistic pca. Artif Intell Stat 976\u2013984"},{"key":"1008_CR38","doi-asserted-by":"crossref","unstructured":"McLachlan G, Peel D (2000) Finite mixture models. Willey Series in Probability and Statistics","DOI":"10.1002\/0471721182"},{"issue":"3","key":"1008_CR39","first-page":"370","volume":"135","author":"JA Nelder","year":"1972","unstructured":"Nelder JA, Wedderburn RW (1972) Generalized linear models. J R Stat Soc: Seri A (Gen) 135(3):370\u2013384","journal-title":"J R Stat Soc: Seri A (Gen)"},{"issue":"1","key":"1008_CR40","first-page":"42","volume":"9","author":"J Osborne","year":"2005","unstructured":"Osborne J (2005) Notes on the use of data transformations. Pract Assess Res Evalu 9(1):42\u201350","journal-title":"Pract Assess Res Evalu"},{"issue":"2","key":"1008_CR41","doi-asserted-by":"publisher","first-page":"118","DOI":"10.1111\/j.2041-210X.2010.00021.x","volume":"1","author":"RB O\u2019hara","year":"2010","unstructured":"O\u2019hara RB, Kotze DJ (2010) Do not log-transform count data. Methods Ecol Evol 1(2):118\u2013122","journal-title":"Methods Ecol Evol"},{"key":"1008_CR42","unstructured":"R Core Team (2019) R: a language and environment for statistical computing organization. R Foundation for Statistical Computing, Vienna, Austria. https:\/\/www.R-project.org\/"},{"key":"1008_CR43","unstructured":"Ramos J et\u00a0al (2003) Using tf-idf to determine word relevance in document queries. In: Proceedings of the first instructional conference on machine learning, volume 242, Piscataway, pp 133\u2013142"},{"issue":"336","key":"1008_CR44","doi-asserted-by":"publisher","first-page":"846","DOI":"10.1080\/01621459.1971.10482356","volume":"66","author":"WM Rand","year":"1971","unstructured":"Rand WM (1971) Objective criteria for the evaluation of clustering methods. J Am Stat Assoc 66(336):846\u2013850","journal-title":"J Am Stat Assoc"},{"key":"1008_CR45","unstructured":"Rau A, Celeux G, Martin-Magniette M-L, Maugis-Rabusseau C (2011) Clustering high-throughput sequencing data with Poisson mixture models. Research Report RR-7786, INRIA"},{"issue":"5","key":"1008_CR46","doi-asserted-by":"publisher","first-page":"1260","DOI":"10.1016\/j.ipm.2006.11.001","volume":"43","author":"L Rigouste","year":"2007","unstructured":"Rigouste L, Capp\u00e9 O, Yvon F (2007) Inference and evaluation of the multinomial mixture model for text clustering. Inf Process Manag 43(5):1260\u20131280","journal-title":"Inf Process Manag"},{"issue":"2","key":"1008_CR47","doi-asserted-by":"publisher","first-page":"461","DOI":"10.1214\/aos\/1176344136","volume":"6","author":"G Schwarz","year":"1978","unstructured":"Schwarz G et al (1978) Estimating the dimension of a model. Ann Stat 6(2):461\u2013464","journal-title":"Ann Stat"},{"key":"1008_CR48","unstructured":"Silvestre C, Cardoso MG, Figueiredo MA (2014) Identifying the number of clusters in discrete mixture models. arXiv preprint arXiv:1409.7419"},{"issue":"14","key":"1008_CR49","doi-asserted-by":"publisher","first-page":"8418","DOI":"10.1073\/pnas.0932692100","volume":"100","author":"T Sorlie","year":"2003","unstructured":"Sorlie T, Tibshirani R, Parker J, Hastie T, Marron J, Nobel A, Deng S, Johnsen H, Pesich R, Geisler S et al (2003) Repeated observation of breast tumor subtypes in independent gene expression data sets. Proc Nat Acad Sci USA 100(14):8418\u20138423","journal-title":"Proc Nat Acad Sci USA"},{"issue":"6","key":"1008_CR50","doi-asserted-by":"publisher","first-page":"3077","DOI":"10.1002\/ece3.3807","volume":"8","author":"AP St-Pierre","year":"2018","unstructured":"St-Pierre AP, Shikon V, Schneider DC (2018) Count data in biology-data transformation or model reformation? Ecol Evol 8(6):3077\u20133085","journal-title":"Ecol Evol"},{"issue":"2","key":"1008_CR51","doi-asserted-by":"publisher","first-page":"443","DOI":"10.1162\/089976699300016728","volume":"11","author":"ME Tipping","year":"1999","unstructured":"Tipping ME, Bishop CM (1999a) Mixtures of probabilistic principal component analyzers. Neural Comput 11(2):443\u2013482","journal-title":"Neural Comput"},{"issue":"3","key":"1008_CR52","doi-asserted-by":"publisher","first-page":"611","DOI":"10.1111\/1467-9868.00196","volume":"61","author":"ME Tipping","year":"1999","unstructured":"Tipping ME, Bishop CM (1999b) Probabilistic principal component analysis. J R Stat Soc: Ser B (Stat Methodol) 61(3):611\u2013622","journal-title":"J R Stat Soc: Ser B (Stat Methodol)"},{"key":"1008_CR53","unstructured":"Wallach HM (2008) Structured topic models for language. PhD thesis, University of Cambridge"},{"key":"1008_CR54","doi-asserted-by":"publisher","first-page":"81","DOI":"10.1007\/978-3-642-10745-0_8","volume-title":"Classification as a tool for research","author":"K Watanabe","year":"2010","unstructured":"Watanabe K, Akaho S, Omachi S, Okada M (2010) Simultaneous clustering and dimensionality reduction using variational bayesian mixture model. Classification as a tool for research. Springer, New York, pp 81\u201389"},{"key":"1008_CR55","unstructured":"Xie P, Xing EP (2013) Integrating document clustering and topic modeling. In: Proceedings of the 30th conference on uncertainty in artificial intelligence"},{"key":"1008_CR56","doi-asserted-by":"crossref","unstructured":"Xu W, Liu X, Gong Y (2003) Document clustering based on non-negative matrix factorization. In: Proceedings of the 26th annual international ACM SIGIR conference on research and development in informaion retrieval. ACM, pp 267\u2013273","DOI":"10.1145\/860435.860485"},{"key":"1008_CR57","first-page":"417","volume-title":"European conference on principles of data mining and knowledge discovery","author":"S Yu","year":"2005","unstructured":"Yu S, Yu K, Tresp V, Kriegel H-P (2005) A probabilistic clustering-projection model for discrete data. European conference on principles of data mining and knowledge discovery. Springer, New York, pp 417\u2013428"},{"issue":"1","key":"1008_CR58","doi-asserted-by":"publisher","first-page":"e85150","DOI":"10.1371\/journal.pone.0085150","volume":"9","author":"I Zwiener","year":"2014","unstructured":"Zwiener I, Frisch B, Binder H (2014) Transforming rna-seq data to improve the performance of prognostic gene signatures. PLoS ONE 9(1):e85150","journal-title":"PLoS ONE"}],"container-title":["Computational Statistics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00180-020-01008-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00180-020-01008-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00180-020-01008-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,9]],"date-time":"2024-08-09T15:26:12Z","timestamp":1723217172000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00180-020-01008-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,7,8]]},"references-count":58,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2021,3]]}},"alternative-id":["1008"],"URL":"https:\/\/doi.org\/10.1007\/s00180-020-01008-9","relation":{},"ISSN":["0943-4062","1613-9658"],"issn-type":[{"type":"print","value":"0943-4062"},{"type":"electronic","value":"1613-9658"}],"subject":[],"published":{"date-parts":[[2020,7,8]]},"assertion":[{"value":"2 August 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 June 2020","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 July 2020","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}