{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,26]],"date-time":"2026-02-26T16:12:51Z","timestamp":1772122371452,"version":"3.50.1"},"reference-count":54,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2010,8,29]],"date-time":"2010-08-29T00:00:00Z","timestamp":1283040000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Data Min Knowl Disc"],"published-print":{"date-parts":[[2011,7]]},"DOI":"10.1007\/s10618-010-0198-2","type":"journal-article","created":{"date-parts":[[2010,8,28]],"date-time":"2010-08-28T05:28:42Z","timestamp":1282973322000},"page":"1-62","source":"Crossref","is-referenced-by-count":24,"title":["Mixed-membership naive Bayes models"],"prefix":"10.1007","volume":"23","author":[{"given":"Hanhuai","family":"Shan","sequence":"first","affiliation":[]},{"given":"Arindam","family":"Banerjee","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2010,8,29]]},"reference":[{"key":"198_CR1","first-page":"1823","volume":"9","author":"E Airoldi","year":"2008","unstructured":"Airoldi E, Blei D, Fienberg S, Xing E (2008) Mixed membership stochastic blockmodels. J Mach Learn Res 9: 1823\u20131856","journal-title":"J Mach Learn Res"},{"key":"198_CR2","doi-asserted-by":"crossref","unstructured":"Banerjee A (2007) An analysis of logistic models: exponential family connections and online performance. In: Proceedings of the 7th SIAM international conference on data mining (SDM)","DOI":"10.1137\/1.9781611972771.19"},{"key":"198_CR3","doi-asserted-by":"crossref","unstructured":"Banerjee A, Dhillon I, Ghosh J, Merugu S (2004) An information theoretic analysis of maximum likelihood mixture estimation for exponential families. In: Proceedings of the 21st international conference on machine learning (ICML)","DOI":"10.1145\/1015330.1015431"},{"key":"198_CR4","first-page":"1345","volume":"6","author":"A Banerjee","year":"2005","unstructured":"Banerjee A, Dhillon I, Ghosh J, Sra S (2005a) Clustering on the unit hypersphere using von (M)ises-(F)isher distributions. J Mach Learn Res 6: 1345\u20131382","journal-title":"J Mach Learn Res"},{"key":"198_CR5","doi-asserted-by":"crossref","unstructured":"Banerjee A, Krumpelman C, Basu S, Mooney R, Ghosh J (2005b) Model based overlapping clustering. In: Proceedings of the 11th international conference on knowledge discovery and data mining (KDD), pp 532\u2013537","DOI":"10.1145\/1081870.1081932"},{"key":"198_CR6","first-page":"1705","volume":"6","author":"A Banerjee","year":"2005","unstructured":"Banerjee A, Merugu S, Dhillon I, Ghosh J (2005c) Clustering with Bregman divergences. J Mach Learn Res 6: 1705\u20131749","journal-title":"J Mach Learn Res"},{"key":"198_CR7","volume-title":"Information and exponential families in statistical theory","author":"O Barndorff-Nielsen","year":"1978","unstructured":"Barndorff-Nielsen O (1978) Information and exponential families in statistical theory. Wiley, Chichester"},{"key":"198_CR8","doi-asserted-by":"crossref","unstructured":"Blei D, Jordan M (2003) Modeling annotated data. In: ACM SIGIR conference on research and development in information retrieval, pp 127\u2013134","DOI":"10.1145\/860435.860460"},{"issue":"1","key":"198_CR9","doi-asserted-by":"crossref","first-page":"121","DOI":"10.1214\/06-BA104","volume":"1","author":"D Blei","year":"2006","unstructured":"Blei D, Jordan M (2006) Variational inference for Dirichlet process mixtures. Bayesian Anal 1(1): 121\u2013144","journal-title":"Bayesian Anal"},{"key":"198_CR10","unstructured":"Blei D, Lafferty J (2005) Correlated topic models. In: Proceedings of the 18th annual conference on neural information processing systems (NIPS)"},{"key":"198_CR11","doi-asserted-by":"crossref","unstructured":"Blei D, Lafferty J (2006) Dynamic topic models. In: Proceedings of the 23rd international conference on machine learning (ICML)","DOI":"10.1145\/1143844.1143859"},{"key":"198_CR12","unstructured":"Blei D, McAuliffe J (2007) Supervised topic models. In: Proceedings of the 20th annual conference on neural information processing systems (NIPS)"},{"key":"198_CR13","doi-asserted-by":"crossref","first-page":"993","DOI":"10.1162\/jmlr.2003.3.4-5.993","volume":"3","author":"D Blei","year":"2003","unstructured":"Blei D, Ng A, Jordan M (2003) Latent Dirichlet\u00a0allocation. J Mach Learn Res 3: 993\u20131022","journal-title":"J Mach Learn Res"},{"key":"198_CR14","doi-asserted-by":"crossref","first-page":"121","DOI":"10.1023\/A:1009715923555","volume":"2","author":"C Burges","year":"1998","unstructured":"Burges C (1998) A tutorial on support vector machines for pattern recognition. Data Min Knowl Discov 2: 121\u2013167","journal-title":"Data Min Knowl Discov"},{"key":"198_CR15","unstructured":"Chang C, Lin C (2001) LIBSVM: a library for support vector machines. Software available at http:\/\/www.csie.ntu.edu.tw\/cjlin\/libsvm"},{"key":"198_CR16","volume-title":"Theory of probability","author":"B Finetti de","year":"1990","unstructured":"de Finetti B (1990) Theory of probability. Wiley, Chichester"},{"issue":"6","key":"198_CR17","doi-asserted-by":"crossref","first-page":"391","DOI":"10.1002\/(SICI)1097-4571(199009)41:6<391::AID-ASI1>3.0.CO;2-9","volume":"41","author":"S Deerwester","year":"1990","unstructured":"Deerwester S, Dumais S, Landauer T, Furnas G, Harshman R (1990) Indexing by latent semantic analysis. J Am Soc Inf Sci 41(6): 391\u2013407","journal-title":"J Am Soc Inf Sci"},{"key":"198_CR18","volume-title":"Optimal statistical decisions","author":"M DeGroot","year":"1970","unstructured":"DeGroot M (1970) Optimal statistical decisions. McGraw-Hill, New York"},{"key":"198_CR19","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1111\/j.2517-6161.1977.tb01600.x","volume":"39","author":"AP Dempster","year":"1977","unstructured":"Dempster AP, Laird NM, Rubin DB (1977) Maximum likelihood from incomplete data via the EM algorithm. J R Stat Soc B 39: 1\u201338","journal-title":"J R Stat Soc B"},{"key":"198_CR20","doi-asserted-by":"crossref","unstructured":"Dhillon I, Mallela S, Modha D (2003) Information-theoretic co-clustering. In: Proceedings of the 9th ACM international conference on knowledge discovery and data mining (KDD), pp 89\u201398","DOI":"10.1145\/956750.956764"},{"key":"198_CR21","doi-asserted-by":"crossref","first-page":"103","DOI":"10.1023\/A:1007413511361","volume":"29","author":"P Domingos","year":"1997","unstructured":"Domingos P, Pazzani M (1997) On the optimality of the simple Bayesian classifier under zero-one loss. Mach Learn 29: 103\u2013130","journal-title":"Mach Learn"},{"key":"198_CR22","doi-asserted-by":"crossref","unstructured":"Erosheva E, Fienberg S, Lafferty J (2004) Mixed-membership models of scientific publications. In: Proceedings of the national academy of science, pp 5220\u20135227","DOI":"10.1073\/pnas.0307760101"},{"key":"198_CR23","unstructured":"Fei-Fei L, Perona P (2005) A (B)ayesian hierarchical model for learning natural scene categories. In: Proceedings of the 15th IEEE international conference of computer vision and pattern recognition (CVPR), pp 524\u2013531"},{"key":"198_CR24","doi-asserted-by":"crossref","first-page":"3286","DOI":"10.1093\/bioinformatics\/bti515","volume":"21","author":"P Flaherty","year":"2005","unstructured":"Flaherty P, Giaever G, Jordan M, Arkin A (2005) A latent variable model for chemogenomic profiling. Bioinformatics 21: 3286\u20133293","journal-title":"Bioinformatics"},{"key":"198_CR25","doi-asserted-by":"crossref","unstructured":"Fu Q, Banerjee A (2008) Multiplicative mixture models for overlapping clustering. In: Proceedings of the 8th IEEE international conference on data mining (ICDM), pp 791\u2013796","DOI":"10.1109\/ICDM.2008.103"},{"key":"198_CR26","doi-asserted-by":"crossref","first-page":"721","DOI":"10.1109\/TPAMI.1984.4767596","volume":"6","author":"S Geman","year":"1984","unstructured":"Geman S, Geman D (1984) Stochastic relaxation, Gibbs distributions, and the Bayesian restoration of images. IEEE Trans Pattern Anal Mach Intell 6: 721\u2013741","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"198_CR27","unstructured":"Ghahramani Z (1995) Factorial learning and the EM algorithm. In: Proceedings of the 8th annual conference on neural information processing systems (NIPS)"},{"key":"198_CR28","doi-asserted-by":"crossref","first-page":"5228","DOI":"10.1073\/pnas.0307752101","volume":"101","author":"T Griffiths","year":"2004","unstructured":"Griffiths T, Steyvers M (2004) Finding scientific topics. Proc Natl Acad Sci USA 101: 5228\u20135235","journal-title":"Proc Natl Acad Sci USA"},{"key":"198_CR29","doi-asserted-by":"crossref","unstructured":"Heller K, Williamson S, Ghahramani Z (2008) Statistical models for partial membership. In: Proceedings of the 25th international conference on machine learning (ICML), pp 392\u2013399","DOI":"10.1145\/1390156.1390206"},{"key":"198_CR30","doi-asserted-by":"crossref","unstructured":"Hoffman T (1999) Probabilistic latent semantic indexing. In: Proceedings of the 15th conference in uncertainty in artificial intelligence (UAI)","DOI":"10.1145\/312624.312649"},{"key":"198_CR31","volume-title":"Algorithms for clustering data","author":"T Jaakkola","year":"2000","unstructured":"Jaakkola T (2000) Algorithms for clustering data. MIT Press, Cambridge"},{"key":"198_CR32","unstructured":"Koutsourelakis P, Eliassi-Rad T (2008) Finding mixed-memberships in social networks. In: Proceedings of the 23rd national conference on artificial intelligence (AAAI)"},{"key":"198_CR33","unstructured":"Lacoste-Julien S, Sha F, Jordan M (2008) DiscLDA: discriminative learning for dimensionality reduction and classification. In: Proceedings of the 21st annual conference on neural information processing systems (NIPS)"},{"key":"198_CR34","doi-asserted-by":"crossref","unstructured":"Lang K (1995) News weeder: Learning to filter netnews. In: Proceedings of the 12th international conference on machine learning (ICML)","DOI":"10.1016\/B978-1-55860-377-6.50048-7"},{"key":"198_CR35","volume-title":"The EM algorithm and extensions","author":"G McLachlan","year":"1996","unstructured":"McLachlan G, Krishnan T (1996) The EM algorithm and extensions. Wiley-Interscience, New York"},{"key":"198_CR36","unstructured":"Mimno D, McCallum A (2008) Topic models conditioned on arbitrary features with Dirichlet-multinomial regression. In: Proceedings of the 24th conference in uncertainty in artificial intelligence (UAI)"},{"key":"198_CR37","unstructured":"Minka T (2003a) A comparison of numerical optimizers for logistic regression. Tech. rep., Carnegie Mellon University"},{"key":"198_CR38","unstructured":"Minka T (2003b) Estimating a Dirichlet distribution. Tech. rep., Massachusetts Institute of Technology"},{"key":"198_CR39","doi-asserted-by":"crossref","first-page":"145","DOI":"10.1023\/B:MACH.0000035475.85309.1b","volume":"57","author":"T Mitchell","year":"2004","unstructured":"Mitchell T, Hutchinson R, Niculescu R, Pereira F, Wang X, Just M, Newman S (2004) Learning to decode cognitive states from brain images. Mach Learn 57: 145\u2013175","journal-title":"Mach Learn"},{"key":"198_CR40","doi-asserted-by":"crossref","first-page":"355","DOI":"10.1007\/978-94-011-5014-9_12","volume-title":"Learning in graphical models","author":"R Neal","year":"1998","unstructured":"Neal R, Hinton G (1998) A view of the EM algorithm that justifies incremental, sparse, and other variants. In: Jordan M (eds) Learning in graphical models. MIT Press, Cambridge, pp 355\u2013368"},{"key":"198_CR41","unstructured":"Newman D, Asuncion A, Smyth P, Welling M (2007) Distributed inference for latent Dirichlet\u00a0allocation. In: Proceedings of the 20th annual conference on neural information processing systems (NIPS)"},{"key":"198_CR42","unstructured":"Ng A, Jordan M (2001) On discrminative vs generative classifiers: a comparison of logistic regression and naive Bayes. In: Proceedings of the 14th annual conference on neural information processing systems (NIPS)"},{"issue":"2\/3","key":"198_CR43","doi-asserted-by":"crossref","first-page":"103","DOI":"10.1023\/A:1007692713085","volume":"39","author":"K Nigam","year":"2000","unstructured":"Nigam K, McCallum A, Thrun S, Mitchell T (2000) Text classification from labeled and unlabeled documents using EM. Mach Learn 39(2\/3): 103\u2013134","journal-title":"Mach Learn"},{"key":"198_CR44","doi-asserted-by":"crossref","DOI":"10.4135\/9781412984805","volume-title":"Logistic Regression: A Primer","author":"F Pampel","year":"2000","unstructured":"Pampel F (2000) Logistic Regression: A Primer. Sage, Thousand Oaks"},{"key":"198_CR45","doi-asserted-by":"crossref","unstructured":"Porteous I, Newman D, Ihler A, Asuncion A, Smyth P, Welling M (2008) Fast collapsed Gibbs sampling for latent Dirichlet\u00a0allocation. In: Proceeding of the 14th ACM international conference on knowledge discovery and data mining (KDD), pp 569\u2013577","DOI":"10.1145\/1401890.1401960"},{"issue":"2","key":"198_CR46","doi-asserted-by":"crossref","first-page":"195","DOI":"10.1137\/1026034","volume":"26","author":"R Redner","year":"1984","unstructured":"Redner R, Walker H (1984) Mixture densities, maximum likelihood and the EM algorithm. SIAM Rev 26(2): 195\u2013239","journal-title":"SIAM Rev"},{"key":"198_CR47","doi-asserted-by":"crossref","unstructured":"Saund E (1994) Unsupervised learning of mixtures of multiple causes in binary data. In: Proceedings of the 7th annual conference on neural information processing systems (NIPS)","DOI":"10.1162\/neco.1995.7.1.51"},{"key":"198_CR48","unstructured":"Segal E, Battle A, Koller D (2003) Decomposing gene expression into cellular processes. In: Proceedings of 8th pacific symposium on biocomputing (PSB)"},{"key":"198_CR49","unstructured":"Shahami M, Hearst M, Saund E (1997) Applying the multiple cause model to text categorization. In: Proceedings of the 14th international conference on machine learning (ICML), pp 435\u2013443"},{"key":"198_CR50","doi-asserted-by":"crossref","unstructured":"Shan H, Banerjee A (2008) Bayesian co-clustering. In: Proceedings of the 8th IEEE international conference on data mining (ICDM), pp 530\u2013539","DOI":"10.1109\/ICDM.2008.91"},{"key":"198_CR51","unstructured":"Wainwright M, Jordan M (2003) Graphical models, exponential families, and variational inference. Tech. Rep. TR 649, Department of Statistics, University of California at Berkeley"},{"key":"198_CR52","unstructured":"Wang C, Blei D, Fei-Fei L (2009) Simultaneous image classification and annotation. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)"},{"key":"198_CR53","doi-asserted-by":"crossref","unstructured":"Wang H, Huang M, Zhu X (2008) A generative probabilistic model for multi-label classification. In: Proceedings of the 8th IEEE international conference on data mining (ICDM)","DOI":"10.1109\/ICDM.2008.86"},{"issue":"22","key":"198_CR54","doi-asserted-by":"crossref","first-page":"2987","DOI":"10.1093\/bioinformatics\/btm484","volume":"23","author":"M Yousef","year":"2007","unstructured":"Yousef M, Jung S, Kossenkov A, Showe L, Showe M (2007) Naive Bayes for microRNA target predictions machine learning for microRNA targets. Bioinformatics 23(22): 2987\u20132992","journal-title":"Bioinformatics"}],"container-title":["Data Mining and Knowledge Discovery"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10618-010-0198-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10618-010-0198-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10618-010-0198-2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,25]],"date-time":"2025-02-25T05:41:55Z","timestamp":1740462115000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10618-010-0198-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2010,8,29]]},"references-count":54,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2011,7]]}},"alternative-id":["198"],"URL":"https:\/\/doi.org\/10.1007\/s10618-010-0198-2","relation":{},"ISSN":["1384-5810","1573-756X"],"issn-type":[{"value":"1384-5810","type":"print"},{"value":"1573-756X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2010,8,29]]}}}