{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,28]],"date-time":"2026-01-28T06:04:51Z","timestamp":1769580291314,"version":"3.49.0"},"reference-count":44,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2017,7,5]],"date-time":"2017-07-05T00:00:00Z","timestamp":1499212800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"name":"NCI","award":["1R01CA213466-01"],"award-info":[{"award-number":["1R01CA213466-01"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Data Sci Anal"],"published-print":{"date-parts":[[2017,9]]},"DOI":"10.1007\/s41060-017-0062-1","type":"journal-article","created":{"date-parts":[[2017,7,5]],"date-time":"2017-07-05T10:43:19Z","timestamp":1499251399000},"page":"83-97","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":13,"title":["Using data to build a better EM: EM* for big data"],"prefix":"10.1007","volume":"4","author":[{"given":"Hasan","family":"Kurban","sequence":"first","affiliation":[]},{"given":"Mark","family":"Jenne","sequence":"additional","affiliation":[]},{"given":"Mehmet M.","family":"Dalkilic","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,7,5]]},"reference":[{"key":"62_CR1","doi-asserted-by":"crossref","unstructured":"Kurban, H., Jenne, M., Dalkilic, M.M.: EM*: an EM algorithm for Big Data. In: 2016 IEEE International Conference on Data Science and Advanced Analytics (DSAA), pp.\u00a0312\u2013320 (2016)","DOI":"10.1109\/DSAA.2016.40"},{"issue":"1","key":"62_CR2","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1007\/s10115-007-0114-2","volume":"14","author":"X Wu","year":"2007","unstructured":"Wu, X., Kumar, V., et al.: Top 10 algorithms in data mining. Knowl. Inf. Syst. 14(1), 1\u201337 (2007)","journal-title":"Knowl. Inf. Syst."},{"issue":"9","key":"62_CR3","doi-asserted-by":"crossref","first-page":"26","DOI":"10.1109\/MC.2014.264","volume":"47","author":"M Jenne","year":"2014","unstructured":"Jenne, M., Boberg, O., Kurban, H., Dalkilic, M.M.: Studying the milky way galaxy using ParaHeap-k. IEEE Comput. Soc. 47(9), 26\u201333 (2014)","journal-title":"IEEE Comput. Soc."},{"key":"62_CR4","unstructured":"Arthur, D., Vassilvitskii, S.: \u201ck-means++,\u201d The advantages of careful seeding. In: Proceedings of the Eighteenth Annual ACM-SIAM Symposium on Discrete Algorithms, pp. 1027\u20131035 (2007)"},{"issue":"7","key":"62_CR5","doi-asserted-by":"crossref","first-page":"622","DOI":"10.14778\/2180912.2180915","volume":"5","author":"B Bahmani","year":"2012","unstructured":"Bahmani, B., Moseley, B., Vattani, A., Kumar, R., Vassilvitskii, S.: Scalable k-means++. Proc. VLDB Endow. 5(7), 622\u2013633 (2012)","journal-title":"Proc. VLDB Endow."},{"issue":"1","key":"62_CR6","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1111\/j.2517-6161.1977.tb01600.x","volume":"39","author":"AP Dempster","year":"1977","unstructured":"Dempster, A.P., Laird, N.M., Rubin, D.B.: Maximum likelihood estimation from incomplete data via the em algorithm. J. R. Stat. Soc. 39(1), 1\u201338 (1977)","journal-title":"J. R. Stat. Soc."},{"issue":"1","key":"62_CR7","doi-asserted-by":"crossref","first-page":"95","DOI":"10.1214\/aos\/1176346060","volume":"11","author":"CFJ Wu","year":"1983","unstructured":"Wu, C.F.J.: On the convergence properties of the EM algorithm. Ann. Stat. 11(1), 95\u2013103 (1983)","journal-title":"Ann. Stat."},{"issue":"1","key":"62_CR8","doi-asserted-by":"crossref","first-page":"334","DOI":"10.1162\/neco.1994.6.2.334","volume":"6","author":"AL Yuille","year":"1994","unstructured":"Yuille, A.L., Stolorz, P., Utans, J.: Mixtures of distributions and the EMalgorithm. Neural Comput. 6(1), 334\u2013340 (1994)","journal-title":"Neural Comput."},{"issue":"1","key":"62_CR9","doi-asserted-by":"crossref","first-page":"129","DOI":"10.1162\/neco.1996.8.1.129","volume":"8","author":"L Xu","year":"1996","unstructured":"Xu, L., Jordan, M.: On convergence properties of the EM algorithm for Gaussian mixtures. Neural Comput. 8(1), 129\u2013151 (1996)","journal-title":"Neural Comput."},{"issue":"2","key":"62_CR10","doi-asserted-by":"crossref","first-page":"305","DOI":"10.1162\/089976699300016674","volume":"11","author":"S Roweis","year":"1999","unstructured":"Roweis, S., Ghahramani, Z.: A unifying review of linear Gaussian models. Neural Comput. 11(2), 305\u2013345 (1999)","journal-title":"Neural Comput."},{"issue":"2","key":"62_CR11","doi-asserted-by":"crossref","first-page":"387","DOI":"10.1007\/s00180-016-0657-3","volume":"32","author":"AK Ghosh","year":"2017","unstructured":"Ghosh, A.K., Chakraborty, A.: Use of EM algorithm for data reduction under sparsity assumption. Comput. Stat. 32(2), 387\u2013407 (2017)","journal-title":"Comput. Stat."},{"key":"62_CR12","volume-title":"The EM Algorithm and Extensions","author":"GJ McLachlan","year":"2007","unstructured":"McLachlan, G.J., Krishnan, T.: The EM Algorithm and Extensions. Wiley, New York (2007)"},{"issue":"1","key":"62_CR13","doi-asserted-by":"crossref","first-page":"44","DOI":"10.1080\/13102818.2014.949045","volume":"28","author":"YG Jung","year":"2014","unstructured":"Jung, Y.G., Kang, M.S.: Clustering performance comparison using K-means and expectation maximization algorithms. Biotechnol. Biotechnol. Equip. 28(1), 44\u201348 (2014)","journal-title":"Biotechnol. Biotechnol. Equip."},{"key":"62_CR14","doi-asserted-by":"crossref","first-page":"897","DOI":"10.1038\/nbt1406","volume":"26","author":"CB Do","year":"2008","unstructured":"Do, C.B., Batzoglou, S.: What is the expectation maximization algorithm. Nat. Biotechnol. 26, 897\u2013899 (2008)","journal-title":"Nat. Biotechnol."},{"key":"62_CR15","doi-asserted-by":"crossref","DOI":"10.1201\/9780203913055","volume-title":"The EM Algorithm and Related Statistical Models","author":"M Watanabe","year":"2003","unstructured":"Watanabe, M., Yamaguchi, K.: The EM Algorithm and Related Statistical Models. CRC Press, Boca Raton (2003)"},{"issue":"3","key":"62_CR16","doi-asserted-by":"crossref","first-page":"511","DOI":"10.1111\/1467-9868.00082","volume":"59","author":"T Hastie","year":"1997","unstructured":"Hastie, T., Tibshiraniet, R., Friedman, J.: The EM algorithm an old folk-song sung to a fast new tune. J. R. Stat. Soc. Ser. B 59(3), 511\u2013567 (1997)","journal-title":"J. R. Stat. Soc. Ser. B"},{"issue":"1","key":"62_CR17","doi-asserted-by":"crossref","first-page":"45","DOI":"10.1109\/42.906424","volume":"20","author":"Y Zhang","year":"2001","unstructured":"Zhang, Y., Brady, M., Smith, S.: Segmentation of brain MR images through a hidden Markov random field model and the expectation-maximization algorithm. IEEE Trans. Med. Imaging 20(1), 45\u201357 (2001)","journal-title":"IEEE Trans. Med. Imaging"},{"issue":"5","key":"62_CR18","doi-asserted-by":"crossref","first-page":"613","DOI":"10.1109\/3468.618260","volume":"7","author":"R Langari","year":"1993","unstructured":"Langari, R., Wang, L., Yen, J.: Radial basis function networks, regression weights, and the expectation\u2013maximization algorithm. IEEE Trans. Syst. Man Cybern. Part A 7(5), 613\u2013623 (1993)","journal-title":"IEEE Trans. Syst. Man Cybern. Part A"},{"issue":"2","key":"62_CR19","doi-asserted-by":"crossref","first-page":"271","DOI":"10.1162\/neco.1997.9.2.271","volume":"9","author":"P Dayan","year":"1997","unstructured":"Dayan, P., Hinton, G.E.: Using expectation\u2013maximization for reinforcement learning. Neural Comput. 9(2), 271\u2013278 (1997)","journal-title":"Neural Comput."},{"issue":"6","key":"62_CR20","doi-asserted-by":"crossref","first-page":"2919","DOI":"10.1016\/j.csda.2006.11.035","volume":"51","author":"M Tang","year":"2007","unstructured":"Tang, M., et al.: On improved EM algorithm and confidence interval construction for incomplete $${\\rm r}\\times {\\rm c}$$ r \u00d7 c tables. Comput. Stat. Data Anal. 51(6), 2919\u20132933 (2007)","journal-title":"Comput. Stat. Data Anal."},{"key":"62_CR21","first-page":"1014","volume":"83","author":"MJ Lindstrom","year":"1988","unstructured":"Lindstrom, M.J., Bates, D.M.: Newton-Raphson and the EM algorithm for linear mixed-effects models for repeated-measures data. J. Am. Stat. Assoc. 83, 1014\u20131022 (1988)","journal-title":"J. Am. Stat. Assoc."},{"key":"62_CR22","volume-title":"An Introduction to Numerical Methods and Analysis","author":"JF Epperson","year":"2013","unstructured":"Epperson, J.F.: An Introduction to Numerical Methods and Analysis. Wiley, New York (2013)"},{"key":"62_CR23","doi-asserted-by":"crossref","first-page":"221","DOI":"10.1080\/01621459.1993.10594313","volume":"88","author":"M Jamshidian","year":"1993","unstructured":"Jamshidian, M., Jennrich, R.I.: Conjugate gradient acceleration of the EM algorithm. J. Am. Stat. Assoc. 88, 221\u2013228 (1993)","journal-title":"J. Am. Stat. Assoc."},{"key":"62_CR24","doi-asserted-by":"crossref","first-page":"569","DOI":"10.1111\/1467-9868.00083","volume":"59","author":"M Jamshidian","year":"1997","unstructured":"Jamshidian, M., Jennrich, R.I.: Acceleration of the EM algorithm by using quasi-Newton methods. J. R. Stat. Soc. 59, 569\u2013587 (1997)","journal-title":"J. R. Stat. Soc."},{"key":"62_CR25","doi-asserted-by":"crossref","unstructured":"Neal, R. M., Hinton, G. E.: A view of the EM algorithm that justifies incremental, sparse, and other variants. In: Jordan, M.I. (ed.) Learning in Graphical Models, pp. 355\u2013368. MIT Press, Cambridge (1999)","DOI":"10.1007\/978-94-011-5014-9_12"},{"issue":"1","key":"62_CR26","doi-asserted-by":"crossref","first-page":"265","DOI":"10.1111\/1467-9868.00176","volume":"61","author":"JG Booth","year":"1999","unstructured":"Booth, J.G., Hobert, J.P.: Maximizing generalized linear mixed model likelihoods with an automated Monte Carlo EM algorithm. J. R. Stat. Soc. Ser. B (Methodol.) 61(1), 265\u2013285 (1999)","journal-title":"J. R. Stat. Soc. Ser. B (Methodol.)"},{"issue":"2","key":"62_CR27","doi-asserted-by":"crossref","first-page":"267","DOI":"10.1093\/biomet\/80.2.267","volume":"80","author":"XL Meng","year":"1993","unstructured":"Meng, X.L., Rubin, D.B.: Maximum likelihood estimation via the ECM algorithm: a general framework. Biometrika 80(2), 267\u2013278 (1993)","journal-title":"Biometrika"},{"issue":"4","key":"62_CR28","doi-asserted-by":"crossref","first-page":"633","DOI":"10.1093\/biomet\/81.4.633","volume":"81","author":"C Liu","year":"1994","unstructured":"Liu, C., et al.: The ECME algorithm: a simple extension of EM and ECM with faster monotone convergence. Biometrika 81(4), 633\u2013648 (1994)","journal-title":"Biometrika"},{"key":"62_CR29","unstructured":"Celeux, G., Chauveau, D., Diebolt, J.: On stochastic versions of the EM algorithm. Doctoral dissertation, INRIA (1995)"},{"issue":"411","key":"62_CR30","doi-asserted-by":"crossref","first-page":"699","DOI":"10.1080\/01621459.1990.10474930","volume":"85","author":"GC Wei","year":"1990","unstructured":"Wei, G.C., et al.: A Monte Carlo implementation of the EM algorithm and the poor mans data augmentation algorithms. J. Am. Stat. Assoc. 85(411), 699\u2013704 (1990)","journal-title":"J. Am. Stat. Assoc."},{"issue":"4","key":"62_CR31","doi-asserted-by":"crossref","first-page":"755","DOI":"10.1093\/biomet\/85.4.755","volume":"85","author":"C Liu","year":"1998","unstructured":"Liu, C., et al.: Parameter expansion to accelerate EM: the PX-EM algorithm. Biometrika 85(4), 755\u2013770 (1998)","journal-title":"Biometrika"},{"issue":"448","key":"62_CR32","doi-asserted-by":"crossref","first-page":"1264","DOI":"10.1080\/01621459.1999.10473879","volume":"94","author":"JS Liu","year":"1999","unstructured":"Liu, J.S., Wu, Y.N.: Parameter expansion for data augmentation. J. Am. Stat. Assoc. 94(448), 1264\u20131274 (1999)","journal-title":"J. Am. Stat. Assoc."},{"issue":"398","key":"62_CR33","doi-asserted-by":"crossref","first-page":"528","DOI":"10.1080\/01621459.1987.10478458","volume":"82","author":"MA Tanner","year":"1987","unstructured":"Tanner, M.A., Wong, W.H.: The calculation of posterior distributions by data augmentation. J. Am. Stat. Assoc. 82(398), 528\u2013550 (1987)","journal-title":"J. Am. Stat. Assoc."},{"issue":"1","key":"62_CR34","first-page":"1","volume":"5","author":"K Lange","year":"1995","unstructured":"Lange, K.: A quasi-Newton acceleration of the EM algorithm. Stat. Sin. 5(1), 1\u201318 (1995)","journal-title":"Stat. Sin."},{"key":"62_CR35","doi-asserted-by":"crossref","first-page":"226","DOI":"10.1111\/j.2517-6161.1982.tb01203.x","volume":"44","author":"TA Louis","year":"1982","unstructured":"Louis, T.A.: Finding the observed information matrix when using the EM algorithm. J. R. Stat. Assoc. Ser. B 44, 226\u2013233 (1982)","journal-title":"J. R. Stat. Assoc. Ser. B"},{"issue":"6","key":"62_CR36","doi-asserted-by":"crossref","first-page":"457","DOI":"10.1093\/biomet\/65.3.457","volume":"51","author":"B Efron","year":"1978","unstructured":"Efron, B., Hinkley, D.V.: Assessing the accuracy of the maximum likelihood estimator: observed versus expected Fisher information. Biometrika 51(6), 457\u2013482 (1978)","journal-title":"Biometrika"},{"key":"62_CR37","unstructured":"Bradley, P., Fayyad, U., Reina, C.: Scaling EM clustering to large databases. Technical report, Microsoft Research (1997)"},{"key":"62_CR38","unstructured":"Bradley, P., Fayyad, U., Reina, C.: Scaling clustering algorithms to large databases. In: ACM KDD Conference (1998)"},{"issue":"1","key":"62_CR39","doi-asserted-by":"crossref","first-page":"51","DOI":"10.1145\/360402.360419","volume":"2","author":"F Fanstrom","year":"2000","unstructured":"Fanstrom, F., Lewis, J., Elkan, C.: Scalability for clustering algorithms revisited. SIGKDD Explor. 2(1), 51\u201357 (2000)","journal-title":"SIGKDD Explor."},{"key":"62_CR40","doi-asserted-by":"crossref","unstructured":"Ordonez, C., Omiecinski, E.: FREM: fast and robust EM clustering for large data sets. In: ACM CIKM Conference, pp. 590\u2013599 (2002)","DOI":"10.1145\/584792.584889"},{"issue":"5","key":"62_CR41","first-page":"1","volume":"23","author":"OL Mangasarian","year":"1990","unstructured":"Mangasarian, O.L., Wolberg, W.H.: Cancer diagnosis via linear programming. SIAM News 23(5), 1\u201318 (1990)","journal-title":"SIAM News"},{"key":"62_CR42","unstructured":"Bache, K., Lichman, M.: UCI Machine Learning Repository. http:\/\/archive.ics.uci.edu\/ml (2013)"},{"key":"62_CR43","unstructured":"Jenne, M., Boberg, O., Kurban, H., Dalkilic, M. M.: Computational astronomy. http:\/\/www.computationalastronomy.com (2014)"},{"issue":"36","key":"62_CR44","doi-asserted-by":"crossref","first-page":"895","DOI":"10.1051\/0004-6361:20042352","volume":"4","author":"L Girardi","year":"2005","unstructured":"Girardi, L., Groenewegen, M.A.T., Hatziminaoglou, E., Costa, L.: Star counts in the galaxy. Simulating from very deep to very shallow photometric surveys with the trilegal code. Astronomy Astrophys. 4(36), 895\u2013915 (2005)","journal-title":"Astronomy Astrophys."}],"container-title":["International Journal of Data Science and Analytics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s41060-017-0062-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s41060-017-0062-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s41060-017-0062-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,25]],"date-time":"2024-06-25T07:25:14Z","timestamp":1719300314000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s41060-017-0062-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,7,5]]},"references-count":44,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2017,9]]}},"alternative-id":["62"],"URL":"https:\/\/doi.org\/10.1007\/s41060-017-0062-1","relation":{},"ISSN":["2364-415X","2364-4168"],"issn-type":[{"value":"2364-415X","type":"print"},{"value":"2364-4168","type":"electronic"}],"subject":[],"published":{"date-parts":[[2017,7,5]]}}}