{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T14:08:33Z","timestamp":1777644513587,"version":"3.51.4"},"reference-count":33,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2003,4,1]],"date-time":"2003-04-01T00:00:00Z","timestamp":1049155200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2003,4,1]],"date-time":"2003-04-01T00:00:00Z","timestamp":1049155200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Data Mining and Knowledge Discovery"],"published-print":{"date-parts":[[2003,4]]},"DOI":"10.1023\/a:1022497517599","type":"journal-article","created":{"date-parts":[[2003,3,28]],"date-time":"2003-03-28T17:56:51Z","timestamp":1048874211000},"page":"215-232","source":"Crossref","is-referenced-by-count":23,"title":["Sampling and Subsampling for Cluster Analysis in Data Mining: With Applications to Sky Survey Data"],"prefix":"10.1007","volume":"7","author":[{"given":"David M.","family":"Rocke","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jian","family":"Dai","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"key":"5114783_CR1","doi-asserted-by":"crossref","first-page":"803","DOI":"10.2307\/2532201","volume":"49","author":"J.D. Banfield","year":"1993","unstructured":"Banfield, J.D. and Raftery, A.E. 1993. Model-based Gaussian and non-Gaussian clustering. Biometrics, 49:803\u2013821.","journal-title":"Biometrics"},{"key":"5114783_CR2","doi-asserted-by":"crossref","first-page":"126","DOI":"10.1287\/ijoc.6.2.126","volume":"6","author":"R. Battiti","year":"1994","unstructured":"Battiti, R. and Tecchiolli, G. 1994. The reactive tabu search. ORSA Journal on Computing, 6:126\u2013140.","journal-title":"ORSA Journal on Computing"},{"key":"5114783_CR3","first-page":"91","volume-title":"Proc. 15th Int. Conf. on Machine Learning","author":"P.S. Bradley","year":"1998","unstructured":"Bradley, P.S. and Fayyad, U.M. 1998. Refining initial points for k-means clustering. In Proc. 15th Int. Conf. on Machine Learning, (J. Shavlik (Ed)). San Francisco: Morgan Kaufman, pp. 91\u201399."},{"key":"5114783_CR4","first-page":"153","volume-title":"Advances in Knowledge Discovery and Data Mining","author":"P. Cheeseman","year":"1996","unstructured":"Cheeseman, P. and Stutz, J. 1996. Bayesian classification (AutoClass): Theory and results. In Advances in Knowledge Discovery and Data Mining, (U.M. Fayyad, G. Piatetsky-Shapiro, P. Smyth, and R. Uthurusamy (Eds.)). Cambridge, MA: The MIT Press, pp. 153\u2013180."},{"key":"5114783_CR5","unstructured":"Dasgupta, A. and Raftery, A. 1995. Detecting features in spatial point processes with clutter via model-based clustering. Technical Report No. 195, Department of Statistics, University of Washington."},{"key":"5114783_CR6","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1111\/j.2517-6161.1977.tb01600.x","volume":"39","author":"A.P. Dempster","year":"1977","unstructured":"Dempster, A.P., Laird, N.M., and Robin, D.B. 1977. Maximum likelihood from incomplete data via the EM algorithm (with discussion). Journal of the Royal Statistical Society B, 39:1\u201338.","journal-title":"Journal of the Royal Statistical Society B"},{"key":"5114783_CR7","doi-asserted-by":"crossref","DOI":"10.1007\/978-94-009-5897-5","volume-title":"Finite Mixture Distributions. Monographs on Applied Probability and Statistics","author":"B.S. Everitt","year":"1981","unstructured":"Everitt, B.S. and Hand, D.J. 1981. Finite Mixture Distributions. Monographs on Applied Probability and Statistics. New York, NY: Chapman and Hall Ltd."},{"key":"5114783_CR8","doi-asserted-by":"crossref","first-page":"5","DOI":"10.1023\/A:1009715820935","volume":"1","author":"U.M. Fayyad","year":"1997","unstructured":"Fayyad, U.M. 1997.\u201cEditorial\u201d, Data Mining and Knowledge Discovery, 1:5\u201310.","journal-title":"Data Mining and Knowledge Discovery"},{"key":"5114783_CR9","unstructured":"Fayyad, U.M. 1991. On the induction of decision trees for multiple concept learning. Ph.D. Thesis, EECS Department. The University of Michigan, Ann Arbor."},{"key":"5114783_CR10","first-page":"471","volume-title":"Advances in Knowledge Discovery and Data Mining","author":"U.M. Fayyad","year":"1996","unstructured":"Fayyad, U.M., Djorgovski, S.G., and Weir, N. 1996. Automating the analysis and cataloging of sky surveys. In Advances in Knowledge Discovery and Data Mining, (U.M. Fayyad, G. Piatetsky-Shapiro, P. Smyth, and R. Uthurusamy (Eds.)). Cambridge, MA: The MIT Press, pp. 471\u2013493."},{"key":"5114783_CR11","first-page":"193","volume-title":"Proc 4th Int. Conf. on Knowledge Discovery and Data Mining KDD-98","author":"U.M. Fayyad","year":"1998","unstructured":"Fayyad, U.M., Reina, C., and Bradley, P.S. 1998. Initialization of iterative refinement clustering algorithms. In Proc 4th Int. Conf. on Knowledge Discovery and Data Mining KDD-98, (R. Agrawal, P. Stolorz, and G. Piatetsky-Shapiro (Eds.)). Menlo Park, CA: AAAI Press, pp. 193\u2013198."},{"key":"5114783_CR12","unstructured":"Fraley, C. and Raftery, A.E. 1998. How many clusters? Which clustering method? Answers via model-based cluster analysis. Technical Report No. 329, Department of Statistics, University of Washington, Box 354322, Seattle, QA 98195-4322."},{"key":"5114783_CR13","doi-asserted-by":"crossref","first-page":"105","DOI":"10.1080\/00401706.1981.10486244","volume":"23","author":"D.M. Hawkins","year":"1981","unstructured":"Hawkins, D.M. 1981. A new test for multivariate normality and homoscedasticity. Technometrics, 23:105\u2013110.","journal-title":"Technometrics"},{"key":"5114783_CR14","doi-asserted-by":"crossref","first-page":"303","DOI":"10.1017\/CBO9780511897375.007","volume-title":"Topics in Applied Multivariate Analysis","author":"D.M. Hawkins","year":"1982","unstructured":"Hawkins, D.M., Muller, M.W., and ten Krooden, J.A. 1982. Cluster analysis. In Topics in Applied Multivariate Analysis, (D.M. Hawkins (Ed.)). Cambridge: Cambridge University Press, pp. 303\u2013356."},{"key":"5114783_CR15","doi-asserted-by":"crossref","DOI":"10.1002\/9780470316801","volume-title":"Finding Groups in Data: An Introduction to Cluster Analysis","author":"L. Kaufman","year":"1990","unstructured":"Kaufman, L. and Rousseeuw, P.J. 1990. Finding Groups in Data: An Introduction to Cluster Analysis. New York: John Wiley & Sons."},{"key":"5114783_CR16","volume-title":"The Advanced Theory of Statistics III","author":"M.G. Kendall","year":"1963","unstructured":"Kendall, M.G. and Stuart, A. 1963. The Advanced Theory of Statistics III. Griffin, London."},{"key":"5114783_CR17","doi-asserted-by":"crossref","first-page":"671","DOI":"10.1126\/science.220.4598.671","volume":"220","author":"S. Kirkpaterick","year":"1983","unstructured":"Kirkpaterick, S., Gelatt, J., and Vecchi, M.P. 1983. Optimization by simulated annealing. Science, 220:671\u2013680.","journal-title":"Science"},{"key":"5114783_CR18","doi-asserted-by":"crossref","first-page":"318","DOI":"10.2307\/2347790","volume":"36","author":"G.J. McLachlan","year":"1987","unstructured":"McLachlan, G.J. 1987.On bootstrapping the likelihood ratio test statistic for the number of components in a normal mixture. Applied Statistics, 36:318\u2013324.","journal-title":"Applied Statistics"},{"key":"5114783_CR19","volume-title":"Mixture Models: Inference and Applications to Clustering","author":"G.J. McLachlan","year":"1988","unstructured":"McLachlan, G.J. and Basford, K. 1988. Mixture Models: Inference and Applications to Clustering. New York: Marcel Dekker."},{"key":"5114783_CR20","volume-title":"The EM Algorithm and Extensions","author":"G.J. McLachlan","year":"1997","unstructured":"McLachlan, G.J. and Krishnan, T. 1997. The EM Algorithm and Extensions. New York: John Wiley & Sons."},{"key":"5114783_CR21","unstructured":"McLachlan, G.J. and Peel, D. 1998. MIXFIT: An algorithm for the automatic fitting and testing of normal mixture models. Center for Statistics, Department of Mathematics, University of Queensland, St. Lucia, Queensland 4072, Australia."},{"key":"5114783_CR22","unstructured":"Odewahn, S.C., Djorgovski, S.G., Brunner, R.J., and Gal, R. 1998. Data from the digitized palomar sky survey. Department of Astronomy, California Institute of Technology, Pasadena, CA 91125."},{"key":"5114783_CR23","doi-asserted-by":"crossref","first-page":"1327","DOI":"10.1214\/aos\/1032526972","volume":"24","author":"D.M. Rocke","year":"1996","unstructured":"Rocke, D.M. 1996. Robustness properties of S-estimators of multivariate location and shape in high dimension. The Annals of Statistics, 24:1327\u20131345.","journal-title":"The Annals of Statistics"},{"key":"5114783_CR24","unstructured":"Rocke, D.M. 1998. Constructive Statistics: Estimators, Algorithms, and Asymptotics. Center for Image Processing and Integrated Computing, University of California, Davis, CA 95616."},{"key":"5114783_CR25","doi-asserted-by":"crossref","first-page":"1047","DOI":"10.1080\/01621459.1996.10476975","volume":"91","author":"D.M. Rocke","year":"1996","unstructured":"Rocke, D.M. and Woodruff, D.L. 1996. Identification of outliers in multivariate data. Journal of the American Statistical Association, 91:1047\u20131061.","journal-title":"Journal of the American Statistical Association"},{"key":"5114783_CR26","doi-asserted-by":"crossref","first-page":"212","DOI":"10.1080\/00401706.1999.10485670","volume":"41","author":"P.J. Rousseeuw","year":"1999","unstructured":"Rousseeuw, P.J. and Van Driessen, K. 1999. A fast algorithm for the minimum covariance determinant estimator. Techometrics, 41:212\u2013223.","journal-title":"Techometrics"},{"key":"5114783_CR27","volume-title":"Proceedings of the Second International Conference on Knowledge Discovery and Data Mining","author":"P. Smyth","year":"1996","unstructured":"Smyth, P. 1996. Clustering using Monte Carlo cross-validation. In Proceedings of the Second International Conference on Knowledge Discovery and Data Mining. Menlo Park, CA: AAAI Press."},{"key":"5114783_CR28","unstructured":"Titterington, D.M., Smith, A.F.M., and Makov, U.E. 1985. Statistical Analysis of Finite Mixture Distributions."},{"key":"5114783_CR29","doi-asserted-by":"crossref","first-page":"2401","DOI":"10.1086\/117459","volume":"109","author":"N. Weir","year":"1995","unstructured":"Weir, N., Fayyad, U.M., and Djorgovski, S.G. 1995. Automated star\/galaxy classification for digitized POSS-II. The Astronomical Journal, 109:2401\u20132414.","journal-title":"The Astronomical Journal"},{"key":"5114783_CR30","doi-asserted-by":"crossref","first-page":"135","DOI":"10.1007\/978-1-4612-1968-2_8","volume-title":"Statistical Challenges in Modern Astronomy II","author":"R.L. White","year":"1997","unstructured":"White, R.L. 1997. Object classification in astronomical images. In Statistical Challenges in Modern Astronomy II, G.J. Babu and E.D. Feigelson(Ed.)). New York: Springer-Verlag, pp. 135\u2013148."},{"key":"5114783_CR31","unstructured":"Wolf, J. 1971. A Monte Carlo study of the sampling distribution of the likelihood ratio for mixtures of multinormal distributions. Technical Report STB 72-2, San Diego: U.S. Naval Personnel and Training Research Laboratory."},{"key":"5114783_CR32","doi-asserted-by":"crossref","first-page":"888","DOI":"10.1080\/01621459.1994.10476821","volume":"89","author":"D.L. Woodruff","year":"1994","unstructured":"Woodruff, D.L. and Rocke, D.M. 1994. Computable robust estimation of multivariate location and shape in high dimension using compound estimators. Journal of the American Statistical Association, 89:888\u2013896.","journal-title":"Journal of the American Statistical Association"},{"key":"5114783_CR33","first-page":"95","volume":"11","author":"C.F.J. Wu","year":"1983","unstructured":"Wu, C.F.J. 1983. On convergence properties of the EM algorithm for Gaussian mixtures. Annals of Statistics, 11:95\u2013103.","journal-title":"Annals of Statistics"}],"container-title":["Data Mining and Knowledge Discovery"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1023\/A:1022497517599.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1023\/A:1022497517599\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1023\/A:1022497517599.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,14]],"date-time":"2025-07-14T03:02:27Z","timestamp":1752462147000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1023\/A:1022497517599"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2003,4]]},"references-count":33,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2003,4]]}},"alternative-id":["5114783"],"URL":"https:\/\/doi.org\/10.1023\/a:1022497517599","relation":{},"ISSN":["1384-5810","1573-756X"],"issn-type":[{"value":"1384-5810","type":"print"},{"value":"1573-756X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2003,4]]}}}