{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,30]],"date-time":"2025-05-30T06:10:01Z","timestamp":1748585401990,"version":"3.41.0"},"publisher-location":"Cham","reference-count":27,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319232003"},{"type":"electronic","value":"9783319232010"}],"license":[{"start":{"date-parts":[[2015,1,1]],"date-time":"2015-01-01T00:00:00Z","timestamp":1420070400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2015,1,1]],"date-time":"2015-01-01T00:00:00Z","timestamp":1420070400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2015]]},"DOI":"10.1007\/978-3-319-23201-0_20","type":"book-chapter","created":{"date-parts":[[2015,8,27]],"date-time":"2015-08-27T11:57:35Z","timestamp":1440676655000},"page":"175-185","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":14,"title":["CLUS: Parallel Subspace Clustering Algorithm on Spark"],"prefix":"10.1007","author":[{"given":"Bo","family":"Zhu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alexandru","family":"Mara","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alberto","family":"Mozo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2015,8,28]]},"reference":[{"key":"20_CR1","first-page":"768","volume":"21","author":"EW Forgy","year":"1965","unstructured":"Forgy, E.W.: Cluster analysis of multivariate data: efficiency versus interpretability of classifications. Biometrics 21, 768\u2013769 (1965)","journal-title":"Biometrics"},{"key":"20_CR2","unstructured":"Ester, M., Kriegel, H.P., Sander, J., Xu, X.: A density-based algorithm for discovering clusters in large spatial databases with noise, pp. 226\u2013231. AAAI Press (1996)"},{"key":"20_CR3","doi-asserted-by":"crossref","unstructured":"Kaufman, L., Rousseeuw, P.J.: Finding Groups in Data. John Wiley & Sons (1990)","DOI":"10.1002\/9780470316801"},{"key":"20_CR4","doi-asserted-by":"crossref","unstructured":"M\u00fcller, E., G\u00fcnnemann, S., Assent, I., Seidl, T.: Evaluating clustering in subspace projections of high dimensional data. In: Proc. VLDB, vol. 2(1) (2009)","DOI":"10.14778\/1687627.1687770"},{"issue":"11","key":"20_CR5","doi-asserted-by":"publisher","first-page":"559","DOI":"10.1080\/14786440109462720","volume":"2","author":"K Pearson","year":"1901","unstructured":"Pearson, K.: On Lines and Planes of Closest Fit to Systems of Points in Space. Philosophical Magazine 2(11), 559\u2013572 (1901)","journal-title":"Philosophical Magazine"},{"issue":"8","key":"20_CR6","doi-asserted-by":"publisher","first-page":"1226","DOI":"10.1109\/TPAMI.2005.159","volume":"27","author":"H Peng","year":"2005","unstructured":"Peng, H., Long, F., Ding, C.: Feature selection based on mutual information criteria of max-dependency, max-relevance, and min-redundancy. Pattern Analysis and Machine Intelligence 27(8), 1226\u20131238 (2005)","journal-title":"Pattern Analysis and Machine Intelligence"},{"key":"20_CR7","doi-asserted-by":"crossref","unstructured":"Zimek, A., Assent, I., Vreeken, J.: Frequent pattern mining algorithms for data clustering. In: Frequent Pattering Mining, chapter 16, pp. 403\u2013423. Springer International Publishing (2014)","DOI":"10.1007\/978-3-319-07821-2_16"},{"key":"20_CR8","doi-asserted-by":"crossref","unstructured":"Kailing, K., Kriegel, H.P., Kr\u00f6ger, P.: Density-connected subspace clustering for high-dimensional data. In: Proc. SIAM, pp. 246\u2013257 (2004)","DOI":"10.1137\/1.9781611972740.23"},{"issue":"1","key":"20_CR9","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1145\/1327452.1327492","volume":"51","author":"J Dean","year":"2008","unstructured":"Dean, J., Ghemawat, S.: MapReduce: simplified data In Proc. on large clusters. Communications of the ACM 51(1), 107\u2013113 (2008)","journal-title":"Communications of the ACM"},{"key":"20_CR10","doi-asserted-by":"crossref","unstructured":"Shvachko, K., et al.: The hadoop distributed file system. In: 2010 IEEE 26th Symposium on Mass Storage Systems and Technologies (MSST). IEEE (2010)","DOI":"10.1109\/MSST.2010.5496972"},{"key":"20_CR11","unstructured":"Zaharia, M., et al.: Resilient distributed datasets: a fault-tolerant abstraction for in-memory cluster computing. In: Proc. USENIX (2012)"},{"issue":"1","key":"20_CR12","doi-asserted-by":"publisher","first-page":"90","DOI":"10.1145\/1007730.1007731","volume":"6","author":"L Parsons","year":"2004","unstructured":"Parsons, L., Haque, E., Liu, H.: Subspace clustering for high dimensional data: a review. ACM SIGKDD Explorations Newsletter 6(1), 90\u2013105 (2004)","journal-title":"ACM SIGKDD Explorations Newsletter"},{"key":"20_CR13","doi-asserted-by":"crossref","unstructured":"Sim, K., Gopalkrishnan, V., Zimek, A., Cong, G.: A survey on enhanced subspace clustering. Data Mining and Knowledge Discovery 26(2) (2013)","DOI":"10.1007\/s10618-012-0258-x"},{"key":"20_CR14","doi-asserted-by":"crossref","unstructured":"Agrawal, R., Gehrke, J., Gunopulos, D., Raghavan, P.: Automatic subspace clustering of high dimensional data for data mining applications. In: Proc. ACM SIGMOD, pp. 94\u2013105 (1998)","DOI":"10.1145\/276305.276314"},{"key":"20_CR15","doi-asserted-by":"crossref","unstructured":"Cheng, C., Fu, A., Zhang, Y.: Entropy-based subspace clustering for mining numerical data. In: Proc. SIGKDD, pp. 84\u201393 (1999)","DOI":"10.1145\/312129.312199"},{"key":"20_CR16","doi-asserted-by":"crossref","unstructured":"Aggarwal, C.C., Wolf, J.L., Yu, P.S., Procopiuc, C., Park, J.S.: Fast algorithms for projected clustering. In: Proc. ACM SIGMOD, pp. 61\u201372 (1999)","DOI":"10.1145\/304181.304188"},{"key":"20_CR17","doi-asserted-by":"crossref","unstructured":"Aggarwal, C.C., Yu, P.S.: Finding generalized projected clusters in high dimensional spaces. In: Proc. ACM SIGMOD, pp. 70\u201381 (2000)","DOI":"10.1145\/335191.335383"},{"key":"20_CR18","doi-asserted-by":"crossref","unstructured":"Sequeira, K., Zaki, M.: SCHISM: a new approach for interesting subspace mining. In: Proc. ICDM, pp. 186\u2013193 (2004)","DOI":"10.1109\/ICDM.2004.10099"},{"issue":"5\u20136","key":"20_CR19","first-page":"427","volume":"2","author":"G Liu","year":"2010","unstructured":"Liu, G., Sim, K., Li, J., Wong, L.: Efficient mining of distance-based subspace clusters. Statistical Analysis and Data Mining 2(5\u20136), 427\u2013444 (2010)","journal-title":"Statistical Analysis and Data Mining"},{"key":"20_CR20","doi-asserted-by":"crossref","unstructured":"Assent, I., Krieger, R., M\u00fcller, E., Seidl, T.: INSCY: indexing subspace clusters with in-process-removal of redundancy. In: Proc. ICDM, pp. 719\u2013724 (2008)","DOI":"10.1109\/ICDM.2008.46"},{"key":"20_CR21","doi-asserted-by":"crossref","unstructured":"Moise, G., Sander, J.: Finding non-redundant, statistically significant regions in high dimensional data: a novel approach to projected and subspace clustering. In: Proc. SIGKDD, pp. 533\u2013541 (2008)","DOI":"10.1145\/1401890.1401956"},{"key":"20_CR22","doi-asserted-by":"crossref","unstructured":"Gunnemann, S., Farber, I., Boden, B., Seidl, T.: Subspace clustering meets dense subgraph mining: a synthesis of two paradigms. In: Proc. ICDM (2010)","DOI":"10.1109\/ICDM.2010.95"},{"key":"20_CR23","unstructured":"Goil, S., Nagesh, H., Choudhary, A.: MAFIA: efficient and scalable subspace clustering for very large data sets. In: Proc. SIGKDD (1999)"},{"key":"20_CR24","unstructured":"Spark. https:\/\/spark.apache.org\/"},{"key":"20_CR25","doi-asserted-by":"crossref","unstructured":"Domenoconi, C., Papadopoulos, D., Gunopulos, D., Ma, S.: Subspace clustering of high dimensional data. In: Proc. SIAM (2004)","DOI":"10.1137\/1.9781611972740.58"},{"key":"20_CR26","unstructured":"Nazerzadeh, H., Ghodsi, M., Sadjadian, S.: Parallel subspace clustering. In: Proc. the 10th Annual Conference of Computer Society of Iran (2005)"},{"key":"20_CR27","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"580","DOI":"10.1007\/978-3-540-69497-7_41","volume-title":"Scientific and Statistical Database Management","author":"E Achtert","year":"2008","unstructured":"Achtert, E., Kriegel, H.-P., Zimek, A.: ELKI: a software system for evaluation of subspace clustering algorithms. In: Lud\u00e4scher, B., Mamoulis, N. (eds.) SSDBM 2008. LNCS, vol. 5069, pp. 580\u2013585. Springer, Heidelberg (2008)"}],"container-title":["Communications in Computer and Information Science","New Trends in Databases and Information Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-23201-0_20","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,30]],"date-time":"2025-05-30T05:45:48Z","timestamp":1748583948000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-319-23201-0_20"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015]]},"ISBN":["9783319232003","9783319232010"],"references-count":27,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-23201-0_20","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"type":"print","value":"1865-0929"},{"type":"electronic","value":"1865-0937"}],"subject":[],"published":{"date-parts":[[2015]]},"assertion":[{"value":"28 August 2015","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}