{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,13]],"date-time":"2026-02-13T10:24:57Z","timestamp":1770978297694,"version":"3.50.1"},"reference-count":55,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2015,8,12]],"date-time":"2015-08-12T00:00:00Z","timestamp":1439337600000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Journal of Big Data"],"published-print":{"date-parts":[[2015,12]]},"DOI":"10.1186\/s40537-015-0027-y","type":"journal-article","created":{"date-parts":[[2015,8,11]],"date-time":"2015-08-11T00:12:00Z","timestamp":1439251920000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":30,"title":["A novel algorithm for fast and scalable subspace clustering of high-dimensional data"],"prefix":"10.1186","volume":"2","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0317-5303","authenticated-orcid":false,"given":"Amardeep","family":"Kaur","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Amitava","family":"Datta","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2015,8,12]]},"reference":[{"issue":"D1","key":"27_CR1","doi-asserted-by":"crossref","first-page":"991","DOI":"10.1093\/nar\/gks1193","volume":"41","author":"T Barrett","year":"2013","unstructured":"Barrett T, Wilhite SE, Ledoux P, Evangelista C, Kim IF, Tomashevsky M, Marshall KA, Phillippy KH, Shermpan PM, Holko M, Yefanov A, Lee H, Zhang N, Robertson CL, Serova N, Davis S, Soboleva A (2013) Ncbi geo: archive for functional genomics data sets-update. Nucleic Acids Res 41(D1): 991\u2013995.","journal-title":"Nucleic Acids Res"},{"issue":"8","key":"27_CR2","doi-asserted-by":"crossref","first-page":"1482","DOI":"10.1109\/JPROC.2009.2021005","volume":"97","author":"PE Dewdney","year":"2009","unstructured":"Dewdney PE, Hall PJ, Schilizzi RT, Lazio TJLW (2009) The square kilometre array. Proc IEEE 97(8): 1482\u20131496.","journal-title":"Proc IEEE"},{"issue":"2","key":"27_CR3","doi-asserted-by":"crossref","first-page":"293","DOI":"10.1093\/nsr\/nwt032","volume":"1","author":"J Fan","year":"2014","unstructured":"Fan J, Han F, Liu H (2014) Challenges of big data analysis. National Science Review 1(2): 293\u2013314.","journal-title":"National Science Review"},{"key":"27_CR4","doi-asserted-by":"crossref","first-page":"273","DOI":"10.1007\/978-3-662-08968-2_16","volume-title":"New directions in statistical physics","author":"M Steinbach","year":"2004","unstructured":"Steinbach M, Ert\u00f6z L, Kumar V (2004) The challenges of clustering high dimensional data In: New directions in statistical physics, 273\u2013309.. Springer, Berlin Heidelberg."},{"key":"27_CR5","unstructured":"Aggarwal CC, Reddy CK (2013) Data clustering: algorithms and applications. Data Mining Knowledge and Discovery Series 1st. CRC Press."},{"issue":"3","key":"27_CR6","doi-asserted-by":"crossref","first-page":"645","DOI":"10.1109\/TNN.2005.845141","volume":"16","author":"R Xu","year":"2005","unstructured":"Xu R, Wunsch D II (2005) Survey of clustering algorithms. Neural Netw IEEE Trans on 16(3): 645\u2013678.","journal-title":"Neural Netw IEEE Trans on"},{"key":"27_CR7","doi-asserted-by":"crossref","DOI":"10.1017\/CBO9780511809071","volume-title":"Introduction to information retrieval Vol. 1","author":"CD Manning","year":"2008","unstructured":"Manning CD, Raghavan P, Sch\u00fctze H (2008) Hierarchical clustering In: Introduction to information retrieval Vol. 1.. Cambridge university press, New York, USA."},{"key":"27_CR8","first-page":"103","volume-title":"Proc. of the ACM SIGMOD international conference on management of data, vol. 1","author":"T Zhang","year":"1996","unstructured":"Zhang T, Ramakrishnan R, Livny M (1996) BIRCH: an efficient data clustering method for very large databases In: Proc. of the ACM SIGMOD international conference on management of data, vol. 1, 103\u2013114.. ACM Press, USA."},{"issue":"34","key":"27_CR9","first-page":"226","volume":"96","author":"M Ester","year":"1996","unstructured":"Ester M, Kriegel H, Sander J, Xu X (1996) A density-based algorithm for discovering clusters in large spatial databases with noise. Int Conf Knowl Discov Data Min 96(34): 226\u2013231.","journal-title":"Int Conf Knowl Discov Data Min"},{"key":"27_CR10","doi-asserted-by":"crossref","DOI":"10.1515\/9781400874668","volume-title":"Adaptive control processes: a guided tour","author":"RE Bellman","year":"1961","unstructured":"Bellman RE (1961) Adaptive control processes: a guided tour. Princeton University Press, New Jersey."},{"key":"27_CR11","doi-asserted-by":"crossref","first-page":"217","DOI":"10.1007\/3-540-49257-7_15","volume-title":"Database Theory \u2013ICDT\u201999. Lecture Notes in Computer Science","author":"K Beyer","year":"1999","unstructured":"Beyer K, Goldstein J (1999) When is nearest neighbor meaningful? Proc 7th Int Conf Database Theory In: Database Theory \u2013ICDT\u201999. Lecture Notes in Computer Science, 217\u2013235.. Springer, Berlin Heidelberg."},{"issue":"1","key":"27_CR12","doi-asserted-by":"crossref","first-page":"90","DOI":"10.1145\/1007730.1007731","volume":"6","author":"L Parsons","year":"2004","unstructured":"Parsons L, Haque E, Liu H (2004) Subspace clustering for high dimensional data: a review. ACM SIGKDD Explor Newsl 6(1): 90\u2013105.","journal-title":"ACM SIGKDD Explor Newsl"},{"key":"27_CR13","first-page":"225","volume-title":"Computational genomics: Theory and application","author":"MM Babu","year":"2004","unstructured":"Babu MM (2004) Introduction to microarray data analysis. In: Grant RP (ed)Computational genomics: Theory and application, 225\u2013249.. Horizon Press, UK."},{"issue":"25","key":"27_CR14","doi-asserted-by":"crossref","first-page":"14863","DOI":"10.1073\/pnas.95.25.14863","volume":"95","author":"MB Eisen","year":"1998","unstructured":"Eisen MB, Spellman PT, Brown PO, Botstein D (1998) Cluster analysis and display of genome-wide expression patterns. Proc Natl Acad Sci 95(25): 14863\u201314868.","journal-title":"Proc Natl Acad Sci"},{"issue":"11","key":"27_CR15","doi-asserted-by":"crossref","first-page":"1370","DOI":"10.1109\/TKDE.2004.68","volume":"16","author":"D Jiang","year":"2004","unstructured":"Jiang D, Tang C, Zhang A (2004) Cluster analysis for gene expression data: a survey. IEEE Trans Knowl Data Eng 16(11): 1370\u20131386.","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"27_CR16","first-page":"93","volume":"8","author":"Y Cheng","year":"2000","unstructured":"Cheng Y, Church GM (2000) Biclustering of expression data. Proc Int Conf Intell Syst Mol Biol 8: 93\u2013103.","journal-title":"Proc Int Conf Intell Syst Mol Biol"},{"issue":"4","key":"27_CR17","doi-asserted-by":"crossref","first-page":"339","DOI":"10.1109\/TCBB.2005.55","volume":"2","author":"S Yoon","year":"2005","unstructured":"Yoon S, Nardini C, Benini L, De Micheli G (2005) Discovering coherent biclusters from gene expression data using zero-suppressed binary decision diagrams. IEEE\/ACM Trans Comput Biol Bioinforma 2(4): 339\u2013353.","journal-title":"IEEE\/ACM Trans Comput Biol Bioinforma"},{"issue":"24","key":"27_CR18","doi-asserted-by":"crossref","first-page":"3267","DOI":"10.1093\/bioinformatics\/btp588","volume":"25","author":"C Huttenhower","year":"2009","unstructured":"Huttenhower C, Mutungu KT, Indik N, Yang W, Schroeder M, Forman JJ, Troyanskaya OG, Coller HA (2009) Detailing regulatory networks through large scale data integration. Bioinformatics 25(24): 3267\u20133274.","journal-title":"Bioinformatics"},{"key":"27_CR19","first-page":"14","volume-title":"Data Mining and Bioinformatics. Lecture Notes in Computer Science, vol. 4316","author":"J Jun","year":"2006","unstructured":"Jun J, Chung S, McLeod D (2006) Subspace clustering of microarray data based on domain transformation In: Data Mining and Bioinformatics. Lecture Notes in Computer Science, vol. 4316, 14\u201328.. Springer, Heidelberg."},{"issue":"3","key":"27_CR20","doi-asserted-by":"crossref","first-page":"279","DOI":"10.1093\/bib\/bbs032","volume":"14","author":"K Eren","year":"2013","unstructured":"Eren K, Deveci M, Kktun O, atalyrek mV (2013) A comparative analysis of biclustering algorithms for gene expression data. Brief Bioinforma 14(3): 279\u2013292.","journal-title":"Brief Bioinforma"},{"issue":"2","key":"27_CR21","doi-asserted-by":"crossref","first-page":"218","DOI":"10.1109\/TPAMI.2003.1177153","volume":"25","author":"R Basri","year":"2003","unstructured":"Basri R, Jacobs DW (2003) Lambertian reflectance and linear subspaces. IEEE Trans Pattern Anal Mach Intell 25(2): 218\u2013233.","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"11","key":"27_CR22","doi-asserted-by":"crossref","first-page":"2765","DOI":"10.1109\/TPAMI.2013.57","volume":"35","author":"E Elhamifar","year":"2013","unstructured":"Elhamifar E, Vidal R (2013) Sparse subspace clustering: algorithm, theory, and applications. IEEE Trans Pattern Anal Mach Intell 35(11): 2765\u20132781.","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"2","key":"27_CR23","doi-asserted-by":"crossref","first-page":"52","DOI":"10.1109\/MSP.2010.939739","volume":"28","author":"R Vidal","year":"2011","unstructured":"Vidal R (2011) Subspace clustering. IEEE Signal Proc Mag 28(2): 52\u201368.","journal-title":"IEEE Signal Proc Mag"},{"key":"27_CR24","doi-asserted-by":"crossref","unstructured":"Ho J, Yang MH, Lim J, Lee KC, Kriegman D (2003) Clustering appearances of objects under varying illumination conditions In: Computer vision and pattern recognition, 2003. Proceedings. 2003 IEEE computer society conference on, vol. 1, 1\u201311.. IEEE.","DOI":"10.1109\/CVPR.2003.1211332"},{"key":"27_CR25","doi-asserted-by":"crossref","unstructured":"Tierney S, Gao J, Guo Y (2014) Subspace clustering for sequential data In: Computer vision and pattern recognition (CVPR), 2014 IEEE conference On, 1019\u20131026.. IEEE.","DOI":"10.1109\/CVPR.2014.134"},{"issue":"1","key":"27_CR26","doi-asserted-by":"crossref","first-page":"85","DOI":"10.1007\/s11263-007-0099-z","volume":"79","author":"R Vidal","year":"2008","unstructured":"Vidal R, Tron R, Hartley R (2008) Multiframe motion segmentation with missing data using PowerFactorization and GPCA. Int J Comput Vis 79(1): 85\u2013105.","journal-title":"Int J Comput Vis"},{"key":"27_CR27","first-page":"243","volume-title":"Data mining and knowledge discovery","author":"S G\u00fcnnemann","year":"2012","unstructured":"G\u00fcnnemann S, Boden B, Seidl T (2012) Finding density-based subspace clusters in graphs with feature vectors In: Data mining and knowledge discovery, 243\u2013269.. Springer, US."},{"issue":"3","key":"27_CR28","doi-asserted-by":"crossref","first-page":"253","DOI":"10.1007\/s11222-007-9027-x","volume":"17","author":"W Jang","year":"2007","unstructured":"Jang W, Hendry M (2007) Cluster analysis of massive datasets in astronomy. Stat Comput 17(3): 253\u2013262.","journal-title":"Stat Comput"},{"key":"27_CR29","doi-asserted-by":"crossref","first-page":"218","DOI":"10.1145\/1008992.1009031","volume-title":"Proceedings of the 27th annual international ACM SIGIR conference on research and development in information retrieval","author":"T Li","year":"2004","unstructured":"Li T, Ma S, Ogihara M (2004) Document clustering via adaptive subspace iteration In: Proceedings of the 27th annual international ACM SIGIR conference on research and development in information retrieval, 218\u2013225.. ACM, USA."},{"key":"27_CR30","doi-asserted-by":"crossref","first-page":"61","DOI":"10.1145\/304182.304188","volume-title":"Proc. of the ACM SIGMOD international conference on management of data","author":"CC Aggarwal","year":"1999","unstructured":"Aggarwal CC, Wolf JL, Yu PS, Procopiuc C, Park JS (1999) Fast algorithms for projected clustering In: Proc. of the ACM SIGMOD international conference on management of data, 61\u201372.. ACM, USA."},{"issue":"4","key":"27_CR31","doi-asserted-by":"crossref","first-page":"255","DOI":"10.1016\/j.infsof.2003.07.003","volume":"46","author":"KG Woo","year":"2004","unstructured":"Woo KG, Lee JH, Kim MH, Lee YJ (2004) FINDIT: a fast and intelligent subspace clustering algorithm using dimension voting. Inf Softw Technol 46(4): 255\u2013271.","journal-title":"Inf Softw Technol"},{"key":"27_CR32","first-page":"307","volume":"12","author":"R Agrawal","year":"1996","unstructured":"Agrawal R, Mannila H, Srikant R, Toivonen H, Verkamo AI (1996) Fast discovery of association rules. Adv Knowl Discov Data Min 12: 307\u2013328.","journal-title":"Adv Knowl Discov Data Min"},{"key":"27_CR33","doi-asserted-by":"crossref","unstructured":"Agrawal R, Gehrke J, Gunopulos D (1998) Automatic subspace clustering of high dimensional data for data mining applications In: Proc. of the ACM SIGMOD international conference on management of data, 94\u2013105.","DOI":"10.1145\/276304.276314"},{"key":"27_CR34","doi-asserted-by":"crossref","unstructured":"Kailing K, Kriegel HP, Kroger P (2004) Density-connected subspace clustering for high-dimensional data In: SIAM international conference on data mining, 246\u2013256.","DOI":"10.1137\/1.9781611972740.23"},{"key":"27_CR35","doi-asserted-by":"crossref","first-page":"250","DOI":"10.1109\/ICDM.2005.5","volume-title":"IEEE international conference on data mining","author":"H-PH Kriegel","year":"2005","unstructured":"Kriegel H-PH, Kroger P, Renz M, Wurst S (2005) A generic framework for efficient subspace clustering of high-dimensional data In: IEEE international conference on data mining, 250\u2013257.. IEEE, Washington, DC, USA."},{"issue":"2","key":"27_CR36","doi-asserted-by":"crossref","first-page":"332","DOI":"10.1007\/s10618-012-0258-x","volume":"26","author":"K Sim","year":"2012","unstructured":"Sim K, Gopalkrishnan V, Zimek A, Cong G (2012) A survey on enhanced subspace clustering. Data Min Knowl Disc 26(2): 332\u2013397.","journal-title":"Data Min Knowl Disc"},{"key":"27_CR37","doi-asserted-by":"crossref","unstructured":"Kaur A, Datta A (2014) Subscale: fast and scalable subspace clustering for high dimensional data In: Data mining workshop (ICDMW), 2014 IEEE international conference on, 621\u2013628.. IEEE.","DOI":"10.1109\/ICDMW.2014.100"},{"key":"27_CR38","volume-title":"Principle component analysis","author":"IT Joliffe","year":"2002","unstructured":"Joliffe IT (2002) Principle component analysis. 2nd edn. Springer, New York."},{"issue":"1","key":"27_CR39","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/1497577.1497578","volume":"3","author":"HP Kriegel","year":"2009","unstructured":"Kriegel HP, Kr\u00f6ger P, Zimek A, Oger PKR (2009) Clustering high-dimensional data: a survey on subspace clustering, pattern-based clustering, and correlation clustering. ACM Trans Knowl Discov Data 3(1): 1\u201358.","journal-title":"ACM Trans Knowl Discov Data"},{"key":"27_CR40","first-page":"84","volume-title":"ACM SIGKDD international conference on knowledge discovery and data mining","author":"CH Cheng","year":"1999","unstructured":"Cheng CH, Fu AW, Zhang Y (1999) Entropy-based subspace clustering for mining numerical data In: ACM SIGKDD international conference on knowledge discovery and data mining, 84\u201393.. ACM, NY, USA."},{"key":"27_CR41","doi-asserted-by":"crossref","unstructured":"Assent I, Emmanuel M, Seidl T (2008) Inscy: Indexing subspace clusters with in-process-removal of redundancy In: Eighth IEEE international conference on data mining, 719\u2013724.. IEEE.","DOI":"10.1109\/ICDM.2008.46"},{"key":"27_CR42","doi-asserted-by":"crossref","unstructured":"Nagesh H, Goil S, Choudhary A (2001) Adaptive grids for clustering massive data sets. Proc 1st SIAM Int Conf Data Min: pp. 1\u201317.","DOI":"10.1137\/1.9781611972719.7"},{"issue":"2","key":"27_CR43","doi-asserted-by":"crossref","first-page":"335","DOI":"10.1215\/S0012-7094-41-00826-8","volume":"8","author":"P Erd\u00f6s","year":"1941","unstructured":"Erd\u00f6s P, Lehner J (1941) The distribution of the number of summands in the partitions of a positive integer. Duke Mathematical Journal 8(2): 335\u2013345.","journal-title":"Duke Mathematical Journal"},{"key":"27_CR44","first-page":"1270","volume-title":"International conference on very large data bases","author":"E M\u00fcller","year":"2009","unstructured":"M\u00fcller E, G\u00fcnnemann S, Assent I, Seidl T, Emmanuel M, Stephan G (2009) Evaluating clustering in subspace projections of high dimensional data In: International conference on very large data bases, 1270\u20131281.. VLDB Endowment, Lyon, France."},{"key":"27_CR45","doi-asserted-by":"crossref","unstructured":"M\u00fcller E, G\u00fcnnemann S, Assent I, Seidl T, F\u00e4rber I (2009) Evaluating Clustering in Subspace Projections of High Dimensional Data. http:\/\/dme.rwth-aachen.de\/en\/OpenSubspace\/evaluation . Accessed 08 Aug 2015.","DOI":"10.14778\/1687627.1687770"},{"key":"27_CR46","unstructured":"Bache K, Lichman M (2006) UCI machine learning repository. http:\/\/archive.ics.uci.edu\/ml . Accessed 08 Aug 2015."},{"issue":"11","key":"27_CR47","doi-asserted-by":"crossref","first-page":"1231","DOI":"10.1177\/0278364913491297","volume":"32","author":"A Geiger","year":"2013","unstructured":"Geiger A, Lenz P, Stiller C, Urtasun R (2013) Vision meets robotics: The KITTI dataset. Int J Rob Res 32(11): 1231\u20131237.","journal-title":"Int J Rob Res"},{"key":"27_CR48","unstructured":"Bileschi SM (2006) Streetscenes: Towards Scene Understanding in Still Images. PhD thesis, Massachusettes Inst Tech."},{"issue":"6","key":"27_CR49","doi-asserted-by":"crossref","first-page":"2939","DOI":"10.1016\/j.eswa.2013.10.025","volume":"41","author":"S Jahirabadkar","year":"2014","unstructured":"Jahirabadkar S, Kulkarni P (2014) Algorithm to determine \u03b5-distance parameter in density based clustering. Expert Syst Appl 41(6): 2939\u20132946.","journal-title":"Expert Syst Appl"},{"key":"27_CR50","doi-asserted-by":"crossref","unstructured":"Assent I, Krieger R, M\u00fcller E, Seidl T (2007) Dusc: Dimensionality unbiased subspace clustering In: Seventh IEEE international conference on data mining (ICDM 2007), 409\u2013414.. IEEE.","DOI":"10.1109\/ICDM.2007.49"},{"key":"27_CR51","first-page":"2825","volume":"12","author":"F Pedregosa","year":"2011","unstructured":"Pedregosa F, Weiss R, Brucher M (2011) Scikit-learn: machine learning in Python. J Mach Learn Res 12: 2825\u20132830.","journal-title":"J Mach Learn Res"},{"issue":"1","key":"27_CR52","doi-asserted-by":"crossref","first-page":"10","DOI":"10.1145\/1656274.1656278","volume":"11","author":"M Hall","year":"2009","unstructured":"Hall M, Frank E, Holmes G, Pfahringer B, Reutemann P, Witten IH (2009) The WEKA data mining software: an update. ACM SIGKDD explorations newsletter 11(1): 10\u201318.","journal-title":"ACM SIGKDD explorations newsletter"},{"key":"27_CR53","first-page":"331","volume-title":"ICCV workshop on large-scale video search and mining (LSVSM\u201913)","author":"Jianqing Zhu","year":"2013","unstructured":"Zhu Jianqing, Liao Shengcai, Lei Zhen, Yi Dong, Li StanZ (2013) Pedestrian attribute classification in surveillance: database and evaluation In: ICCV workshop on large-scale video search and mining (LSVSM\u201913), 331\u2013338.. IEEE, Sydney."},{"key":"27_CR54","unstructured":"GitHub repository for SUBSCALE algorithm. https:\/\/github.com\/amkaur\/subscale.git . Accessed 08 Aug 2015."},{"key":"27_CR55","unstructured":"GitHub repository for scalable SUBSCALE algorithm. https:\/\/github.com\/amkaur\/subscaleplus.git . Accessed 08 Aug 2015."}],"container-title":["Journal of Big Data"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s40537-015-0027-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1186\/s40537-015-0027-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s40537-015-0027-y","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,10]],"date-time":"2024-06-10T13:15:52Z","timestamp":1718025352000},"score":1,"resource":{"primary":{"URL":"http:\/\/www.journalofbigdata.com\/content\/2\/1\/17"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,8,12]]},"references-count":55,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2015,12]]}},"alternative-id":["27"],"URL":"https:\/\/doi.org\/10.1186\/s40537-015-0027-y","relation":{},"ISSN":["2196-1115"],"issn-type":[{"value":"2196-1115","type":"electronic"}],"subject":[],"published":{"date-parts":[[2015,8,12]]},"article-number":"17"}}