{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T11:03:53Z","timestamp":1772795033772,"version":"3.50.1"},"reference-count":59,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2014,12,27]],"date-time":"2014-12-27T00:00:00Z","timestamp":1419638400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Knowl Inf Syst"],"published-print":{"date-parts":[[2015,10]]},"DOI":"10.1007\/s10115-014-0801-8","type":"journal-article","created":{"date-parts":[[2014,12,26]],"date-time":"2014-12-26T03:28:17Z","timestamp":1419564497000},"page":"1-34","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":34,"title":["Greedy column subset selection for large-scale data sets"],"prefix":"10.1007","volume":"45","author":[{"given":"Ahmed K.","family":"Farahat","sequence":"first","affiliation":[]},{"given":"Ahmed","family":"Elgohary","sequence":"additional","affiliation":[]},{"given":"Ali","family":"Ghodsi","sequence":"additional","affiliation":[]},{"given":"Mohamed S.","family":"Kamel","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2014,12,27]]},"reference":[{"issue":"4","key":"801_CR1","doi-asserted-by":"crossref","first-page":"671","DOI":"10.1016\/S0022-0000(03)00025-4","volume":"66","author":"D Achlioptas","year":"2003","unstructured":"Achlioptas D (2003) Database-friendly random projections: Johnson-Lindenstrauss with binary coins. J Comput Syst Sci 66(4):671\u2013687","journal-title":"J Comput Syst Sci"},{"issue":"2","key":"801_CR2","doi-asserted-by":"crossref","first-page":"226","DOI":"10.1145\/290200.287637","volume":"24","author":"C Bischof","year":"1998","unstructured":"Bischof C, Quintana-Ort\u00ed G (1998) Computing rank-revealing QR factorizations of dense matrices. ACM Trans Math Softw 24(2):226\u2013253","journal-title":"ACM Trans Math Softw"},{"key":"801_CR3","doi-asserted-by":"crossref","unstructured":"Boutsidis C, Drineas P, Magdon-Ismail M (2011) Near optimal column-based matrix reconstruction. In: Proceedings of the 52nd annual IEEE symposium on foundations of computer science (FOCS\u201911), pp 305\u2013314","DOI":"10.1109\/FOCS.2011.21"},{"key":"801_CR4","doi-asserted-by":"crossref","unstructured":"Boutsidis C, Mahoney MW, Drineas P (2008a) An improved approximation algorithm for the column subset selection problem, CoRR abs\/0812.4293","DOI":"10.1137\/1.9781611973068.105"},{"key":"801_CR5","doi-asserted-by":"crossref","unstructured":"Boutsidis C, Mahoney MW Drineas P (2008b) Unsupervised feature selection for principal components analysis. In Li Y, Liu B, Sarawagi S (eds) Proceedings of the 14th ACM SIGKDD international conference on knowledge discovery and data mining (KDD\u201908). ACM, New York, pp 61\u201369","DOI":"10.1145\/1401890.1401903"},{"key":"801_CR6","doi-asserted-by":"crossref","unstructured":"Boutsidis C, Mahoney MW, Drineas P (2009) An improved approximation algorithm for the column subset selection problem. In: Proceedings of the 20th annual ACM-SIAM symposium on discrete algorithms (SODA\u201909), pp 968\u2013977","DOI":"10.1137\/1.9781611973068.105"},{"key":"801_CR7","doi-asserted-by":"crossref","unstructured":"Boutsidis C, Sun J, Anerousis N (2008) Clustered subset selection and its applications on it service metrics. In: Proceedings of the 17th ACM conference on information and knowledge management (CIKM\u201908), pp 599\u2013608","DOI":"10.1145\/1458082.1458162"},{"key":"801_CR8","doi-asserted-by":"crossref","unstructured":"Cai D, Zhang C, He X (2010) Unsupervised feature selection for multi-cluster data. In: Proceedings of the 16th ACM SIGKDD international conference on knowledge discovery and data mining (KDD\u201910). ACM, New York, NY, pp 333\u2013342","DOI":"10.1145\/1835804.1835848"},{"key":"801_CR9","doi-asserted-by":"crossref","unstructured":"\u00c7ivril A, Magdon-Ismail M (2008) Deterministic sparse column based matrix reconstruction via greedy approximation of SVD. In: Proceedings of the 19th international symposium on algorithms and computation (ISAAC\u201908). Springer, New York, pp 414\u2013423","DOI":"10.1007\/978-3-540-92182-0_38"},{"key":"801_CR10","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1016\/j.tcs.2011.11.019","volume":"421","author":"A \u00c7ivril","year":"2012","unstructured":"\u00c7ivril A, Magdon-Ismail M (2012) Column subset selection via sparse approximation of SVD. Theoret Comput Sci 421:1\u201314","journal-title":"Theoret Comput Sci"},{"key":"801_CR11","first-page":"67","volume":"88","author":"T Chan","year":"1987","unstructured":"Chan T (1987) Rank revealing QR factorizations. Linear Algebra Appl 88:67\u201382","journal-title":"Linear Algebra Appl"},{"issue":"3","key":"801_CR12","doi-asserted-by":"crossref","first-page":"568","DOI":"10.1109\/TPAMI.2010.88","volume":"33","author":"W-Y Chen","year":"2011","unstructured":"Chen W-Y, Song Y, Bai H, Lin C-J, Chang E (2011) Parallel spectral clustering in distributed systems. IEEE Trans Pattern Anal Mach Intell 33(3):568\u2013586","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"801_CR13","unstructured":"Cui Y, Dy J (2008) Orthogonal principal feature selection, the sparse optimization and variable selection workshop at the international conference on machine learning (ICML)"},{"issue":"1","key":"801_CR14","doi-asserted-by":"crossref","first-page":"60","DOI":"10.1002\/rsa.10073","volume":"22","author":"S Dasgupta","year":"2003","unstructured":"Dasgupta S, Gupta A (2003) An elementary proof of a theorem of Johnson and Lindenstrauss. Random Struct Algorithms 22(1):60\u201365","journal-title":"Random Struct Algorithms"},{"issue":"1","key":"801_CR15","doi-asserted-by":"crossref","first-page":"107","DOI":"10.1145\/1327452.1327492","volume":"51","author":"J Dean","year":"2008","unstructured":"Dean J, Ghemawat S (2008) MapReduce: simplified data processing on large clusters. Commun ACM 51(1):107\u2013113","journal-title":"Commun ACM"},{"issue":"6","key":"801_CR16","doi-asserted-by":"crossref","first-page":"391","DOI":"10.1002\/(SICI)1097-4571(199009)41:6<391::AID-ASI1>3.0.CO;2-9","volume":"41","author":"S Deerwester","year":"1990","unstructured":"Deerwester S, Dumais S, Furnas G, Landauer T, Harshman R (1990) Indexing by latent semantic analysis. J Am Soc Inform Sci Technol 41(6):391\u2013407","journal-title":"J Am Soc Inform Sci Technol"},{"key":"801_CR17","doi-asserted-by":"crossref","unstructured":"Deshpande A, Rademacher L (2010) Efficient volume sampling for row\/column subset selection. In: Proceedings of the 51st annual IEEE symposium on foundations of computer science (FOCS\u201910), pp 329\u2013338","DOI":"10.1109\/FOCS.2010.38"},{"issue":"1","key":"801_CR18","doi-asserted-by":"crossref","first-page":"225","DOI":"10.4086\/toc.2006.v002a012","volume":"2","author":"A Deshpande","year":"2006","unstructured":"Deshpande A, Rademacher L, Vempala S, Wang G (2006a) Matrix approximation and projective clustering via volume sampling. Theory Comput 2(1):225\u2013247","journal-title":"Theory Comput"},{"key":"801_CR19","doi-asserted-by":"crossref","unstructured":"Deshpande A, Rademacher L, Vempala S, Wang G (2006b) Matrix approximation and projective clustering via volume sampling. In: Proceedings of the 17th annual ACM-SIAM symposium on discrete algorithms (SODA\u201906). ACM, New York, NY, pp 1117\u20131126","DOI":"10.1145\/1109557.1109681"},{"issue":"1\u20133","key":"801_CR20","doi-asserted-by":"crossref","first-page":"9","DOI":"10.1023\/B:MACH.0000033113.59016.96","volume":"56","author":"P Drineas","year":"2004","unstructured":"Drineas P, Frieze A, Kannan R, Vempala S, Vinay V (2004) Clustering large graphs via the singular value decomposition. Mach Learn 56(1\u20133):9\u201333","journal-title":"Mach Learn"},{"issue":"1","key":"801_CR21","doi-asserted-by":"crossref","first-page":"158","DOI":"10.1137\/S0097539704442696","volume":"36","author":"P Drineas","year":"2007","unstructured":"Drineas P, Kannan R, Mahoney M (2007) Fast Monte Carlo algorithms for matrices II: computing a low-rank approximation to a matrix. SIAM J Comput 36(1):158\u2013183","journal-title":"SIAM J Comput"},{"key":"801_CR22","doi-asserted-by":"crossref","unstructured":"Drineas P, Mahoney M, Muthukrishnan S (2006) Subspace sampling and relative-error matrix approximation: column-based methods. Approximation, randomization, and combinatorial optimization. Algorithms and techniques. Springer, Berlin, pp 316\u2013326","DOI":"10.1007\/11830924_30"},{"key":"801_CR23","unstructured":"Elgohary A, Farahat AK, Kamel MS, Karray F (2013) Embed and conquer: scalable embeddings for kernel k-means on mapreduce, CoRR abs\/1311.2334"},{"key":"801_CR24","doi-asserted-by":"crossref","unstructured":"Elsayed T, Lin J, Oard DW (2008) Pairwise document similarity in large collections with MapReduce. In: Proceedings of the 46th annual meeting of the association for computational linguistics on human language technologies: short Papers (HLT\u201908), pp 265\u2013268","DOI":"10.3115\/1557690.1557767"},{"key":"801_CR25","doi-asserted-by":"crossref","unstructured":"Ene A, Im S, Moseley B (2011) Fast clustering using MapReduce. In: Proceedings of the 17th ACM SIGKDD international conference on knowledge discovery and data mining (KDD\u201911), pp 681\u2013689","DOI":"10.1145\/2020408.2020515"},{"key":"801_CR26","doi-asserted-by":"crossref","unstructured":"Farahat A, Elgohary A, Ghodsi A, Kamel M (2013) Distributed column subset selection on MapReduce. In: Proceedings of the 13th IEEE international conference on data mining (ICDM\u201913), pp 171\u2013180","DOI":"10.1109\/ICDM.2013.155"},{"key":"801_CR27","doi-asserted-by":"crossref","unstructured":"Farahat AK, Ghodsi A, Kamel MS (2011) An efficient greedy method for unsupervised feature selection. In: Proceedings of the 11th IEEE international conference on data mining (ICDM\u201911), pp 161\u2013170","DOI":"10.1109\/ICDM.2011.22"},{"issue":"2","key":"801_CR28","doi-asserted-by":"crossref","first-page":"285","DOI":"10.1007\/s10115-012-0538-1","volume":"35","author":"AK Farahat","year":"2013","unstructured":"Farahat AK, Ghodsi A, Kamel MS (2013) Efficient greedy feature selection for unsupervised learning. Knowl Inf Syst 35(2):285\u2013310","journal-title":"Knowl Inf Syst"},{"key":"801_CR29","doi-asserted-by":"crossref","unstructured":"Frieze A, Kannan R, Vempala S (1998) Fast Monte-Carlo algorithms for finding low-rank approximations. In: Proceedings of the 39th annual IEEE symposium on foundations of computer science (FOCS\u201998), pp 370\u2013378","DOI":"10.1109\/SFCS.1998.743487"},{"key":"801_CR30","volume-title":"Matrix computations","author":"G Golub","year":"1996","unstructured":"Golub G, Van Loan C (1996) Matrix computations, 3rd edn. Johns Hopkins University Press, Baltimore","edition":"3"},{"issue":"4","key":"801_CR31","doi-asserted-by":"crossref","first-page":"848","DOI":"10.1137\/0917055","volume":"17","author":"M Gu","year":"1996","unstructured":"Gu M, Eisenstat SC (1996) Efficient algorithms for computing a strong rank-revealing QR factorization. SIAM J Sci Comput 17(4):848\u2013869","journal-title":"SIAM J Sci Comput"},{"key":"801_CR32","doi-asserted-by":"crossref","unstructured":"Guruswami V, Sinop AK (2012) Optimal column-based low-rank matrix reconstruction. In: Proceedings of the 21st annual ACM-SIAM symposium on discrete algorithms (SODA\u201912), pp 1207\u20131214","DOI":"10.1137\/1.9781611973099.95"},{"issue":"5","key":"801_CR33","doi-asserted-by":"crossref","first-page":"2580","DOI":"10.1137\/100804139","volume":"33","author":"N Halko","year":"2011","unstructured":"Halko N, Martinsson P-G, Shkolnisky Y, Tygert M (2011) An algorithm for the principal component analysis of large data sets. SIAM J Sci Comput 33(5):2580\u20132594","journal-title":"SIAM J Sci Comput"},{"key":"801_CR34","volume-title":"Laplacian score for feature selection, advances in neural information processing systems 18 (NIPS\u201905)","author":"X He","year":"2005","unstructured":"He X, Cai D, Niyogi P (2005) Laplacian score for feature selection, advances in neural information processing systems 18 (NIPS\u201905). MIT Press, Cambridge, MA"},{"issue":"3","key":"801_CR35","doi-asserted-by":"crossref","first-page":"328","DOI":"10.1109\/TPAMI.2005.55","volume":"27","author":"X He","year":"2005","unstructured":"He X, Yan S, Hu Y, Niyogi P, Zhang H (2005) Face recognition using Laplacianfaces. IEEE Trans Pattern Anal Mach Intell 27(3):328\u2013340","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"801_CR36","volume-title":"Engineering statistics","author":"RV Hogg","year":"1987","unstructured":"Hogg RV, Ledolter J (1987) Engineering statistics, vol 358. MacMillan, New York"},{"key":"801_CR37","volume-title":"Algorithms for clustering data","author":"AK Jain","year":"1988","unstructured":"Jain AK, Dubes RC (1988) Algorithms for clustering data. Prentice-Hall Inc, Upper Saddle River, NJ"},{"key":"801_CR38","volume-title":"Principal component analysis","author":"I Jolliffe","year":"2002","unstructured":"Jolliffe I (2002) Principal component analysis, 2nd edn. Springer, New York","edition":"2"},{"key":"801_CR39","unstructured":"Kang U, Tsourakakis C, Appel A, Faloutsos C, Leskovec J (2008) Hadi: fast diameter estimation and mining in massive graphs with hadoop, CMU-ML-08-117"},{"key":"801_CR40","doi-asserted-by":"crossref","unstructured":"Karloff H, Suri S, Vassilvitskii S (2010) A model of computation for MapReduce. In: Proceedings of the 21st annual ACM-SIAM symposium on discrete algorithms (SODA\u201910), pp 938\u2013948","DOI":"10.1137\/1.9781611973075.76"},{"key":"801_CR41","doi-asserted-by":"crossref","unstructured":"Karypis G (2003) CLUTO\u2014a clustering toolkit, rechnical report #02-017. University of Minnesota, Department of Computer Science","DOI":"10.21236\/ADA439508"},{"key":"801_CR42","unstructured":"Kaufman L, Rousseeuw P (1987) Clustering by means of medoids. Department of Mathematics and Informatics,Technical report, Technische Hogeschool, Delft (Netherlands)"},{"issue":"5","key":"801_CR43","doi-asserted-by":"crossref","first-page":"684","DOI":"10.1109\/TPAMI.2005.92","volume":"27","author":"K Lee","year":"2005","unstructured":"Lee K, Ho J, Kriegman D (2005) Acquiring linear subspaces for face recognition under variable lighting. IEEE Trans Pattern Anal Mach Intell 27(5):684\u2013698","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"801_CR44","unstructured":"Lewis D (1999) Reuters-21578 text categorization test collection distribution 1.0"},{"key":"801_CR45","first-page":"361","volume":"5","author":"DD Lewis","year":"2004","unstructured":"Lewis DD, Yang Y, Rose TG, Li F (2004) Rcv1: a new benchmark collection for text categorization research. J Mach Learn Res 5:361\u2013397","journal-title":"J Mach Learn Res"},{"key":"801_CR46","doi-asserted-by":"crossref","unstructured":"Li P, Hastie TJ, Church KW (2006) Very sparse random projections. In: Proceedings of the 25th ACM SIGKDD international conference on knowledge discovery and data mining (KDD\u201906), pp 287\u2013296","DOI":"10.1145\/1150402.1150436"},{"key":"801_CR47","doi-asserted-by":"crossref","unstructured":"Lu Y, Cohen I, Zhou X, Tian Q (2007) Feature selection using principal feature analysis. In: Proceedings of the 15th international conference on multimedia. ACM, New York, NY, pp 301\u2013304","DOI":"10.1145\/1291233.1291297"},{"key":"801_CR48","volume-title":"Handbook of matrices","author":"H L\u00fctkepohl","year":"1996","unstructured":"L\u00fctkepohl H (1996) Handbook of matrices. Wiley, New Jersey"},{"key":"801_CR49","doi-asserted-by":"crossref","unstructured":"Masaeli M, Yan Y, Cui Y, Fung, G, Dy J (2010) Convex principal feature selection. In: Proceedings of SIAM international conference on data mining (SDM), pp 619\u2013628","DOI":"10.1137\/1.9781611972801.54"},{"key":"801_CR50","unstructured":"Meng X, Mahoney M (2013) Robust regression on mapreduce. In: Proceedings of the 30th international conference on machine learning (ICML-13), pp 888\u2013896"},{"issue":"3","key":"801_CR51","doi-asserted-by":"crossref","first-page":"301","DOI":"10.1109\/34.990133","volume":"24","author":"P Mitra","year":"2002","unstructured":"Mitra P, Murthy C, Pal S (2002) Unsupervised feature selection using feature similarity. IEEE Trans Pattern Anal Mach Intell 24(3):301\u2013312","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"1","key":"801_CR52","doi-asserted-by":"crossref","first-page":"199","DOI":"10.1016\/S0024-3795(00)00120-8","volume":"316","author":"C Pan","year":"2000","unstructured":"Pan C (2000) On the existence and computation of rank-revealing LU factorizations. Linear Algebra Appl 316(1):199\u2013222","journal-title":"Linear Algebra Appl"},{"issue":"12","key":"801_CR53","doi-asserted-by":"crossref","first-page":"1615","DOI":"10.1109\/TPAMI.2003.1251154","volume":"25","author":"T Sim","year":"2003","unstructured":"Sim T, Baker S, Bsat M (2003) The CMU pose, illumination, and expression database. IEEE Trans Pattern Anal Mach Intell 25(12):1615\u20131618","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"801_CR54","doi-asserted-by":"crossref","unstructured":"Singh S, Kubica J, Larsen S, Sorokina D (2009) Parallel large scale feature selection for logistic regression. Proceedings of the SIAM international conference on data mining, pp 1171\u20131182","DOI":"10.1137\/1.9781611972795.100"},{"issue":"11","key":"801_CR55","doi-asserted-by":"crossref","first-page":"1958","DOI":"10.1109\/TPAMI.2008.128","volume":"30","author":"A Torralba","year":"2008","unstructured":"Torralba A, Fergus R, Freeman W (2008) 80 Million tiny images: a large data set for nonparametric object and scene recognition. IEEE Trans Pattern Anal Mach Intell 30(11):1958\u20131970","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"801_CR56","volume-title":"Hadoop: the definitive guide","author":"T White","year":"2009","unstructured":"White T (2009) Hadoop: the definitive guide, 1st edn. O\u2019Reilly Media Inc, Sebastopol","edition":"1"},{"key":"801_CR57","first-page":"1855","volume":"6","author":"L Wolf","year":"2005","unstructured":"Wolf L, Shashua A (2005) Feature selection for unsupervised and supervised inference: the emergence of sparsity in a weight-based approach. J Mach Learn Res 6:1855\u20131887","journal-title":"J Mach Learn Res"},{"key":"801_CR58","doi-asserted-by":"crossref","unstructured":"Xiang J, Guo C, Aboulnaga A (2013) Scalable maximum clique computation using mapreduce. IEEE 29th international conference on data engineering (ICDE), 2013 , pp 74\u201385","DOI":"10.1109\/ICDE.2013.6544815"},{"key":"801_CR59","doi-asserted-by":"crossref","unstructured":"Zhao Z, Liu H (2007) Spectral feature selection for supervised and unsupervised learning. In: Proceedings of the 24th international conference on machine learning (ICML\u201907). ACM, New York, NY, pp 1151\u20131157","DOI":"10.1145\/1273496.1273641"}],"container-title":["Knowledge and Information Systems"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10115-014-0801-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10115-014-0801-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10115-014-0801-8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,8,19]],"date-time":"2019-08-19T00:52:15Z","timestamp":1566175935000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10115-014-0801-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014,12,27]]},"references-count":59,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2015,10]]}},"alternative-id":["801"],"URL":"https:\/\/doi.org\/10.1007\/s10115-014-0801-8","relation":{},"ISSN":["0219-1377","0219-3116"],"issn-type":[{"value":"0219-1377","type":"print"},{"value":"0219-3116","type":"electronic"}],"subject":[],"published":{"date-parts":[[2014,12,27]]}}}