{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T04:55:06Z","timestamp":1773809706722,"version":"3.50.1"},"reference-count":138,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2021]]},"DOI":"10.1109\/access.2021.3084057","type":"journal-article","created":{"date-parts":[[2021,5,26]],"date-time":"2021-05-26T22:06:05Z","timestamp":1622066765000},"page":"80015-80027","source":"Crossref","is-referenced-by-count":59,"title":["Scalable Clustering Algorithms for Big Data: A Review"],"prefix":"10.1109","volume":"9","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7810-7006","authenticated-orcid":false,"given":"Mahmoud A.","family":"Mahdi","sequence":"first","affiliation":[{"name":"Faculty of Computers and Information, Zagazig University, Zagazig, Egypt"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8065-8977","authenticated-orcid":false,"given":"Khalid M.","family":"Hosny","sequence":"additional","affiliation":[{"name":"Faculty of Computers and Information, Zagazig University, Zagazig, Egypt"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7630-1983","authenticated-orcid":false,"given":"Ibrahim","family":"Elhenawy","sequence":"additional","affiliation":[{"name":"Faculty of Computers and Information, Zagazig University, Zagazig, Egypt"}]}],"member":"263","reference":[{"key":"ref39","first-page":"186","article-title":"STING: A statistical information grid approach to spatial data mining","volume":"97","author":"wang","year":"1997","journal-title":"Proc VLDB"},{"key":"ref38","author":"jain","year":"1988","journal-title":"Algorithms for clustering data"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICTAI.2014.48"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-24741-8_7"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/775107.775149"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/312129.312201"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-77018-3_37"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2010.5596684"},{"key":"ref35","first-page":"125","article-title":"Support vector clustering","volume":"2","author":"ben-hur","year":"2002","journal-title":"J Mach Learn Res"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.2007.909556"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/584887.584888"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2002.1033770"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/BF02948829"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2014.04.008"},{"key":"ref22","first-page":"281","article-title":"Some methods for classification and analysis of multivariate observations","volume":"1","author":"macqueen","year":"1967","journal-title":"Proc 5th Berkeley Symp Math Statist Probab"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2014.03.013"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2019.04.014"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2008.01.039"},{"key":"ref101","doi-asserted-by":"publisher","DOI":"10.1016\/j.jnca.2014.11.007"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1002\/9780470316801.ch2"},{"key":"ref100","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-33362-0_24"},{"key":"ref25","first-page":"405","article-title":"Clustering by means of medoids","author":"kaufman","year":"1987","journal-title":"Proc Stat Data Anal Based L1 Norm Conf"},{"key":"ref50","first-page":"298","article-title":"Random projections for k-means clustering","author":"boutsidis","year":"2010","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2013.62"},{"key":"ref59","first-page":"1","article-title":"Big text data clustering using class labels and semantic feature based on Hadoop of cloud computing","volume":"8","author":"kim","year":"2014","journal-title":"Int J Softw Eng Appl"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1002\/dac.2844"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1145\/2020408.2020516"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-10665-1_71"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2008.142"},{"key":"ref54","first-page":"563","article-title":"An integrated approach for CURE clustering using map-reduce technique","author":"maitrey","year":"2013","journal-title":"Proc Elsevier"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2018.2848470"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2015.2490080"},{"key":"ref40","first-page":"428","article-title":"Wavecluster: A multi-resolution clustering approach for very large spatial databases","volume":"98","author":"sheikholeslami","year":"1998","journal-title":"Proc 24th Int Conf Very Large DataBases"},{"key":"ref4","author":"kaisler","year":"2014","journal-title":"Big data and analytics challenges and issues"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1142\/6103"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-05693-7_1"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2013.109"},{"key":"ref8","article-title":"Data clustering","author":"aggarwal","year":"2014","journal-title":"Algorithms and Application"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1145\/1401890.1401973"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/s10115-014-0808-1"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/350391.350432"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1145\/235968.233324"},{"key":"ref45","article-title":"Data stream clustering: A review","author":"zubaro?lu","year":"2020","journal-title":"arXiv 2007 10781"},{"key":"ref48","first-page":"186","article-title":"Random projection for high dimensional data clustering: A cluster ensemble approach","author":"fern","year":"2003","journal-title":"Proc 20th Int Conf Mach Learn (ICML)"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1145\/276305.276312"},{"key":"ref42","first-page":"506","article-title":"Optimal grid-clustering: Towards breaking the curse of dimensionality in high-dimensional clustering","author":"hinneburg","year":"1999","journal-title":"Proc Intl Conf on Very Large Databases"},{"key":"ref41","first-page":"58","article-title":"An efficient approach to clustering in large multimedia databases with noise","author":"hinneburg","year":"1998","journal-title":"Proc 1st Intl Conf on Knowledge Discovery and Data Mining (KDD)"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-020-09918-2"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1201\/9781315373515-6"},{"key":"ref127","first-page":"21","article-title":"Clustering large data sets with mixed numeric and categorical values","author":"huang","year":"1997","journal-title":"Proc Pacific-Asia Conf on Knowledge Discovery and Data Mining (PAKDD)"},{"key":"ref126","doi-asserted-by":"publisher","DOI":"10.1016\/j.asoc.2015.01.031"},{"key":"ref125","author":"han","year":"2011","journal-title":"Data Mining Concepts and Techniques"},{"key":"ref124","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-00887-0_18"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1007\/BF01908075"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-018-9627-1"},{"key":"ref129","doi-asserted-by":"publisher","DOI":"10.1137\/S0036144598334138"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1002\/sam.11380"},{"key":"ref128","first-page":"34","article-title":"A fast clustering algorithm to cluster very large categorical data sets in data mining","volume":"3","author":"huang","year":"1997","journal-title":"DMKD"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1186\/s41044-016-0011-3"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2005.33"},{"key":"ref130","doi-asserted-by":"publisher","DOI":"10.1109\/SFCS.2000.892124"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2007.190725"},{"key":"ref74","first-page":"16","article-title":"Click: A clustering algorithm with applications to gene expression analysis","volume":"8","author":"sharan","year":"2000","journal-title":"Proc Int Conf Intell Syst Mol Biol"},{"key":"ref75","first-page":"849","article-title":"On spectral clustering: Analysis and an algorithm","author":"ng","year":"2002","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref133","doi-asserted-by":"publisher","DOI":"10.1007\/s00357-003-0015-3"},{"key":"ref134","doi-asserted-by":"publisher","DOI":"10.1016\/B978-012088469-8.50075-9"},{"key":"ref131","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2002.994785"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1109\/ICSMC.2009.5345924"},{"key":"ref132","doi-asserted-by":"publisher","DOI":"10.1016\/B978-012722442-8\/50016-1"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1007\/s10115-010-0342-8"},{"key":"ref136","doi-asserted-by":"publisher","DOI":"10.1007\/s10115-007-0070-x"},{"key":"ref135","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2006.05.006"},{"key":"ref138","doi-asserted-by":"publisher","DOI":"10.1145\/2133803.2184450"},{"key":"ref137","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2009.11.001"},{"key":"ref60","article-title":"A parallelizing interface for K-means type clustering algorithms and neural network batch training","author":"arjunan","year":"2003","journal-title":"Malaysian-Japan Seminar on Artificial Intelligence Applications in Industry"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTER.2014.6968782"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/CCGrid.2015.19"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.2005.845141"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-28349-8_2"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1016\/j.comcom.2007.05.024"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1093\/bib\/bbn058"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/TETC.2014.2330519"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1007\/s11390-014-1416-y"},{"key":"ref2","volume":"1","author":"friedman","year":"2001","journal-title":"The Elements of Statistical Learning"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1007\/s40745-015-0040-1"},{"key":"ref1","volume":"344","author":"kaufman","year":"2009","journal-title":"Finding Groups in Data An Introduction to Cluster Analysis"},{"key":"ref109","doi-asserted-by":"publisher","DOI":"10.1007\/11430919_49"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.1109\/CCCM.2009.5267735"},{"key":"ref108","doi-asserted-by":"publisher","DOI":"10.1145\/342009.335383"},{"key":"ref94","doi-asserted-by":"publisher","DOI":"10.1109\/ICTAI.2009.60"},{"key":"ref107","doi-asserted-by":"publisher","DOI":"10.1145\/347090.347145"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-04747-3_23"},{"key":"ref106","first-page":"452","article-title":"Mafia: Efficient and scalable subspace clustering for very large data sets","volume":"443","author":"goil","year":"1999","journal-title":"Proc 5th ACM SIGKDD Int Conf Knowl Discovery Data Mining"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btm064"},{"key":"ref105","doi-asserted-by":"publisher","DOI":"10.1145\/312129.312199"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-71701-0_5"},{"key":"ref104","doi-asserted-by":"publisher","DOI":"10.1145\/276304.276314"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.1145\/1281192.1281210"},{"key":"ref103","author":"milenova","year":"1997","journal-title":"Clustering large databases with numeric and nominal values using orthogonal projections"},{"key":"ref102","doi-asserted-by":"publisher","DOI":"10.1007\/BFb0052867"},{"key":"ref111","doi-asserted-by":"publisher","DOI":"10.1145\/1552303.1552307"},{"key":"ref112","first-page":"83","article-title":"Clustering over data streams based on grid density and index tree","volume":"6","author":"ren","year":"2011","journal-title":"J Converg Inf Technol"},{"key":"ref110","doi-asserted-by":"publisher","DOI":"10.1109\/WGEC.2008.32"},{"key":"ref98","first-page":"206","article-title":"Dengris-stream: A density-grid based clustering algorithm for evolving data streams over sliding window","author":"amini","year":"2012","journal-title":"Proc Int Conf Data Mining Comput Eng"},{"key":"ref99","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611972825.85"},{"key":"ref96","doi-asserted-by":"publisher","DOI":"10.1109\/ICIECS.2009.5363379"},{"key":"ref97","doi-asserted-by":"publisher","DOI":"10.1109\/FSKD.2009.553"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-07821-2_10"},{"key":"ref11","first-page":"131","article-title":"Issues, challenges, and solutions: Big data mining","author":"jaseena","year":"2014","journal-title":"Proc NeTCoM CSIT GRAPH-HOC SPTM"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-09156-3_49"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/WCCCT.2014.16"},{"key":"ref14","first-page":"43","article-title":"Parallel clustering algorithms: Survey","volume":"34","author":"kim","year":"2009","journal-title":"Parallel Algorithms Spring"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1002\/9780470382776"},{"key":"ref118","doi-asserted-by":"publisher","DOI":"10.1007\/978-94-009-0107-0_13"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/2.781637"},{"key":"ref82","first-page":"324","article-title":"A distribution-based clustering algorithm for mining in large spatial databases","author":"xu","year":"1998","journal-title":"Proc 14th Int Conf Data Eng"},{"key":"ref117","doi-asserted-by":"publisher","DOI":"10.1109\/21.299710"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1016\/S0306-4379(00)00022-3"},{"key":"ref81","first-page":"226","article-title":"A density-based algorithm for discovering clusters in large spatial databases with noise","volume":"96","author":"ester","year":"1996","journal-title":"Proc KDD"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-24741-8_9"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1145\/304181.304187"},{"key":"ref119","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.96.6.2907"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.5339\/qfarf.2012.AESNP4"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1023\/A:1009745219419"},{"key":"ref114","doi-asserted-by":"publisher","DOI":"10.1016\/0098-3004(84)90020-7"},{"key":"ref113","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1111\/j.2517-6161.1977.tb01600.x","article-title":"Maximum likelihood from incomplete data via the EM algorithm","volume":"39","author":"dempster","year":"1977","journal-title":"J Roy Statist Soc B Statist Methodol"},{"key":"ref116","doi-asserted-by":"publisher","DOI":"10.1109\/72.159055"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1007\/s00357-014-9161-z"},{"key":"ref115","doi-asserted-by":"publisher","DOI":"10.1016\/0004-3702(89)90046-5"},{"key":"ref120","first-page":"554","article-title":"The infinite Gaussian mixture model","author":"rasmussen","year":"2000","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611972764.29"},{"key":"ref121","doi-asserted-by":"publisher","DOI":"10.1145\/584887.584889"},{"key":"ref122","first-page":"1537","article-title":"Maximum margin clustering","author":"xu","year":"2005","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref123","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611972795.55"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1007\/s007780050005"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1109\/34.1000236"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2004.10087"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611972740.23"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6287639\/9312710\/09440980.pdf?arnumber=9440980","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,31]],"date-time":"2024-08-31T16:29:21Z","timestamp":1725121761000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9440980\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"references-count":138,"URL":"https:\/\/doi.org\/10.1109\/access.2021.3084057","relation":{},"ISSN":["2169-3536"],"issn-type":[{"value":"2169-3536","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021]]}}}