{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,24]],"date-time":"2026-03-24T16:09:36Z","timestamp":1774368576453,"version":"3.50.1"},"reference-count":54,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2021,7,5]],"date-time":"2021-07-05T00:00:00Z","timestamp":1625443200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2021,7,5]],"date-time":"2021-07-05T00:00:00Z","timestamp":1625443200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"published-print":{"date-parts":[[2022,2]]},"DOI":"10.1007\/s11227-021-03958-3","type":"journal-article","created":{"date-parts":[[2021,7,5]],"date-time":"2021-07-05T10:04:25Z","timestamp":1625479465000},"page":"2616-2650","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":12,"title":["DHkmeans-\u2113diversity: distributed hierarchical K-means for satisfaction of the \u2113-diversity privacy model using Apache Spark"],"prefix":"10.1007","volume":"78","author":[{"given":"Farough","family":"Ashkouti","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0792-0523","authenticated-orcid":false,"given":"Keyhan","family":"Khamforoosh","sequence":"additional","affiliation":[]},{"given":"Amir","family":"Sheikhahmadi","sequence":"additional","affiliation":[]},{"given":"Hana","family":"Khamfroush","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,7,5]]},"reference":[{"issue":"6","key":"3958_CR1","first-page":"32","volume":"2","author":"AP Singh","year":"2013","unstructured":"Singh AP, Parihar MD (2013) A review of privacy preserving data publishing technique. Int J Emerg Res Manag Technol 2(6):32\u201338","journal-title":"Int J Emerg Res Manag Technol"},{"key":"3958_CR2","unstructured":"Sweeney L (2000) Simple demographics often identify people uniquely, Carnegie Mellon Univ. Data Priv. Work. Pap. 3. Pittsburgh 671: 1\u201334"},{"key":"3958_CR3","doi-asserted-by":"publisher","first-page":"51071","DOI":"10.1109\/ACCESS.2020.2980235","volume":"8","author":"A Zigomitros","year":"2020","unstructured":"Zigomitros A, Casino F, Solanas A, Patsakis C (2020) A Survey on privacy properties for data publishing of relational data. IEEE Access 8:51071\u201351099","journal-title":"IEEE Access"},{"issue":"1","key":"3958_CR4","doi-asserted-by":"publisher","first-page":"1376","DOI":"10.1038\/srep01376","volume":"3","author":"Y-A de Montjoye","year":"2013","unstructured":"de Montjoye Y-A, Hidalgo CA, Verleysen M, Blondel VD (2013) Unique in the Crowd: the privacy bounds of human mobility. Sci Rep 3(1):1376","journal-title":"Sci Rep"},{"issue":"1","key":"3958_CR5","doi-asserted-by":"publisher","first-page":"25","DOI":"10.1186\/s40537-016-0059-y","volume":"3","author":"P Jain","year":"2016","unstructured":"Jain P, Gyanchandani M, Khare N (2016) Big data privacy: a technological perspective and review. J Big Data 3(1):25","journal-title":"J Big Data"},{"key":"3958_CR6","doi-asserted-by":"publisher","first-page":"2751","DOI":"10.1109\/ACCESS.2016.2577036","volume":"4","author":"S Yu","year":"2016","unstructured":"Yu S (2016) Big privacy: challenges and opportunities of privacy study in the age of big data. IEEE Access 4:2751\u20132763","journal-title":"IEEE Access"},{"key":"3958_CR7","doi-asserted-by":"publisher","first-page":"1821","DOI":"10.1109\/ACCESS.2016.2558446","volume":"4","author":"A Mehmood","year":"2016","unstructured":"Mehmood A, Natgunanathan I, Xiang Y, Hua G, Guo S (2016) Protection of big data privacy. IEEE Access 4:1821\u20131834","journal-title":"IEEE Access"},{"issue":"2","key":"3958_CR8","first-page":"161","volume":"6","author":"C Clifton","year":"2013","unstructured":"Clifton C, Tassa T (2013) On syntactic anonymity and differential privacy. Trans Data Priv 6(2):161\u2013183","journal-title":"Trans Data Priv"},{"key":"3958_CR9","first-page":"1151","volume":"2","author":"L Xu","year":"2014","unstructured":"Xu L, Jiang C, Wang J, Yuan J, Ren Y (2014) Information security in big data: privacy and data mining. IEEE Access 2:1151\u20131178","journal-title":"IEEE Access"},{"issue":"05","key":"3958_CR10","doi-asserted-by":"publisher","first-page":"557","DOI":"10.1142\/S0218488502001648","volume":"10","author":"L Sweeney","year":"2002","unstructured":"Sweeney L (2002) k-anonymity: a model for protecting privacy. Int J Uncertain Fuzziness Knowl Based Syst 10(05):557\u2013570","journal-title":"Int J Uncertain Fuzziness Knowl Based Syst"},{"issue":"1","key":"3958_CR11","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1145\/1217299.1217302","volume":"1","author":"A Machanavajjhala","year":"2007","unstructured":"Machanavajjhala A, Kifer D, Gehrke J, Venkitasubramaniam M (2007) l-diversity: privacy beyond k-anonymity. ACM Trans Knowl Discov from Data 1(1):3-es","journal-title":"ACM Trans Knowl Discov from Data"},{"key":"3958_CR12","unstructured":"Ninghui L, Tiancheng L, Venkatasubramanian S (2007) t-Closeness: privacy beyond k-anonymity and \u2113-diversity. In: Proceedings - International Conference on Data Engineering: pp 106\u2013115"},{"key":"3958_CR13","doi-asserted-by":"crossref","unstructured":"Xiao X, Tao Y (2007) M-invariance: towards privacy preserving re-publication of dynamic datasets.\u201dIn: Proceedings of the 2007 ACM SIGMOD International Conference on Management of Data, pp 689\u2013700","DOI":"10.1145\/1247480.1247556"},{"key":"3958_CR14","doi-asserted-by":"crossref","unstructured":"Nergiz ME, Atzori M, Clifton C (2007) Hiding the presence of individuals from shared databases. In: Proceedings of the 2007 ACM SIGMOD International Conference on Management of Data, pp 665\u2013676","DOI":"10.1145\/1247480.1247554"},{"key":"3958_CR15","doi-asserted-by":"publisher","first-page":"74","DOI":"10.1016\/j.cose.2017.09.002","volume":"72","author":"SA Abdelhameed","year":"2018","unstructured":"Abdelhameed SA, Moussa SM, Khalifa ME (2018) Privacy-preserving tabular data publishing: a comprehensive evaluation from web to cloud. Comput Secur 72:74\u201395","journal-title":"Comput Secur"},{"issue":"1","key":"3958_CR16","doi-asserted-by":"publisher","first-page":"61","DOI":"10.1504\/IJBDI.2016.073904","volume":"3","author":"N Victor","year":"2016","unstructured":"Victor N, Lopez D, Abawajy JH (2016) Privacy models for big data: a survey. Int J Big Data Intell 3(1):61\u201375","journal-title":"Int J Big Data Intell"},{"issue":"4","key":"3958_CR17","doi-asserted-by":"publisher","first-page":"14","DOI":"10.1145\/1749603.1749605","volume":"42","author":"B Fung","year":"2010","unstructured":"Fung B, Wang K, Chen R, Yu PS (2010) Privacy-preserving data publishing: a survey of recent developments. ACM Comput Surv 42(4):14","journal-title":"ACM Comput Surv"},{"key":"3958_CR18","doi-asserted-by":"publisher","first-page":"357","DOI":"10.1016\/j.ins.2015.01.025","volume":"305","author":"M Ali","year":"2015","unstructured":"Ali M, Khan SU, Vasilakos AV (2015) Security in cloud computing: opportunities and challenges. Inf Sci (Ny) 305:357\u2013383","journal-title":"Inf Sci (Ny)"},{"key":"3958_CR19","doi-asserted-by":"publisher","first-page":"201","DOI":"10.1007\/978-3-658-24549-8_7","volume-title":"SQL & NoSQL Databases","author":"A Meier","year":"2019","unstructured":"Meier A, Kaufmann M (2019) Nosql databases. In: Meier A, Kaufmann M (eds) SQL & NoSQL Databases. Springer, Berlin, pp 201\u2013218"},{"key":"3958_CR20","unstructured":"Apache software foundation, Apache Spark home page. https:\/\/spark.apache.org\/"},{"issue":"11","key":"3958_CR21","doi-asserted-by":"publisher","first-page":"56","DOI":"10.1145\/2934664","volume":"59","author":"M Zaharia","year":"2016","unstructured":"Zaharia M et al (2016) Apache spark. Commun ACM 59(11):56\u201365","journal-title":"Commun ACM"},{"issue":"3","key":"3958_CR22","doi-asserted-by":"publisher","first-page":"145","DOI":"10.1007\/s41060-016-0027-9","volume":"1","author":"S Salloum","year":"2016","unstructured":"Salloum S, Dautov R, Chen X, Peng PX, Huang JZ (2016) Big data analytics on Apache Spark. Int J Data Sci Anal 1(3):145\u2013164","journal-title":"Int J Data Sci Anal"},{"key":"3958_CR23","volume-title":"Learning spark","author":"H Karau","year":"2015","unstructured":"Karau H, Konwinski A, Wendell P, Zaharia M (2015) Learning spark. O\u2019Reilly Media"},{"key":"3958_CR24","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4842-0964-6","volume-title":"Big data analytics with spark","author":"M Guller","year":"2015","unstructured":"Guller M (2015) Big data analytics with spark. Apress, Berkeley"},{"key":"3958_CR25","doi-asserted-by":"crossref","unstructured":"Canbay Y, Sa\u01e7iro\u01e7lu S (2017) Big data anonymization with spark. In 2nd International Conference on Computer Science and Engineering, UBMK 2017, pp 833\u2013838","DOI":"10.1109\/UBMK.2017.8093543"},{"key":"3958_CR26","doi-asserted-by":"crossref","unstructured":"Na S, Xumin L, Yong G (2010) Research on k-means clustering algorithm: an improved k-means clustering algorithm. In: 2010 Third International Symposium on intelligent information technology and security informatics, pp 63\u201367","DOI":"10.1109\/IITSI.2010.74"},{"issue":"1","key":"3958_CR27","doi-asserted-by":"publisher","first-page":"200","DOI":"10.1016\/j.eswa.2012.07.021","volume":"40","author":"ME Celebi","year":"2013","unstructured":"Celebi ME, Kingravi HA, Vela PA (2013) A comparative study of efficient initialization methods for the k-means clustering algorithm. Expert Syst Appl 40(1):200\u2013210","journal-title":"Expert Syst Appl"},{"issue":"551","key":"3958_CR28","doi-asserted-by":"publisher","first-page":"124185","DOI":"10.1016\/j.physa.2020.124185","volume":"1","author":"R Rashidi","year":"2020","unstructured":"Rashidi R, Khamforoosh K, Sheikhahmadi A (2020) An analytic approach to separate users by introducing new combinations of initial centers of clustering. Phys A Stat Mech Appl 1(551):124185","journal-title":"Phys A Stat Mech Appl"},{"issue":"2","key":"3958_CR29","doi-asserted-by":"publisher","first-page":"129","DOI":"10.1109\/TIT.1982.1056489","volume":"28","author":"S Lloyd","year":"1982","unstructured":"Lloyd S (1982) Least squares quantization in PCM. IEEE Trans Inf theory 28(2):129\u2013137","journal-title":"IEEE Trans Inf theory"},{"key":"3958_CR30","first-page":"25","volume":"2006","author":"K LeFevre","year":"2006","unstructured":"LeFevre K, DeWitt DJ, Ramakrishnan R (2006) Mondrian multidimensional K-anonymity. Proc Int Conf Data Eng 2006:25","journal-title":"Proc Int Conf Data Eng"},{"issue":"2","key":"3958_CR31","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1145\/1233321.1233324","volume":"8","author":"J Xu","year":"2006","unstructured":"Xu J, Wang W, Pei J, Wang X, Shi B, Fu AW-C (2006) Utility-based anonymization for privacy preservation with less information loss. Acm Sigkdd Explor Newsl 8(2):21\u201330","journal-title":"Acm Sigkdd Explor Newsl"},{"issue":"9","key":"3958_CR32","doi-asserted-by":"publisher","first-page":"1181","DOI":"10.1109\/TKDE.2008.52","volume":"20","author":"J Li","year":"2008","unstructured":"Li J, Wong RC-W, Fu AW-C, Pei J (2008) Anonymization by local recoding in data with attribute hierarchical taxonomies. IEEE Trans Knowl Data Eng 20(9):1181\u20131194","journal-title":"IEEE Trans Knowl Data Eng"},{"issue":"3","key":"3958_CR33","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/1798596.1798602","volume":"6","author":"G Aggarwal","year":"2010","unstructured":"Aggarwal G et al (2010) Achieving anonymity via clustering. ACM Trans Algorithms 6(3):1\u201319","journal-title":"ACM Trans Algorithms"},{"key":"3958_CR34","doi-asserted-by":"crossref","unstructured":"Zheng W, Ma Y, Wang Z, Jia C, Li P (2019) Effective L-diversity anonymization algorithm based on improved clustering. In: International Symposium on Cyberspace Safety and Security, pp 318\u2013329","DOI":"10.1007\/978-3-030-37352-8_29"},{"key":"3958_CR35","doi-asserted-by":"crossref","unstructured":"LeFevre K, DJDJ DeWitt, R Ramakrishnan, (2005) Incognito: efficient full-domain K-anonymity SIGMOD \u201905 Proc. 2005 ACM SIGMOD Int Conf Manag Data, pp 49\u201360","DOI":"10.1145\/1066157.1066164"},{"key":"3958_CR36","doi-asserted-by":"publisher","first-page":"27156","DOI":"10.1109\/ACCESS.2018.2828398","volume":"6","author":"S Yaseen","year":"2018","unstructured":"Yaseen S et al (2018) Improved generalization for secure data publishing. IEEE Access 6:27156\u201327165","journal-title":"IEEE Access"},{"key":"3958_CR37","doi-asserted-by":"publisher","first-page":"122878","DOI":"10.1109\/ACCESS.2019.2936301","volume":"7","author":"O Temuujin","year":"2019","unstructured":"Temuujin O, Ahn J, Im D (2019) Efficient L-diversity algorithm for preserving privacy of dynamically published datasets. IEEE Access 7:122878\u2013122888","journal-title":"IEEE Access"},{"issue":"3","key":"3958_CR38","doi-asserted-by":"publisher","first-page":"561","DOI":"10.1109\/TKDE.2010.236","volume":"24","author":"T Li","year":"2012","unstructured":"Li T, Li N, Zhang J, Molloy I (2012) Slicing: a new approach for privacy preserving data publishing. IEEE Trans Knowl Data Eng 24(3):561\u2013574","journal-title":"IEEE Trans Knowl Data Eng"},{"issue":"2","key":"3958_CR39","doi-asserted-by":"publisher","first-page":"59","DOI":"10.1016\/j.bdr.2015.01.006","volume":"2","author":"X Jin","year":"2015","unstructured":"Jin X, Wah BW, Cheng X, Wang Y (2015) Significance and challenges of big data research. Big Data Res 2(2):59\u201364","journal-title":"Big Data Res"},{"key":"3958_CR40","doi-asserted-by":"crossref","unstructured":"Zhang X, Leckie C, Dou W, Chen J, Kotagiri R, Salcic Z (2016) Scalable local-recoding anonymization using locality sensitive hashing for big data privacy preservation. In: Proceedings of the 25th ACM International on Conference on Information and Knowledge Management - CIKM \u201916, pp 1793\u20131802","DOI":"10.1145\/2983323.2983841"},{"issue":"2","key":"3958_CR41","doi-asserted-by":"publisher","first-page":"363","DOI":"10.1109\/TPDS.2013.48","volume":"25","author":"X Zhang","year":"2014","unstructured":"Zhang X, Yang LT, Liu C, Chen J (2014) A scalable two-phase top-down specialization approach for data anonymization using mapreduce on cloud. IEEE Trans Parallel Distrib Syst 25(2):363\u2013373","journal-title":"IEEE Trans Parallel Distrib Syst"},{"issue":"8","key":"3958_CR42","doi-asserted-by":"publisher","first-page":"2293","DOI":"10.1109\/TC.2014.2360516","volume":"64","author":"X Zhang","year":"2015","unstructured":"Zhang X et al (2015) Proximity-aware local-recoding anonymization with MapReduce for scalable big data privacy preservation in cloud. IEEE Trans Comput 64(8):2293\u20132307","journal-title":"IEEE Trans Comput"},{"issue":"5","key":"3958_CR43","doi-asserted-by":"publisher","first-page":"1008","DOI":"10.1016\/j.jcss.2014.02.007","volume":"80","author":"X Zhang","year":"2014","unstructured":"Zhang X, Liu C, Nepal S, Yang C, Dou W, Chen J (2014) A hybrid approach for scalable sub-tree anonymization over big data using MapReduce on cloud. J Comput Syst Sci 80(5):1008\u20131020","journal-title":"J Comput Syst Sci"},{"key":"3958_CR44","doi-asserted-by":"crossref","unstructured":"Zakerzadeh H, Aggarwal CC, Barker K (2015) Privacy-preserving big data publishing, Proc. 27th Int Conf Sci Stat Database Manag. - SSDBM \u201915, pp 1\u201311","DOI":"10.1145\/2791347.2791380"},{"key":"3958_CR45","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.ins.2020.07.066","volume":"546","author":"F Ashkouti","year":"2021","unstructured":"Ashkouti F, Sheikhahmadi A (2021) DI-Mondrian: distributed improved mondrian for satisfaction of the L-diversity privacy model using apache spark. Inf Sci (Ny) 546:1\u201324","journal-title":"Inf Sci (Ny)"},{"issue":"1","key":"3958_CR46","doi-asserted-by":"publisher","first-page":"45","DOI":"10.1186\/s40537-017-0104-5","volume":"4","author":"M Al-Zobbi","year":"2017","unstructured":"Al-Zobbi M, Shahrestani S, Ruan C (2017) Improving MapReduce privacy by implementing multi-dimensional sensitivity-based anonymization. J Big Data 4(1):45","journal-title":"J Big Data"},{"issue":"1","key":"3958_CR47","doi-asserted-by":"publisher","first-page":"30","DOI":"10.1186\/s40537-019-0193-4","volume":"6","author":"P Jain","year":"2019","unstructured":"Jain P, Gyanchandani M, Khare N (2019) Enhanced secured Map Reduce layer for big data privacy and security. J Big Data 6(1):30","journal-title":"J Big Data"},{"key":"3958_CR48","doi-asserted-by":"publisher","first-page":"393","DOI":"10.1016\/j.future.2016.10.022","volume":"74","author":"JJV Nayahi","year":"2017","unstructured":"Nayahi JJV, Kavitha V (2017) Privacy and utility preserving data clustering for data anonymization and distribution on Hadoop. Futur Gener Comput Syst 74:393\u2013408","journal-title":"Futur Gener Comput Syst"},{"issue":"5","key":"3958_CR49","doi-asserted-by":"publisher","first-page":"589","DOI":"10.3390\/electronics10050589","volume":"10","author":"SU Bazai","year":"2021","unstructured":"Bazai SU, Jang-Jaccard J, Alavizadeh H (2021) Scalable, high-performance, and generalized subtree data anonymization approach for Apache Spark. Electronics 10(5):589","journal-title":"Electronics"},{"key":"3958_CR50","unstructured":"IPUMS USA, University of Minnesota. https:\/\/usa.ipums.org\/usa\/"},{"key":"3958_CR51","volume-title":"Data mining: concepts and techniques","author":"J Han","year":"2011","unstructured":"Han J, Pei J, Kamber M (2011) Data mining: concepts and techniques. Elsevier, Amsterdam"},{"issue":"5","key":"3958_CR52","first-page":"270","volume":"2","author":"D Sinwar","year":"2014","unstructured":"Sinwar D, Kaushik R (2014) Study of Euclidean and Manhattan distance metrics using simple k-means clustering. Int J Res Appl Sci Eng Technol 2(5):270\u2013274","journal-title":"Int J Res Appl Sci Eng Technol"},{"issue":"1","key":"3958_CR53","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1137\/120875909","volume":"56","author":"L Liberti","year":"2014","unstructured":"Liberti L, Lavor C, Maculan N, Mucherino A (2014) Euclidean distance geometry and applications. SIAM Rev 56(1):3\u201369","journal-title":"SIAM Rev"},{"key":"3958_CR54","unstructured":"University of california at Irvine, UCI machine learning repository. http:\/\/archive.ics.uci.edu\/ml\/datasets.php"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-021-03958-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11227-021-03958-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-021-03958-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,4]],"date-time":"2023-02-04T21:14:09Z","timestamp":1675545249000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11227-021-03958-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,7,5]]},"references-count":54,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2022,2]]}},"alternative-id":["3958"],"URL":"https:\/\/doi.org\/10.1007\/s11227-021-03958-3","relation":{},"ISSN":["0920-8542","1573-0484"],"issn-type":[{"value":"0920-8542","type":"print"},{"value":"1573-0484","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,7,5]]},"assertion":[{"value":"17 June 2021","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 July 2021","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}