{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,14]],"date-time":"2025-10-14T20:21:08Z","timestamp":1760473268494,"version":"3.40.4"},"publisher-location":"Cham","reference-count":24,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031877650","type":"print"},{"value":"9783031877667","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-87766-7_21","type":"book-chapter","created":{"date-parts":[[2025,4,14]],"date-time":"2025-04-14T04:09:39Z","timestamp":1744603779000},"page":"234-244","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Combining Similarity-Based Correlation and\u00a0Hierarchical Ascending Clustering for\u00a0Small Files Problem in\u00a0HDFS"],"prefix":"10.1007","author":[{"given":"Han\u00e8ne","family":"Chettaoui","sequence":"first","affiliation":[]},{"given":"Farah","family":"Hkiri","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,4,15]]},"reference":[{"key":"21_CR1","unstructured":"Microsoft Azure. https:\/\/azure.microsoft.com\/. Accessed 6 Dec 2024"},{"key":"21_CR2","doi-asserted-by":"crossref","unstructured":"Aggarwal, R., Verma, J., Siwach, M.: Small files\u2019 problem in Hadoop: a systematic literature review. J. King Saud Univ. Comput. Inf. Sci. 34(10, Part A), 8658\u20138674 (2022)","DOI":"10.1016\/j.jksuci.2021.09.007"},{"key":"21_CR3","doi-asserted-by":"crossref","unstructured":"Ahad, M.A., Biswas, R.: Dynamic merging based small file storage (DM-SFS) architecture for efficiently storing small size files in Hadoop. In: Proceedings of the International Conference on Computational Intelligence and Data Science, pp. 1626\u20131635 (2018)","DOI":"10.1016\/j.procs.2018.05.128"},{"issue":"6","key":"21_CR4","doi-asserted-by":"publisher","first-page":"3381","DOI":"10.1007\/s10586-023-03992-1","volume":"26","author":"N Alange","year":"2023","unstructured":"Alange, N., Vidya Sagar, P.: Small files access efficiency in Hadoop distributed file system a case study performed on British library text files. Cluster Comput. 26(6), 3381\u20133388 (2023)","journal-title":"Cluster Comput."},{"issue":"1","key":"21_CR5","doi-asserted-by":"publisher","first-page":"913","DOI":"10.32604\/csse.2023.036400","volume":"46","author":"A Ali","year":"2023","unstructured":"Ali, A., Mirza, N.M., Ishak, M.K.: Enhanced best fit algorithm for merging small files. Comput. Syst. Sci. Eng. 46(1), 913\u2013928 (2023)","journal-title":"Comput. Syst. Sci. Eng."},{"issue":"11","key":"21_CR6","doi-asserted-by":"publisher","first-page":"290","DOI":"10.1007\/s10462-024-10938-5","volume":"57","author":"A Badshah","year":"2024","unstructured":"Badshah, A., Daud, A., Alharbey, R., Banjar, A., Bukhari, A., Alshemaimri, B.: Big data applications: overview, challenges and future. Artif. Intell. Rev. 57(11), 290 (2024)","journal-title":"Artif. Intell. Rev."},{"key":"21_CR7","doi-asserted-by":"crossref","unstructured":"Chen, X., Zhu, W., Yan, K.: Massive small file storage scheme based on association rule mining. In: Proceedings of the 3rd International Conference on Big Data Engineering, pp. 34\u201340 (2021)","DOI":"10.1145\/3468920.3468925"},{"issue":"6","key":"21_CR8","doi-asserted-by":"publisher","first-page":"1847","DOI":"10.1016\/j.jnca.2012.07.009","volume":"35","author":"B Dong","year":"2012","unstructured":"Dong, B., Zheng, Q., Tian, F., Chao, K.-M., Ma, R., Anane, R.: An optimized approach for storing and accessing small files on cloud storage. J. Netw. Comput. Appl. 35(6), 1847\u20131862 (2012)","journal-title":"J. Netw. Comput. Appl."},{"key":"21_CR9","doi-asserted-by":"publisher","first-page":"227","DOI":"10.1016\/j.dcan.2023.01.014","volume":"10","author":"S Guan","year":"2024","unstructured":"Guan, S., Zhang, C., Wang, Y., Liu, W.: Hadoop-based secure storage solution for big data in cloud computing environment. Digit. Commun. Netw. 10, 227\u2013236 (2024)","journal-title":"Digit. Commun. Netw."},{"key":"21_CR10","doi-asserted-by":"crossref","unstructured":"Gudivada, V.N., Rao, D.L., Gudivada, A.R.: Chapter 11 - Information retrieval: concepts, models, and systems. In: Computational Analysis and Understanding of Natural Languages: Principles, Methods and Applications. Handbook of Statistics, vol.\u00a038, pp. 331\u2013401. Elsevier (2018)","DOI":"10.1016\/bs.host.2018.07.009"},{"key":"21_CR11","doi-asserted-by":"publisher","first-page":"3696","DOI":"10.1007\/s11227-015-1462-4","volume":"72","author":"H He","year":"2016","unstructured":"He, H., Zhonghui, D., Zhang, W., Chen, A.: Optimization strategy of Hadoop small file storage for big data in healthcare. J. Supercomput. 72, 3696\u20133707 (2016)","journal-title":"J. Supercomput."},{"issue":"22","key":"21_CR12","first-page":"323","volume":"118","author":"DS Jayalakshmi","year":"2018","unstructured":"Jayalakshmi, D.S., Srinivasan, R.: A greedy file merging algorithm for handling small files in HDFS. Int. J. Pure Appl. Math. 118(22), 323\u2013330 (2018)","journal-title":"Int. J. Pure Appl. Math."},{"issue":"4","key":"21_CR13","doi-asserted-by":"publisher","first-page":"634","DOI":"10.20965\/jaciii.2019.p0634","volume":"23","author":"J Liu","year":"2019","unstructured":"Liu, J.: Storage-optimization method for massive small files of agricultural resources based on Hadoop. J. Adv. Comput. Intell. Intell. Inform. 23(4), 634\u2013640 (2019)","journal-title":"J. Adv. Comput. Intell. Intell. Inform."},{"key":"21_CR14","doi-asserted-by":"crossref","unstructured":"Liu, J., Jin, S., Wang, D., Li, H.: An archive-based method for efficiently handling small file problems in HDFS. Concurr. Comput. Pract. Exp. 1\u201315 (2024)","DOI":"10.1002\/cpe.8260"},{"issue":"5","key":"21_CR15","doi-asserted-by":"publisher","first-page":"396","DOI":"10.1080\/08839514.2020.1723868","volume":"34","author":"K Park","year":"2020","unstructured":"Park, K., Hong, J.S., Kim, W.: A methodology combining cosine similarity with classifier for text classification. Appl. Artif. Intell. 34(5), 396\u2013411 (2020)","journal-title":"Appl. Artif. Intell."},{"key":"21_CR16","unstructured":"Qian, Y., et al.: Combining buffered I\/O and direct I\/O in distributed file systems. In: 22nd USENIX Conference on File and Storage Technologies, FAST 2024, Santa Clara, CA, USA, 27\u201329 February 2024, pp. 17\u201333 (2024)"},{"issue":"8","key":"21_CR17","doi-asserted-by":"publisher","first-page":"8219","DOI":"10.1007\/s10462-022-10366-3","volume":"56","author":"X Ran","year":"2023","unstructured":"Ran, X., Xi, Y., Yonggang, L., Wang, X., Zhenyu, L.: Comprehensive survey on hierarchical clustering algorithms and the recent developments. Artif. Intell. Rev. 56(8), 8219\u20138264 (2023)","journal-title":"Artif. Intell. Rev."},{"issue":"4S5","key":"21_CR18","first-page":"122","volume":"8","author":"R Rathidevi","year":"2019","unstructured":"Rathidevi, R., Parameswari, R.: CSFC: a new centroid based clustering method to improve the efficiency of storing and accessing small files in Hadoop. Int. J. Recent Technol. Eng. 8(4S5), 122\u2013127 (2019)","journal-title":"Int. J. Recent Technol. Eng."},{"issue":"3","key":"21_CR19","doi-asserted-by":"publisher","first-page":"1495","DOI":"10.1007\/s11277-020-07312-3","volume":"113","author":"IF Siddiqui","year":"2020","unstructured":"Siddiqui, I.F., Qureshi, N., Chowdhry, B.S., Uqaili, M.A.: Pseudo-cache-based IoT small files management framework in HDFS cluster. Wirel. Pers. Commun. 113(3), 1495\u20131522 (2020)","journal-title":"Wirel. Pers. Commun."},{"key":"21_CR20","unstructured":"Wai, K.S.S., Myint, J., Yee, T.T.: Merging small files based on agglomerative hierarchical clustering on HDFS for cloud storage. In: Proceedings of the 16th International Conference on Computer Applications (ICCA), pp. 34\u201339 (2018)"},{"issue":"5","key":"21_CR21","doi-asserted-by":"publisher","first-page":"2814","DOI":"10.1109\/TNSE.2022.3195350","volume":"10","author":"S Wang","year":"2023","unstructured":"Wang, S., Lan, H., Peng, Y., Peng, Z.: Consolidating industrial small files using robust graph clustering. IEEE Trans. Netw. Sci. Eng. 10(5), 2814\u20132831 (2023)","journal-title":"IEEE Trans. Netw. Sci. Eng."},{"issue":"3","key":"21_CR22","doi-asserted-by":"publisher","first-page":"645","DOI":"10.1109\/TNN.2005.845141","volume":"16","author":"X Rui","year":"2005","unstructured":"Rui, X., Wunsch, D.: Survey of clustering algorithms. IEEE Trans. Neural Netw. 16(3), 645\u2013678 (2005)","journal-title":"IEEE Trans. Neural Netw."},{"key":"21_CR23","doi-asserted-by":"publisher","first-page":"119","DOI":"10.1016\/j.jpdc.2021.05.011","volume":"156","author":"Y Zhai","year":"2021","unstructured":"Zhai, Y., et al.: Hadoop Perfect File: a fast and memory-efficient metadata access archive file to face small files problem in HDFS. J. Parallel Distrib. Comput. 156, 119\u2013130 (2021)","journal-title":"J. Parallel Distrib. Comput."},{"key":"21_CR24","doi-asserted-by":"publisher","DOI":"10.1016\/j.sysarc.2020.101810","volume":"109","author":"Z Zhu","year":"2020","unstructured":"Zhu, Z., Tan, L., Li, Y., Ji, C.: PHDFS: optimizing I\/O performance of HDFS in deep learning cloud computing platform. J. Syst. Architect. 109, 101810 (2020)","journal-title":"J. Syst. Architect."}],"container-title":["Lecture Notes on Data Engineering and Communications Technologies","Advanced Information Networking and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-87766-7_21","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,14]],"date-time":"2025-04-14T04:09:48Z","timestamp":1744603788000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-87766-7_21"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031877650","9783031877667"],"references-count":24,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-87766-7_21","relation":{},"ISSN":["2367-4512","2367-4520"],"issn-type":[{"value":"2367-4512","type":"print"},{"value":"2367-4520","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"15 April 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"AINA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Advanced Information Networking and Applications","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Barcelona","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Spain","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 April 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11 April 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"39","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"aina0","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/voyager.ce.fit.ac.jp\/conf\/aina\/2025\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}