{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,15]],"date-time":"2025-08-15T01:33:54Z","timestamp":1755221634444,"version":"3.43.0"},"publisher-location":"Cham","reference-count":25,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032006264","type":"print"},{"value":"9783032006271","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-032-00627-1_12","type":"book-chapter","created":{"date-parts":[[2025,8,9]],"date-time":"2025-08-09T04:22:43Z","timestamp":1754713363000},"page":"231-251","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Clustering Malware at\u00a0Scale: A First Full-Benchmark Study"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8982-0141","authenticated-orcid":false,"given":"Martin","family":"Mocko","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6328-1335","authenticated-orcid":false,"given":"Jakub","family":"\u0160evcech","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3873-9308","authenticated-orcid":false,"given":"Daniela","family":"Chud\u00e1","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,8,10]]},"reference":[{"issue":"17","key":"12_CR1","doi-asserted-by":"publisher","first-page":"8482","DOI":"10.3390\/app12178482","volume":"12","author":"FA Aboaoja","year":"2022","unstructured":"Aboaoja, F.A., Zainal, A., Ghaleb, F.A., Al-Rimy, B., Eisa, T., Elnour, A.: Malware detection issues, challenges, and future directions: a survey. Appl. Sci. 12(17), 8482 (2022). https:\/\/doi.org\/10.3390\/app12178482","journal-title":"Appl. Sci."},{"key":"12_CR2","doi-asserted-by":"publisher","unstructured":"Ali, M., Hagen, J., Oliver, J.: Scalable malware clustering using multi-stage tree parallelization. In: 2020 IEEE International Conference on Intelligence and Security Informatics (ISI), pp.\u00a01\u20136. IEEE (2020). https:\/\/doi.org\/10.1109\/ISI49825.2020.9280546","DOI":"10.1109\/ISI49825.2020.9280546"},{"key":"12_CR3","doi-asserted-by":"publisher","unstructured":"Anderson, H.S., Roth, P.: Ember: an open dataset for training static PE malware machine learning models. arXiv preprint arXiv:1804.04637 (2018). https:\/\/doi.org\/10.48550\/arXiv.1804.04637","DOI":"10.48550\/arXiv.1804.04637"},{"issue":"2","key":"12_CR4","doi-asserted-by":"publisher","first-page":"49","DOI":"10.1145\/304181.304187","volume":"28","author":"M Ankerst","year":"1999","unstructured":"Ankerst, M., Breunig, M.M., Kriegel, H.P., Sander, J.: Optics: ordering points to identify the clustering structure. ACM SIGMOD Rec. 28(2), 49\u201360 (1999). https:\/\/doi.org\/10.1145\/304181.304187","journal-title":"ACM SIGMOD Rec."},{"key":"12_CR5","doi-asserted-by":"publisher","unstructured":"Aresu, M., Ariu, D., Ahmadi, M., Maiorca, D., Giacinto, G.: Clustering android malware families by http traffic. In: 2015 10th International Conference on Malicious and Unwanted Software (MALWARE), pp. 128\u2013135. IEEE (2015). https:\/\/doi.org\/10.1109\/MALWARE.2015.7413693","DOI":"10.1109\/MALWARE.2015.7413693"},{"key":"12_CR6","unstructured":"Ester, M., Kriegel, H.P., Sander, J., Xu, X., et\u00a0al.: A density-based algorithm for discovering clusters in large spatial databases with noise. In: KDD, vol.\u00a096, pp. 226\u2013231 (1996)"},{"key":"12_CR7","doi-asserted-by":"publisher","first-page":"2313","DOI":"10.1109\/ACCESS.2019.2962198","volume":"8","author":"Y Fang","year":"2019","unstructured":"Fang, Y., Zhang, W., Li, B., Jing, F., Zhang, L.: Semi-supervised malware clustering based on the weight of bytecode and API. IEEE Access 8, 2313\u20132326 (2019). https:\/\/doi.org\/10.1109\/ACCESS.2019.2962198","journal-title":"IEEE Access"},{"key":"12_CR8","doi-asserted-by":"publisher","unstructured":"Faridi, H., Srinivasagopalan, S., Verma, R.: Performance evaluation of features and clustering algorithms for malware. In: 2018 IEEE International Conference on Data Mining Workshops (ICDMW), pp. 13\u201322. IEEE (2018). https:\/\/doi.org\/10.1109\/ICDMW.2018.00010","DOI":"10.1109\/ICDMW.2018.00010"},{"key":"12_CR9","doi-asserted-by":"publisher","unstructured":"Harang, R., Rudd, E.M.: Sorel-20m: a large scale benchmark dataset for malicious PE detection. arXiv preprint arXiv:2012.07634 (2020). https:\/\/doi.org\/10.48550\/arXiv.2012.07634","DOI":"10.48550\/arXiv.2012.07634"},{"key":"12_CR10","doi-asserted-by":"publisher","unstructured":"Hu, X., Shin, K.G.: Duet: integration of dynamic and static analyses for malware clustering with cluster ensembles. In: Proceedings of the 29th Annual Computer Security Applications Conference, pp. 79\u201388 (2013). https:\/\/doi.org\/10.1145\/2523649.2523677","DOI":"10.1145\/2523649.2523677"},{"key":"12_CR11","unstructured":"Hu, X., Shin, K.G., Bhatkar, S., Griffin, K.: $$\\{$$MutantX-S$$\\}$$: scalable malware clustering based on static features. In: 2013 USENIX Annual Technical Conference (USENIX ATC 2013), pp. 187\u2013198 (2013)"},{"key":"12_CR12","doi-asserted-by":"publisher","unstructured":"Jure\u010dkov\u00e1, O., Jure\u010dek, M., Stamp, M.: Online clustering of known and emerging malware families. arXiv preprint arXiv:2405.03298 (2024). https:\/\/doi.org\/10.48550\/arXiv.2405.03298","DOI":"10.48550\/arXiv.2405.03298"},{"issue":"4","key":"12_CR13","doi-asserted-by":"publisher","first-page":"579","DOI":"10.1007\/s11416-024-00513-5","volume":"20","author":"O Jure\u010dkov\u00e1","year":"2024","unstructured":"Jure\u010dkov\u00e1, O., Jure\u010dek, M., Stamp, M., Di Troia, F., L\u00f3rencz, R.: Classification and online clustering of zero-day malware. J. Comput. Virol. Hacking Tech. 20(4), 579\u2013592 (2024). https:\/\/doi.org\/10.1007\/s11416-024-00513-5","journal-title":"J. Comput. Virol. Hacking Tech."},{"key":"12_CR14","doi-asserted-by":"publisher","unstructured":"MacAskill, N., Wilkins, Z., Zincir-Heywood, N.: Scaling multi-objective optimization for clustering malware. In: 2021 IEEE Symposium Series on Computational Intelligence (SSCI), pp.\u00a01\u20138. IEEE (2021). https:\/\/doi.org\/10.1109\/SSCI50451.2021.9659925","DOI":"10.1109\/SSCI50451.2021.9659925"},{"key":"12_CR15","unstructured":"MacQueen, J.: Some methods for classification and analysis of multivariate observations. In: Proceedings of the Fifth Berkeley Symposium on Mathematical Statistics and Probability, Volume 1: Statistics, vol.\u00a05, pp. 281\u2013298. University of California Press (1967)"},{"key":"12_CR16","doi-asserted-by":"publisher","unstructured":"Maimon, O., Rokach, L.: Data mining and knowledge discovery handbook, vol.\u00a02. Springer (2005). https:\/\/doi.org\/10.1007\/b107408","DOI":"10.1007\/b107408"},{"key":"12_CR17","doi-asserted-by":"publisher","unstructured":"McInnes, L., Healy, J., Melville, J.: Umap: uniform manifold approximation and projection for dimension reduction. arXiv preprint arXiv:1802.03426 (2018). https:\/\/doi.org\/10.48550\/arXiv.1802.03426","DOI":"10.48550\/arXiv.1802.03426"},{"issue":"5","key":"12_CR18","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3329786","volume":"52","author":"O Or-Meir","year":"2019","unstructured":"Or-Meir, O., Nissim, N., Elovici, Y., Rokach, L.: Dynamic malware analysis in the modern era\u2013a state of the art survey. ACM Comput. Surv. (CSUR) 52(5), 1\u201348 (2019). https:\/\/doi.org\/10.1145\/3329786","journal-title":"ACM Comput. Surv. (CSUR)"},{"issue":"2","key":"12_CR19","doi-asserted-by":"publisher","first-page":"487","DOI":"10.1016\/j.comnet.2012.06.022","volume":"57","author":"R Perdisci","year":"2013","unstructured":"Perdisci, R., Ariu, D., Giacinto, G.: Scalable fine-grained behavioral clustering of HTTP-based malware. Comput. Netw. 57(2), 487\u2013500 (2013). https:\/\/doi.org\/10.1016\/j.comnet.2012.06.022","journal-title":"Comput. Netw."},{"issue":"3","key":"12_CR20","doi-asserted-by":"publisher","first-page":"371","DOI":"10.1007\/s10207-020-00509-4","volume":"20","author":"G Pitolli","year":"2020","unstructured":"Pitolli, G., Laurenza, G., Aniello, L., Querzoni, L., Baldoni, R.: MalFamAware: automatic family identification and malware classification through online clustering. Int. J. Inf. Secur. 20(3), 371\u2013386 (2020). https:\/\/doi.org\/10.1007\/s10207-020-00509-4","journal-title":"Int. J. Inf. Secur."},{"key":"12_CR21","doi-asserted-by":"publisher","DOI":"10.1016\/j.jisa.2021.102876","volume":"60","author":"T Rezaei","year":"2021","unstructured":"Rezaei, T., Manavi, F., Hamzeh, A.: A PE header-based method for malware detection using clustering and deep embedding techniques. J. Inf. Secur. Appl. 60, 102876 (2021). https:\/\/doi.org\/10.1016\/j.jisa.2021.102876","journal-title":"J. Inf. Secur. Appl."},{"issue":"4","key":"12_CR22","doi-asserted-by":"publisher","first-page":"639","DOI":"10.3233\/JCS-2010-0410","volume":"19","author":"K Rieck","year":"2011","unstructured":"Rieck, K., Trinius, P., Willems, C., Holz, T.: Automatic analysis of malware behavior using machine learning. J. Comput. Secur. 19(4), 639\u2013668 (2011). https:\/\/doi.org\/10.3233\/JCS-2010-0410","journal-title":"J. Comput. Secur."},{"key":"12_CR23","doi-asserted-by":"publisher","unstructured":"Wilkins, Z., Zincir-Heywood, N.: Cougar: clustering of unknown malware using genetic algorithm routines. In: Proceedings of the 2020 Genetic and Evolutionary Computation Conference, pp. 1195\u20131203 (2020). https:\/\/doi.org\/10.1145\/3377930.3390151","DOI":"10.1145\/3377930.3390151"},{"key":"12_CR24","doi-asserted-by":"publisher","unstructured":"Yang, L., Ciptadi, A., Laziuk, I., Ahmadzadeh, A., Wang, G.: Bodmas: an open dataset for learning based temporal analysis of PE malware. In: 2021 IEEE Security and Privacy Workshops (SPW), pp. 78\u201384. IEEE (2021). https:\/\/doi.org\/10.1109\/SPW53761.2021.00020","DOI":"10.1109\/SPW53761.2021.00020"},{"issue":"2","key":"12_CR25","doi-asserted-by":"publisher","first-page":"103","DOI":"10.1145\/235968.233324","volume":"25","author":"T Zhang","year":"1996","unstructured":"Zhang, T., Ramakrishnan, R., Livny, M.: Birch: an efficient data clustering method for very large databases. ACM SIGMOD Rec. 25(2), 103\u2013114 (1996). https:\/\/doi.org\/10.1145\/235968.233324","journal-title":"ACM SIGMOD Rec."}],"container-title":["Lecture Notes in Computer Science","Availability, Reliability and Security"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-00627-1_12","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,9]],"date-time":"2025-08-09T04:22:46Z","timestamp":1754713366000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-00627-1_12"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783032006264","9783032006271"],"references-count":25,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-00627-1_12","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"10 August 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ARES","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Availability, Reliability and Security","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Ghent","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Belgium","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11 August 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 August 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ares-12025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2025.ares-conference.eu","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}