{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,5]],"date-time":"2026-06-05T22:35:25Z","timestamp":1780698925527,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":62,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,11,15]],"date-time":"2023-11-15T00:00:00Z","timestamp":1700006400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100014440","name":"Ministerio de Ciencia, Innovaci\u00f3n y Universidades","doi-asserted-by":"publisher","award":["TED2021-132464B-I00 - PID2022-142290OB-I00"],"award-info":[{"award-number":["TED2021-132464B-I00 - PID2022-142290OB-I00"]}],"id":[{"id":"10.13039\/100014440","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Agence Nationale de la Recherche","award":["ANR-22-PECY0007 - ANR-22- PECY-0008"],"award-info":[{"award-number":["ANR-22-PECY0007 - ANR-22- PECY-0008"]}]},{"DOI":"10.13039\/501100000781","name":"European Research Council","doi-asserted-by":"publisher","award":["771844"],"award-info":[{"award-number":["771844"]}],"id":[{"id":"10.13039\/501100000781","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,11,15]]},"DOI":"10.1145\/3576915.3616589","type":"proceedings-article","created":{"date-parts":[[2023,11,21]],"date-time":"2023-11-21T12:35:13Z","timestamp":1700570113000},"page":"60-74","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":36,"title":["Decoding the Secrets of Machine Learning in Malware Classification: A Deep Dive into Datasets, Feature Extraction, and Model Performance"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0988-9366","authenticated-orcid":false,"given":"Savino","family":"Dambra","sequence":"first","affiliation":[{"name":"Norton Research Group, Biot, France"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9035-6718","authenticated-orcid":false,"given":"Yufei","family":"Han","sequence":"additional","affiliation":[{"name":"INRIA, Rennes, France"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9547-3502","authenticated-orcid":false,"given":"Simone","family":"Aonzo","sequence":"additional","affiliation":[{"name":"EURECOM, Biot, France"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3375-6069","authenticated-orcid":false,"given":"Platon","family":"Kotzias","sequence":"additional","affiliation":[{"name":"Norton Research Group, Athens, Greece"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-8452-4532","authenticated-orcid":false,"given":"Antonino","family":"Vitale","sequence":"additional","affiliation":[{"name":"EURECOM, Biot, France"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2962-1348","authenticated-orcid":false,"given":"Juan","family":"Caballero","sequence":"additional","affiliation":[{"name":"IMDEA Software Institute, Madrid, Spain"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5957-6213","authenticated-orcid":false,"given":"Davide","family":"Balzarotti","sequence":"additional","affiliation":[{"name":"EURECOM, Biot, France"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8408-3741","authenticated-orcid":false,"given":"Leyla","family":"Bilge","sequence":"additional","affiliation":[{"name":"Norton Research Group, Biot, France"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2023,11,21]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2023. DecodingMLSecretsOfWindowsMalwareClassification. https:\/\/github.c om\/eurecom-s3\/DecodingMLSecretsOfWindowsMalwareClassification."},{"key":"e_1_3_2_1_2_1","unstructured":"Accessed September 20 2023. Chocolatey the Package Manager for Windows. https:\/\/chocolatey.org\/."},{"key":"e_1_3_2_1_3_1","unstructured":"Accessed September 20 2023. Detect-It-Easy. https:\/\/github.com\/horsicq\/Detect- It-Easy."},{"key":"e_1_3_2_1_4_1","unstructured":"Accessed September 20 2023. JuanLesPIN. https:\/\/github.com\/Maff1t\/JuanLesP IN-Public."},{"key":"e_1_3_2_1_5_1","unstructured":"Accessed September 20 2023. LordNoteworthy\/al-khaser. https:\/\/github.com\/L ordNoteworthy\/al-khaser."},{"key":"e_1_3_2_1_6_1","unstructured":"Accessed September 20 2023. Proxmox Virtual Environment. https:\/\/www.prox mox.com\/en\/proxmox-ve."},{"key":"e_1_3_2_1_7_1","unstructured":"Accessed September 20 2023. Yara patterns of RetDec. https:\/\/github.com\/avast \/retdec\/tree\/master\/support\/yara_patterns."},{"key":"e_1_3_2_1_8_1","volume-title":"NDSS Symposium","author":"Aghakhani Hojjat","year":"2020","unstructured":"Hojjat Aghakhani, Fabio Gritti, Francesco Mecca, Martina Lindorfer, Stefano Ortolani, Davide Balzarotti, Giovanni Vigna, and Christopher Kruegel. 2020. When Malware is Packin'Heat; Limits of Machine Learning Classifiers Based on Static Analysis Features. In NDSS Symposium 2020."},{"key":"e_1_3_2_1_9_1","volume-title":"Ember: an open dataset for training static pe malware machine learning models. arXiv preprint arXiv:1804.04637","author":"Anderson Hyrum S","year":"2018","unstructured":"Hyrum S Anderson and Phil Roth. 2018. Ember: an open dataset for training static pe malware machine learning models. arXiv preprint arXiv:1804.04637 (2018)."},{"key":"e_1_3_2_1_10_1","volume-title":"To appear in Usenix Security","author":"Aonzo Simone","year":"2023","unstructured":"Simone Aonzo, Yufei Han, Alessandro Mantovani, and Davide Balzarotti. 2022. Humans vs. Machines in Malware Classification. In To appear in Usenix Security 2023."},{"key":"e_1_3_2_1_11_1","unstructured":"Daniel Arp Erwin Quiring Feargus Pendlebury Alexander Warnecke Fabio Pierazzi Christian Wressnegger Lorenzo Cavallaro and Konrad Rieck. [n. d.]. Dos and Dontextquoterightts of Machine Learning in Computer Security. In USENIX Security 22."},{"key":"e_1_3_2_1_12_1","volume-title":"Transcending Transcend: Revisiting Malware Classification in the Presence of Concept Drift. In IEEE Symposium on Security and Privacy (Oakland).","author":"Barbero Federico","year":"2022","unstructured":"Federico Barbero, Feargus Pendlebury, Fabio Pierazzi, and Lorenzo Cavallaro. 2022. Transcending Transcend: Revisiting Malware Classification in the Presence of Concept Drift. In IEEE Symposium on Security and Privacy (Oakland)."},{"key":"e_1_3_2_1_13_1","unstructured":"Capstone. 2022. Capstone - The ultimate disassembly framework. https:\/\/www.capstone-engine.org\/."},{"key":"e_1_3_2_1_14_1","unstructured":"Microsoft Corporation. 2022. PE Format. https:\/\/docs.microsoft.com\/en-us\/windows\/win32\/debug\/pe-format."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638293"},{"key":"e_1_3_2_1_16_1","volume-title":"Feature Extraction, and Model Performance. arXiv preprint arXiv:2307.14657","author":"Dambra Savino","year":"2023","unstructured":"Savino Dambra, Yufei Han, Simone Aonzo, Platon Kotzias, Antonino Vitale, Juan Caballero, Davide Balzarotti, and Leyla Bilge. 2023. Decoding the Secrets of Machine Learning in Malware Classification: A Deep Dive into Datasets, Feature Extraction, and Model Performance. arXiv preprint arXiv:2307.14657 (2023)."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cose.2021.102550"},{"key":"e_1_3_2_1_18_1","volume-title":"MalDAE: Detecting and explaining malware based on correlation and fusion of static and dynamic characteristics. computers & security","author":"Han Weijie","year":"2019","unstructured":"Weijie Han, Jingfeng Xue, Yong Wang, Lu Huang, Zixiao Kong, and Limin Mao. 2019a. MalDAE: Detecting and explaining malware based on correlation and fusion of static and dynamic characteristics. computers & security, Vol. 83 (2019), 208--233."},{"key":"e_1_3_2_1_19_1","volume-title":"MalInsight: A systematic profiling based malware detection framework. Journal of Network and Computer Applications","author":"Han Weijie","year":"2019","unstructured":"Weijie Han, Jingfeng Xue, Yong Wang, Zhenyan Liu, and Zixiao Kong. 2019b. MalInsight: A systematic profiling based malware detection framework. Journal of Network and Computer Applications (2019)."},{"key":"e_1_3_2_1_20_1","volume-title":"Stokes","author":"Huang Wenyi","year":"2016","unstructured":"Wenyi Huang and Jack W. Stokes. 2016. MtNet: A Multi-Task Neural Network for Dynamic Malware Classification. In DIMVA '16."},{"key":"e_1_3_2_1_21_1","unstructured":"AV-TEST Institute. 2023. New Malware. https:\/\/www.av-test.org\/en\/statistics\/malware\/."},{"key":"e_1_3_2_1_22_1","volume-title":"Annual Computer Security Applications Conference.","author":"Jindal Chani","year":"2019","unstructured":"Chani Jindal, Christopher Salls, Hojjat Aghakhani, Keith Long, Christopher Kruegel, and Giovanni Vigna. 2019. Neurlux: Dynamic Malware Analysis without Feature Engineering. In Annual Computer Security Applications Conference."},{"key":"e_1_3_2_1_23_1","unstructured":"Roberto Jordaney Kumar Sharad Santanu Kumar Dash Zhi Wang Davide Papini Ilia Nouretdinov and Lorenzo Cavallaro. [n. d.]. Transcend: Detecting Concept Drift in Malware Classification Models. In USENIX Security 17."},{"key":"e_1_3_2_1_24_1","volume-title":"MOTIF: A Large Malware Reference Dataset with Ground Truth Family Labels. In Workshop on Artificial Intelligence for Cyber Security.","author":"Joyce Robert J","year":"2022","unstructured":"Robert J Joyce, Dev Amlani, Charles Nicholas, and Edward Raff. 2022. MOTIF: A Large Malware Reference Dataset with Ground Truth Family Labels. In Workshop on Artificial Intelligence for Cyber Security."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CICYBS.2013.6597204"},{"key":"e_1_3_2_1_26_1","unstructured":"ElMouatez Billah Karbab and Mourad Debbabi. 2019. MalDy: Portable data-driven malware detection using natural language processing and machine learning techniques on behavioral analysis reports. (2019)."},{"key":"e_1_3_2_1_27_1","unstructured":"Kaspersky. 2023. PCybercriminals attack users with 400 000 new malicious files daily. https:\/\/www.kaspersky.com\/about\/press-releases\/2022_cybercriminals-attack-users-with-400000-new-malicious-files-daily-that-is-5-more-than-in-2021."},{"key":"e_1_3_2_1_28_1","unstructured":"Alexander Kuechler Alessandro Mantovani Yufei Han Leyla Bilge and Davide Balzarotti. [n. d.]. Does Every Second Count? Time-based Evolution of Malware Behavior in Sandboxes (NDSS 21)."},{"key":"e_1_3_2_1_29_1","volume-title":"Hyung Geun Oh, and Eui Tak Kim","author":"Lee Shinho","year":"2021","unstructured":"Shinho Lee, Wookhyun Jung, Wonrak Lee, Hyung Geun Oh, and Eui Tak Kim. 2021. Android malware dataset construction methodology to minimize bias-variance tradeoff. ICT Express (2021)."},{"key":"e_1_3_2_1_30_1","unstructured":"Chia Chin Lip and Dzati Athiar Ramli. 2012. Comparative Study on Feature Score and Decision Level Fusion Schemes for Robust Multibiometric Systems."},{"key":"e_1_3_2_1_31_1","unstructured":"Weitang Liu Xiaoyun Wang John D. Owens and Yixuan Li. 2020. Energy-Based out-of-Distribution Detection (NIPS'20)."},{"key":"e_1_3_2_1_32_1","unstructured":"Nicola Loi Claudio Borile and Daniele Ucci. 2021. Towards an Automated Pipeline for Detecting and Classifying Malware through Machine Learning. https:\/\/arxiv.org\/abs\/2106.05625"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2007.48"},{"key":"e_1_3_2_1_34_1","volume-title":"Longitudinal Study of the Prevalence of Malware Evasive Techniques. arXiv preprint arXiv:2112.11289","author":"Maffia Lorenzo","year":"2021","unstructured":"Lorenzo Maffia, Dario Nisi, Platon Kotzias, Giovanni Lagorio, Simone Aonzo, and Davide Balzarotti. 2021. Longitudinal Study of the Prevalence of Malware Evasive Techniques. arXiv preprint arXiv:2112.11289 (2021)."},{"key":"e_1_3_2_1_35_1","volume-title":"Sadia Afroz, Rekha Bachwani, Riyaz Faizullabhoy, Ling Huang, Vaishaal Shankar, Tony Wu, George Yiu, Anthony D. Joseph, and J. D. Tygar.","author":"Miller Brad","year":"2016","unstructured":"Brad Miller, Alex Kantchelian, Michael Carl Tschantz, Sadia Afroz, Rekha Bachwani, Riyaz Faizullabhoy, Ling Huang, Vaishaal Shankar, Tony Wu, George Yiu, Anthony D. Joseph, and J. D. Tygar. 2016. Reviewer Integration and Performance Measurement for Malware Detection. In DIMVA."},{"key":"e_1_3_2_1_36_1","volume-title":"2017 IEEE Symposium on Security and Privacy (SP).","author":"Miramirkhani Najmeh","unstructured":"Najmeh Miramirkhani, Mahathi Priya Appini, Nick Nikiforakis, and Michalis Polychronakis. [n.,d.]. Spotless sandboxes: Evading malware analysis systems using wear-and-tear artifacts. In 2017 IEEE Symposium on Security and Privacy (SP)."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/2046684.2046689"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"crossref","unstructured":"Marek Pawlicki Micha\u0142 Chora's Rafa\u0142 Kozik and Witold Ho\u0142ubowicz. 2021. Missing and Incomplete Data Handling in Cybersecurity Applications. In Intelligent Information and Database Systems.","DOI":"10.1007\/978-3-030-73280-6_33"},{"key":"e_1_3_2_1_39_1","volume-title":"USENIX Security Symposium.","author":"Pendlebury Feargus","year":"2019","unstructured":"Feargus Pendlebury, Fabio Pierazzi, Roberto Jordaney, Johannes Kinder, and Lorenzo Cavallaro. 2019. TESSERACT: Eliminating Experimental Bias in Malware Classification across Space and Time. In USENIX Security Symposium."},{"key":"e_1_3_2_1_40_1","volume-title":"Annual Computer Security Applications Conference.","author":"Perdisci Roberto","year":"2008","unstructured":"Roberto Perdisci, Andrea Lanzi, and Wenke Lee. 2008. McBoost: Boosting Scalability in Malware Collection and Analysis using Statistical Classification of Executables. In Annual Computer Security Applications Conference."},{"key":"e_1_3_2_1_41_1","unstructured":"Marco Pontello. 2021. TrID - File Identifier. http:\/\/mark0.net\/soft-trid-e.html."},{"key":"e_1_3_2_1_42_1","volume-title":"Induction of decision trees. Machine learning","author":"Quinlan J. Ross","year":"1986","unstructured":"J. Ross Quinlan. 1986. Induction of decision trees. Machine learning (1986)."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3427228.3427242"},{"key":"e_1_3_2_1_44_1","volume-title":"Workshops at the AAAI Conference on Artificial Intelligence.","author":"Raff Edward","year":"2018","unstructured":"Edward Raff, Jon Barker, Jared Sylvester, Robert Brandon, Bryan Catanzaro, and Charles K Nicholas. 2018. Malware Detection by Eating a Whole EXE. In Workshops at the AAAI Conference on Artificial Intelligence."},{"key":"e_1_3_2_1_45_1","volume-title":"Early-stage malware prediction using recurrent neural networks. computers & security","author":"Rhode Matilda","year":"2018","unstructured":"Matilda Rhode, Pete Burnap, and Kevin Jones. 2018. Early-stage malware prediction using recurrent neural networks. computers & security, Vol. 77 (2018), 578--594."},{"key":"e_1_3_2_1_46_1","unstructured":"Konrad Rieck Thorsten Holz Carsten Willems Patrick D\u00fcssel and Pavel Laskov. 2008. Learning and Classification of Malware Behavior. In Detection of Intrusions and Malware and Vulnerability Assessment."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/SP.2012.14"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2016.12.016"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2011.08.020"},{"key":"e_1_3_2_1_50_1","volume-title":"OPEM: A Static-Dynamic Approach for Machine-learning-based Malware Detection. In International joint conference CISIS.","author":"Santos Igor","year":"2012","unstructured":"Igor Santos, Jaime Devesa, Felix Brezo, Javier Nieves, and Pablo Garcia Bringas. 2012. OPEM: A Static-Dynamic Approach for Machine-learning-based Malware Detection. In International joint conference CISIS."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/MALWARE.2015.7413680"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/3427228.3427261"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-04342-0_7"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/3411508.3421373"},{"key":"e_1_3_2_1_55_1","unstructured":"Nazgol Tavabi Andres Abeliuk Negar Mokhberian Jeremy Abramson and Kristina Lerman. [n. d.]. Challenges in Forecasting Malicious Events from Incomplete Data (WWW '20)."},{"key":"e_1_3_2_1_56_1","volume-title":"A Problem of Dimensionality: A Simple Example","author":"Trunk G. V.","year":"1979","unstructured":"G. V. Trunk. 1979. A Problem of Dimensionality: A Simple Example. IEEE Transactions on Pattern Analysis and Machine Intelligence (1979)."},{"key":"e_1_3_2_1_57_1","volume-title":"IMCFN: Image-based malware classification using fine-tuned convolutional neural network architecture.","author":"Vasan Danish","year":"2020","unstructured":"Danish Vasan, Mamoun Alazab, Sobia Wassan, Hamad Naeem, Babak Safaei, and Qin Zheng. 2020. IMCFN: Image-based malware classification using fine-tuned convolutional neural network architecture. (2020)."},{"key":"e_1_3_2_1_58_1","unstructured":"vtfeedapi Accessed September 20 2023. VirusTotal API 2.0 Reference: File Feed. https:\/\/developers.virustotal.com\/v2.0\/reference\/file-feed."},{"key":"e_1_3_2_1_59_1","volume-title":"qian han, and yumeng gao","year":"2022","unstructured":"jiezhong xiao, qian han, and yumeng gao. 2022. Hybrid Classification and Clustering Algorithm on Recent Android Malware Detection (CSAI 2021). Association for Computing Machinery."},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"crossref","unstructured":"Miuyin Yong Wong Matthew Landen Manos Antonakakis Douglas M Blough Elissa M Redmiles and Mustaque Ahamad. 2021. An Inside Look into the Practice of Malware Analysis. In ACM CCS 21.","DOI":"10.1145\/3460120.3484759"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11280-019-00675-z"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i01.5474"}],"event":{"name":"CCS '23: ACM SIGSAC Conference on Computer and Communications Security","location":"Copenhagen Denmark","acronym":"CCS '23","sponsor":["SIGSAC ACM Special Interest Group on Security, Audit, and Control"]},"container-title":["Proceedings of the 2023 ACM SIGSAC Conference on Computer and Communications Security"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3576915.3616589","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3576915.3616589","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T01:35:42Z","timestamp":1755740142000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3576915.3616589"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,15]]},"references-count":62,"alternative-id":["10.1145\/3576915.3616589","10.1145\/3576915"],"URL":"https:\/\/doi.org\/10.1145\/3576915.3616589","relation":{},"subject":[],"published":{"date-parts":[[2023,11,15]]},"assertion":[{"value":"2023-11-21","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}