{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,7]],"date-time":"2026-05-07T15:15:33Z","timestamp":1778166933114,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":31,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,8,3]]},"DOI":"10.1145\/3711896.3737431","type":"proceedings-article","created":{"date-parts":[[2025,8,3]],"date-time":"2025-08-03T21:04:26Z","timestamp":1754255066000},"page":"5516-5526","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":10,"title":["EMBER2024 - A Benchmark Dataset for Holistic Evaluation of Malware Classifiers"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-7168-1237","authenticated-orcid":false,"given":"Robert J.","family":"Joyce","sequence":"first","affiliation":[{"name":"Booz Allen Hamilton, McLean, VA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-2561-3535","authenticated-orcid":false,"given":"Gideon","family":"Miller","sequence":"additional","affiliation":[{"name":"Laboratory for Physical Sciences, College Park, MD, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-5404-4320","authenticated-orcid":false,"given":"Phil","family":"Roth","sequence":"additional","affiliation":[{"name":"CrowdStrike, Austin, TX, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4272-2565","authenticated-orcid":false,"given":"Richard","family":"Zak","sequence":"additional","affiliation":[{"name":"Booz Allen Hamilton, McLean, VA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4023-3880","authenticated-orcid":false,"given":"Elliott","family":"Zaresky-Williams","sequence":"additional","affiliation":[{"name":"Booz Allen Hamilton, McLean, VA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-4720-6907","authenticated-orcid":false,"given":"Hyrum","family":"Anderson","sequence":"additional","affiliation":[{"name":"Cisco Systems, San Jose, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9900-1972","authenticated-orcid":false,"given":"Edward","family":"Raff","sequence":"additional","affiliation":[{"name":"Booz Allen Hamilton, McLean, VA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6368-8696","authenticated-orcid":false,"given":"James","family":"Holt","sequence":"additional","affiliation":[{"name":"Laboratory for Physical Sciences, College Park, MD, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,8,3]]},"reference":[{"key":"e_1_3_2_2_1_1","unstructured":"[n. d.]. VirusShare.com - Because Sharing is Caring. https:\/\/virusshare.com\/ Last accessed on 2025-02-17."},{"key":"e_1_3_2_2_2_1","volume-title":"Ember: an open dataset for training static pe malware machine learning models. arXiv preprint arXiv:1804.04637","author":"Anderson Hyrum S","year":"2018","unstructured":"Hyrum S Anderson and Phil Roth. 2018. Ember: an open dataset for training static pe malware machine learning models. arXiv preprint arXiv:1804.04637 (2018)."},{"key":"e_1_3_2_2_3_1","volume-title":"2023 IEEE Conference on Secure and Trustworthy Machine Learning (SaTML). IEEE, 339-364","author":"Apruzzese G.","unstructured":"G. Apruzzese, H. S. Anderson, S. Dambra, D. Freeman, F. Pierazzi, and K. Roundy. 2023. ''Real attackers don't compute gradients'': Bridging the gap between adversarial ML research and practice. In 2023 IEEE Conference on Secure and Trustworthy Machine Learning (SaTML). IEEE, 339-364."},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cose.2020.101859"},{"key":"e_1_3_2_2_5_1","unstructured":"Ero Carrera. 2004. pefile: Python module to read and work with PE files. https:\/\/github.com\/erocarrera\/pefile"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.5555\/3091622.3091637"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/SNAMS52053.2020.9336538"},{"key":"e_1_3_2_2_8_1","volume-title":"SOREL-20M: A large scale benchmark dataset for malicious PE detection. arXiv preprint arXiv:2012.07634","author":"Harang Richard","year":"2020","unstructured":"Richard Harang and Ethan M Rudd. 2020. SOREL-20M: A large scale benchmark dataset for malicious PE detection. arXiv preprint arXiv:2012.07634 (2020)."},{"key":"e_1_3_2_2_9_1","first-page":"399","volume-title":"DIMVA 2016, San Sebasti\u00e1n, Spain, July 7-8, 2016, Proceedings 13","author":"Huang Wenyi","year":"2016","unstructured":"Wenyi Huang and Jack W Stokes. 2016. MtNet: a multi-task neural network for dynamic malware classification. In Detection of Intrusions and Malware, and Vulnerability Assessment: 13th International Conference, DIMVA 2016, San Sebasti\u00e1n, Spain, July 7-8, 2016, Proceedings 13. Springer, 399-418."},{"key":"e_1_3_2_2_10_1","first-page":"625","volume-title":"Transcend: Detecting Concept Drift in Malware Classification Models. In 26th USENIX Security Symposium (USENIX Security 17)","author":"Jordaney Roberto","year":"2017","unstructured":"Roberto Jordaney, Kumar Sharad, Santanu K. Dash, Zhi Wang, Davide Papini, Ilia Nouretdinov, and Lorenzo Cavallaro. 2017. Transcend: Detecting Concept Drift in Malware Classification Models. In 26th USENIX Security Symposium (USENIX Security 17). USENIX Association, Vancouver, BC, 625-642. https:\/\/www.usenix.org\/conference\/usenixsecurity17\/technical-sessions\/presentation\/jordaney"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cose.2022.102921"},{"key":"e_1_3_2_2_12_1","volume-title":"ClarAVy: A Tool for Scalable and Accurate Malware Family Labeling. In Companion of the 16th ACM\/SPEC International Conference on Performance Engineering (WWW Companion '25)","author":"Joyce Robert J.","year":"2025","unstructured":"Robert J. Joyce, Derek Everett, Maya Fuchs, Edward Raff, and James Holt. 2025. ClarAVy: A Tool for Scalable and Accurate Malware Family Labeling. In Companion of the 16th ACM\/SPEC International Conference on Performance Engineering (WWW Companion '25)."},{"key":"e_1_3_2_2_13_1","volume-title":"Platforms, Exploitation, and Packers. arXiv preprint arXiv:2310.11706","author":"Joyce Robert J","year":"2023","unstructured":"Robert J Joyce, Edward Raff, Charles Nicholas, and James Holt. 2023. MalDICT: Benchmark Datasets on Malware Behaviors, Platforms, Exploitation, and Packers. arXiv preprint arXiv:2310.11706 (2023)."},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/2808769.2808780"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/1014052.1014105"},{"key":"e_1_3_2_2_16_1","volume-title":"Towards an automated pipeline for detecting and classifying malware through machine learning. arXiv preprint arXiv:2106.05625","author":"Loi Nicola","year":"2021","unstructured":"Nicola Loi, Claudio Borile, and Daniele Ucci. 2021. Towards an automated pipeline for detecting and classifying malware through machine learning. arXiv preprint arXiv:2106.05625 (2021)."},{"key":"e_1_3_2_2_17_1","unstructured":"Microsoft Security Team. 2021. Attack AI Systems in Machine Learning Evasion Competition. https:\/\/www.microsoft.com\/en-us\/security\/blog\/2021\/07\/29\/attack-ai-systems-in-machine-learning-evasion-competition\/"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cose.2015.04.001"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CTC.2013.9"},{"key":"e_1_3_2_2_20_1","volume-title":"Small Effect Sizes in Malware Detection? Make Harder Train\/Test Splits!","author":"Patel Tirth","year":"2023","unstructured":"Tirth Patel, Fred Lu, Edward Raff, Charles Nicholas, Cynthia Matuszek, and James Holt. 2023. Small Effect Sizes in Malware Detection? Make Harder Train\/Test Splits! (2023)."},{"key":"e_1_3_2_2_21_1","volume-title":"A survey of machine learning methods and challenges for windows malware classification. arXiv preprint arXiv:2006.09271","author":"Raff Edward","year":"2020","unstructured":"Edward Raff and Charles Nicholas. 2020. A survey of machine learning methods and challenges for windows malware classification. arXiv preprint arXiv:2006.09271 (2020)."},{"key":"e_1_3_2_2_22_1","volume-title":"https:\/\/docs.google.com\/presentation\/d\/1A13tsUkgWeujTy9SD-vDFfQp9fnIqbSE_tCihNPlArQ Conference on Applied Machine Learning in Information Security","author":"Roth Phil","year":"2019","unstructured":"Phil Roth. 2019. EMBER Improvements. (2019). https:\/\/docs.google.com\/presentation\/d\/1A13tsUkgWeujTy9SD-vDFfQp9fnIqbSE_tCihNPlArQ Conference on Applied Machine Learning in Information Security."},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/MALWARE.2015.7413680"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/SECPRI.2001.924286"},{"key":"e_1_3_2_2_25_1","unstructured":"John Seymour. [n. d.]. label-virusshare. https:\/\/github.com\/seymour1\/label-virusshare Last accessed on 2025-02-14."},{"key":"e_1_3_2_2_26_1","first-page":"230","volume-title":"RAID 2016, Paris, France, September 19-21, 2016, Proceedings 19","author":"Sebasti\u00e1n Marcos","year":"2016","unstructured":"Marcos Sebasti\u00e1n, Richard Rivera, Platon Kotzias, and Juan Ca ballero. 2016. Avclass: A tool for massive malware labeling. In Research in Attacks, Intrusions, and Defenses: 19th International Symposium, RAID 2016, Paris, France, September 19-21, 2016, Proceedings 19. Springer, 230-253."},{"key":"e_1_3_2_2_27_1","unstructured":"Wei Song Xuezixiang Li Sadia Afroz Deepali Garg Dmitry Kuznetsov and Heng Yin. 2021. MAB-Malware: A Reinforcement Learning Framework for Attacking Static Malware Classifiers. arXiv:2003.03100 [cs.CR] https:\/\/arxiv.org\/abs\/2003.03100"},{"key":"e_1_3_2_2_28_1","unstructured":"Romain Thomas. 2017. LIEF - Library to Instrument Executable Formats. https:\/\/lief.quarkslab.com\/."},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3291061"},{"key":"e_1_3_2_2_30_1","unstructured":"VirusTotal. [n. d.]. Analyse suspicious files domains IPs and URLs to detect malware and other breaches automatically share them with the security community. https:\/\/www.virustotal.com\/en\/ Last accessed on 2025-02-18."},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICEA.2019.8858297"}],"event":{"name":"KDD '25: The 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining","location":"Toronto ON Canada","acronym":"KDD '25","sponsor":["SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGMOD ACM Special Interest Group on Management of Data"]},"container-title":["Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.2"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3711896.3737431","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,30]],"date-time":"2026-04-30T18:19:14Z","timestamp":1777573154000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3711896.3737431"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,3]]},"references-count":31,"alternative-id":["10.1145\/3711896.3737431","10.1145\/3711896"],"URL":"https:\/\/doi.org\/10.1145\/3711896.3737431","relation":{},"subject":[],"published":{"date-parts":[[2025,8,3]]},"assertion":[{"value":"2025-08-03","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}