{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T18:00:27Z","timestamp":1772906427165,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":42,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,5,8]]},"DOI":"10.1145\/3701716.3715212","type":"proceedings-article","created":{"date-parts":[[2025,6,23]],"date-time":"2025-06-23T14:10:32Z","timestamp":1750687832000},"page":"277-286","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["ClarAVy: A Tool for Scalable and Accurate Malware Family Labeling"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-7168-1237","authenticated-orcid":false,"given":"Robert J.","family":"Joyce","sequence":"first","affiliation":[{"name":"Booz Allen Hamilton, McLean, VA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3593-5255","authenticated-orcid":false,"given":"Derek","family":"Everett","sequence":"additional","affiliation":[{"name":"Booz Allen Hamilton, McLean, VA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0771-2647","authenticated-orcid":false,"given":"Maya","family":"Fuchs","sequence":"additional","affiliation":[{"name":"Booz Allen Hamilton, McLean, VA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9900-1972","authenticated-orcid":false,"given":"Edward","family":"Raff","sequence":"additional","affiliation":[{"name":"Booz Allen Hamilton, McLean, VA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6368-8696","authenticated-orcid":false,"given":"James","family":"Holt","sequence":"additional","affiliation":[{"name":"Laboratory for Physical Sciences, College Park, MD, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,5,23]]},"reference":[{"key":"e_1_3_2_2_1_1","unstructured":"[n. d.]. VirusShare.com - Because Sharing is Caring. https:\/\/virusshare.com\/ Last accessed on 2024--11--17."},{"key":"e_1_3_2_2_2_1","volume-title":"NDSS 2009, 16th Annual Network and Distributed System Security Symposium. http:\/\/www.eurecom.fr\/publication\/2783","author":"Bayer Ulrich","year":"2009","unstructured":"Ulrich Bayer, Paolo Milani Comparetti, Clemens Hlauschek, Christopher Kruegel, and Engin Kirda. 2009. Scalable, behavior-based malware clustering. In NDSS 2009, 16th Annual Network and Distributed System Security Symposium. http:\/\/www.eurecom.fr\/publication\/2783"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cose.2020.101859"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cose.2021.102500"},{"key":"e_1_3_2_2_5_1","volume-title":"Multilevel bayesian models of categorical data annotation. Unpublished manuscript 17, 122","author":"Carpenter Bob","year":"2008","unstructured":"Bob Carpenter. 2008. Multilevel bayesian models of categorical data annotation. Unpublished manuscript 17, 122 (2008), 45--50."},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/MALWARE.2015.7413685"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.2307\/2346806"},{"key":"e_1_3_2_2_8_1","unstructured":"Karsten Hahn. [n. d.]. Malware Naming Hell Part 1: Taming the mess of AV detection names. https:\/\/www.gdatasoftware.com\/blog\/2019\/08\/35146-tamingthe-mess-of-av-detection-names Last accessed on 2024--11--17."},{"key":"e_1_3_2_2_9_1","volume-title":"Rudd","author":"Harang Richard","year":"2020","unstructured":"Richard Harang and Ethan M. Rudd. 2020. SOREL-20M: A Large Scale Benchmark Dataset for Malicious PE Detection. arXiv:2012.07634 [cs.CR]"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1007\/978--3--319--40667--1_20"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/MSR.2017.57"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/DSN58367.2023.00029"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNSM.2024.3373601"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cose.2022.102921"},{"key":"e_1_3_2_2_15_1","volume-title":"Proceedings of the Conference on Applied Machine Learning in Information Security. 54--69","author":"Joyce Robert J.","year":"2021","unstructured":"Robert J. Joyce, Edward Raff, and Charles Nicholas. 2021. Rank-1 Similarity Matrix Decomposition For Modeling Changes in Antivirus Consensus Through Time. In Proceedings of the Conference on Applied Machine Learning in Information Security. 54--69."},{"key":"e_1_3_2_2_16_1","volume-title":"Proceedings of the Conference on Applied Machine Learning in Information Security. 105--121","author":"Joyce Robert J.","year":"2023","unstructured":"Robert J. Joyce, Edward Raff, Charles Nicholas, and James Holt. 2023. MalDICT: Benchmark Datasets on Malware Behaviors, Platforms, Exploitation, and Packers. In Proceedings of the Conference on Applied Machine Learning in Information Security. 105--121."},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.icte.2022.02.007"},{"key":"e_1_3_2_2_18_1","volume-title":"AVCLASS: Yet Another Massive Malware Labeling Tool.","author":"Kurogome Yura","year":"2019","unstructured":"Yura Kurogome. 2019. AVCLASS: Yet Another Massive Malware Labeling Tool. (2019). https:\/\/github.com\/killvxk\/avclassplusplus Black Hat Europe."},{"key":"e_1_3_2_2_19_1","volume-title":"Reiter","author":"Li Peng","year":"2010","unstructured":"Peng Li, Limin Liu, Debin Gao, and Michael K. Reiter. 2010. On Challenges in Evaluating Malware Clustering. In Recent Advances in Intrusion Detection, Somesh Jha, Robin Sommer, and Christian Kreibich (Eds.). 238--255."},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/2487788.2488056"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1007\/978--3--319-08509--8_7"},{"key":"e_1_3_2_2_22_1","volume-title":"Towards a Methodical Evaluation of Antivirus Scans and Labels","author":"Mohaisen Aziz","unstructured":"Aziz Mohaisen, Omar Alrawi, Matt Larson, and Danny McPherson. 2014. Towards a Methodical Evaluation of Antivirus Scans and Labels. In Information Security Applications, Yongdae Kim, Heejo Lee, and Adrian Perrig (Eds.). Cham, 231--241."},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cose.2015.04.001"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/79.543975"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/2016904.2016908"},{"key":"e_1_3_2_2_26_1","volume-title":"Small Effect Sizes in Malware Detection? Make Harder Train\/Test Splits! Proceedings of the Conference on Applied Machine Learning in Information Security","author":"Patel Tirth","year":"2023","unstructured":"Tirth Patel, Fred Lu, Edward Raff, Charles Nicholas, Cynthia Matuszek, and James Holt. 2023. Small Effect Sizes in Malware Detection? Make Harder Train\/Test Splits! Proceedings of the Conference on Applied Machine Learning in Information Security (2023). https:\/\/arxiv.org\/abs\/2312.15813"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.18464\/cybin.v3i1.17"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","unstructured":"Y. Qiao X. Yun and Y. Zhang. 2016. How to Automatically Identify the Homology of Different Malware. In 2016 IEEE Trustcom\/BigDataSE\/ISPA. 929--936. https:\/\/doi.org\/10.1109\/TrustCom.2016.0158","DOI":"10.1109\/TrustCom.2016.0158"},{"key":"e_1_3_2_2_29_1","volume-title":"https:\/\/github.com\/killvxk\/avclassplusplus","author":"Sebasti\u00e1n Marcos","year":"2023","unstructured":"Marcos Sebasti\u00e1n and Juan Caballero. 2023. AVClass. (2023). https:\/\/github.com\/killvxk\/avclassplusplus"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"crossref","unstructured":"Marcos Sebasti\u00e1n Richard Rivera Platon Kotzias and Juan Caballero. 2016. AVclass: A Tool for Massive Malware Labeling. In Research in Attacks Intrusions and Defenses Fabian Monrose Marc Dacier Gregory Blanc and Joaquin Garcia-Alfaro (Eds.). Cham 230--253.","DOI":"10.1007\/978-3-319-45719-2_11"},{"key":"e_1_3_2_2_31_1","volume-title":"AVClass2: Massive Malware Tag Extraction from AV Labels. CoRR abs\/2006.10615","author":"Sebasti\u00e1n Silvia","year":"2020","unstructured":"Silvia Sebasti\u00e1n and Juan Caballero. 2020. AVClass2: Massive Malware Tag Extraction from AV Labels. CoRR abs\/2006.10615 (2020). arXiv:2006.10615 https: \/\/arxiv.org\/abs\/2006.10615"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2017.2653763"},{"key":"e_1_3_2_2_33_1","volume-title":"Dynamic bayesian combination of multiple imperfect classifiers. Decision making and imperfection","author":"Simpson Edwin","year":"2013","unstructured":"Edwin Simpson, Stephen Roberts, Ioannis Psorakis, and Arfon Smith. 2013. Dynamic bayesian combination of multiple imperfect classifiers. Decision making and imperfection (2013), 1--35."},{"key":"e_1_3_2_2_34_1","volume-title":"Fast dawidskene: A fast vote aggregation scheme for sentiment classification. arXiv preprint arXiv:1803.02781","author":"Sinha Vaibhav B","year":"2018","unstructured":"Vaibhav B Sinha, Sukrut Rao, and Vineeth N Balasubramanian. 2018. Fast dawidskene: A fast vote aggregation scheme for sentiment classification. arXiv preprint arXiv:1803.02781 (2018)."},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3097983.3098196"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cose.2018.11.001"},{"key":"e_1_3_2_2_37_1","unstructured":"VirusTotal. [n. d.]. File statistics during last 7 days. https:\/\/www.virustotal.com\/en\/statistics\/ Last accessed on 2024--11--17."},{"key":"e_1_3_2_2_38_1","volume-title":"An Observational Investigation of Reverse Engineers' Processes. In 29th USENIX Security Symposium (USENIX Security 20)","author":"Votipka Daniel","year":"1875","unstructured":"Daniel Votipka, Seth Rabin, Kristopher Micinski, Jeffrey S. Foster, and Michelle L. Mazurek. 2020. An Observational Investigation of Reverse Engineers' Processes. In 29th USENIX Security Symposium (USENIX Security 20). USENIX Association, 1875--1892. https:\/\/www.usenix.org\/conference\/usenixsecurity20\/presentation\/votipka-observational"},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2019.113022"},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/SPW53761.2021.00020"},{"key":"e_1_3_2_2_41_1","volume-title":"Spectral methods meet EM: A provably optimal algorithm for crowdsourcing. Advances in neural information processing systems 27","author":"Zhang Yuchen","year":"2014","unstructured":"Yuchen Zhang, Xi Chen, Dengyong Zhou, and Michael I Jordan. 2014. Spectral methods meet EM: A provably optimal algorithm for crowdsourcing. Advances in neural information processing systems 27 (2014)."},{"key":"e_1_3_2_2_42_1","volume-title":"Measuring and Modeling the Label Dynamics of Online Anti-Malware Engines. In 29th USENIX Security Symposium (USENIX Security . USENIX Association, 2361--2378","author":"Zhu Shuofei","year":"2020","unstructured":"Shuofei Zhu, Jianjun Shi, Limin Yang, Boqin Qin, Ziyi Zhang, Linhai Song, and Gang Wang. 2020. Measuring and Modeling the Label Dynamics of Online Anti-Malware Engines. In 29th USENIX Security Symposium (USENIX Security . USENIX Association, 2361--2378. https:\/\/www.usenix.org\/conference\/usenixsecurity20\/presentation\/zhu"}],"event":{"name":"WWW '25: The ACM Web Conference 2025","location":"Sydney NSW Australia","acronym":"WWW '25","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Companion Proceedings of the ACM on Web Conference 2025"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3701716.3715212","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,7]],"date-time":"2025-10-07T18:24:23Z","timestamp":1759861463000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3701716.3715212"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,8]]},"references-count":42,"alternative-id":["10.1145\/3701716.3715212","10.1145\/3701716"],"URL":"https:\/\/doi.org\/10.1145\/3701716.3715212","relation":{},"subject":[],"published":{"date-parts":[[2025,5,8]]},"assertion":[{"value":"2025-05-23","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}