{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T13:41:31Z","timestamp":1773841291398,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":33,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,11,26]],"date-time":"2023-11-26T00:00:00Z","timestamp":1700956800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,11,30]]},"DOI":"10.1145\/3605764.3623907","type":"proceedings-article","created":{"date-parts":[[2023,11,21]],"date-time":"2023-11-21T12:12:17Z","timestamp":1700568737000},"page":"185-196","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":8,"title":["AVScan2Vec: Feature Learning on Antivirus Scan Data for Production-Scale Malware Corpora"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-7168-1237","authenticated-orcid":false,"given":"Robert J.","family":"Joyce","sequence":"first","affiliation":[{"name":"Booz Allen Hamilton, Catonsville, MD, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-3212-8156","authenticated-orcid":false,"given":"Tirth","family":"Patel","sequence":"additional","affiliation":[{"name":"University of Maryland, Baltimore County, Catonsville, MD, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9494-7139","authenticated-orcid":false,"given":"Charles","family":"Nicholas","sequence":"additional","affiliation":[{"name":"University of Maryland, Baltimore County, Catonsville, MD, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9900-1972","authenticated-orcid":false,"given":"Edward","family":"Raff","sequence":"additional","affiliation":[{"name":"Booz Allen Hamilton, Jamesville , MD, USA"}]}],"member":"320","published-online":{"date-parts":[[2023,11,26]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"[n. d.]. VirusShare.com - Because Sharing is Caring. https:\/\/virusshare.com\/ Last accessed on 2023-06-09."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/2857705.2857713"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3290353"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV"},{"key":"e_1_3_2_1_5_1","volume-title":"CharacterBERT: Reconciling ELMo and BERT for Word-Level Open-Vocabulary Representations From Characters. CoRR abs\/2010.10392","author":"Boukkouri Hicham El","year":"2020","unstructured":"Hicham El Boukkouri, Olivier Ferret, Thomas Lavergne, Hiroshi Noji, Pierre Zweigenbaum, and Junichi Tsujii. 2020. CharacterBERT: Reconciling ELMo and BERT for Word-Level Open-Vocabulary Representations From Characters. CoRR abs\/2010.10392 (2020). arXiv:2010.10392 https:\/\/arxiv.org\/abs\/2010.10392"},{"key":"e_1_3_2_1_6_1","volume-title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. CoRR abs\/1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. CoRR abs\/1810.04805 (2018). arXiv:1810.04805 http:\/\/arxiv.org\/abs\/1810.04805"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/2939672.2939754"},{"key":"e_1_3_2_1_8_1","unstructured":"Karsten Hahn. [n. d.]. https:\/\/www.gdatasoftware.com\/blog\/2019\/08\/35146-taming-the-mess-of-av-detection-names Last accessed on 2023-06-09."},{"key":"e_1_3_2_1_9_1","volume-title":"Rudd","author":"Harang Richard E.","year":"2020","unstructured":"Richard E. Harang and Ethan M. Rudd. 2020. SOREL-20M: A Large Scale Benchmark Dataset for Malicious PE Detection. CoRR abs\/2012.07634 (2020). arXiv:2012.07634 https:\/\/arxiv.org\/abs\/2012.07634"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","unstructured":"Matthew Henderson Rami Al-Rfou Brian Strope Yun-hsuan Sung Laszlo Lukacs Ruiqi Guo Sanjiv Kumar Balint Miklos and Ray Kurzweil. 2017. Efficient Natural Language Response Suggestion for Smart Reply. https:\/\/doi.org\/10.48550\/ARXIV. 1705.00652","DOI":"10.48550\/ARXIV"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2009.01972"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jnca.2012.10.004"},{"key":"e_1_3_2_1_13_1","volume-title":"26th USENIX Security Symposium (USENIX Security 17)","author":"Jordaney Roberto","year":"2017","unstructured":"Roberto Jordaney, Kumar Sharad, Santanu K Dash, Zhi Wang, Davide Papini, Ilia Nouretdinov, and Lorenzo Cavallaro. 2017. Transcend: Detecting concept drift in malware classification models. In 26th USENIX Security Symposium (USENIX Security 17). 625--642."},{"key":"e_1_3_2_1_14_1","volume-title":"MOTIF: A Large Malware Reference Dataset with Ground Truth Family Labels. arXiv:2111.15031 [cs.LG]","author":"Joyce Robert J.","year":"2021","unstructured":"Robert J. Joyce, Dev Amlani, Charles Nicholas, and Edward Raff. 2021. MOTIF: A Large Malware Reference Dataset with Ground Truth Family Labels. arXiv:2111.15031 [cs.LG]"},{"key":"e_1_3_2_1_15_1","unstructured":"Robert J Joyce Edward Raff and Charles Nicholas. 2021. Rank-1 Similarity Matrix Decomposition For Modeling Changes in Antivirus Consensus Through Time. (2021)."},{"key":"e_1_3_2_1_16_1","volume-title":"International conference on machine learning. PMLR, 1188--1196","author":"Le Quoc","year":"2014","unstructured":"Quoc Le and Tomas Mikolov. 2014. Distributed representations of sentences and documents. In International conference on machine learning. PMLR, 1188--1196."},{"key":"e_1_3_2_1_17_1","volume-title":"International Conference on Machine Learning. 671--679","author":"Li Ke","year":"2016","unstructured":"Ke Li and Jitendra Malik. 2016. Fast k-nearest neighbour search via Dynamic Continuous Indexing. In International Conference on Machine Learning. 671--679."},{"key":"e_1_3_2_1_18_1","volume-title":"SGDR: Stochastic Gradient Descent with Restarts. CoRR abs\/1608.03983","author":"Loshchilov Ilya","year":"2016","unstructured":"Ilya Loshchilov and Frank Hutter. 2016. SGDR: Stochastic Gradient Descent with Restarts. CoRR abs\/1608.03983 (2016). arXiv:1608.03983 http:\/\/arxiv.org\/abs\/1608.03983"},{"key":"e_1_3_2_1_19_1","volume-title":"Efficient estimation of word representations in vector space. arXiv preprint arXiv:1301.3781","author":"Mikolov Tomas","year":"2013","unstructured":"Tomas Mikolov, Kai Chen, Greg Corrado, and Jeffrey Dean. 2013. Efficient estimation of word representations in vector space. arXiv preprint arXiv:1301.3781 (2013)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CTC.2013.9"},{"key":"e_1_3_2_1_21_1","volume-title":"NeurIPS 2020 Workshop: ML Retrospectives, Surveys & Meta-Analyses (ML-RSA). arXiv:2006","author":"Raff Edward","year":"2020","unstructured":"Edward Raff and Charles Nicholas. 2020. A Survey of Machine Learning Methods and Challenges for Windows Malware Classification. In NeurIPS 2020 Workshop: ML Retrospectives, Surveys & Meta-Analyses (ML-RSA). arXiv:2006.09271 http:\/\/arxiv.org\/abs\/2006.09271"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5994"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/MALWARE.2015.7413680"},{"key":"e_1_3_2_1_24_1","volume-title":"Research in Attacks, Intrusions, and Defenses, Fabian Monrose, Marc Dacier, Gregory Blanc, and Joaquin Garcia-Alfaro (Eds.)","author":"Sebasti\u00e1n Marcos","unstructured":"Marcos Sebasti\u00e1n, Richard Rivera, Platon Kotzias, and Juan Caballero. 2016. AVclass: A Tool for Massive Malware Labeling. In Research in Attacks, Intrusions, and Defenses, Fabian Monrose, Marc Dacier, Gregory Blanc, and Joaquin Garcia-Alfaro (Eds.). Springer International Publishing, Cham, 230--253."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"crossref","unstructured":"Silvia Sebasti\u00e1n and Juan Caballero. 2020. AVClass2: Massive Malware Tag Extraction from AV Labels. arXiv:2006.10615 [cs.CR]","DOI":"10.18239\/jornadas_2021.34.17"},{"key":"e_1_3_2_1_26_1","unstructured":"Shadowserver. [n. d.]. https:\/\/www.shadowserver.org\/statistics\/ Last accessed on 2022--10--26."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.procs.2015.02.149"},{"key":"e_1_3_2_1_28_1","volume-title":"Garnett (Eds.)","volume":"28","author":"Srivastava Rupesh K","year":"2015","unstructured":"Rupesh K Srivastava, Klaus Greff, and J\u00fcrgen Schmidhuber. 2015. Training Very Deep Networks. In Advances in Neural Information Processing Systems, C. Cortes, N. Lawrence, D. Lee, M. Sugiyama, and R. Garnett (Eds.), Vol. 28. Curran Associates, Inc. https:\/\/proceedings.neurips.cc\/paper\/2015\/file\/215a71a12769b056c3c32e7299f1c5ed-Paper.pdf"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cose.2018.11.001"},{"key":"e_1_3_2_1_30_1","volume-title":"Advances in Neural Information Processing Systems","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, ?ukasz Kaiser, and Illia Polosukhin. 2017. Attention is All you Need. In Advances in Neural Information Processing Systems, I. Guyon, U. V. Luxburg, S. Bengio, H. Wallach, R. Fergus, S. Vishwanathan, and R. Garnett (Eds.), Vol. 30. Curran Associates, Inc., 5998--6008. https:\/\/proceedings.neurips. cc\/paper\/2017\/file\/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf"},{"key":"e_1_3_2_1_31_1","unstructured":"VirusTotal. [n. d.]. https:\/\/www.virustotal.com\/ Last accessed on 2023-06-09."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00521-017-3077-6"},{"key":"e_1_3_2_1_33_1","volume-title":"Measuring and Modeling the Label Dynamics of Online Anti-Malware Engines. In 29th USENIX Security Symposium (USENIX Security 20)","author":"Zhu Shuofei","year":"2020","unstructured":"Shuofei Zhu, Jianjun Shi, Limin Yang, Boqin Qin, Ziyi Zhang, Linhai Song, and Gang Wang. 2020. Measuring and Modeling the Label Dynamics of Online Anti-Malware Engines. In 29th USENIX Security Symposium (USENIX Security 20). USENIX Association, 2361--2378. https:\/\/www.usenix.org\/conference\/usenixsecurity20\/presentation\/zhu"}],"event":{"name":"CCS '23: ACM SIGSAC Conference on Computer and Communications Security","location":"Copenhagen Denmark","acronym":"CCS '23","sponsor":["SIGSAC ACM Special Interest Group on Security, Audit, and Control"]},"container-title":["Proceedings of the 16th ACM Workshop on Artificial Intelligence and Security"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3605764.3623907","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3605764.3623907","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T01:36:35Z","timestamp":1755912995000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3605764.3623907"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,26]]},"references-count":33,"alternative-id":["10.1145\/3605764.3623907","10.1145\/3605764"],"URL":"https:\/\/doi.org\/10.1145\/3605764.3623907","relation":{},"subject":[],"published":{"date-parts":[[2023,11,26]]},"assertion":[{"value":"2023-11-26","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}