{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,4]],"date-time":"2026-06-04T16:02:56Z","timestamp":1780588976027,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":119,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["2453818"],"award-info":[{"award-number":["2453818"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["2453819"],"award-info":[{"award-number":["2453819"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["2453820"],"award-info":[{"award-number":["2453820"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,6]]},"DOI":"10.1145\/3779208.3785264","type":"proceedings-article","created":{"date-parts":[[2026,6,4]],"date-time":"2026-06-04T15:21:58Z","timestamp":1780586518000},"page":"788-804","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Taming Data Challenges in ML-based Security Tasks Using Generative AI"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-4088-8408","authenticated-orcid":false,"given":"Shravya","family":"Kanchi","sequence":"first","affiliation":[{"name":"Virginia Tech, Blacksburg, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0684-4971","authenticated-orcid":false,"given":"Neal","family":"Mangaokar","sequence":"additional","affiliation":[{"name":"University of Michigan, Ann Arbor, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-7247-0490","authenticated-orcid":false,"given":"Aravind","family":"Cheruvu","sequence":"additional","affiliation":[{"name":"Virginia Tech, Blacksburg, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-2285-7490","authenticated-orcid":false,"given":"Sifat Muhammad","family":"Abdullah","sequence":"additional","affiliation":[{"name":"Virginia Tech, Blacksburg, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0539-3742","authenticated-orcid":false,"given":"Shirin","family":"Nilizadeh","sequence":"additional","affiliation":[{"name":"University of Texas, Arlington, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4907-3687","authenticated-orcid":false,"given":"Atul","family":"Prakash","sequence":"additional","affiliation":[{"name":"University of Michigan, Ann Arbor, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6559-9689","authenticated-orcid":false,"given":"Bimal","family":"Viswanath","sequence":"additional","affiliation":[{"name":"Virginia Tech, Blacksburg, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,6,4]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2015. Microsoft Malware Classification Challenge - Kaggle. https:\/\/www.kaggle.com\/c\/malware-classification."},{"key":"e_1_3_2_1_2_1","volume-title":"Coefficient of variation. Encyclopedia of research design","author":"Abdi Herv\u00e9","year":"2010","unstructured":"Herv\u00e9 Abdi. 2010. Coefficient of variation. Encyclopedia of research design (2010)."},{"key":"e_1_3_2_1_3_1","volume-title":"Active Learning: A Survey. In Data Classification","author":"Aggarwal Charu C","year":"2014","unstructured":"Charu C Aggarwal, Xiangnan Kong, Quanquan Gu, Jiawei Han, and S Yu Philip. 2014. Active Learning: A Survey. In Data Classification. Chapman and Hall\/CRC."},{"key":"e_1_3_2_1_4_1","volume-title":"Proc. Of ACM CODASPY.","author":"Ahmadi Mansour","year":"2016","unstructured":"Mansour Ahmadi, Dmitry Ulyanov, Stanislav Semenov, Mikhail Trofimov, and Giorgio Giacinto. 2016. Novel Feature Extraction, Selection and Fusion for Effective Malware Family Classification. In Proc. Of ACM CODASPY."},{"key":"e_1_3_2_1_5_1","volume-title":"Proc. of ACM AISec.","author":"Andresini Giuseppina","year":"2021","unstructured":"Giuseppina Andresini, Feargus Pendlebury, Fabio Pierazzi, Corrado Loglisci, Annalisa Appice, and Lorenzo Cavallaro. 2021. INSOMNIA: Towards Concept-Drift Robustness in Network Intrusion Detection. In Proc. of ACM AISec."},{"key":"e_1_3_2_1_6_1","volume-title":"A contrastive learning approach for training variational autoencoder priors. Advances in neural information processing systems 34","author":"Aneja Jyoti","year":"2021","unstructured":"Jyoti Aneja, Alex Schwing, Jan Kautz, and Arash Vahdat. 2021. A contrastive learning approach for training variational autoencoder priors. Advances in neural information processing systems 34 (2021), 480\u2013493."},{"key":"e_1_3_2_1_7_1","volume-title":"Proc. of NetAI Workshop.","author":"Bahnasy Mahmoud","year":"2020","unstructured":"Mahmoud Bahnasy, Fenglin Li, Shihan Xiao, and Xiangle Cheng. 2020. DeepBGP: A Machine Learning Approach for BGP Configuration Synthesis. In Proc. of NetAI Workshop."},{"key":"e_1_3_2_1_8_1","volume-title":"Proc. of CVPR.","author":"Yutong","unstructured":"Yutong Bai et al. 2024. Sequential modeling enables scalable learning for large vision models. In Proc. of CVPR."},{"key":"e_1_3_2_1_9_1","volume-title":"Proc. of IEEE S&P.","author":"Barbero Federico","year":"2022","unstructured":"Federico Barbero, Feargus Pendlebury, Fabio Pierazzi, and Lorenzo Cavallaro. 2022. Transcending TRANSCEND: Revisiting Malware Classification in the Presence of Concept Drift. In Proc. of IEEE S&P."},{"key":"e_1_3_2_1_10_1","volume-title":"Proc. of USENIX Security.","author":"Barradas Diogo","year":"2018","unstructured":"Diogo Barradas, Nuno Santos, and Lu\u00eds Rodrigues. 2018. Effective Detection of Multimedia Protocol Tunneling using Machine Learning. In Proc. of USENIX Security."},{"key":"e_1_3_2_1_11_1","volume-title":"Taking on the Curse of Dimensionality in Joint Distributions Using Neural Networks","author":"Bengio Samy","year":"2000","unstructured":"Samy Bengio and Yoshua Bengio. 2000. Taking on the Curse of Dimensionality in Joint Distributions Using Neural Networks. IEEE TNN (2000)."},{"key":"e_1_3_2_1_12_1","volume-title":"Pattern recognition and machine learning","author":"Bishop Christopher M","unstructured":"Christopher M Bishop and Nasser M Nasrabadi. 2006. Pattern recognition and machine learning. Springer."},{"key":"e_1_3_2_1_13_1","volume-title":"SMOTE for high-dimensional class-imbalanced data. BMC Bioinformatics","author":"Blagus Rok","year":"2013","unstructured":"Rok Blagus and Lara Lusa. 2013. SMOTE for high-dimensional class-imbalanced data. BMC Bioinformatics (2013)."},{"key":"e_1_3_2_1_14_1","volume-title":"Proc. of USENIX Security.","author":"Bollinger Dino","year":"2022","unstructured":"Dino Bollinger, Karel Kubicek, Carlos Cotrini, and David Basin. 2022. Automating Cookie Consent and GDPR Violation Detection. In Proc. of USENIX Security."},{"key":"e_1_3_2_1_15_1","volume-title":"Proc. of ICLR.","author":"Borisov Vadim","year":"2023","unstructured":"Vadim Borisov, Kathrin Se\u00dfler, Tobias Leemann, Martin Pawelczyk, and Gjergji Kasneci. 2023. Language Models are Realistic Tabular Data Generators. In Proc. of ICLR."},{"key":"e_1_3_2_1_16_1","volume-title":"Proc. of CVPR.","author":"Chang Huiwen","year":"2022","unstructured":"Huiwen Chang, Han Zhang, Lu Jiang, Ce Liu, and William T Freeman. 2022. MaskGIT: Masked Generative Image Transformer. In Proc. of CVPR."},{"key":"e_1_3_2_1_17_1","volume-title":"SMOTE: Synthetic Minority Over-sampling Technique. JAIR","author":"Chawla Nitesh V","year":"2002","unstructured":"Nitesh V Chawla, Kevin W Bowyer, Lawrence O Hall, and W Philip Kegelmeyer. 2002. SMOTE: Synthetic Minority Over-sampling Technique. JAIR (2002)."},{"key":"e_1_3_2_1_18_1","volume-title":"Proc. of IEEE S&P.","author":"Chen Yuqi","year":"2018","unstructured":"Yuqi Chen, Christopher M Poskitt, and Jun Sun. 2018. Learning from Mutants: Using Code Mutation to Learn and Monitor Invariants of a Cyber-Physical System. In Proc. of IEEE S&P."},{"key":"e_1_3_2_1_19_1","volume-title":"Proc. of USENIX Security.","author":"Chen Yizheng","year":"2021","unstructured":"Yizheng Chen, Shiqi Wang, Weifan Jiang, Asaf Cidon, and Suman Jana. 2021. Cost-Aware Robust Tree Ensembles for Security Applications. In Proc. of USENIX Security."},{"key":"e_1_3_2_1_20_1","volume-title":"Proc. of USENIX Security.","author":"Chen Yizheng","year":"2020","unstructured":"Yizheng Chen, Shiqi Wang, Dongdong She, and Suman Jana. 2020. On Training Robust PDF Malware Classifiers. In Proc. of USENIX Security."},{"key":"e_1_3_2_1_21_1","unstructured":"Phillip Chlap et al. 2021. A Review of Medical Image Data Augmentation Techniques for Deep Learning Applications. JMIRO (2021)."},{"key":"e_1_3_2_1_22_1","volume-title":"Time Series FeatuRe Extraction on basis of Scalable Hypothesis tests (tsfresh - A Python package). Neurocomputing","author":"Christ Maximilian","year":"2018","unstructured":"Maximilian Christ, Nils Braun, Julius Neuffer, and Andreas W Kempa-Liehr. 2018. Time Series FeatuRe Extraction on basis of Scalable Hypothesis tests (tsfresh - A Python package). Neurocomputing (2018)."},{"key":"e_1_3_2_1_23_1","volume-title":"Proc. of CoRR abs\/2210","author":"Won Hyung","year":"2022","unstructured":"Hyung Won Chung et al. 2022. Scaling Instruction-Finetuned Language Models. Proc. of CoRR abs\/2210.11416 (2022)."},{"key":"e_1_3_2_1_24_1","volume-title":"Proc. of USENIX Security.","author":"Cidon Asaf","year":"2019","unstructured":"Asaf Cidon, Lior Gavish, Itay Bleier, Nadia Korshun, Marco Schweighauser, and Alexey Tsitkin. 2019. High Precision Detection of Business Email Compromise. In Proc. of USENIX Security."},{"key":"e_1_3_2_1_25_1","volume-title":"DeepSMOTE: Fusing Deep Learning and SMOTE for Imbalanced Data","author":"Dablain D","year":"2021","unstructured":"D Dablain, B Krawczyk, and NV Chawla DeepSMOTE. 2021. DeepSMOTE: Fusing Deep Learning and SMOTE for Imbalanced Data. IEEE TNNLS (2021)."},{"key":"e_1_3_2_1_26_1","volume-title":"Proc. of ICDM.","author":"Dai Wangzhi","year":"2019","unstructured":"Wangzhi Dai, Kenney Ng, Kristen Severson, Wei Huang, Fred Anderson, and Collin Stultz. 2019. Generative Oversampling with a Contrastive Variational Autoencoder. In Proc. of ICDM."},{"key":"e_1_3_2_1_27_1","volume-title":"Proc. of ICLR.","author":"Dathathri Sumanth","year":"2020","unstructured":"Sumanth Dathathri, Andrea Madotto, Janice Lan, Jane Hung, Eric Frank, Piero Molino, Jason Yosinski, and Rosanne Liu. 2020. Plug and Play Language Models: A Simple Approach to Controlled Text Generation. In Proc. of ICLR."},{"key":"e_1_3_2_1_28_1","volume-title":"Indexing by Latent Semantic Analysis. JASIST","author":"Deerwester Scott","year":"1990","unstructured":"Scott Deerwester, Susan T Dumais, George W Furnas, Thomas K Landauer, and Richard Harshman. 1990. Indexing by Latent Semantic Analysis. JASIST (1990)."},{"key":"e_1_3_2_1_29_1","volume-title":"Measuring skewness: a forgotten statistic? Journal of statistics education","author":"Doane David P","year":"2011","unstructured":"David P Doane and Lori E Seward. 2011. Measuring skewness: a forgotten statistic? Journal of statistics education (2011)."},{"key":"e_1_3_2_1_30_1","volume-title":"Proc. of CCS.","author":"Dodia Priyanka","year":"2022","unstructured":"Priyanka Dodia, Mashael AlSabah, Omar Alrawi, and Tao Wang. 2022. Exposing the Rat in the Tunnel: Using Traffic Analysis for Tor-based Malware Detection. In Proc. of CCS."},{"key":"e_1_3_2_1_31_1","volume-title":"Proc. of USENIX Security.","author":"Downing Evan","year":"2021","unstructured":"Evan Downing, Yisroel Mirsky, Kyuhong Park, and Wenke Lee. 2021. DeepReflect: Discovering Malicious Functionality through Binary Reconstruction. In Proc. of USENIX Security."},{"key":"e_1_3_2_1_32_1","volume-title":"Multiple Comparisons Using Rank Sums. Technometrics","author":"Dunn Olive Jean","year":"1964","unstructured":"Olive Jean Dunn. 1964. Multiple Comparisons Using Rank Sums. Technometrics (1964)."},{"key":"e_1_3_2_1_33_1","volume-title":"Proc. of CoRR abs\/1809","author":"Fajardo Val Andrei","year":"2018","unstructured":"Val Andrei Fajardo, David Findlay, Roshanak Houmanfar, Charu Jaiswal, Jiaxi Liang, and Honglei Xie. 2018. Vos: a method for variational oversampling of imbalanced data. Proc. of CoRR abs\/1809.02596 (2018)."},{"key":"e_1_3_2_1_34_1","volume-title":"Marking the 15-year Anniversary. JAIR","author":"Fern\u00e1ndez Alberto","year":"2018","unstructured":"Alberto Fern\u00e1ndez, Salvador Garcia, Francisco Herrera, and Nitesh V Chawla. 2018. SMOTE for Learning from Imbalanced Data: Progress and Challenges, Marking the 15-year Anniversary. JAIR (2018)."},{"key":"e_1_3_2_1_35_1","volume-title":"Extremely Randomized Trees. Machine learning","author":"Geurts Pierre","year":"2006","unstructured":"Pierre Geurts, Damien Ernst, and Louis Wehenkel. 2006. Extremely Randomized Trees. Machine learning (2006)."},{"key":"e_1_3_2_1_36_1","volume-title":"Proc. of IEEE S&P.","author":"Gong Jiajun","year":"2022","unstructured":"Jiajun Gong, Wuqi Zhang, Charles Zhang, and Tao Wang. 2022. Surakav: Generating Realistic Traces for a Strong Website Fingerprinting Defense. In Proc. of IEEE S&P."},{"key":"e_1_3_2_1_37_1","volume-title":"Proc. of NeurIPS.","author":"Goodfellow Ian","year":"2014","unstructured":"Ian Goodfellow, Jean Pouget-Abadie, Mehdi Mirza, Bing Xu, David Warde-Farley, Sherjil Ozair, Aaron Courville, and Yoshua Bengio. 2014. Generative Adversarial Nets. In Proc. of NeurIPS."},{"key":"e_1_3_2_1_38_1","volume-title":"Proc. of NeurIPS.","author":"Gulati Manbir","year":"2024","unstructured":"Manbir Gulati and Paul Roysdon. 2024. TabMT: Generating Tabular data with Masked Transformers. In Proc. of NeurIPS."},{"key":"e_1_3_2_1_39_1","volume-title":"Proc. of NeurIPS.","author":"Gulrajani Ishaan","year":"2017","unstructured":"Ishaan Gulrajani, Faruk Ahmed, Martin Arjovsky, Vincent Dumoulin, and Aaron C Courville. 2017. Improved Training of Wasserstein GANs. In Proc. of NeurIPS."},{"key":"e_1_3_2_1_40_1","volume-title":"Proc. of USENIX Security.","author":"Guo Wenbo","year":"2019","unstructured":"Wenbo Guo, Dongliang Mu, Xinyu Xing, Min Du, and Dawn Song. 2019. DEEP-VSA: Facilitating Value-set Analysis with Deep Learning for Postmortem Program Analysis. In Proc. of USENIX Security."},{"key":"e_1_3_2_1_41_1","volume-title":"Proc. of USENIX Security.","author":"Xueyuan","unstructured":"Xueyuan Han et al. 2021. SIGL: Securing Software Installations Through Deep Graph Learning. In Proc. of USENIX Security."},{"key":"e_1_3_2_1_42_1","volume-title":"Proc. of CSCWD.","author":"Hao Xingran","year":"2021","unstructured":"Xingran Hao, Zhengwei Jiang, Qingsai Xiao, Qiuyun Wang, Yepeng Yao, Baoxu Liu, and Jian Liu. 2021. Producing More with Less: A GAN-based Network Attack Detection Approach for Imbalanced Data. In Proc. of CSCWD."},{"key":"e_1_3_2_1_43_1","volume-title":"Proc. of USENIX Security.","author":"Hayes Jamie","year":"2016","unstructured":"Jamie Hayes and George Danezis. 2016. k-fingerprinting: a Robust Scalable Website Fingerprinting Technique. In Proc. of USENIX Security."},{"key":"e_1_3_2_1_44_1","volume-title":"Proc. of IEEE WCCI.","author":"He Haibo","year":"2008","unstructured":"Haibo He, Yang Bai, Edwardo A Garcia, and Shutao Li. 2008. ADASYN: Adaptive Synthetic Sampling Approach for Imbalanced Learning. In Proc. of IEEE WCCI."},{"key":"e_1_3_2_1_45_1","volume-title":"AutoML: A Survey of the State-of-the-Art. Knowledge-Based Systems","author":"He Xin","year":"2021","unstructured":"Xin He, Kaiyong Zhao, and Xiaowen Chu. 2021. AutoML: A Survey of the State-of-the-Art. Knowledge-Based Systems (2021)."},{"key":"e_1_3_2_1_46_1","volume-title":"Proc. of USENIX Security.","author":"Ho Grant","year":"2019","unstructured":"Grant Ho, Asaf Cidon, Lior Gavish, Marco Schweighauser, Vern Paxson, Stefan Savage, Geoffrey M Voelker, and David Wagner. 2019. Detecting and Characterizing Lateral Phishing at Scale. In Proc. of USENIX Security."},{"key":"e_1_3_2_1_47_1","volume-title":"Proc. of CCS.","author":"Holland Jordan","year":"2021","unstructured":"Jordan Holland, Paul Schmitt, Nick Feamster, and Prateek Mittal. 2021. New Directions in Automated Traffic Analysis. In Proc. of CCS."},{"key":"e_1_3_2_1_48_1","volume-title":"Proc. of IEEE S&P.","author":"Jan Steve TK","year":"2020","unstructured":"Steve TK Jan, Qingying Hao, Tianrui Hu, Jiameng Pu, Sonal Oswal, Gang Wang, and Bimal Viswanath. 2020. Throwing Darts in the Dark? Detecting Bots with Limited Data using Neural Data Augmentation. In Proc. of IEEE S&P."},{"key":"e_1_3_2_1_49_1","volume-title":"Netdiffusion: Network data augmentation through protocol-constrained traffic generation. POMACS","author":"Xi Jiang","year":"2024","unstructured":"Xi Jiang et al. 2024. Netdiffusion: Network data augmentation through protocol-constrained traffic generation. POMACS (2024)."},{"key":"e_1_3_2_1_50_1","volume-title":"Proc. of USENIX Security.","author":"Jordaney Roberto","year":"2017","unstructured":"Roberto Jordaney, Kumar Sharad, Santanu K Dash, Zhi Wang, Davide Papini, Ilia Nouretdinov, and Lorenzo Cavallaro. 2017. Transcend: Detecting Concept Drift in Malware Classification Models. In Proc. of USENIX Security."},{"key":"e_1_3_2_1_51_1","volume-title":"Proc. of NAACL-HLT.","author":"Ming-Wei Chang Jacob Devlin","year":"2019","unstructured":"Jacob Devlin Ming-Wei Chang Kentonand Lee Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In Proc. of NAACL-HLT."},{"key":"e_1_3_2_1_52_1","volume-title":"CTRL: A Conditional Transformer Language Model for Controllable Generation. CoRR abs\/1909.05858","author":"Keskar Nitish Shirish","year":"2019","unstructured":"Nitish Shirish Keskar, Bryan McCann, Lav R Varshney, Caiming Xiong, and Richard Socher. 2019. CTRL: A Conditional Transformer Language Model for Controllable Generation. CoRR abs\/1909.05858 (2019)."},{"key":"e_1_3_2_1_53_1","volume-title":"Proc. of CVPR.","author":"Khorram Saeed","year":"2024","unstructured":"Saeed Khorram, Mingqi Jiang, Mohamad Shahbazi, Mohamad H Danesh, and Li Fuxin. 2024. Taming the Tail in Class-Conditional GANs: Knowledge Sharing via Unconditional Training at Lower Resolutions. In Proc. of CVPR."},{"key":"e_1_3_2_1_54_1","volume-title":"Proc. of ICLR.","author":"Kim Juno","year":"2024","unstructured":"Juno Kim, Jaehyuk Kwon, Mincheol Cho, Hyunjong Lee, and Joong-Ho Won. 2024. t3-Variational Autoencoder: Learning Heavy-tailed Data with Student's t and Power Divergence. In Proc. of ICLR."},{"key":"e_1_3_2_1_55_1","volume-title":"Proc. of ICLR.","author":"Kim Jayoung","year":"2023","unstructured":"Jayoung Kim, Chaejeong Lee, and Noseong Park. 2023. STaSy: Score-based Tabular Data Synthesis. In Proc. of ICLR."},{"key":"e_1_3_2_1_56_1","volume-title":"Proc. of ICML.","author":"Kotelnikov Akim","year":"2023","unstructured":"Akim Kotelnikov, Dmitry Baranchuk, Ivan Rubachev, and Artem Babenko. 2023. TabDDPM: Modelling Tabular Data with Diffusion Models. In Proc. of ICML."},{"key":"e_1_3_2_1_57_1","volume-title":"Combined Cleaning and Resampling Algorithm for Multi-Class Imbalanced Data with Label Noise. Knowledge-Based Systems","author":"Koziarski Micha\u0142","year":"2020","unstructured":"Micha\u0142 Koziarski, Micha\u0142 Wo\u017aniak, and Bartosz Krawczyk. 2020. Combined Cleaning and Resampling Algorithm for Multi-Class Imbalanced Data with Label Noise. Knowledge-Based Systems (2020)."},{"key":"e_1_3_2_1_58_1","volume-title":"Radial-Based Oversampling for Multiclass Imbalanced Data Classification","author":"Krawczyk Bartosz","year":"2019","unstructured":"Bartosz Krawczyk, Micha\u0142 Koziarski, and Micha\u0142 Wo\u017eniak. 2019. Radial-Based Oversampling for Multiclass Imbalanced Data Classification. IEEE TNNLS (2019)."},{"key":"e_1_3_2_1_59_1","volume-title":"Use of Ranks in One-Criterion Variance Analysis. JASA","author":"Kruskal William H","year":"1952","unstructured":"William H Kruskal and W Allen Wallis. 1952. Use of Ranks in One-Criterion Variance Analysis. JASA (1952)."},{"key":"e_1_3_2_1_60_1","volume-title":"Proc. of ICML.","author":"Lee Chaejeong","year":"2023","unstructured":"Chaejeong Lee, Jayoung Kim, and Noseong Park. 2023. CoDi: Co-evolving Contrastive Diffusion Models for Mixed-type Tabular Synthesis. In Proc. of ICML."},{"key":"e_1_3_2_1_61_1","volume-title":"Imbalanced-learn: A Python Toolbox to Tackle the Curse of Imbalanced Datasets in Machine Learning. JMLR","author":"Nogueira Guillaume","year":"2017","unstructured":"Lema\u00c3\u017dtre, Guillaume and Nogueira, Fernando and Aridas, Christos K. 2017. Imbalanced-learn: A Python Toolbox to Tackle the Curse of Imbalanced Datasets in Machine Learning. JMLR (2017)."},{"key":"e_1_3_2_1_62_1","volume-title":"Proc. of EMNLP.","author":"Huihan","unstructured":"Huihan Li et al. 2024. In Search of the Long-Tail: Systematic Generation of Long-Tail Inferential Knowledge via Logical Rule Guided Search. In Proc. of EMNLP."},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1109\/SP.2018.00034"},{"key":"e_1_3_2_1_64_1","volume-title":"Proc. of MLSys.","author":"Liam","unstructured":"Liam Li et al. 2020. A System for Massively Parallel Hyperparameter Tuning. In Proc. of MLSys."},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1145\/3419394.3423643"},{"key":"e_1_3_2_1_66_1","unstructured":"Yuzhen Lu Dong Chen Ebenezer Olaniyi and Yanbo Huang. 2022. Generative Adversarial Networks (GANs) for Image Augmentation in Agriculture: A Systematic Review. Comput. Electron. Agric. (2022)."},{"key":"e_1_3_2_1_67_1","unstructured":"James Lucas George Tucker Roger Grosse and Mohammad Norouzi. 2019. Understanding posterior collapse in generative latent variable models. (2019)."},{"key":"e_1_3_2_1_68_1","volume-title":"A Comprehensive Survey of Data Augmentation in Visual Reinforcement Learning. CoRR abs\/2210.04561","author":"Ma Guozheng","year":"2022","unstructured":"Guozheng Ma, Zhen Wang, Zhecheng Yuan, Xueqian Wang, Bo Yuan, and Dacheng Tao. 2022. A Comprehensive Survey of Data Augmentation in Visual Reinforcement Learning. CoRR abs\/2210.04561 (2022)."},{"key":"e_1_3_2_1_69_1","volume-title":"Proc. of NeurIPS Workshop.","author":"Meghdouri Fares","year":"2021","unstructured":"Fares Meghdouri, Thomas Schmied, Thomas G\u00e4rtner, and Tanja Zseby. 2021. Controllable Network Data Balancing with GANs. In Proc. of NeurIPS Workshop."},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.14722\/ndss.2018.23204"},{"key":"e_1_3_2_1_71_1","volume-title":"Design and Analysis of Experiments","author":"Montgomery Douglas C.","unstructured":"Douglas C. Montgomery. 2019. Design and Analysis of Experiments (10th ed.). John Wiley & Sons.","edition":"10"},{"key":"e_1_3_2_1_72_1","volume-title":"Proc. of ICSE.","author":"Yu","unstructured":"Yu Nong et al. 2024. VGX: Large-Scale Sample Generation for Boosting Learning-Based Software Vulnerability Analyses. In Proc. of ICSE."},{"key":"e_1_3_2_1_73_1","volume-title":"Proc. of ICLR.","author":"Dongmin","unstructured":"Dongmin Park et al. 2025. Rare-to-Frequent: Unlocking Compositional Generation Power of Diffusion Models on Rare Concepts with LLM Guidance. In Proc. of ICLR."},{"key":"e_1_3_2_1_74_1","volume-title":"Proc. of AsiaCCS.","author":"Park Sunnyeo","year":"2019","unstructured":"Sunnyeo Park, Dohyeok Kim, and Sooel Son. 2019. An Empirical Study of Prioritizing JavaScript Engine Crashes via Machine Learning. In Proc. of AsiaCCS."},{"key":"e_1_3_2_1_75_1","volume-title":"Proc. of IEEE DSAA.","author":"Patki Neha","year":"2016","unstructured":"Neha Patki, Roy Wedge, and Kalyan Veeramachaneni. 2016. The Synthetic Data Vault. In Proc. of IEEE DSAA."},{"key":"e_1_3_2_1_76_1","volume-title":"Proc. of IEEE S&P.","author":"Peeters Christian","year":"2018","unstructured":"Christian Peeters, Hadi Abdullah, Nolen Scaife, Jasmine Bowers, Patrick Traynor, Bradley Reaves, and Kevin Butler. 2018. Sonar: Detecting SS7 Redirection Attacks With Audio-Based Distance Bounding. In Proc. of IEEE S&P."},{"key":"e_1_3_2_1_77_1","volume-title":"Proc. of IEEE S&P.","author":"Pierazzi Fabio","year":"2020","unstructured":"Fabio Pierazzi, Feargus Pendlebury, Jacopo Cortellazzi, and Lorenzo Cavallaro. 2020. Intriguing Properties of Adversarial ML Attacks in the Problem Space. In Proc. of IEEE S&P."},{"key":"e_1_3_2_1_78_1","unstructured":"PyTorch Contributors. 2023. PyTorch Vision Transforms. https:\/\/pytorch.org\/vision\/0.15\/transforms.html. Accessed: 2025-02-14."},{"key":"e_1_3_2_1_79_1","volume-title":"Proc. of Asia CCS.","author":"Qiu Han","year":"2021","unstructured":"Han Qiu, Yi Zeng, Shangwei Guo, Tianwei Zhang, Meikang Qiu, and Bhavani Thuraisingham. 2021. Deepsweep: An evaluation framework for mitigating DNN backdoor attacks using data augmentation. In Proc. of Asia CCS."},{"key":"e_1_3_2_1_80_1","volume-title":"Proc. of KDD.","author":"Ribeiro Marco Tulio","year":"2016","unstructured":"Marco Tulio Ribeiro, Sameer Singh, and Carlos Guestrin. 2016. Why should i trust you?: Explaining the predictions of any classifier. In Proc. of KDD."},{"key":"e_1_3_2_1_81_1","volume-title":"Proc. ACM IMC.","author":"Sabnis Anirudh","year":"2021","unstructured":"Anirudh Sabnis and Ramesh K Sitaraman. 2021. TRAGEN: A Synthetic Trace Generator for Realistic Cache Simulations. In Proc. ACM IMC."},{"key":"e_1_3_2_1_82_1","volume-title":"Proc. of AAAI.","author":"Samuel Dvir","year":"2024","unstructured":"Dvir Samuel, Rami Ben-Ari, Simon Raviv, Nir Darshan, and Gal Chechik. 2024. Generating images of rare concepts using pre-trained diffusion models. In Proc. of AAAI."},{"key":"e_1_3_2_1_83_1","volume-title":"Proc. of NeurIPS.","author":"Sanh Victor","year":"2019","unstructured":"Victor Sanh, Lysandre Debut, Julien Chaumond, and Thomas Wolf. 2019. Distil-BERT, a distilled version of BERT: smaller, faster, cheaper and lighter. In Proc. of NeurIPS."},{"key":"e_1_3_2_1_84_1","volume-title":"Proc. of USENIX Security.","author":"Sch\u00fcppen Samuel","year":"2018","unstructured":"Samuel Sch\u00fcppen, Dominik Teubert, Patrick Herrmann, and Ulrike Meyer. 2018. FANCI : Feature-based Automated NXDomain Classification and Intelligence. In Proc. of USENIX Security."},{"key":"e_1_3_2_1_85_1","volume-title":"Proc. of USENIX Security.","author":"Sebastian B","year":"2017","unstructured":"B Sebastian, C Christian, and P Alexander. 2017. Predicting the Resilience of Obfuscated Code Against Symbolic Execution Attacks via Machine Learning. In Proc. of USENIX Security."},{"key":"e_1_3_2_1_86_1","volume-title":"Proc. of ICML.","author":"Seedat Nabeel","unstructured":"Nabeel Seedat, Nicolas Huynh, Boris van Breugel, and Mihaela van der Schaar. 2023. Curated LLM: Synergy of LLMs and Data Curation for tabular augmentation in low-data regimes. In Proc. of ICML."},{"key":"e_1_3_2_1_87_1","volume-title":"Proc. of NIPS.","author":"Shao Jie","year":"2024","unstructured":"Jie Shao, Ke Zhu, Hanxiao Zhang, and Jianxin Wu. 2024. DiffuLT: Diffusion for Long-tail Recognition Without External Knowledge. In Proc. of NIPS."},{"key":"e_1_3_2_1_88_1","doi-asserted-by":"publisher","DOI":"10.5220\/0006639801080116"},{"key":"e_1_3_2_1_89_1","volume-title":"Proc. of IEEE S&P.","author":"She Dongdong","year":"2020","unstructured":"Dongdong She, Yizheng Chen, Abhishek Shah, Baishakhi Ray, and Suman Jana. 2020. Neutaint: Efficient Dynamic Taint Analysis with Neural Networks. In Proc. of IEEE S&P."},{"key":"e_1_3_2_1_90_1","volume-title":"Proc. of IEEE S&P.","author":"She Dongdong","year":"2019","unstructured":"Dongdong She, Kexin Pei, Dave Epstein, Junfeng Yang, Baishakhi Ray, and Suman Jana. 2019. NEUZZ: Efficient Fuzzing with Neural Program Smoothing. In Proc. of IEEE S&P."},{"key":"e_1_3_2_1_91_1","volume-title":"Proc. of USENIX Security.","author":"Sikder Amit Kumar","year":"2017","unstructured":"Amit Kumar Sikder, Hidayet Aksu, and A Selcuk Uluagac. 2017. 6thSense: A Context-aware Sensor-based Attack Detector for Smart Devices. In Proc. of USENIX Security."},{"key":"e_1_3_2_1_92_1","volume-title":"Proc. of CoRR abs\/2302","author":"Solatorio Aivin V","year":"2023","unstructured":"Aivin V Solatorio and Olivier Dupriez. 2023. REaLTabFormer: Generating Realistic Relational and Tabular Data using Transformers. Proc. of CoRR abs\/2302.02041 (2023)."},{"key":"e_1_3_2_1_93_1","volume-title":"Proc. of USENIX Security.","author":"Stadler Theresa","year":"2022","unstructured":"Theresa Stadler, Bristena Oprisanu, and Carmela Troncoso. 2022. Synthetic Data - Anonymisation Groundhog Day. In Proc. of USENIX Security."},{"key":"e_1_3_2_1_94_1","volume-title":"Proc. of CoRR abs\/2412","author":"Stocksieker Samuel","year":"2024","unstructured":"Samuel Stocksieker, Denys Pommeret, and Arthur Charpentier. 2024. Data Augmentation with Variational Autoencoder for Imbalanced Dataset. Proc. of CoRR abs\/2412.07039 (2024)."},{"key":"e_1_3_2_1_95_1","volume-title":"Proc. of IEEE DSAA.","author":"Taylor Adrian","year":"2016","unstructured":"Adrian Taylor, Sylvain Leblanc, and Nathalie Japkowicz. 2016. Anomaly Detection in Automobile Control Network Data with Long Short-Term Memory Networks. In Proc. of IEEE DSAA."},{"key":"e_1_3_2_1_96_1","volume-title":"Proc. of NDSS.","author":"Tekiner Ege","year":"2022","unstructured":"Ege Tekiner, Abbas Acar, and A Selcuk Uluagac. 2022. A Lightweight IoT Cryptojacking Detection Mechanism in Heterogeneous Smart Home Networks. In Proc. of NDSS."},{"key":"e_1_3_2_1_97_1","volume-title":"Proc. of ACM IMC.","author":"Testart Cecilia","year":"2019","unstructured":"Cecilia Testart, Philipp Richter, Alistair King, Alberto Dainotti, and David Clark. 2019. Profiling BGP Serial Hijackers: Capturing Persistent Misbehavior in the Global Routing Table. In Proc. of ACM IMC."},{"key":"e_1_3_2_1_98_1","volume-title":"Proc. of IEEE S&P.","author":"Thirumuruganathan Saravanan","year":"2022","unstructured":"Saravanan Thirumuruganathan, Mohamed Nabeel, Euijin Choo, Issa Khalil, and Ting Yu. 2022. SIRAJ: A Unified Framework for Aggregation of Malicious Entity Detectors. In Proc. of IEEE S&P."},{"key":"e_1_3_2_1_99_1","unstructured":"Romain Thomas. 2017. LIEF - Library to Instrument Executable Formats. https:\/\/lief.quarkslab.com\/."},{"key":"e_1_3_2_1_100_1","volume-title":"Proc. of NeurIPS.","author":"den Oord Aaron Van","year":"2016","unstructured":"Aaron Van den Oord, Nal Kalchbrenner, Lasse Espeholt, Oriol Vinyals, Alex Graves, et al. 2016. Conditional Image Generation with PixelCNN Decoders. In Proc. of NeurIPS."},{"key":"e_1_3_2_1_101_1","volume-title":"Proc. of NeurIPS.","author":"Den Oord Aaron Van","year":"2017","unstructured":"Aaron Van Den Oord, Oriol Vinyals, et al. 2017. Neural Discrete Representation Learning. In Proc. of NeurIPS."},{"key":"e_1_3_2_1_102_1","volume-title":"Visualizing Data using t-SNE. JMLR","author":"der Maaten Laurens Van","year":"2008","unstructured":"Laurens Van der Maaten and Geoffrey Hinton. 2008. Visualizing Data using t-SNE. JMLR (2008)."},{"key":"e_1_3_2_1_103_1","volume-title":"Proc. of NIPS.","author":"Wang Pengkun","year":"2024","unstructured":"Pengkun Wang, Zhe Zhao, HaiBin Wen, Fanfu Wang, Binwu Wang, Qingfu Zhang, and Yang Wang. 2024. LLM-AutoDA: Large Language Model-Driven Automatic Data Augmentation for Long-tailed Problems. In Proc. of NIPS."},{"key":"e_1_3_2_1_104_1","volume-title":"Tree-Structured Parzen Estimator: Understanding Its Algorithm Components and Their Roles for Better Empirical Performance. CoRR abs\/2304.11127","author":"Watanabe Shuhei","year":"2023","unstructured":"Shuhei Watanabe. 2023. Tree-Structured Parzen Estimator: Understanding Its Algorithm Components and Their Roles for Better Empirical Performance. CoRR abs\/2304.11127 (2023)."},{"key":"e_1_3_2_1_105_1","volume-title":"Proc. of USENIX Security.","author":"Xu Jiahua","year":"2019","unstructured":"Jiahua Xu and Benjamin Livshits. 2019. The Anatomy of a Cryptocurrency Pump-and-Dump Scheme. In Proc. of USENIX Security."},{"key":"e_1_3_2_1_106_1","volume-title":"Proc. of NIPS.","author":"Xu Lei","year":"2019","unstructured":"Lei Xu, Maria Skoularidou, Alfredo Cuesta-Infante, and Kalyan Veeramachaneni. 2019. Modeling tabular data using conditional gan. In Proc. of NIPS."},{"key":"e_1_3_2_1_107_1","volume-title":"STAN: Synthetic Network Traffic Generation with Generative Neural Models. In Deployable Machine Learning for Security Defense: Second International Workshop, MLHat","author":"Shengzhe Xu","year":"2021","unstructured":"Shengzhe Xu et al. 2021. STAN: Synthetic Network Traffic Generation with Generative Neural Models. In Deployable Machine Learning for Security Defense: Second International Workshop, MLHat 2021, Virtual Event, August 15, 2021, Proceedings 2."},{"key":"e_1_3_2_1_108_1","volume-title":"Proc. of USENIX Security.","author":"Teng","unstructured":"Teng Xu et al. 2021. Deep Entity Classification: Abusive Account Detection for Online Social Networks. In Proc. of USENIX Security."},{"key":"e_1_3_2_1_109_1","volume-title":"Toward Effective Intrusion Detection Using Log-Cosh Conditional Variational Autoencoder","author":"Xu Xing","year":"2020","unstructured":"Xing Xu, Jie Li, Yang Yang, and Fumin Shen. 2020. Toward Effective Intrusion Detection Using Log-Cosh Conditional Variational Autoencoder. IEEE IoT (2020)."},{"key":"e_1_3_2_1_110_1","volume-title":"Proc. of USENIX Security.","author":"Limin","unstructured":"Limin Yang et al. 2021. CADE: Detecting and Explaining Concept Drift Samples for Security Applications. In Proc. of USENIX Security."},{"key":"e_1_3_2_1_111_1","volume-title":"Proc. of IEEE S&P Workshop.","author":"Yang Limin","year":"2021","unstructured":"Limin Yang, Arridhana Ciptadi, Ihar Laziuk, Ali Ahmadzadeh, and Gang Wang. 2021. BODMAS: An Open Dataset for Learning based Temporal Analysis of PE Malware. In Proc. of IEEE S&P Workshop."},{"key":"e_1_3_2_1_112_1","volume-title":"Proc. of IEEE S&P.","author":"Yang Zhiju","year":"2022","unstructured":"Zhiju Yang, Weiping Pei, Monchu Chen, and Chuan Yue. 2022. WTAGRAPH: Web Tracking and Advertising Detection using Graph Neural Networks. In Proc. of IEEE S&P."},{"key":"e_1_3_2_1_113_1","volume-title":"Proc. of SIGCOMM.","author":"Yin Yucheng","year":"2022","unstructured":"Yucheng Yin, Zinan Lin, Minhao Jin, Giulia Fanti, and Vyas Sekar. 2022. Practical gan-based synthetic ip header trace generation using netshare. In Proc. of SIGCOMM."},{"key":"e_1_3_2_1_114_1","volume-title":"Proc. of USENIX Security.","author":"Yu Lingjing","year":"2020","unstructured":"Lingjing Yu, Bo Luo, Jun Ma, Zhaoyu Zhou, and Qingyun Liu. 2020. You Are What You Broadcast: Identification of Mobile and IoT Devices from (Public) WiFi. In Proc. of USENIX Security."},{"key":"e_1_3_2_1_115_1","volume-title":"Proc. of ICLR.","author":"Hengrui","unstructured":"Hengrui Zhang et al. 2024. Mixed-Type Tabular Data Synthesis with Score-Based Diffusion in Latent Space. In Proc. of ICLR."},{"key":"e_1_3_2_1_116_1","volume-title":"Proc. of ACML.","author":"Zhao Zilong","year":"2021","unstructured":"Zilong Zhao, Adittya Kunar, Robert Birke, and Lydia Y Chen. 2021. CTAB-GAN: Effective Table Data Synthesizing. In Proc. of ACML."},{"key":"e_1_3_2_1_117_1","volume-title":"Hiek Van der Scheer, and Lydia Y Chen","author":"Zhao Zilong","year":"2024","unstructured":"Zilong Zhao, Aditya Kunar, Robert Birke, Hiek Van der Scheer, and Lydia Y Chen. 2024. CTAB-GAN+: Enhancing Tabular Data Synthesis. Frontiers in big Data (2024)."},{"key":"e_1_3_2_1_118_1","volume-title":"Proc. of NeurIPS.","author":"Zhou Yaqin","year":"2019","unstructured":"Yaqin Zhou, Shangqing Liu, Jingkai Siow, Xiaoning Du, and Yang Liu. 2019. Devign: Effective Vulnerability Identification by Learning Comprehensive Program Semantics via Graph Neural Networks. In Proc. of NeurIPS."},{"key":"e_1_3_2_1_119_1","volume-title":"A Hierarchical Deep Ensemble Model for Bots Install Fraud Detection in Mobile Advertising. ACM TOIS","author":"Zhu Yadong","year":"2021","unstructured":"Yadong Zhu, Xiliang Wang, Qing Li, Tianjun Yao, and Shangsong Liang. 2021. BotSpot+ + : A Hierarchical Deep Ensemble Model for Bots Install Fraud Detection in Mobile Advertising. ACM TOIS (2021)."}],"event":{"name":"ASIA CCS '26: ACM Asia Conference on Computer and Communications Security","location":"Bangalore India","acronym":"ASIA CCS '26","sponsor":["SIGSAC ACM Special Interest Group on Security, Audit, and Control"]},"container-title":["Proceedings of the ACM Asia Conference on Computer and Communications Security"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3779208.3785264","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3779208.3785264","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,6,4]],"date-time":"2026-06-04T15:31:32Z","timestamp":1780587092000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3779208.3785264"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6]]},"references-count":119,"alternative-id":["10.1145\/3779208.3785264","10.1145\/3779208"],"URL":"https:\/\/doi.org\/10.1145\/3779208.3785264","relation":{},"subject":[],"published":{"date-parts":[[2026,6]]},"assertion":[{"value":"2026-06-04","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}