{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,25]],"date-time":"2026-02-25T17:58:06Z","timestamp":1772042286501,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":71,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,4,12]],"date-time":"2024-04-12T00:00:00Z","timestamp":1712880000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/100000183","name":"Army Research Office","doi-asserted-by":"publisher","award":["W911NF-21-1-0027"],"award-info":[{"award-number":["W911NF-21-1-0027"]}],"id":[{"id":"10.13039\/100000183","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,4,12]]},"DOI":"10.1145\/3597503.3639116","type":"proceedings-article","created":{"date-parts":[[2024,4,12]],"date-time":"2024-04-12T16:43:26Z","timestamp":1712940206000},"page":"1-13","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":23,"title":["VGX: Large-Scale Sample Generation for Boosting Learning-Based Software Vulnerability Analyses"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8598-5181","authenticated-orcid":false,"given":"Yu","family":"Nong","sequence":"first","affiliation":[{"name":"Washington State University, Pullman, Washington, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-8104-9917","authenticated-orcid":false,"given":"Richard","family":"Fang","sequence":"additional","affiliation":[{"name":"Washington State University, Pullman, Washington, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-6679-5153","authenticated-orcid":false,"given":"Guangbei","family":"Yi","sequence":"additional","affiliation":[{"name":"Washington State University, Pullman, Washington, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9886-0460","authenticated-orcid":false,"given":"Kunsong","family":"Zhao","sequence":"additional","affiliation":[{"name":"The Hong Kong Polytechnic University, Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9082-3208","authenticated-orcid":false,"given":"Xiapu","family":"Luo","sequence":"additional","affiliation":[{"name":"The Hong Kong Polytechnic University, Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4508-5963","authenticated-orcid":false,"given":"Feng","family":"Chen","sequence":"additional","affiliation":[{"name":"The University of Texas at Dallas, Dallas, Texas, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5224-9970","authenticated-orcid":false,"given":"Haipeng","family":"Cai","sequence":"additional","affiliation":[{"name":"Washington State University, Pullman, Washington, USA"}]}],"member":"320","published-online":{"date-parts":[[2024,4,12]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2017. SARD: A Software Assurance Reference Dataset. https:\/\/samate.nist.gov\/SARD\/."},{"key":"e_1_3_2_1_2_1","unstructured":"2022. 2022 CWE Top 25 Most Dangerous Software Weaknesses. https:\/\/cwe.mitre.org\/top25\/archive\/2022\/2022_cwe_top25.html."},{"key":"e_1_3_2_1_3_1","unstructured":"2022. CVE-2017-12991. https:\/\/github.com\/the-tcpdump-group\/tcpdump\/commit\/50a44b6b8e4f7c127440dbd4239cf571945cc1e7."},{"key":"e_1_3_2_1_4_1","unstructured":"2022. Memory Leak. https:\/\/cwe.mitre.org\/data\/definitions\/401.html."},{"key":"e_1_3_2_1_5_1","unstructured":"2022. OpenBSD. https:\/\/github.com\/bukhalo\/openbsd-src\/commit\/a88c32bfabe8a7fd0b25703230d4adba1d204e0a."},{"key":"e_1_3_2_1_6_1","unstructured":"2022. Race Condition. https:\/\/cwe.mitre.org\/data\/definitions\/362.html."},{"key":"e_1_3_2_1_7_1","unstructured":"2022. RawStudio. https:\/\/github.com\/rawstudio\/rawstudio\/commit\/04cf4f537ffdce5f3e5207bead0ac2d254114cc2."},{"key":"e_1_3_2_1_8_1","unstructured":"2022. Use of Uninitialized Variables. https:\/\/cwe.mitre.org\/data\/definitions\/457.html."},{"key":"e_1_3_2_1_9_1","volume-title":"Cybersecurity vulnerability statistics and facts of","year":"2023","unstructured":"2023. Cybersecurity vulnerability statistics and facts of 2023. https:\/\/www.comparitech.com\/blog\/information-security\/cybersecurity-vulnerability-statistics\/."},{"key":"e_1_3_2_1_10_1","unstructured":"2023. Data Quality Considerations for Machine Learning Models. https:\/\/towardsdatascience.com\/data-quality-considerations-for-machine-learning-models-dcbe9cab34cb."},{"key":"e_1_3_2_1_11_1","unstructured":"2023. How Much Data Is Needed For Machine Learning? https:\/\/graphite-note.com\/how-much-data-is-needed-for-machine-learning."},{"key":"e_1_3_2_1_12_1","unstructured":"2023. The Size and Quality of a Data Set. https:\/\/developers.google.com\/machine-learning\/data-prep\/construct\/collect\/data-size-quality."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3360585"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3475960.3475985"},{"key":"e_1_3_2_1_16_1","volume-title":"Benchmarking Software Vulnerability Detection Techniques: A Survey. arXiv preprint arXiv:2303.16362","author":"Bi Yingzhou","year":"2023","unstructured":"Yingzhou Bi, Jiangtao Huang, Penghui Liu, and Lianmei Wang. 2023. Benchmarking Software Vulnerability Detection Techniques: A Survey. arXiv preprint arXiv:2303.16362 (2023)."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00051"},{"key":"e_1_3_2_1_18_1","volume-title":"AI Embedded Assurance for Cyber Systems","author":"Cai Haipeng","unstructured":"Haipeng Cai, Yu Nong, Yuzhe Ou, and Feng Chen. 2023. Generating Vulnerable Code via Learning-Based Program Transformations. In AI Embedded Assurance for Cyber Systems. Springer, 123--138."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2021.3087402"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2022.3147265"},{"key":"e_1_3_2_1_21_1","volume-title":"Empirical evaluation of gated recurrent neural networks on sequence modeling. arXiv preprint arXiv:1412.3555","author":"Chung Junyoung","year":"2014","unstructured":"Junyoung Chung, Caglar Gulcehre, KyungHyun Cho, and Yoshua Bengio. 2014. Empirical evaluation of gated recurrent neural networks on sequence modeling. arXiv preprint arXiv:1412.3555 (2014)."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSM.2013.85"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE48619.2023.00022"},{"key":"e_1_3_2_1_24_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_1_25_1","volume-title":"International Conference on Learning Representations (ICLR).","author":"Dinella Elizabeth","year":"2020","unstructured":"Elizabeth Dinella, Hanjun Dai, Ziyang Li, Mayur Naik, Le Song, and Ke Wang. 2020. Hoppity: Learning graph transformations to detect and fix bugs in programs. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3379597.3387501"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.findings-emnlp.139"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3524842.3528452"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3540250.3549098"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3338906.3341179"},{"key":"e_1_3_2_1_31_1","volume-title":"FlowDist:Multi-Staged Refinement-Based Dynamic Information Flow Analysis for Distributed Software Systems. In 30th USENIX Security Symposium (USENIX Security 21)","author":"Fu Xiaoqin","year":"2021","unstructured":"Xiaoqin Fu and Haipeng Cai. 2021. FlowDist:Multi-Staged Refinement-Based Dynamic Information Flow Analysis for Distributed Software Systems. In 30th USENIX Security Symposium (USENIX Security 21). 2093--2110."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/LCOMM.2018.2877965"},{"key":"e_1_3_2_1_33_1","volume-title":"GraphCodeBERT: Pre-training Code Representations with Data Flow. In International Conference on Learning Representations (ICLR).","author":"Guo Daya","year":"2020","unstructured":"Daya Guo, Shuo Ren, Shuai Lu, Zhangyin Feng, Duyu Tang, LIU Shujie, Long Zhou, Nan Duan, Alexey Svyatkovskiy, Shengyu Fu, et al. 2020. GraphCodeBERT: Pre-training Code Representations with Data Flow. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_1_34_1","volume-title":"Proceedings of the 32nd International Conference on Neural Information Processing Systems. 7944--7954","author":"Harer Jacob A","year":"2018","unstructured":"Jacob A Harer, Onur Ozdemir, Tomo Lazovich, Christopher P Reale, Rebecca L Russell, Louis Y Kim, and Peter Chin. 2018. Learning to repair software vulnerabilities with generative adversarial networks. In Proceedings of the 32nd International Conference on Neural Information Processing Systems. 7944--7954."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3524842.3527949"},{"key":"e_1_3_2_1_36_1","volume-title":"DATGAN: Integrating expert knowledge into deep learning for synthetic tabular data. arXiv preprint arXiv:2203.03489","author":"Lederrey Gael","year":"2022","unstructured":"Gael Lederrey, Tim Hillel, and Michel Bierlaire. 2022. DATGAN: Integrating expert knowledge into deep learning for synthetic tabular data. arXiv preprint arXiv:2203.03489 (2022)."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3551349.3556908"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3540250.3549173"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3540250.3558925"},{"key":"e_1_3_2_1_40_1","volume-title":"31st USENIX Security Symposium (USENIX Security 22)","author":"Li Wen","year":"2022","unstructured":"Wen Li, Jiang Ming, Xiapu Luo, and Haipeng Cai. 2022. {PolyCruise}: A {Cross-Language} Dynamic Information Flow Analysis. In 31st USENIX Security Symposium (USENIX Security 22). 2513--2530."},{"key":"e_1_3_2_1_41_1","volume-title":"PolyFuzz: Holistic Greybox Fuzzing of Multi-Language Systems. In 32nd USENIX Security Symposium (USENIX Security 23)","author":"Li Wen","year":"2023","unstructured":"Wen Li, Jinyang Ruan, Guangbei Yi, Long Cheng, Xiapu Luo, and Haipeng Cai. 2023. PolyFuzz: Holistic Greybox Fuzzing of Multi-Language Systems. In 32nd USENIX Security Symposium (USENIX Security 23). 1379--1396."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3576915.3623166"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3468264.3468597"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/TDSC.2021.3076142"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/TDSC.2021.3051525"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.14722\/ndss.2018.23158"},{"key":"e_1_3_2_1_47_1","unstructured":"Linus Eriksson. 2022. Tree-Sitter. https:\/\/github.com\/tree-sitter\/tree-sitter."},{"key":"e_1_3_2_1_48_1","first-page":"1296","article-title":"Combining Graph Neural Networks With Expert Knowledge for Smart Contract Vulnerability Detection","volume":"35","author":"Liu Zhenguang","year":"2023","unstructured":"Zhenguang Liu, Peng Qian, Xiaoyang Wang, Yuan Zhuang, Lin Qiu, and Xun Wang. 2023. Combining Graph Neural Networks With Expert Knowledge for Smart Contract Vulnerability Detection. IEEE Transactions on Knowledge & Data Engineering 35, 02 (2023), 1296--1310.","journal-title":"IEEE Transactions on Knowledge & Data Engineering"},{"key":"e_1_3_2_1_49_1","volume-title":"VulChecker: Graph-based Vulnerability Localization in Source Code. In USENIX Security Symposium.","author":"Mirsky Yisroel","year":"2023","unstructured":"Yisroel Mirsky, George Macon, Michael Brown, Carter Yagemann, Matthew Pruett, Evan Downing, Sukarno Mertoguno, and Wenke Lee. 2023. VulChecker: Graph-based Vulnerability Localization in Source Code. In USENIX Security Symposium."},{"key":"e_1_3_2_1_50_1","unstructured":"National Institute of Standards and Technology (NIST). 2022. National Vulnerability Database (NVD). https:\/\/nvd.nist.gov."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/3510003.3510096"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/SANER48275.2020.9054851"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.infsof.2021.106614"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/3540250.3549128"},{"key":"e_1_3_2_1_55_1","volume-title":"VulGen: Realistic Vulnerability Generation Via Pattern Mining and Deep Learning. In 2023 IEEE\/ACM 45th International Conference on Software Engineering (ICSE). 2527--2539","author":"Nong Yu","year":"2023","unstructured":"Yu Nong, Yuzhe Ou, Michael Pradel, Feng Chen, and Haipeng Cai. 2023. VulGen: Realistic Vulnerability Generation Via Pattern Mining and Deep Learning. In 2023 IEEE\/ACM 45th International Conference on Software Engineering (ICSE). 2527--2539."},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2022.3207149"},{"key":"e_1_3_2_1_57_1","first-page":"297","article-title":"Report on the static analysis tool exposition (sate) iv","volume":"500","author":"Okun Vadim","year":"2013","unstructured":"Vadim Okun, Aurelien Delaitre, Paul E Black, et al. 2013. Report on the static analysis tool exposition (sate) iv. NIST Special Publication 500 (2013), 297.","journal-title":"NIST Special Publication"},{"key":"e_1_3_2_1_58_1","first-page":"9343","article-title":"Integrating tree path in transformer for code representation","volume":"34","author":"Peng Han","year":"2021","unstructured":"Han Peng, Ge Li, Wenhan Wang, Yunfei Zhao, and Zhi Jin. 2021. Integrating tree path in transformer for code representation. Advances in Neural Information Processing Systems 34 (2021), 9343--9354.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_59_1","volume-title":"Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2).","author":"Puri Ruchir","year":"2021","unstructured":"Ruchir Puri, David S Kung, Geert Janssen, Wei Zhang, Giacomo Domeniconi, Vladimir Zolotov, Julian Dolby, Jie Chen, Mihir Choudhury, Lindsey Decker, et al. 2021. CodeNet: A Large-Scale AI for Code Dataset for Learning a Diversity of Coding Tasks. In Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2)."},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/3236024.3236084"},{"key":"e_1_3_2_1_61_1","volume-title":"Towards security defect prediction with AI. arXiv preprint arXiv:1808.09897","author":"Sestili Carson D","year":"2018","unstructured":"Carson D Sestili, William S Snavely, and Nathan M VanHoudnos. 2018. Towards security defect prediction with AI. arXiv preprint arXiv:1808.09897 (2018)."},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1109\/DSN48987.2021.00030"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.685"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1145\/3510003.3510229"},{"key":"e_1_3_2_1_65_1","volume-title":"International Conference on Learning Representations (ICLR).","author":"Yao Ziyu","year":"2021","unstructured":"Ziyu Yao, Frank F Xu, Pengcheng Yin, Huan Sun, and Graham Neubig. 2021. Learning Structural Edits via Incremental Tree Transformations. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58144-2_21"},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICAICA52286.2021.9497888"},{"key":"e_1_3_2_1_68_1","volume-title":"FIXRE-VERTER: A Realistic Bug Injection Methodology for Benchmarking Fuzz Testing. In 31st USENIX Security Symposium (USENIX Security 22)","author":"Zhang Zenong","year":"2022","unstructured":"Zenong Zhang, Zach Patterson, Michael Hicks, and Shiyi Wei. 2022. FIXRE-VERTER: A Realistic Bug Injection Methodology for Benchmarking Fuzz Testing. In 31st USENIX Security Symposium (USENIX Security 22). 3699--3715."},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE-SEIP52600.2021.00020"},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.1145\/3488932.3527288"},{"key":"e_1_3_2_1_71_1","volume-title":"Proceedings of the 33rd International Conference on Neural Information Processing Systems. 10197--10207","author":"Zhou Yaqin","year":"2019","unstructured":"Yaqin Zhou, Shangqing Liu, Jingkai Siow, Xiaoning Du, and Yang Liu. 2019. Devign: effective vulnerability identification by learning comprehensive program semantics via graph neural networks. In Proceedings of the 33rd International Conference on Neural Information Processing Systems. 10197--10207."},{"key":"e_1_3_2_1_72_1","first-page":"1","article-title":"mVulPreter: A Multi-Granularity Vulnerability Detection System With Interpretations","volume":"01","author":"Zou Deqing","year":"2022","unstructured":"Deqing Zou, Yutao Hu, Wenke Li, Yueming Wu, Haojun Zhao, and Hai Jin. 2022. mVulPreter: A Multi-Granularity Vulnerability Detection System With Interpretations. IEEE Transactions on Dependable and Secure Computing (TDSC) 01 (2022), 1--12.","journal-title":"IEEE Transactions on Dependable and Secure Computing (TDSC)"}],"event":{"name":"ICSE '24: IEEE\/ACM 46th International Conference on Software Engineering","location":"Lisbon Portugal","acronym":"ICSE '24","sponsor":["SIGSOFT ACM Special Interest Group on Software Engineering","IEEE CS","Faculty of Engineering of University of Porto"]},"container-title":["Proceedings of the IEEE\/ACM 46th International Conference on Software Engineering"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3597503.3639116","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3597503.3639116","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3597503.3639116","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T22:49:11Z","timestamp":1750286951000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3597503.3639116"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,12]]},"references-count":71,"alternative-id":["10.1145\/3597503.3639116","10.1145\/3597503"],"URL":"https:\/\/doi.org\/10.1145\/3597503.3639116","relation":{},"subject":[],"published":{"date-parts":[[2024,4,12]]},"assertion":[{"value":"2024-04-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}