{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,13]],"date-time":"2026-06-13T16:05:03Z","timestamp":1781366703370,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":33,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,16]],"date-time":"2023-10-16T00:00:00Z","timestamp":1697414400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"National Science Foundation","award":["CNS-2154873"],"award-info":[{"award-number":["CNS-2154873"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,16]]},"DOI":"10.1145\/3607199.3607242","type":"proceedings-article","created":{"date-parts":[[2023,10,3]],"date-time":"2023-10-03T22:30:51Z","timestamp":1696372251000},"page":"654-668","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":200,"title":["DiverseVul: A New Vulnerable Source Code Dataset for Deep Learning Based Vulnerability Detection"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2019-5955","authenticated-orcid":false,"given":"Yizheng","family":"Chen","sequence":"first","affiliation":[{"name":"University of Maryland, United States of America"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-5467-2592","authenticated-orcid":false,"given":"Zhoujie","family":"Ding","sequence":"additional","affiliation":[{"name":"University of California, Berkeley, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-4207-2369","authenticated-orcid":false,"given":"Lamya","family":"Alowain","sequence":"additional","affiliation":[{"name":"King Abdulaziz City for Science and Technology, Saudi Arabia"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8379-902X","authenticated-orcid":false,"given":"Xinyun","family":"Chen","sequence":"additional","affiliation":[{"name":"Google Deepmind, United States of America"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9944-9232","authenticated-orcid":false,"given":"David","family":"Wagner","sequence":"additional","affiliation":[{"name":"University of California, Berkeley, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2023,10,16]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/1646353.1646374"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3475960.3475985"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3540250.3549162"},{"key":"e_1_3_2_1_4_1","volume-title":"Deep learning based vulnerability detection: Are we there yet","author":"Chakraborty Saikat","year":"2021","unstructured":"Saikat Chakraborty, Rahul Krishna, Yangruibo Ding, and Baishakhi Ray. 2021. Deep learning based vulnerability detection: Are we there yet. IEEE Transactions on Software Engineering (2021)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3508398.3511495"},{"key":"e_1_3_2_1_6_1","volume-title":"Last accessed on","author":"The\u00a0MITRE Corporation","year":"2023","unstructured":"The\u00a0MITRE Corporation. Last accessed on March 28, 2023. 2022 CWE Top 25 Most Dangerous Software Weaknesses. https:\/\/cwe.mitre.org\/top25\/archive\/2022\/2022_cwe_top25.html"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE48619.2023.00022"},{"key":"e_1_3_2_1_8_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3379597.3387501"},{"key":"e_1_3_2_1_10_1","volume-title":"Codebert: A pre-trained model for programming and natural languages. arXiv preprint arXiv:2002.08155","author":"Feng Zhangyin","year":"2020","unstructured":"Zhangyin Feng, Daya Guo, Duyu Tang, Nan Duan, Xiaocheng Feng, Ming Gong, Linjun Shou, Bing Qin, Ting Liu, Daxin Jiang, 2020. Codebert: A pre-trained model for programming and natural languages. arXiv preprint arXiv:2002.08155 (2020)."},{"key":"e_1_3_2_1_11_1","volume-title":"Graphcodebert: Pre-training code representations with data flow. arXiv preprint arXiv:2009.08366","author":"Guo Daya","year":"2020","unstructured":"Daya Guo, Shuo Ren, Shuai Lu, Zhangyin Feng, Duyu Tang, Shujie Liu, Long Zhou, Nan Duan, Alexey Svyatkovskiy, Shengyu Fu, 2020. Graphcodebert: Pre-training code representations with data flow. arXiv preprint arXiv:2009.08366 (2020)."},{"key":"e_1_3_2_1_12_1","volume-title":"Codesearchnet challenge: Evaluating the state of semantic code search. arXiv preprint arXiv:1909.09436","author":"Husain Hamel","year":"2019","unstructured":"Hamel Husain, Ho-Hsiang Wu, Tiferet Gazit, Miltiadis Allamanis, and Marc Brockschmidt. 2019. Codesearchnet challenge: Evaluating the state of semantic code search. arXiv preprint arXiv:1909.09436 (2019)."},{"key":"e_1_3_2_1_13_1","volume-title":"Gated graph sequence neural networks. arXiv preprint arXiv:1511.05493","author":"Li Yujia","year":"2015","unstructured":"Yujia Li, Daniel Tarlow, Marc Brockschmidt, and Richard Zemel. 2015. Gated graph sequence neural networks. arXiv preprint arXiv:1511.05493 (2015)."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/TDSC.2021.3051525"},{"key":"e_1_3_2_1_15_1","volume-title":"Vuldeepecker: A deep learning-based system for vulnerability detection. arXiv preprint arXiv:1801.01681","author":"Li Zhen","year":"2018","unstructured":"Zhen Li, Deqing Zou, Shouhuai Xu, Xinyu Ou, Hai Jin, Sujuan Wang, Zhijun Deng, and Yuyi Zhong. 2018. Vuldeepecker: A deep learning-based system for vulnerability detection. arXiv preprint arXiv:1801.01681 (2018)."},{"key":"e_1_3_2_1_16_1","volume-title":"Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692","author":"Liu Yinhan","year":"2019","unstructured":"Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, and Veselin Stoyanov. 2019. Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692 (2019)."},{"key":"e_1_3_2_1_17_1","volume-title":"Codexglue: A machine learning benchmark dataset for code understanding and generation. arXiv preprint arXiv:2102.04664","author":"Lu Shuai","year":"2021","unstructured":"Shuai Lu, Daya Guo, Shuo Ren, Junjie Huang, Alexey Svyatkovskiy, Ambrosio Blanco, Colin Clement, Dawn Drain, Daxin Jiang, Duyu Tang, 2021. Codexglue: A machine learning benchmark dataset for code understanding and generation. arXiv preprint arXiv:2102.04664 (2021)."},{"key":"e_1_3_2_1_18_1","volume-title":"USENIX Security","author":"Mirsky Yisroel","year":"2023","unstructured":"Yisroel Mirsky, George Macon, Michael Brown, Carter Yagemann, Matthew Pruett, Evan Downing, Sukarno Mertoguno, and Wenke Lee. 2023. VulChecker: Graph-based Vulnerability Localization in Source Code. In USENIX Security 2023."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3468264.3473122"},{"key":"e_1_3_2_1_20_1","volume-title":"Last accessed on","author":"National\u00a0Institute of Standards and Technology.","year":"2023","unstructured":"National\u00a0Institute of Standards and Technology. Last accessed on March 19, 2023. National Vulnerability Database. https:\/\/nvd.nist.gov\/"},{"key":"e_1_3_2_1_21_1","volume-title":"Last accessed on","author":"National\u00a0Institute of Standards and Technology.","year":"2023","unstructured":"National\u00a0Institute of Standards and Technology. Last accessed on March 19, 2023. NIST Software Assurance Reference Dataset. https:\/\/samate.nist.gov\/SARD"},{"key":"e_1_3_2_1_22_1","first-page":"297","article-title":"Report on the static analysis tool exposition (sate) iv","volume":"500","author":"Okun Vadim","year":"2013","unstructured":"Vadim Okun, Aurelien Delaitre, Paul\u00a0E Black, 2013. Report on the static analysis tool exposition (sate) iv. NIST Special Publication 500 (2013), 297.","journal-title":"NIST Special Publication"},{"key":"e_1_3_2_1_23_1","volume-title":"Language models are unsupervised multitask learners. OpenAI blog 1, 8","author":"Radford Alec","year":"2019","unstructured":"Alec Radford, Jeffrey Wu, Rewon Child, David Luan, Dario Amodei, Ilya Sutskever, 2019. Language models are unsupervised multitask learners. OpenAI blog 1, 8 (2019), 9."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.5555\/3455716.3455856"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICMLA.2018.00120"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE48619.2023.00188"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3564625.3567985"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/DSN48987.2021.00030"},{"key":"e_1_3_2_1_29_1","volume-title":"Codet5: Identifier-aware unified pre-trained encoder-decoder models for code understanding and generation. arXiv preprint arXiv:2109.00859","author":"Wang Yue","year":"2021","unstructured":"Yue Wang, Weishi Wang, Shafiq Joty, and Steven\u00a0CH Hoi. 2021. Codet5: Identifier-aware unified pre-trained encoder-decoder models for code understanding and generation. arXiv preprint arXiv:2109.00859 (2021)."},{"key":"e_1_3_2_1_30_1","volume-title":"A Systematic Evaluation of Large Language Models of Code. arXiv preprint arXiv:2202.13169","author":"Xu F","year":"2022","unstructured":"Frank\u00a0F Xu, Uri Alon, Graham Neubig, and Vincent\u00a0J Hellendoorn. 2022. A Systematic Evaluation of Large Language Models of Code. arXiv preprint arXiv:2202.13169 (2022)."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/SP.2014.44"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE-SEIP52600.2021.00020"},{"key":"e_1_3_2_1_33_1","volume-title":"Devign: Effective vulnerability identification by learning comprehensive program semantics via graph neural networks. Advances in neural information processing systems 32","author":"Zhou Yaqin","year":"2019","unstructured":"Yaqin Zhou, Shangqing Liu, Jingkai Siow, Xiaoning Du, and Yang Liu. 2019. Devign: Effective vulnerability identification by learning comprehensive program semantics via graph neural networks. Advances in neural information processing systems 32 (2019)."}],"event":{"name":"RAID 2023: The 26th International Symposium on Research in Attacks, Intrusions and Defenses","location":"Hong Kong China","acronym":"RAID 2023"},"container-title":["Proceedings of the 26th International Symposium on Research in Attacks, Intrusions and Defenses"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3607199.3607242","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3607199.3607242","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:37:35Z","timestamp":1750178255000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3607199.3607242"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,16]]},"references-count":33,"alternative-id":["10.1145\/3607199.3607242","10.1145\/3607199"],"URL":"https:\/\/doi.org\/10.1145\/3607199.3607242","relation":{},"subject":[],"published":{"date-parts":[[2023,10,16]]},"assertion":[{"value":"2023-10-16","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}