{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,9]],"date-time":"2026-04-09T13:57:54Z","timestamp":1775743074378,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":47,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,7,24]],"date-time":"2024-07-24T00:00:00Z","timestamp":1721779200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,7,24]]},"DOI":"10.1145\/3671016.3671388","type":"proceedings-article","created":{"date-parts":[[2024,7,17]],"date-time":"2024-07-17T20:19:32Z","timestamp":1721247572000},"page":"95-104","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":7,"title":["DFEPT: Data Flow Embedding for Enhancing Pre-Trained Model Based Vulnerability Detection"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-4774-2225","authenticated-orcid":false,"given":"Zhonghao","family":"Jiang","sequence":"first","affiliation":[{"name":"Chongqing University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6013-1369","authenticated-orcid":false,"given":"Weifeng","family":"Sun","sequence":"additional","affiliation":[{"name":"Chongqing University, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-5379-985X","authenticated-orcid":false,"given":"Xiaoyan","family":"Gu","sequence":"additional","affiliation":[{"name":"Chongqing University, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-2004-3509","authenticated-orcid":false,"given":"Jiaxin","family":"Wu","sequence":"additional","affiliation":[{"name":"Chongqing University, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-5308-3229","authenticated-orcid":false,"given":"Tao","family":"Wen","sequence":"additional","affiliation":[{"name":"Chongqing University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8442-5222","authenticated-orcid":false,"given":"Haibo","family":"Hu","sequence":"additional","affiliation":[{"name":"Chongqing University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9538-9121","authenticated-orcid":false,"given":"Meng","family":"Yan","sequence":"additional","affiliation":[{"name":"Chongqing University, China"}]}],"member":"320","published-online":{"date-parts":[[2024,7,24]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2024. Browse vulnerabilities by date. https:\/\/www.cvedetails.com\/browse-by-date.php Accessed 2024-01-14."},{"key":"e_1_3_2_1_2_1","unstructured":"2024. List of data breaches. https:\/\/en.wikipedia.org\/wiki\/List_of_data_breaches Accessed 2024-01-14."},{"key":"e_1_3_2_1_3_1","volume-title":"Detecting bugs using decompilation and data flow analysis. Black Hat USA","author":"Cesare Silvio","year":"2013","unstructured":"Silvio Cesare. 2013. Detecting bugs using decompilation and data flow analysis. Black Hat USA (2013), 1193\u20131206."},{"key":"e_1_3_2_1_4_1","volume-title":"Deep learning based vulnerability detection: Are we there yet","author":"Chakraborty Saikat","year":"2021","unstructured":"Saikat Chakraborty, Rahul Krishna, Yangruibo Ding, and Baishakhi Ray. 2021. Deep learning based vulnerability detection: Are we there yet. IEEE Transactions on Software Engineering (2021)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3436877"},{"key":"e_1_3_2_1_6_1","volume-title":"Learning phrase representations using RNN encoder-decoder for statistical machine translation. arXiv preprint arXiv:1406.1078","author":"Cho Kyunghyun","year":"2014","unstructured":"Kyunghyun Cho, Bart Van\u00a0Merri\u00ebnboer, Caglar Gulcehre, Dzmitry Bahdanau, Fethi Bougares, Holger Schwenk, and Yoshua Bengio. 2014. Learning phrase representations using RNN encoder-decoder for statistical machine translation. arXiv preprint arXiv:1406.1078 (2014)."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3548606.3560552"},{"key":"e_1_3_2_1_8_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/SANER53432.2022.00114"},{"key":"e_1_3_2_1_10_1","volume-title":"Favocado: Fuzzing the Binding Code of JavaScript Engines Using Semantically Correct Test Cases.. In NDSS.","author":"Dinh Sung\u00a0Ta","year":"2021","unstructured":"Sung\u00a0Ta Dinh, Haehyun Cho, Kyle Martin, Adam Oest, Kyle Zeng, Alexandros Kapravelos, Gail-Joon Ahn, Tiffany Bao, Ruoyu Wang, Adam Doup\u00e9, 2021. Favocado: Fuzzing the Binding Code of JavaScript Engines Using Semantically Correct Test Cases.. In NDSS."},{"key":"e_1_3_2_1_11_1","volume-title":"Codebert: A pre-trained model for programming and natural languages. arXiv preprint arXiv:2002.08155","author":"Feng Zhangyin","year":"2020","unstructured":"Zhangyin Feng, Daya Guo, Duyu Tang, Nan Duan, Xiaocheng Feng, Ming Gong, Linjun Shou, Bing Qin, Ting Liu, Daxin Jiang, 2020. Codebert: A pre-trained model for programming and natural languages. arXiv preprint arXiv:2002.08155 (2020)."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3524842.3528452"},{"key":"e_1_3_2_1_13_1","volume-title":"International conference on machine learning. PMLR, 1263\u20131272","author":"Gilmer Justin","year":"2017","unstructured":"Justin Gilmer, Samuel\u00a0S Schoenholz, Patrick\u00a0F Riley, Oriol Vinyals, and George\u00a0E Dahl. 2017. Neural message passing for quantum chemistry. In International conference on machine learning. PMLR, 1263\u20131272."},{"key":"e_1_3_2_1_14_1","volume-title":"Framewise phoneme classification with bidirectional LSTM and other neural network architectures. Neural networks 18, 5-6","author":"Graves Alex","year":"2005","unstructured":"Alex Graves and J\u00fcrgen Schmidhuber. 2005. Framewise phoneme classification with bidirectional LSTM and other neural network architectures. Neural networks 18, 5-6 (2005), 602\u2013610."},{"key":"e_1_3_2_1_15_1","volume-title":"Unixcoder: Unified cross-modal pre-training for code representation. arXiv preprint arXiv:2203.03850","author":"Guo Daya","year":"2022","unstructured":"Daya Guo, Shuai Lu, Nan Duan, Yanlin Wang, Ming Zhou, and Jian Yin. 2022. Unixcoder: Unified cross-modal pre-training for code representation. arXiv preprint arXiv:2203.03850 (2022)."},{"key":"e_1_3_2_1_16_1","volume-title":"Graphcodebert: Pre-training code representations with data flow. arXiv preprint arXiv:2009.08366","author":"Guo Daya","year":"2020","unstructured":"Daya Guo, Shuo Ren, Shuai Lu, Zhangyin Feng, Duyu Tang, Shujie Liu, Long Zhou, Nan Duan, Alexey Svyatkovskiy, Shengyu Fu, 2020. Graphcodebert: Pre-training code representations with data flow. arXiv preprint arXiv:2009.08366 (2020)."},{"key":"e_1_3_2_1_17_1","volume-title":"Vulberta: Simplified source code pre-training for vulnerability detection. In 2022 International joint conference on neural networks (IJCNN)","author":"Hanif Hazim","year":"2022","unstructured":"Hazim Hanif and Sergio Maffeis. 2022. Vulberta: Simplified source code pre-training for vulnerability detection. In 2022 International joint conference on neural networks (IJCNN). IEEE, 1\u20138."},{"key":"e_1_3_2_1_18_1","unstructured":"https:\/\/tree-sitter.github.io\/tree sitter\/. 2024. tree-sitter: An Incremental Parsing System for Programming Tools. Accessed: 2024-01-13."},{"key":"e_1_3_2_1_19_1","volume-title":"International conference on machine learning. PMLR, 5110\u20135121","author":"Kanade Aditya","year":"2020","unstructured":"Aditya Kanade, Petros Maniatis, Gogul Balakrishnan, and Kensen Shi. 2020. Learning and evaluating contextual embedding of source code. In International conference on machine learning. PMLR, 5110\u20135121."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3548606.3560664"},{"key":"e_1_3_2_1_21_1","volume-title":"Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907","author":"Kipf N","year":"2016","unstructured":"Thomas\u00a0N Kipf and Max Welling. 2016. Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907 (2016)."},{"key":"e_1_3_2_1_22_1","volume-title":"Vuldeepecker: A deep learning-based system for vulnerability detection. arXiv preprint arXiv:1801.01681","author":"Li Zhen","year":"2018","unstructured":"Zhen Li, Deqing Zou, Shouhuai Xu, Xinyu Ou, Hai Jin, Sujuan Wang, Zhijun Deng, and Yuyi Zhong. 2018. Vuldeepecker: A deep learning-based system for vulnerability detection. arXiv preprint arXiv:1801.01681 (2018)."},{"key":"e_1_3_2_1_23_1","volume-title":"Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692","author":"Liu Yinhan","year":"2019","unstructured":"Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, and Veselin Stoyanov. 2019. Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692 (2019)."},{"key":"e_1_3_2_1_24_1","volume-title":"Codexglue: A machine learning benchmark dataset for code understanding and generation. arXiv preprint arXiv:2102.04664","author":"Lu Shuai","year":"2021","unstructured":"Shuai Lu, Daya Guo, Shuo Ren, Junjie Huang, Alexey Svyatkovskiy, Ambrosio Blanco, Colin Clement, Dawn Drain, Daxin Jiang, Duyu Tang, 2021. Codexglue: A machine learning benchmark dataset for code understanding and generation. arXiv preprint arXiv:2102.04664 (2021)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/1315245.1315311"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3510454.3516865"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/1853919.1853923"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.5555\/3455716.3455856"},{"key":"e_1_3_2_1_29_1","volume-title":"Evaluating complexity, code churn, and developer activity metrics as indicators of software vulnerabilities","author":"Shin Yonghee","year":"2010","unstructured":"Yonghee Shin, Andrew Meneely, Laurie Williams, and Jason\u00a0A Osborne. 2010. Evaluating complexity, code churn, and developer activity metrics as indicators of software vulnerabilities. IEEE transactions on software engineering 37, 6 (2010), 772\u2013787."},{"key":"e_1_3_2_1_30_1","volume-title":"Dataflow Analysis-Inspired Deep Learning for Efficient Vulnerability Detection. In 2024 IEEE\/ACM 46th International Conference on Software Engineering (ICSE). IEEE Computer Society, 166\u2013178","author":"Steenhoek Benjamin","year":"2023","unstructured":"Benjamin Steenhoek, Hongyang Gao, and Wei Le. 2023. Dataflow Analysis-Inspired Deep Learning for Efficient Vulnerability Detection. In 2024 IEEE\/ACM 46th International Conference on Software Engineering (ICSE). IEEE Computer Society, 166\u2013178."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE48619.2023.00188"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cose.2021.102417"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jss.2023.111623"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3564625.3567985"},{"key":"e_1_3_2_1_35_1","unstructured":"https:\/\/pytorch.org\/. 2024. PyTorch. Accessed: 2024-01-13."},{"key":"e_1_3_2_1_36_1","unstructured":"https:\/\/www.checkmarx.com\/. 2024. Checkmarx. Accessed: 2024-01-13."},{"key":"e_1_3_2_1_37_1","unstructured":"https:\/\/www.python.org\/. 2024. Python 3.9.18. Accessed: 2024-01-13."},{"key":"e_1_3_2_1_38_1","volume-title":"Attention is all you need. Advances in neural information processing systems 30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan\u00a0N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_39_1","volume-title":"WANA: Symbolic execution of wasm bytecode for cross-platform smart contract vulnerability detection. arXiv preprint arXiv:2007.15510","author":"Wang Dong","year":"2020","unstructured":"Dong Wang, Bo Jiang, and WK Chan. 2020. WANA: Symbolic execution of wasm bytecode for cross-platform smart contract vulnerability detection. arXiv preprint arXiv:2007.15510 (2020)."},{"key":"e_1_3_2_1_40_1","volume-title":"Defecthunter: A novel llm-driven boosted-conformer-based code vulnerability detection mechanism. arXiv preprint arXiv:2309.15324","author":"Wang Jin","year":"2023","unstructured":"Jin Wang, Zishan Huang, Hengli Liu, Nianyi Yang, and Yinhao Xiao. 2023. Defecthunter: A novel llm-driven boosted-conformer-based code vulnerability detection mechanism. arXiv preprint arXiv:2309.15324 (2023)."},{"key":"e_1_3_2_1_41_1","volume-title":"Codet5: Identifier-aware unified pre-trained encoder-decoder models for code understanding and generation. arXiv preprint arXiv:2109.00859","author":"Wang Yue","year":"2021","unstructured":"Yue Wang, Weishi Wang, Shafiq Joty, and Steven\u00a0CH Hoi. 2021. Codet5: Identifier-aware unified pre-trained encoder-decoder models for code understanding and generation. arXiv preprint arXiv:2109.00859 (2021)."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2024.3382361"},{"key":"e_1_3_2_1_43_1","volume-title":"How powerful are graph neural networks?arXiv preprint arXiv:1810.00826","author":"Xu Keyulu","year":"2018","unstructured":"Keyulu Xu, Weihua Hu, Jure Leskovec, and Stefanie Jegelka. 2018. How powerful are graph neural networks?arXiv preprint arXiv:1810.00826 (2018)."},{"key":"e_1_3_2_1_44_1","volume-title":"Pattern-Based Vulnerability Discovery.Ph.\u00a0D. Dissertation","author":"Yamaguchi Fabian","unstructured":"Fabian Yamaguchi. 2015. Pattern-Based Vulnerability Discovery.Ph.\u00a0D. Dissertation. University of G\u00f6ttingen."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2023.3286586"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE-SEIP52600.2021.00020"},{"key":"e_1_3_2_1_47_1","volume-title":"Devign: Effective vulnerability identification by learning comprehensive program semantics via graph neural networks. Advances in neural information processing systems 32","author":"Zhou Yaqin","year":"2019","unstructured":"Yaqin Zhou, Shangqing Liu, Jingkai Siow, Xiaoning Du, and Yang Liu. 2019. Devign: Effective vulnerability identification by learning comprehensive program semantics via graph neural networks. Advances in neural information processing systems 32 (2019)."}],"event":{"name":"Internetware 2024: 15th Asia-Pacific Symposium on Internetware","location":"Macau China","acronym":"Internetware 2024","sponsor":["SIGSOFT ACM Special Interest Group on Software Engineering"]},"container-title":["Proceedings of the 15th Asia-Pacific Symposium on Internetware"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3671016.3671388","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3671016.3671388","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T00:38:23Z","timestamp":1755909503000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3671016.3671388"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,24]]},"references-count":47,"alternative-id":["10.1145\/3671016.3671388","10.1145\/3671016"],"URL":"https:\/\/doi.org\/10.1145\/3671016.3671388","relation":{},"subject":[],"published":{"date-parts":[[2024,7,24]]},"assertion":[{"value":"2024-07-24","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}