{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,17]],"date-time":"2025-11-17T02:29:56Z","timestamp":1763346596370,"version":"3.45.0"},"reference-count":126,"publisher":"Tech Science Press","issue":"3","license":[{"start":{"date-parts":[[2025,11,2]],"date-time":"2025-11-02T00:00:00Z","timestamp":1762041600000},"content-version":"vor","delay-in-days":305,"URL":"https:\/\/doi.org\/10.32604\/TSP-CROSSMARKPOLICY"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["CMC"],"published-print":{"date-parts":[[2025]]},"DOI":"10.32604\/cmc.2025.070195","type":"journal-article","created":{"date-parts":[[2025,10,13]],"date-time":"2025-10-13T08:42:35Z","timestamp":1760344955000},"page":"4345-4374","update-policy":"https:\/\/doi.org\/10.32604\/tsp-crossmarkpolicy","source":"Crossref","is-referenced-by-count":0,"title":["Binary Code Similarity Detection: Retrospective Review and Future Directions"],"prefix":"10.32604","volume":"85","author":[{"given":"Shengjia","family":"Chang","sequence":"first","affiliation":[]},{"given":"Baojiang","family":"Cui","sequence":"additional","affiliation":[]},{"given":"Shaocong","family":"Feng","sequence":"additional","affiliation":[]}],"member":"17807","published-online":{"date-parts":[[2025]]},"reference":[{"key":"ref1","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3446371","article-title":"A survey of binary code similarity","volume":"54","author":"Haq","year":"2022","journal-title":"ACM Comput Surv"},{"key":"ref2","series-title":"Proceedings of the 33rd ACM\/IEEE International Conference on Automated Software Engineering; 2018 Sep 3\u20137; Montpellier, France","first-page":"896","article-title":"VulSeeker: a semantic learning based vulnerability seeker for cross-platform binary","author":"Gao"},{"key":"ref3","series-title":"Proceedings of the 2017 ACM SIGSAC Conference on Computer and Communications Security; 2017 Oct 30\u2013Nov 3; Dallas, TX, USA","first-page":"363","article-title":"Neural network-based graph embedding for cross-platform binary code similarity detection","author":"Xu"},{"key":"ref4","doi-asserted-by":"crossref","first-page":"392","DOI":"10.1145\/3296957.3177157","article-title":"FirmUp: precise static detection of common vulnerabilities in firmware","volume":"53","author":"David","year":"2018","journal-title":"ACM SIGPLAN Notices"},{"key":"ref5","series-title":"Detection of Intrusions and Malware, and Vulnerability Assessment: 15th International Conference, DIMVA 2018; 2018 Jun 28\u201329; Saclay, France. Cham, Switzerland: Springer International Publishing","first-page":"114","article-title":"Binarm: scalable and efficient detection of vulnerabilities in firmware images of intelligent electronic devices","author":"Shirani","year":"2018"},{"key":"ref6","series-title":"Proceedings of the 33rd ACM\/IEEE International Conference on Automated Software Engineering; 2018 Sep 3\u20137; Montpellier, France","first-page":"667","article-title":"\u03b1Diff: cross-version binary code similarity detection with DNN","author":"Liu"},{"key":"ref7","first-page":"129","author":"Bruschi","year":"2006","journal-title":"Detection of intrusions and malware & vulnerability assessment"},{"key":"ref8","doi-asserted-by":"crossref","first-page":"307","DOI":"10.1109\/TDSC.2013.40","article-title":"Control flow-based malware VariantDetection","volume":"11","author":"Cesare","year":"2014","journal-title":"IEEE Trans Dependable Secure Comput"},{"key":"ref9","series-title":"Proceedings of the 28th Annual Computer Security Applications Conference; 2012 Dec 3\u20137; Orlando, FL, USA","first-page":"349","article-title":"Lines of malicious code: insights into the malicious software industry","author":"Lindorfer"},{"key":"ref10","series-title":"22nd USENIX Security Symposium (USENIX Security 13); 2013 Aug 14\u201316; Washington, DC, USA","first-page":"81","article-title":"Towards automatic software lineage inference","author":"Jang"},{"key":"ref11","doi-asserted-by":"crossref","first-page":"416","DOI":"10.1007\/978-3-319-18467-8_28","author":"Ming","year":"2015","journal-title":"ICT systems security and privacy protection"},{"key":"ref12","doi-asserted-by":"crossref","first-page":"261","DOI":"10.1126\/science.aaa8685","article-title":"Advances in natural language processing","volume":"349","author":"Hirschberg","year":"2015","journal-title":"Science"},{"year":"1999","author":"Manning","journal-title":"Foundations of statistical natural language processing","key":"ref13"},{"key":"ref14","doi-asserted-by":"crossref","first-page":"e15","DOI":"10.1017\/ATSIP.2020.13","article-title":"Graph representation learning: a survey","volume":"9","author":"Chen","year":"2020","journal-title":"APSIPA Trans Signal Inf Process"},{"doi-asserted-by":"crossref","unstructured":"Wang X, Jiang Y, Bach N, Wang T, Huang Z, Huang F, et al. Automated concatenation of embeddings for structured prediction. arXiv:2010.05006. 2020.","key":"ref15","DOI":"10.18653\/v1\/2021.acl-long.206"},{"author":"Duan","article-title":"DeepBinDiff: learning program-wide code representations for binary diffing","series-title":"Proceedings of the 2020 Network and Distributed System Security Symposium; 2020 Feb 23\u201326; San Diego, CA, USA","key":"ref16"},{"key":"ref17","doi-asserted-by":"crossref","first-page":"5082","DOI":"10.1002\/int.22752","volume":"37","author":"Fu","year":"2022","journal-title":"Int J Intelligent Sys"},{"key":"ref18","doi-asserted-by":"crossref","first-page":"799","DOI":"10.1109\/TNNLS.2021.3102234","article-title":"Multilevel graph matching networks for deep graph similarity learning","volume":"34","author":"Ling","year":"2023","journal-title":"IEEE Trans Neural Netw Learning Syst"},{"author":"Peng","article-title":"How could neural networks understand programs?","series-title":"Proceedings of the 38th International Conference on Machine Learning; 2021 Jul 18\u201324; Online","key":"ref19"},{"key":"ref20","doi-asserted-by":"crossref","first-page":"2224","DOI":"10.1109\/TSE.2021.3056139","article-title":"Codee: a tensor embedding scheme for binary code search","volume":"48","author":"Yang","year":"2022","journal-title":"IIEEE Trans Software Eng"},{"key":"ref21","series-title":"The World Wide Web Conference","first-page":"417","article-title":"Graph neural networks for social recommendation","author":"Fan","year":"2019"},{"key":"ref22","doi-asserted-by":"crossref","first-page":"114348","DOI":"10.1016\/j.eswa.2020.114348","article-title":"BinDeep: a deep learning approach to binary code similarity detection","volume":"168","author":"Tian","year":"2021","journal-title":"Expert Syst Appl"},{"key":"ref23","series-title":"Proceedings of the 2017 ACM on Asia Conference on Computer and Communications Security","first-page":"346","article-title":"Extracting conditional formulas for cross-platform bug search","author":"Feng","year":"2017"},{"key":"ref24","first-page":"238","author":"Gao","year":"2008","journal-title":"Information and communications security"},{"key":"ref25","series-title":"2012 11th International Conference on Machine Learning and Applications","first-page":"386","article-title":"Binary function clustering using semantic hashes","author":"Jin","year":"2012 Dec 12\u201315;"},{"key":"ref26","series-title":"Proceedings of the 2nd ACM SIGPLAN Program Protection and Reverse Engineering Workshop","first-page":"1","article-title":"Fast location of similar code fragments using semantic \u2018juice\u2019","author":"Lakhotia","year":"2013"},{"key":"ref27","series-title":"Proceedings of the 22nd ACM SIGSOFT International Symposium on Foundations of Software Engineering","first-page":"389","article-title":"Semantics-based obfuscation-resilient binary code similarity comparison with applications to software plagiarism detection","author":"Luo","year":"2014"},{"key":"ref28","series-title":"Proceedings of the 33rd ACM SIGSOFT International Symposium on Software Testing and Analysis","first-page":"503","article-title":"CLAP: learning transferable binary code representations with natural language supervision","author":"Wang","year":"2024"},{"key":"ref29","doi-asserted-by":"crossref","DOI":"10.1109\/JIOT.2025.3579231","article-title":"Binary code similarity detection via LLM-based source code conversion","author":"Wan","year":"2025","journal-title":"IEEE Internet Things J"},{"unstructured":"Lu S, Guo D, Ren S, Huang J, Svyatkovskiy A, Blanco A, et al. CodeXGLUE: a machine learning benchmark dataset for code understanding and generation. arXiv:2102.04664. 2021.","key":"ref30"},{"unstructured":"Chen M, Tworek J, Jun H, Yuan Q, de Oliveira Pinto HP, Kaplan J, et al. Evaluating large language models trained on code. arXiv:2107.03374. 2021.","key":"ref31"},{"key":"ref32","series-title":"Proceedings of the 6th ACM SIGPLAN International Symposium on Machine Programming","first-page":"1","article-title":"A systematic evaluation of large language models of code","author":"Xu","year":"2022"},{"unstructured":"Rozi\u00e8re B, Gehring J, Gloeckle F, Sootla S, Gat I, Tan XE, et al. Code llama: open foundation models for code. arXiv:2308.12950. 2023.","key":"ref33"},{"unstructured":"Li R, Allal LB, Zi Y, Muennighoff N, Kocetkov D, Mou C, et al. Starcoder: may the source be with you! arXiv:2305.06161. 2023.","key":"ref34"},{"key":"ref35","series-title":"31st USENIX Security Symposium (USENIX Security 22); 2022 Aug 10\u201312; Boston, MA, USA","first-page":"2099","article-title":"How machine learning is solving the binary function similarity problem","author":"Marcelli"},{"key":"ref36","series-title":"Proceedings of the 42nd ACM SIGPLAN International Conference on Programming Language Design and Implementation","first-page":"435","article-title":"Automated conformance testing for JavaScript engines via deep compiler fuzzing","author":"Ye","year":"2021"},{"key":"ref37","series-title":"2023 IEEE\/ACM 45th International Conference on Software Engineering (ICSE); 2023 May 14\u201320; Melbourne, VIC, Australia","first-page":"919","article-title":"CodaMosa: escaping coverage plateaus in test generation with pre-trained large language models","author":"Lemieux"},{"key":"ref38","series-title":"2023 IEEE\/ACM 45th International Conference on Software Engineering (ICSE); 2023 May 14\u201320; Melbourne, VIC, Australia","first-page":"1355","article-title":"Fill in the blank: context-aware automated text input generation for mobile GUI testing","author":"Liu"},{"key":"ref39","series-title":"Proceedings of the 32nd ACM SIGSOFT International Symposium on Software Testing and Analysis","first-page":"423","article-title":"Large language models are zero-shot fuzzers: fuzzing deep-learning libraries via large language models","author":"Deng","year":"2023"},{"key":"ref40","series-title":"Proceedings of the 30th ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering","first-page":"935","article-title":"VulRepair: a T5-based automated software vulnerability repair","author":"Fu","year":"2022"},{"key":"ref41","series-title":"2023 IEEE\/ACM 45th International Conference on Software Engineering (ICSE); 2023 May 14\u201320; Melbourne, VIC, Australia","first-page":"1469","article-title":"Automated repair of programs from large language models","author":"Fan"},{"key":"ref42","series-title":"2023 IEEE\/ACM 45th International Conference on Software Engineering (ICSE); 2023 May 14\u201320; Melbourne, VIC, Australia","first-page":"2450","article-title":"Retrieval-based prompt selection for code-related few-shot learning","author":"Nashid"},{"unstructured":"Cheshkov A, Zadorozhny P, Levichev R. Evaluation of ChatGPT model for vulnerability detection. arXiv:2304.07232. 2023.","key":"ref43"},{"key":"ref44","series-title":"2023 IEEE 34th International Symposium on Software Reliability Engineering Workshops (ISSREW); 2023 Oct 9\u201312; Florence, Italy","first-page":"112","article-title":"Software vulnerability detection using large language models","author":"Das Purba"},{"key":"ref45","series-title":"Proceedings of the 33rd ACM SIGSOFT International Symposium on Software Testing and Analysis","first-page":"235","article-title":"SCALE: constructing structured natural language comment trees for software vulnerability detection","author":"Wen","year":"2024"},{"key":"ref46","series-title":"Proceedings of the 2024 on ACM SIGSAC Conference on Computer and Communications Security","first-page":"4554","article-title":"ReSym: harnessing LLMs to recover variable and data structure symbols from stripped binaries","author":"Xie","year":"2024"},{"key":"ref47","series-title":"The Thirty-Eighth Annual Conference on Neural Information Processing Systems; 2024 Dec 10\u201315; Vancouver, BC, Canada","first-page":"112624","article-title":"Source code foundation models are transferable binary analysis knowledge bases","author":"Su"},{"author":"Xu","article-title":"Unleashing the power of generative model in recovering variable names from stripped binary","series-title":"Proceedings of the 2025 Network and Distributed System Security Symposium; 2025 Feb 24\u201328; San Diego, CA, USA","key":"ref48"},{"author":"Jiang","article-title":"Beyond classification: inferring function names in stripped binaries via domain adapted LLMs","series-title":"Proceedings of the 2025 Network and Distributed System Security Symposium; 2025 Feb 24\u201328; San Diego, CA, USA","key":"ref49"},{"key":"ref50","series-title":"ACMSIGPLAN Workshop on Compiler Support for System Software (WCSS)","article-title":"Compressing differences of executable code","author":"Baker Brenda","year":"1999"},{"key":"ref51","series-title":"Detection of Intrusions and Malware and Vulnerability Assessment, GI SIG SIDAR Workshop, DIMVA 2004","first-page":"161","article-title":"Structural comparison of executable objects","author":"Flake","year":"2004"},{"key":"ref52","first-page":"1","article-title":"Graph-based comparison of executable objects (English version)","author":"Dullien","year":"2005","journal-title":"Actes du Symposium SSTIC05"},{"key":"ref53","series-title":"2016 IEEE 23rd International Conference on Software Analysis, Evolution, and Reengineering (SANER); 2016 Mar 14\u201318; Osaka, Japan","first-page":"57","article-title":"Cross-architecture binary semantics understanding via similar code comparison","author":"Hu"},{"key":"ref54","series-title":"Proceedings of the 2017 ACM on Asia Conference on Computer and Communications Security","first-page":"155","article-title":"BinSequence: fast, accurate and scalable binary code reuse detection","author":"Huang","year":"2017"},{"key":"ref55","series-title":"2017 IEEE\/ACM 39th International Conference on Software Engineering (ICSE); 2017 May 20\u201328; Buenos Aires, Argentina","first-page":"462","article-title":"SPAIN: security patch analysis for binaries towards understanding the pain and pills","author":"Xu"},{"key":"ref56","series-title":"2017 32nd IEEE\/ACM International Conference on Automated Software Engineering (ASE); 2017 Oct 30\u2013Nov 3; Urbana, IL, USA","first-page":"342","article-title":"Towards robust instruction-level trace alignment of binary code","author":"Karg\u00e9n"},{"key":"ref57","doi-asserted-by":"crossref","first-page":"349","DOI":"10.1145\/2666356.2594343","article-title":"Tracelet-based code search in executables","volume":"49","author":"David","year":"2014","journal-title":"ACM SIGPLAN Notices"},{"key":"ref58","series-title":"Proceedings of the 30th Annual Computer Security Applications Conference","first-page":"406","article-title":"Leveraging semantic signatures for bug search in binary programs","author":"Pewny","year":"2014"},{"key":"ref59","series-title":"2015 IEEE Symposium on Security and Privacy; 2015 May 17\u201321; San Jose, CA, USA","first-page":"709","article-title":"Cross-architecture bug search in binary executables","author":"Pewny"},{"author":"Eschweiler","article-title":"discovRE: efficient cross-architecture identification of bugs in binary code","series-title":"Proceedings of the 2016 Network and Distributed System Security Symposium; 2016 Feb 21\u201324; San Diego, CA, USA","key":"ref60"},{"key":"ref61","doi-asserted-by":"crossref","first-page":"266","DOI":"10.1145\/2980983.2908126","article-title":"Statistical similarity of binaries","volume":"51","author":"David","year":"2016","journal-title":"ACM SIGPLAN Notices"},{"key":"ref62","series-title":"Proceedings of the 2016 ACM SIGSAC Conference on Computer and Communications Security","first-page":"480","article-title":"Scalable graph-based bug search for firmware images","author":"Feng","year":"2016"},{"key":"ref63","series-title":"Proceedings of the 2016 24th ACM SIGSOFT International Symposium on Foundations of Software Engineering","first-page":"678","article-title":"BinGo: cross-architecture cross-OS binary search","author":"Chandramohan","year":"2016"},{"key":"ref64","series-title":"Proceedings of the 38th ACM SIGPLAN Conference on Programming Language Design and Implementation","first-page":"79","article-title":"Similarity of binaries through re-optimization","author":"David","year":"2017"},{"key":"ref65","series-title":"2019 IEEE Symposium on Security and Privacy (SP); 2019 May 19\u201323; San Francisco, CA, USA","first-page":"472","article-title":"Asm2Vec: boosting static representation robustness for binary clone search against code obfuscation and compiler optimization","author":"Ding"},{"key":"ref66","series-title":"Detection of Intrusions and Malware, and Vulnerability Assessment: 16th International Conference, DIMVA 2019","article-title":"Safe: self-attentive function embeddings for binary similarity","author":"Massarelli","year":"2019"},{"key":"ref67","series-title":"Proceedings of the 16th ACM Conference on Computer and Communications Security","first-page":"611","article-title":"Large-scale malware indexing using function-call graphs","author":"Hu","year":"2009"},{"key":"ref68","series-title":"USENIX ATC\u201913: Proceedings of the 2013 USENIX conference on Annual Technical Conference; 2013 Jun 26\u201328; San Jose, CA, USA","first-page":"187","article-title":"MutantX-S: scalable malware clustering based on static features","author":"Hu"},{"key":"ref69","doi-asserted-by":"crossref","first-page":"607","DOI":"10.1007\/s11227-016-1941-2","article-title":"Binary executable file similarity calculation using function matching","volume":"75","author":"Kim","year":"2019","journal-title":"J Supercomput"},{"key":"ref70","doi-asserted-by":"crossref","first-page":"364","DOI":"10.1109\/TSE.1975.6312866","article-title":"The source code control system","volume":"SE-1","author":"Rochkind","year":"1975","journal-title":"IEEE Trans Softw Eng"},{"key":"ref71","series-title":"Proceedings of the 3rd International Workshop on Software Configuration Management","first-page":"144","article-title":"Delta storage for arbitrary non-text files","author":"Reichenberger","year":"1991"},{"key":"ref72","series-title":"8th International Symposium, RAID 2005","article-title":"Recent advances in intrusion detection","author":"Valdes","year":"2006"},{"key":"ref73","series-title":"2013 10th Working Conference on Mining Software Repositories (MSR); 2013 May 18\u201319; San Francisco, CA, USA","first-page":"329","article-title":"Rendezvous: a search engine for binary code","author":"Khoo","year":"2013"},{"unstructured":"Tencent. BinaryAI Python SDK [Internet]. [cited 2025 Sep 9]. Available from: https:\/\/github.com\/binaryai\/sdk.","key":"ref74"},{"key":"ref75","series-title":"Proceedings of the 31st ACM SIGSOFT International Symposium on Software Testing and Analysis","first-page":"1","article-title":"jTrans: jump-aware transformer for binary code similarity detection","author":"Wang","year":"2022"},{"unstructured":"Zhu W, Wang H, Zhou Y, Wang J, Sha Z, Gao Z, et al. kTrans: knowledge-aware transformer for binary code embedding. arXiv:2308.12659. 2023.","key":"ref76"},{"author":"He","article-title":"Code is not natural language: unlock the power of semantics-oriented graph representation for binary code similarity detection","series-title":"33rd USENIX Security Symposium (USENIX Security 24); 2024 Aug 14\u201316; Philadelphia, PA, USA","key":"ref77"},{"key":"ref78","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3175492","volume":"21","author":"Alrabaee","year":"2018","journal-title":"ACM Trans Priv Secur"},{"key":"ref79","series-title":"Proceedings of the 36th International Conference on Machine Learning; 2019 Jun 9\u201315; Long Beach, CA, USA","first-page":"3835","article-title":"Graph matching networks for learning the similarity of graph structured objects","author":"Li"},{"key":"ref80","series-title":"Proceedings of the 13th Workshop on Programming Languages and Analysis for Security","first-page":"42","article-title":"Binary similarity detection using machine learning","author":"Shalev","year":"2018"},{"key":"ref81","doi-asserted-by":"crossref","first-page":"301","DOI":"10.1007\/978-3-319-60876-1_14","author":"Shirani","year":"2017","journal-title":"Detection of intrusions and malware, and vulnerability assessment"},{"key":"ref82","series-title":"Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining","first-page":"461","article-title":"Kam1n0: MapReduce-based assembly clone search for reverse engineering","author":"Ding","year":"2016"},{"key":"ref83","series-title":"2014 Eighth International Conference on Software Security and Reliability (SERE); 2014 Jun 30\u2013Jul 2; San Francisco, CA, USA","first-page":"78","article-title":"BinClone: detecting code clones in malware","author":"Farhadi"},{"key":"ref84","series-title":"ICT Systems Security and Privacy Protection: 32nd IFIP TC 11 International Conference, SEC 2017","first-page":"341","article-title":"Binsign: fingerprinting binary functions to support automated analysis of code executables","author":"Nouh","year":"2017"},{"key":"ref85","series-title":"Proceedings of the Eighth ACM Conference on Data and Application Security and Privacy","first-page":"354","article-title":"Beyond precision and recall: understanding uses (and misuses) of similarity hashes in binary analysis","author":"Pagani","year":"2018"},{"key":"ref86","doi-asserted-by":"crossref","first-page":"103856","DOI":"10.1016\/j.cose.2024.103856","article-title":"Malware similarity and a new fuzzy hash: compound Code Block Hash (CCBHash)","volume":"142","author":"Onieva","year":"2024","journal-title":"Comput Secur"},{"key":"ref87","series-title":"2016 IEEE Trustcom\/BigDataSE\/ISPA; 2016 Aug 23\u201326; Tianjin, China","first-page":"1782","article-title":"Forensic malware analysis: the value of fuzzy hashing algorithms in identifying similarities","author":"Sarantinos"},{"unstructured":"Dullien T. Searching statically-linked vulnerable library functions in executable code [Internet]. [cited 2025 Sep 9]. Available from: https:\/\/googleprojectzero.blog.","key":"ref88"},{"author":"Massarelli","article-title":"Investigating graph embedding neural networks with unsupervised features extraction for binary analysis","series-title":"Proceedings of the 2019 Workshop on Binary Analysis Research; 2019 Feb 24; San Diego, CA, USA","key":"ref89"},{"key":"ref90","first-page":"1145","article-title":"Order matters: semantic-aware neural networks for binary code similarity detection","volume":"34","author":"Yu","year":"2020","journal-title":"Proc AAAI Conf Artif Intell"},{"unstructured":"Pei K, Xuan Z, Yang J, Jana S, Ray B. Trex: learning execution semantics from micro-traces for binary similarity. arXiv:2012.08680. 2020.","key":"ref91"},{"key":"ref92","series-title":"2021 51st Annual IEEE\/IFIP International Conference on Dependable Systems and Networks (DSN); 2021 Jun 21\u201324; Taipei, Taiwan","first-page":"224","article-title":"Asteria: deep learning-based AST-encoding for cross-platform binary code similarity detection","author":"Yang"},{"key":"ref93","series-title":"Proceedings of the 2021 ACM SIGSAC Conference on Computer and Communications Security","first-page":"3236","article-title":"PalmTree: learning an assembly language model for instruction embedding","author":"Li","year":"2021"},{"key":"ref94","series-title":"Proceedings of the 38th Annual Computer Security Applications Conference","first-page":"361","article-title":"Practical binary code similarity detection with BERT-based transferable similarity learning","author":"Ahn","year":"2022"},{"key":"ref95","series-title":"Proceedings of the 31st ACM SIGSOFT International Symposium on Software Testing and Analysis","first-page":"151","article-title":"Improving cross-platform binary analysis using representation learning via graph alignment","author":"Kim","year":"2022"},{"author":"Luo","article-title":"VulHawk: cross-architecture vulnerability detection with entropy-based binary code search","series-title":"Proceedings of the 2023 Network and Distributed System Security Symposium; 2023 Feb 27\u2013Mar 3; San Diego, CA, USA","key":"ref96"},{"unstructured":"Collyer J, Watson T, Phillips I. FASER: binary code similarity search through the use of intermediate representations. arXiv:2310.03605. 2023.","key":"ref97"},{"key":"ref98","first-page":"1","article-title":"Asteria-pro: enhancing deep learning-based binary code similarity detection by incorporating domain knowledge","volume":"33","author":"Yang","year":"2024","journal-title":"ACM Trans Softw Eng Methodol"},{"key":"ref99","first-page":"1","article-title":"sem2vec: semantics-aware assembly tracelet embedding","volume":"32","author":"Wang","year":"2023","journal-title":"ACM Trans Softw Eng Methodol"},{"doi-asserted-by":"crossref","unstructured":"Feng Z, Guo D, Tang D, Duan N, Feng X, Gong M, et al. CodeBERT: a pre-trained model for programming and natural languages. arXiv:2002.08155. 2020.","key":"ref100","DOI":"10.18653\/v1\/2020.findings-emnlp.139"},{"key":"ref101","series-title":"ICML\u201920: Proceedings of the 37th International Conference on Machine Learning; 2020 Jul 13\u201318; Online","first-page":"5110","article-title":"Learning and evaluating contextual embedding of source code","author":"Kanade"},{"key":"ref102","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3635711","article-title":"Representation learning for stack overflow posts: how far are we?","volume":"33","author":"He","year":"2024","journal-title":"ACM Trans Softw Eng Methodol"},{"unstructured":"Guo D, Ren S, Lu S, Feng Z, Tang D, Liu S, et al. GraphCodeBERT: pre-training code representations with data flow. arXiv:2009.08366. 2020.","key":"ref103"},{"doi-asserted-by":"crossref","unstructured":"Ahmad WU, Chakraborty S, Ray B, Chang KW. Unified pre-training for program understanding and generation. arXiv:2103.06333. 2021.","key":"ref104","DOI":"10.18653\/v1\/2021.naacl-main.211"},{"doi-asserted-by":"crossref","unstructured":"Wang Y, Wang W, Joty S, Hoi SCH. CodeT5: identifier-aware unified pre-trained encoder-decoder models for code understanding and generation. arXiv:2109.00859. 2021.","key":"ref105","DOI":"10.18653\/v1\/2021.emnlp-main.685"},{"unstructured":"CodeGPT. CodeGPT: AI agents for software development; 2025 [Internet]. [cited 2025 Sep 9]. Available from: https:\/\/codegpt.co\/.","key":"ref106"},{"unstructured":"OpenAI. OpenAI Codex; 2021 [Internet]. [cited 2025 Sep 9]. Available from: https:\/\/openai.com\/codex\/.","key":"ref107"},{"unstructured":"Microsoft. Microsoft Copilot; 2023 [Internet]. [cited 2025 Sep 9]. Available from: https:\/\/copilot.microsoft.com\/.","key":"ref108"},{"key":"ref109","series-title":"International Joint Conference on Neural Networks (IJCNN); 2022 Jul 18\u201323; Padua, Italy","first-page":"1","article-title":"VulBERTa: simplified source code pre-training for vulnerability detection","author":"Hanif"},{"doi-asserted-by":"crossref","unstructured":"Guo D, Lu S, Duan N, Wang Y, Zhou M, Yin J. UniXcoder: unified cross-modal pre-training for code representation. arXiv:2203.03850. 2022.","key":"ref110","DOI":"10.18653\/v1\/2022.acl-long.499"},{"key":"ref111","series-title":"Proceedings of the 30th ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering","first-page":"18","article-title":"NatGen: generative pre-training by \u201cnaturalizing\u201d source code","author":"Chakraborty","year":"2022"},{"unstructured":"Fried D, Aghajanyan A, Lin J, Wang S, Wallace E, Shi F, et al. InCoder: a generative model for code infilling and synthesis. arXiv:2204.05999. 2022.","key":"ref112"},{"unstructured":"Nijkamp E, Pang B, Hayashi H, Tu L, Wang H, Zhou Y, et al. CodeGen: an open large language model for code with multi-turn program synthesis. arXiv:2203.13474. 2022.","key":"ref113"},{"key":"ref114","series-title":"2023 IEEE International Conference on Software Maintenance and Evolution (ICSME); 2023 Oct 1\u20136; Bogot\u00e1, Colombia","first-page":"182","article-title":"CCBERT: self-supervised code change representation learning","author":"Zhou"},{"unstructured":"OpenAI; GPT-4. 2023 Mar 14 [Internet]. [cited 2025 Sep 9]. Available from: https:\/\/openai.com\/index\/gpt-4-research\/.","key":"ref115"},{"unstructured":"Meta. Code Llama. 2023 [Internet]. [cited 2025 Sep 9]. Available from: https:\/\/codellama.dev\/about.","key":"ref116"},{"unstructured":"DeepSeek. 2025 [Internet]. [cited 2025 Sep 9]. Available from: https:\/\/www.deepseek.com\/.","key":"ref117"},{"doi-asserted-by":"crossref","unstructured":"Tan H, Luo Q, Li J, Zhang Y. LLM4Decompile: decompiling binary code with large language models. arXiv:2403.05286. 2024.","key":"ref118","DOI":"10.18653\/v1\/2024.emnlp-main.203"},{"author":"Hu","article-title":"DeGPT: optimizing decompiler output with LLM","series-title":"Proceedings of the 2024 Network and Distributed System Security Symposium; 2024 Feb 26\u2013Mar 1; San Diego, CA, USA","key":"ref119"},{"doi-asserted-by":"crossref","unstructured":"Shang X, Chen G, Cheng S, Wu B, Hu L, Li G, et al. BinMetric: a comprehensive binary analysis benchmark for large language models. arXiv:2505.07360. 2025.","key":"ref120","DOI":"10.24963\/ijcai.2025\/858"},{"key":"ref121","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3716822","article-title":"LLM-powered static binary taint analysis","volume":"34","author":"Liu","year":"2025","journal-title":"ACM Trans Softw Eng Methodol"},{"key":"ref122","series-title":"2024 IEEE International Conference on Software Maintenance and Evolution (ICSME); 2024 Oct 6\u201311; Flagstaff, AZ, USA","first-page":"1","article-title":"How far have we gone in binary code understanding using large language models","author":"Shang"},{"key":"ref123","series-title":"33rd USENIX Security Symposium (USENIX Security 24); 2024 Aug 14\u201316; Philadelphia, PA, USA","first-page":"829","article-title":"Large language models for code analysis: do LLMs really do their job?","author":"Fang"},{"key":"ref124","series-title":"2024 IEEE Symposium on Security and Privacy (SP); 2024 May 19\u201323; San Francisco, CA, USA","first-page":"862","article-title":"LLMs cannot reliably identify and reason about security vulnerabilities (yet?): a comprehensive evaluation, framework, and benchmarks","author":"Ullah"},{"unstructured":"Liu F, Liu Y, Shi L, Huang H, Wang R, Yang Z, et al. Exploring and evaluating hallucinations in LLM-powered code generation. arXiv:2404.00971. 2024.","key":"ref125"},{"unstructured":"Chen M, Zhu T, Zhang M, He Y, Lin M, Li P, et al. Explainer-guided targeted adversarial attacks against binary code similarity detection models. arXiv:2506.05430. 2025.","key":"ref126"}],"container-title":["Computers, Materials &amp; Continua"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/cdn.techscience.cn\/files\/cmc\/2025\/TSP_CMC-85-3\/TSP_CMC_70195\/TSP_CMC_70195.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,17]],"date-time":"2025-11-17T02:25:17Z","timestamp":1763346317000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.techscience.com\/cmc\/v85n3\/64200"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":126,"journal-issue":{"issue":"3","published-online":{"date-parts":[[2025]]},"published-print":{"date-parts":[[2025]]}},"URL":"https:\/\/doi.org\/10.32604\/cmc.2025.070195","relation":{},"ISSN":["1546-2226"],"issn-type":[{"type":"electronic","value":"1546-2226"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"2025-07-10","order":0,"name":"received","label":"Received","group":{"name":"publication_history","label":"Publication History"}},{"value":"2025-09-10","order":1,"name":"accepted","label":"Accepted","group":{"name":"publication_history","label":"Publication History"}},{"value":"2025-10-23","order":2,"name":"published","label":"Published Online","group":{"name":"publication_history","label":"Publication History"}}]}}