{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,14]],"date-time":"2025-11-14T07:41:51Z","timestamp":1763106111421,"version":"3.41.2"},"publisher-location":"Cham","reference-count":41,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031976193","type":"print"},{"value":"9783031976209","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-97620-9_15","type":"book-chapter","created":{"date-parts":[[2025,7,10]],"date-time":"2025-07-10T09:28:43Z","timestamp":1752139723000},"page":"267-286","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Exploring the\u00a0Potential of\u00a0LLMs for\u00a0Code Deobfuscation"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-0597-7788","authenticated-orcid":false,"given":"David","family":"Beste","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8776-8770","authenticated-orcid":false,"given":"Gr\u00e9goire","family":"Menguy","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6911-9390","authenticated-orcid":false,"given":"Hossein","family":"Hajipour","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8949-9896","authenticated-orcid":false,"given":"Mario","family":"Fritz","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3807-6417","authenticated-orcid":false,"given":"Antonio Emanuele","family":"Cin\u00e0","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6509-3506","authenticated-orcid":false,"given":"S\u00e9bastien","family":"Bardin","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2783-1264","authenticated-orcid":false,"given":"Thorsten","family":"Holz","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4706-260X","authenticated-orcid":false,"given":"Thorsten","family":"Eisenhofer","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3779-7781","authenticated-orcid":false,"given":"Lea","family":"Sch\u00f6nherr","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,7,10]]},"reference":[{"key":"15_CR1","doi-asserted-by":"publisher","unstructured":"Achiam, J., et\u00a0al.: GPT-4 technical report. arXiv preprint (2023). https:\/\/doi.org\/10.48550\/arXiv.2303.08774","DOI":"10.48550\/arXiv.2303.08774"},{"key":"15_CR2","doi-asserted-by":"publisher","unstructured":"Armengol-Estap\u00e9, J., Woodruff, J., Brauckmann, A., Magalh\u00e3es, J.W.d.S., O\u2019Boyle, M.F.: Exebench: an ML-scale dataset of executable C functions. In: Proceedings of the 6th ACM SIGPLAN International Symposium on Machine Programming (MAPS), pp. 50\u201359 (2022). https:\/\/doi.org\/10.1145\/3520312.3534867","DOI":"10.1145\/3520312.3534867"},{"key":"15_CR3","doi-asserted-by":"publisher","unstructured":"Bardin, S., David, R., Marion, J.Y.: Backward-bounded DSE: targeting infeasibility questions on obfuscated codes. In: IEEE Symposium on Security and Privacy (S &P), pp. 633\u2013651 (2017). https:\/\/doi.org\/10.1109\/SP.2017.36","DOI":"10.1109\/SP.2017.36"},{"key":"15_CR4","unstructured":"Blazytko, T., Contag, M., Aschermann, C., Holz, T.: Syntia: synthesizing the semantics of obfuscated code. In: USENIX Security Symposium, pp. 643\u2013659 (2017). https:\/\/dl.acm.org\/doi\/10.5555\/3241189.3241240"},{"key":"15_CR5","unstructured":"Collberg, C.: RandomizeArguments\u2014tigress.wtf. https:\/\/tigress.wtf\/randomizeArguments.html. Accessed 30 Apr 2025"},{"key":"15_CR6","unstructured":"Collberg, C.: Recipes\u2014tigress.wtf. https:\/\/tigress.wtf\/recipes.html. Accessed 30 Apr 2025"},{"key":"15_CR7","unstructured":"Collberg, C.: The Tigress C Diversifier\/Obfuscator. https:\/\/tigress.wtf\/index.html. Accessed 30 Apr 2025"},{"key":"15_CR8","unstructured":"Collberg, C., Thomborson, C., Low, D.: A taxonomy of obfuscating transformations. Technical report, The University of Auckland, New Zealand (1997)"},{"key":"15_CR9","unstructured":"Collberg, C.S., Nagra, J.: Surreptitious Software - Obfuscation, Watermarking, and Tamperproofing for Software Protection. Addison-Wesley (2010). https:\/\/dl.acm.org\/doi\/10.5555\/1594894"},{"key":"15_CR10","doi-asserted-by":"publisher","unstructured":"Collberg, C.S., Thomborson, C.D., Low, D.: Manufacturing cheap, resilient, and stealthy opaque constructs. In: ACM Symposium on Principles of Programming Languages (POPL), pp. 184\u2013196. https:\/\/doi.org\/10.1145\/268946.268962","DOI":"10.1145\/268946.268962"},{"key":"15_CR11","doi-asserted-by":"publisher","unstructured":"Coogan, K., Lu, G., Debray, S.: Deobfuscation of virtualization-obfuscated software: a semantics-based approach. In: ACM Conference on Computer and Communications Security (CCS), pp. 275\u2013284 (2011). https:\/\/doi.org\/10.1145\/2046707.2046739","DOI":"10.1145\/2046707.2046739"},{"key":"15_CR12","unstructured":"Dettmers, T., Lewis, M., Belkada, Y., Zettlemoyer, L.: Llm.int8(): 8-bit matrix multiplication for transformers at scale. In: Advances in Neural Information Processing Systems (NeurIPS), pp. 30318\u201330332 (2022). https:\/\/dl.acm.org\/doi\/10.5555\/3600270.3602468"},{"key":"15_CR13","doi-asserted-by":"publisher","unstructured":"Eyrolles, N., Goubin, L., Videau, M.: Defeating MBA-based obfuscation. In: Proceedings of the 2016 ACM Workshop on Software PROtection, pp. 27\u201338 (2016). https:\/\/doi.org\/10.1145\/2995306.2995308","DOI":"10.1145\/2995306.2995308"},{"key":"15_CR14","doi-asserted-by":"publisher","unstructured":"Fan, Z., Gao, X., Mirchev, M., Roychoudhury, A., Tan, S.H.: Automated repair of programs from large language models. In: International Conference on Software Engineering (ICSE), pp. 1469\u20131481 (2023). https:\/\/doi.org\/10.1109\/ICSE48619.2023.00128","DOI":"10.1109\/ICSE48619.2023.00128"},{"key":"15_CR15","doi-asserted-by":"publisher","unstructured":"Feng, Z., et al.: CodeBERT: a pre-trained model for programming and natural languages. In: Findings of the Association for Computational Linguistics (EMNLP), pp. 1536\u20131547 (2020). https:\/\/doi.org\/10.18653\/v1\/2020.findings-emnlp.139","DOI":"10.18653\/v1\/2020.findings-emnlp.139"},{"key":"15_CR16","doi-asserted-by":"publisher","unstructured":"Guo, D., et\u00a0al.: Deepseek-coder: when the large language model meets programming\u2013the rise of code intelligence. arXiv preprint (2024). https:\/\/doi.org\/10.48550\/arXiv.2401.14196","DOI":"10.48550\/arXiv.2401.14196"},{"key":"15_CR17","doi-asserted-by":"publisher","unstructured":"Hajipour, H., Hassler, K., Holz, T., Sch\u00f6nherr, L., Fritz, M.: Codelmsec benchmark: systematically evaluating and finding security vulnerabilities in black-box code language models. In: IEEE Conference on Secure and Trustworthy Machine Learning (SaTML), pp. 684\u2013709 (2024). https:\/\/doi.org\/10.1109\/SaTML59370.2024.00040","DOI":"10.1109\/SaTML59370.2024.00040"},{"key":"15_CR18","doi-asserted-by":"publisher","unstructured":"Hajipour, H., Malinowski, M., Fritz, M.: IReEn: reverse-engineering of black-box functions via iterative neural program synthesis. In: Machine Learning and Principles and Practice of Knowledge Discovery in Databases (ECML PKDD), pp. 143\u2013157 (2021). https:\/\/doi.org\/10.1007\/978-3-030-93733-1_10","DOI":"10.1007\/978-3-030-93733-1_10"},{"key":"15_CR19","doi-asserted-by":"publisher","unstructured":"Hajipour, H., Sch\u00f6nherr, L., Holz, T., Fritz, M.: Hexacoder: secure code generation via oracle-guided synthetic training data. arXiv preprint (2024). https:\/\/doi.org\/10.48550\/arXiv.2409.06446","DOI":"10.48550\/arXiv.2409.06446"},{"key":"15_CR20","unstructured":"Halstead, M.H.: Elements of Software Science (Operating and programming systems series). Elsevier Science Inc. (1977). https:\/\/dl.acm.org\/doi\/10.5555\/540137"},{"key":"15_CR21","doi-asserted-by":"publisher","unstructured":"Hammad, M., Garcia, J., Malek, S.: A large-scale empirical study on the effects of code obfuscations on android apps and anti-malware products. In: International Conference on Software Engineering (ICSE), pp. 421\u2013431 (2018). https:\/\/doi.org\/10.1145\/3180155.3180228","DOI":"10.1145\/3180155.3180228"},{"key":"15_CR22","doi-asserted-by":"publisher","unstructured":"Jiang, N., Liu, K., Lutellier, T., Tan, L.: Impact of code language models on automated program repair. In: International Conference on Software Engineering (ICSE), pp. 10 pp.\u201354 (2023). https:\/\/doi.org\/10.1109\/ICSE48619.2023.00125","DOI":"10.1109\/ICSE48619.2023.00125"},{"key":"15_CR23","doi-asserted-by":"publisher","unstructured":"Katzmarski, B., Koschke, R.: Program complexity metrics and programmer opinions. In: 20th IEEE International Conference on Program Comprehension (ICPC), pp. 17\u201326 (2012). https:\/\/doi.org\/10.1109\/ICPC.2012.6240486","DOI":"10.1109\/ICPC.2012.6240486"},{"key":"15_CR24","unstructured":"Lattner, C.: LLVM and clang: Next generation compiler technology. In: The BSD Conference, pp. 1\u201320 (2008)"},{"key":"15_CR25","doi-asserted-by":"publisher","unstructured":"Linn, C., Debray, S.K.: Obfuscation of executable code to improve resistance to static disassembly. In: ACM Conference on Computer and Communications Security (CCS), pp. 290\u2013299 (2003). https:\/\/doi.org\/10.1145\/948109.948149","DOI":"10.1145\/948109.948149"},{"key":"15_CR26","unstructured":"Liu, B., Shen, J., Ming, J., Zheng, Q., Li, J., Xu, D.: MBA-blast: unveiling and simplifying mixed Boolean-arithmetic obfuscation. In: USENIX Security Symposium, pp. 2351\u20132365 (2021)"},{"key":"15_CR27","doi-asserted-by":"publisher","unstructured":"Liu, Z., Wang, S.: How far we have come: testing decompilation correctness of C decompilers. In: Proceedings of the 29th ACM SIGSOFT International Symposium on Software Testing and Analysis (ISSTA), pp. 475\u2013487 (2020). https:\/\/doi.org\/10.1145\/3395363.3397370","DOI":"10.1145\/3395363.3397370"},{"key":"15_CR28","doi-asserted-by":"publisher","unstructured":"McCabe, T.J.: A complexity measure. TSE 308\u2013320 (1976). https:\/\/doi.org\/10.1109\/TSE.1976.233837","DOI":"10.1109\/TSE.1976.233837"},{"key":"15_CR29","doi-asserted-by":"publisher","unstructured":"Menguy, G., Bardin, S., Bonichon, R., Lima, C.D.S.: Search-based local black-box deobfuscation: understand, improve and mitigate. In: ACM Conference on Computer and Communications Security (CCS), pp. 2513\u20132525 (2021). https:\/\/doi.org\/10.1145\/3460120.3485250","DOI":"10.1145\/3460120.3485250"},{"key":"15_CR30","doi-asserted-by":"publisher","unstructured":"Ming, J., Xu, D., Wang, L., Wu, D.: Loop: logic-oriented opaque predicate detection in obfuscated binary code. In: ACM Conference on Computer and Communications Security (CCS), pp. 757\u2013768 (2015). https:\/\/doi.org\/10.1145\/2810103.2813617","DOI":"10.1145\/2810103.2813617"},{"key":"15_CR31","doi-asserted-by":"publisher","unstructured":"Reichenwallner, B., Meerwald-Stadler, P.: Efficient deobfuscation of linear mixed Boolean-arithmetic expressions. In: CheckMATE Workshop, pp. 19\u201328 (2022). https:\/\/doi.org\/10.1145\/3560831.3564256","DOI":"10.1145\/3560831.3564256"},{"key":"15_CR32","doi-asserted-by":"publisher","unstructured":"Reichenwallner, B., Meerwald-Stadler, P.: Simplification of general mixed Boolean-arithmetic expressions: GAMBA. In: IEEE European Symposium on Security and Privacy (EuroS &P) Workshops, pp. 427\u2013438 (2023). https:\/\/doi.org\/10.1109\/EuroSPW59978.2023.00053","DOI":"10.1109\/EuroSPW59978.2023.00053"},{"key":"15_CR33","doi-asserted-by":"publisher","unstructured":"Roziere, B., et\u00a0al.: Code llama: open foundation models for code. arXiv preprint (2023). https:\/\/doi.org\/10.48550\/arXiv.2308.12950","DOI":"10.48550\/arXiv.2308.12950"},{"key":"15_CR34","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"372","DOI":"10.1007\/978-3-319-93411-2_17","volume-title":"Detection of Intrusions and Malware, and Vulnerability Assessment","author":"J Salwan","year":"2018","unstructured":"Salwan, J., Bardin, S., Potet, M.-L.: Symbolic deobfuscation: from virtualized code back to the original. In: Giuffrida, C., Bardin, S., Blanc, G. (eds.) DIMVA 2018. LNCS, vol. 10885, pp. 372\u2013392. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-319-93411-2_17"},{"key":"15_CR35","doi-asserted-by":"publisher","DOI":"10.1145\/2886012","author":"S Schrittwieser","year":"2016","unstructured":"Schrittwieser, S., Katzenbeisser, S., Kinder, J., Merzdovnik, G., Weippl, E.: Protecting software through obfuscation: can it keep pace with progress in code analysis? ACM Comput. Surv. (CSUR) (2016). https:\/\/doi.org\/10.1145\/2886012","journal-title":"ACM Comput. Surv. (CSUR)"},{"key":"15_CR36","doi-asserted-by":"publisher","unstructured":"Tofighi-Shirazi, R., As\u0103voae, I.M., Elbaz-Vincent, P.: Fine-grained static detection of obfuscation transforms using ensemble-learning and semantic reasoning. In: Proceedings of the 9th Workshop on Software Security, Protection, and Reverse Engineering (SSPREW), pp. 1\u201312 (2019). https:\/\/doi.org\/10.1145\/3371307.3371313","DOI":"10.1145\/3371307.3371313"},{"key":"15_CR37","doi-asserted-by":"publisher","unstructured":"Udupa, S.K., Debray, S.K., Madou, M.: Deobfuscation: reverse engineering obfuscated code. In: 12th Working Conference on Reverse Engineering (WCRE\u201905), pp. 10 pp.\u201354 (2005). https:\/\/doi.org\/10.1109\/WCRE.2005.13","DOI":"10.1109\/WCRE.2005.13"},{"key":"15_CR38","unstructured":"Wang, C., Hill, J., Knight, J., Davidson, J.: Software tamper resistance: Obstructing static analysis of programs. Technical report, University of Virginia (2000). https:\/\/dl.acm.org\/doi\/10.5555\/900898"},{"key":"15_CR39","doi-asserted-by":"publisher","unstructured":"Wang, Y., Le, H., Gotmare, A., Bui, N., Li, J., Hoi, S.: Codet5+: open code large language models for code understanding and generation. In: Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 1069\u20131088 (2023). https:\/\/doi.org\/10.18653\/v1\/2023.emnlp-main.68","DOI":"10.18653\/v1\/2023.emnlp-main.68"},{"key":"15_CR40","doi-asserted-by":"publisher","unstructured":"Wang, Y., Wang, W., Joty, S., Hoi, S.C.: Codet5: Identifier-aware unified pre-trained encoder-decoder models for code understanding and generation. In: Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 8696\u20138708 (2021). https:\/\/doi.org\/10.18653\/v1\/2021.emnlp-main.685","DOI":"10.18653\/v1\/2021.emnlp-main.685"},{"key":"15_CR41","doi-asserted-by":"publisher","unstructured":"Yadegari, B., Johannesmeyer, B., Whitely, B., Debray, S.: A generic approach to automatic deobfuscation of executable code. In: IEEE Symposium on Security and Privacy (S &P), pp. 674\u2013691 (2015). https:\/\/doi.org\/10.1109\/SP.2015.47","DOI":"10.1109\/SP.2015.47"}],"container-title":["Lecture Notes in Computer Science","Detection of Intrusions and Malware, and Vulnerability Assessment"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-97620-9_15","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,10]],"date-time":"2025-07-10T09:28:47Z","timestamp":1752139727000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-97620-9_15"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031976193","9783031976209"],"references-count":41,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-97620-9_15","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"10 July 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"DIMVA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Detection of Intrusions and Malware, and Vulnerability Assessment","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Graz","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Austria","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 July 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11 July 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"dimva2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/dimva.org\/dimva2025\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}