{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T05:00:23Z","timestamp":1750309223292,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":39,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,7,1]],"date-time":"2024-07-01T00:00:00Z","timestamp":1719792000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,7]]},"DOI":"10.1145\/3634737.3657029","type":"proceedings-article","created":{"date-parts":[[2024,6,28]],"date-time":"2024-06-28T11:51:38Z","timestamp":1719575498000},"page":"1143-1158","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["On the Role of Pre-trained Embeddings in Binary Code Analysis"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-6102-5424","authenticated-orcid":false,"given":"Alwin","family":"Maier","sequence":"first","affiliation":[{"name":"Max Planck Institute for Solar System Research, G\u00f6ttingen, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-2631-9031","authenticated-orcid":false,"given":"Felix","family":"Wei\u00dfberg","sequence":"additional","affiliation":[{"name":"Technische Universit\u00e4t Berlin, Berlin, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5054-8758","authenticated-orcid":false,"given":"Konrad","family":"Rieck","sequence":"additional","affiliation":[{"name":"Technische Universit\u00e4t Berlin, Berlin, Germany"},{"name":"Berlin Institute for the Foundations of Learning and Data (BIFOLD), Berlin, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,7]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Proc. of the Conference on Empirical Methods in Natural Language Processing. 5799--5811","author":"Aghajanyan Armen","year":"2021","unstructured":"Armen Aghajanyan, Anchit Gupta, Akshat Shrivastava, Xilun Chen, Luke Zettlemoyer, and Sonal Gupta. 2021. Muppet: Massive Multi-task Representations with Pre-Finetuning. In Proc. of the Conference on Empirical Methods in Natural Language Processing. 5799--5811."},{"key":"e_1_3_2_1_2_1","volume-title":"Proc. of the Annual Computer Security Applications Conference (AC-SAC). 361--374","author":"Ahn Sunwoo","year":"2022","unstructured":"Sunwoo Ahn, Seonggwan Ahn, Hyungjoon Koo, and Yunheung Paek. 2022. Practical Binary Code Similarity Detection with BERT-based Transferable Similarity Learning.. In Proc. of the Annual Computer Security Applications Conference (AC-SAC). 361--374."},{"key":"e_1_3_2_1_3_1","volume-title":"Proc. of the Annual Computer Security Applications Conference (ACSAC). 84--96","author":"Alves-Foss Jim","year":"2019","unstructured":"Jim Alves-Foss and Jia Song. 2019. Function boundary detection in stripped binaries. In Proc. of the Annual Computer Security Applications Conference (ACSAC). 84--96."},{"key":"e_1_3_2_1_4_1","volume-title":"Proc. of the International Joint Conference on Neural Networks (IJCNN).","author":"Biswas Sajib","year":"2022","unstructured":"Sajib Biswas, Timothy Barao, John Lazzari, Jeret McCoy, Xiuwen Liu, and Alexander Kostandarithes. 2022. Geometric Analysis and Metric Learning of Instruction Embeddings. In Proc. of the International Joint Conference on Neural Networks (IJCNN)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1142\/S0218001493000339"},{"key":"e_1_3_2_1_6_1","volume-title":"Proc. of the Intelligent Systems Conference (IntelliSys).","author":"Chen Yu","year":"2018","unstructured":"Yu Chen, Zhiqiang Shi, Hong Li, Weiwei Zhao, Yiliang Liu, and Yuansong Qiao. 2018. HIMALIA: Recovering Compiler Optimization Levels from Binaries by Deep Learning. In Proc. of the Intelligent Systems Conference (IntelliSys)."},{"key":"e_1_3_2_1_7_1","volume-title":"Proc. of the USENIX Security Symposium. 99--116","author":"Chua Zheng Leong","year":"2017","unstructured":"Zheng Leong Chua, Shiqi Shen, Prateek Saxena, and Zhenkai Liang. 2017. Neural Nets Can Learn Function Type Signatures From Binaries.. In Proc. of the USENIX Security Symposium. 99--116."},{"key":"e_1_3_2_1_8_1","volume-title":"Proc. of the USENIX Security Symposium. 99--116","author":"Chua Zheng Leong","year":"2017","unstructured":"Zheng Leong Chua, Shiqi Shen, Prateek Saxena, and Zhenkai Liang. 2017. Neural Nets Can Learn Function Type Signatures From Binaries. In Proc. of the USENIX Security Symposium. 99--116."},{"key":"e_1_3_2_1_9_1","volume-title":"Proc. of the International Conference on Machine Learning (ICML). 2702--2711","author":"Dai Hanjun","year":"2016","unstructured":"Hanjun Dai, Bo Dai, and Le Song. 2016. Discriminative Embeddings of Latent Variable Models for Structured Data. In Proc. of the International Conference on Machine Learning (ICML). 2702--2711."},{"key":"e_1_3_2_1_10_1","volume-title":"Proc. of the Conference of The North American Chapter of the Association for Computational Linguistics: Human Language Technologies (NAACL-HLT).","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In Proc. of the Conference of The North American Chapter of the Association for Computational Linguistics: Human Language Technologies (NAACL-HLT)."},{"key":"e_1_3_2_1_11_1","volume-title":"Proc. of the IEEE Symposium on Security and Privacy.","author":"Ding Steven","year":"2019","unstructured":"Steven Ding, Benjamin Fung, and Philippe Charland. 2019. Asm2Vec: Boosting Static Representation Robustness for Binary Clone Search against Code Obfuscation and Compiler Optimization. In Proc. of the IEEE Symposium on Security and Privacy."},{"key":"e_1_3_2_1_12_1","volume-title":"Proc. of the 31st International Conference on Machine Learning. 647--655","author":"Donahue Jeff","year":"2014","unstructured":"Jeff Donahue, Yangqing Jia, Oriol Vinyals, Judy Hoffman, Ning Zhang, Eric Tzeng, and Trevor Darrell. 2014. DeCAF: A Deep Convolutional Activation Feature for Generic Visual Recognition. In Proc. of the 31st International Conference on Machine Learning. 647--655."},{"key":"e_1_3_2_1_13_1","unstructured":"DWARF Debugging Information Format Committee 2010. DWARF debugging information format. DWARF Debugging Information Format Committee. Version 4."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1045"},{"key":"e_1_3_2_1_15_1","volume-title":"An introduction to ROC analysis. Pattern recognition letters 27, 8","author":"Fawcett Tom","year":"2006","unstructured":"Tom Fawcett. 2006. An introduction to ROC analysis. Pattern recognition letters 27, 8 (2006), 861--874."},{"key":"e_1_3_2_1_16_1","volume-title":"Proc. of the ACM Conference on Computer and Communications Security (CCS). 480--491","author":"Feng Qian","year":"2016","unstructured":"Qian Feng, Rundong Zhou, Chengcheng Xu, Yao Cheng, Brian Testa, and Heng Yin. 2016. Scalable Graph-based Bug Search for Firmware Images. In Proc. of the ACM Conference on Computer and Communications Security (CCS). 480--491."},{"key":"e_1_3_2_1_17_1","volume-title":"Proc. of the USENIX Security Symposium.","author":"Guo Wenbo","year":"2019","unstructured":"Wenbo Guo, Dongliang Mu, Xinyu Xing, Min Du, and Dawn Song. 2019. DEEP-VSA: Facilitating Value-set Analysis with Deep Learning for Postmortem Program Analysis. In Proc. of the USENIX Security Symposium."},{"volume-title":"Proc. of the ACM Conference on Computer and Communications Security (CCS). 1667--1680","author":"He Jingxuan","key":"e_1_3_2_1_18_1","unstructured":"Jingxuan He, Pesho Ivanov, Petar Tsankov, Veselin Raychev, and Martin T. Vechev. 2018. Debin: Predicting Debug Information in Stripped Binaries.. In Proc. of the ACM Conference on Computer and Communications Security (CCS). 1667--1680."},{"key":"e_1_3_2_1_19_1","volume-title":"Proc. of the ACM Conference on Computer and Communications Security (CCS). 1631--1645","author":"Jin Xin","year":"2022","unstructured":"Xin Jin, Kexin Pei, Jun Yeon Won, and Zhiqiang Lin. 2022. SymLM: Predicting Function Names in Stripped Binaries via Context-Sensitive Execution-Aware Code Embeddings.. In Proc. of the ACM Conference on Computer and Communications Security (CCS). 1631--1645."},{"key":"e_1_3_2_1_20_1","volume-title":"Proc. of the International Conference on Machine Learning (ICML).","author":"Le Quoc","year":"2014","unstructured":"Quoc Le and Tomas Mikolov. 2014. Distributed Representations of Sentences and Documents. In Proc. of the International Conference on Machine Learning (ICML)."},{"key":"e_1_3_2_1_21_1","volume-title":"Sung Hoon Baek, and Ki-Woong Park","author":"Lee Yongjun","year":"2019","unstructured":"Yongjun Lee, Hyun Kwon, Sang-Hoon Choi, Seung-Ho Lim, Sung Hoon Baek, and Ki-Woong Park. 2019. Instruction2vec: Efficient Preprocessor of Assembly Code to Detect Software Weakness with CNN. Applied Sciences 9 (2019)."},{"key":"e_1_3_2_1_22_1","volume-title":"Proc. of the International Conference on Internet (ICONI).","author":"Lee Young Jun","year":"2017","unstructured":"Young Jun Lee, Sang-Hoon Choi, Chulwoo Kim, Seung-Ho Lim, and Ki-Woong Park. 2017. Learning Binary Code with Deep Learning to Detect Software Weakness. In Proc. of the International Conference on Internet (ICONI)."},{"key":"e_1_3_2_1_23_1","volume-title":"Proc. of the ACM Conference on Computer and Communications Security (CCS).","author":"Li Xuezixiang","year":"2021","unstructured":"Xuezixiang Li, Yu Qu, and Heng Yin. 2021. PalmTree: Learning an Assembly Language Model for Instruction Embedding. In Proc. of the ACM Conference on Computer and Communications Security (CCS)."},{"key":"e_1_3_2_1_24_1","volume-title":"Proc. of the IEEE international conference on computer vision. 1311--1319","author":"Lin Di","year":"2017","unstructured":"Di Lin, Guangyong Chen, Daniel Cohen-Or, Pheng-Ann Heng, and Hui Huang. 2017. Cascaded feature network for semantic segmentation of RGB-D images. In Proc. of the IEEE international conference on computer vision. 1311--1319."},{"key":"e_1_3_2_1_25_1","volume-title":"Proc. of the Conference on Detection of Intrusions and Malware & Vulnerability Assessment (DIMVA).","author":"Massarelli Luca","year":"2019","unstructured":"Luca Massarelli, Giuseppe Antonio Di Luna, Fabio Petroni, Roberto Baldoni, and Leonardo Querzoni. 2019. SAFE: Self-Attentive Function Embeddings for Binary Similarity. In Proc. of the Conference on Detection of Intrusions and Malware & Vulnerability Assessment (DIMVA)."},{"key":"e_1_3_2_1_26_1","volume-title":"Proc. of the Conference on Detection of Intrusions and Malware & Vulnerability Assessment (DIMVA).","author":"Massarelli Luca","year":"2019","unstructured":"Luca Massarelli, Giuseppe Antonio Di Luna, Fabio Petroni, Roberto Baldoni, and Leonardo Querzoni. 2019. SAFE: Self-Attentive Function Embeddings for Binary Similarity. In Proc. of the Conference on Detection of Intrusions and Malware & Vulnerability Assessment (DIMVA)."},{"key":"e_1_3_2_1_27_1","volume-title":"Proc. of the International Conference on Learning Representations (ICLR Workshop).","author":"Mikolov Tomas","year":"2013","unstructured":"Tomas Mikolov, Kai Chen, Greg Corrado, and Jeffrey Dean. 2013. Efficient Estimation of Word Representations in Vector Space. In Proc. of the International Conference on Learning Representations (ICLR Workshop)."},{"key":"e_1_3_2_1_28_1","volume-title":"Distributed representations of words and phrases and their compositionality. Advances in Neural Information Processing Systems 26","author":"Mikolov Tomas","year":"2013","unstructured":"Tomas Mikolov, Ilya Sutskever, Kai Chen, Greg Corrado, and Jeff Dean. 2013. Distributed representations of words and phrases and their compositionality. Advances in Neural Information Processing Systems 26 (2013)."},{"key":"e_1_3_2_1_29_1","volume-title":"Proc. of the ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering.","author":"Pei Kexin","year":"2021","unstructured":"Kexin Pei, Jonas Guan, Matthew Broughton, Zhongtian Chen, Songchen Yao, David Williams-King, Vikas Ummadisetty, Junfeng Yang, Baishakhi Ray, and Suman Jana. 2021. StateFormer: Fine-Grained Type Recovery from Binaries using Generative State Modeling. In Proc. of the ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering."},{"key":"e_1_3_2_1_30_1","volume-title":"Proc. of the Network and Distributed System Security Symposium (NDSS).","author":"Pei Kexin","year":"2021","unstructured":"Kexin Pei, Jonas Guan, David Williams-King, Junfeng Yang, and Suman Jana. 2021. XDA: Accurate, Robust Disassembly with Transfer Learning. In Proc. of the Network and Distributed System Security Symposium (NDSS)."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2021.3132950"},{"key":"e_1_3_2_1_32_1","volume-title":"Proc. of the Workshop on Binary Analysis Research (BAR).","author":"Redmond Kimberly","year":"2019","unstructured":"Kimberly Redmond, Lannan Luo, and Qiang Zeng. 2019. A cross-architecture instruction embedding model for natural language processing-inspired binary code analysis. In Proc. of the Workshop on Binary Analysis Research (BAR)."},{"key":"e_1_3_2_1_33_1","volume-title":"Proc. of the USENIX Security Symposium. 611--626","author":"Richard Shin Eui Chul","year":"2015","unstructured":"Eui Chul Richard Shin, Dawn Song, and Reza Moazzezi. 2015. Recognizing Functions in Binaries with Neural Networks.. In Proc. of the USENIX Security Symposium. 611--626."},{"key":"e_1_3_2_1_34_1","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan Gomez Lukasz Kaiser and Illia Polosukhin. 2017. Attention is All you Need. In Advances in Neural Information Processing Systems."},{"key":"e_1_3_2_1_35_1","volume-title":"Proc. of Annual Meeting of the Association for Computational Linguistics (ACL).","author":"Wang Sinong","year":"2020","unstructured":"Sinong Wang, Madian Khabsa, and Hao Ma. 2020. To Pretrain or Not to Pretrain: Examining the Benefits of Pretrainng on Resource Rich Tasks. In Proc. of Annual Meeting of the Association for Computational Linguistics (ACL)."},{"key":"e_1_3_2_1_36_1","volume-title":"Proc. of the ACM Conference on Computer and Communications Security (CCS).","author":"Xu Xiaojun","year":"2017","unstructured":"Xiaojun Xu, Chang Liu, Qian Feng, Heng Yin, Le Song, and Song Dawn. 2017. Neural Network-based Graph Embedding for Cross-Platform Binary Code Similarity Detection. In Proc. of the ACM Conference on Computer and Communications Security (CCS)."},{"key":"e_1_3_2_1_37_1","volume-title":"Proc. of the ACM Conference on Computer and Communications Security (CCS). 363--376","author":"Xu Xiaojun","year":"2017","unstructured":"Xiaojun Xu, Chang Liu, Qian Feng, Heng Yin, Le Song, and Dawn Song. 2017. Neural Network-based Graph Embedding for Cross-Platform Binary Code Similarity Detection.. In Proc. of the ACM Conference on Computer and Communications Security (CCS). 363--376."},{"key":"e_1_3_2_1_38_1","volume-title":"Proc. of the AAAI Conference on Artificial Intelligence.","author":"Yu Zeping","year":"2020","unstructured":"Zeping Yu, Rui Cao, Qiyi Tang, Sen Nie, Junzhou Huang, and Shi Wu. 2020. Order matters: Semantic-aware neural networks for binary code similarity detection. In Proc. of the AAAI Conference on Artificial Intelligence."},{"key":"e_1_3_2_1_39_1","volume-title":"Proc. of the Network and Distributed System Security Symposium (NDSS).","author":"Zuo Fei","year":"2019","unstructured":"Fei Zuo, Xiaopeng Li, Patrick Young, Lannan Luo, Qiang Zeng, and Zhexin Zhang. 2019. Neural Machine Translation Inspired Binary Code Similarity Comparison beyond Function Pairs.. In Proc. of the Network and Distributed System Security Symposium (NDSS)."}],"event":{"name":"ASIA CCS '24: 19th ACM Asia Conference on Computer and Communications Security","sponsor":["SIGSAC ACM Special Interest Group on Security, Audit, and Control"],"location":"Singapore Singapore","acronym":"ASIA CCS '24"},"container-title":["Proceedings of the 19th ACM Asia Conference on Computer and Communications Security"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3634737.3657029","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T23:44:07Z","timestamp":1750290247000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3634737.3657029"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7]]},"references-count":39,"alternative-id":["10.1145\/3634737.3657029","10.1145\/3634737"],"URL":"https:\/\/doi.org\/10.1145\/3634737.3657029","relation":{},"subject":[],"published":{"date-parts":[[2024,7]]},"assertion":[{"value":"2024-07-01","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}