{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,22]],"date-time":"2025-12-22T22:14:58Z","timestamp":1766441698649,"version":"3.48.0"},"publisher-location":"New York, NY, USA","reference-count":63,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,11,22]],"date-time":"2026-11-22T00:00:00Z","timestamp":1795305600000},"content-version":"vor","delay-in-days":368,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62172308, 62272351, 61972297, 62172144"],"award-info":[{"award-number":["62172308, 62272351, 61972297, 62172144"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["2312185, 2417055"],"award-info":[{"award-number":["2312185, 2417055"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100006785","name":"Google","doi-asserted-by":"publisher","award":["Google Research Scholar Award"],"award-info":[{"award-number":["Google Research Scholar Award"]}],"id":[{"id":"10.13039\/100006785","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100007875","name":"Tulane University","doi-asserted-by":"publisher","award":["Tulane COR Fellowships"],"award-info":[{"award-number":["Tulane COR Fellowships"]}],"id":[{"id":"10.13039\/100007875","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,19]]},"DOI":"10.1145\/3719027.3744829","type":"proceedings-article","created":{"date-parts":[[2025,11,22]],"date-time":"2025-11-22T23:32:38Z","timestamp":1763854358000},"page":"1334-1348","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Analyzing PDFs like Binaries: Adversarially Robust PDF Malware Analysis via Intermediate Representation and Language Model"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-8106-4101","authenticated-orcid":false,"given":"Side","family":"Liu","sequence":"first","affiliation":[{"name":"Wuhan University, Wuhan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9682-0502","authenticated-orcid":false,"given":"Jiang","family":"Ming","sequence":"additional","affiliation":[{"name":"Tulane University, New Orleans, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-8491-273X","authenticated-orcid":false,"given":"Guodong","family":"Zhou","sequence":"additional","affiliation":[{"name":"Wuhan University, Wuhan, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-3987-088X","authenticated-orcid":false,"given":"Xinyi","family":"Liu","sequence":"additional","affiliation":[{"name":"Wuhan University, Wuhan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4639-5824","authenticated-orcid":false,"given":"Jianming","family":"Fu","sequence":"additional","affiliation":[{"name":"Wuhan University, Wuhan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5731-8958","authenticated-orcid":false,"given":"Guojun","family":"Peng","sequence":"additional","affiliation":[{"name":"Wuhan University, Wuhan, China"}]}],"member":"320","published-online":{"date-parts":[[2025,11,22]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10618-014-0365-y"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/SPW59333.2023.00017"},{"key":"e_1_3_2_2_3_1","unstructured":"PDF Association. 2020. https:\/\/pdfa.org\/stressful-pdf-corpus\/."},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/SP46214.2022.9833659"},{"key":"e_1_3_2_2_5_1","unstructured":"Bernard Bautista. 2023. Threat-Loaded: Malicious PDFs Never Go Out of Style. https:\/\/www.trustwave.com\/en-us\/resources\/blogs\/spiderlabs-blog\/threat-loaded-malicious-pdfs-never-go-out-of-style\/."},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.14722\/ndss.2016.23483"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.5555\/3489212.3489344"},{"key":"e_1_3_2_2_8_1","volume-title":"Proceedings of the 26th USENIX Security Symposium (USENIX Security 17)","author":"Chua Zheng Leong","year":"2017","unstructured":"Zheng Leong Chua, Shiqi Shen, Prateek Saxena, and Zhenkai Liang. 2017. Neural Nets Can Learn Function Type Signatures from Binaries. In Proceedings of the 26th USENIX Security Symposium (USENIX Security 17)."},{"key":"e_1_3_2_2_9_1","unstructured":"CISA. 2022. Phishing Infographic. https:\/\/www.cisa.gov\/sites\/default\/files\/2023-02\/phishing-infographic-508c.pdf."},{"key":"e_1_3_2_2_10_1","volume-title":"Proceedings of the 2018 International Conference on Machine Learning (ICML '18)","author":"Dai Hanjun","year":"2018","unstructured":"Hanjun Dai, Hui Li, Tian Tian, Xin Huang, Lin Wang, Jun Zhu, and Le Song. 2018. Adversarial Attack on Graph Structured Data. In Proceedings of the 2018 International Conference on Machine Learning (ICML '18)."},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3133956.3133978"},{"key":"e_1_3_2_2_12_1","volume-title":"Senior Global Marketing Manager","author":"VIPRE Security Group David Bloxberg","year":"2024","unstructured":"VIPRE Security Group David Bloxberg, Senior Global Marketing Manager. 2024. PDFs: Why They Are Such a Popular Attack Vector. https:\/\/safesendsoftware.com\/pdf-exploit-popular-attack-vector\/."},{"key":"e_1_3_2_2_13_1","volume-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies.","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies."},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/SP.2019.00003"},{"key":"e_1_3_2_2_15_1","unstructured":"David Evans Weilin Xu and Yanjun Qi. 2017. Adversarial Machine Learning: Are We Playing the Wrong Game? https:\/\/www.cs.virginia.edu\/ evans\/talks\/cispa2017\/."},{"key":"e_1_3_2_2_16_1","unstructured":"Canadian Institute for Cybersecurity. 2022. CIC-Evasive-PDFMal2022. https:\/\/www.unb.ca\/cic\/datasets\/pdfmal-2022.html."},{"key":"e_1_3_2_2_17_1","unstructured":"Hex-Rays. 2025. Hex Rays - State-of-the-Art Binary Code Analysis Solutions. https:\/\/hex-rays.com\/ida-pro\/."},{"key":"e_1_3_2_2_18_1","unstructured":"ISO. 2008. ISO 32000--1:2008 - Document management \u2014 Portable document format \u2014 Part 1: PDF 1.7. https:\/\/www.iso.org\/standard\/51502.html."},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.5220\/0010908400003120"},{"key":"e_1_3_2_2_20_1","unstructured":"Duff Johnson. 2021. PDF's Popularity Online. https:\/\/pdfa.org\/pdfs-popularity-online\/."},{"key":"e_1_3_2_2_21_1","volume-title":"Proceedings of the 26th USENIX Security Symposium (USENIX Security '17)","author":"Jordaney Roberto","year":"2017","unstructured":"Roberto Jordaney, Kumar Sharad, Santanu K Dash, Zhi Wang, Davide Papini, Ilia Nouretdinov, and Lorenzo Cavallaro. 2017. Transcend: Detecting Concept Drift in Malware Classification Models. In Proceedings of the 26th USENIX Security Symposium (USENIX Security '17)."},{"key":"e_1_3_2_2_22_1","unstructured":"jorisschellekens\/borb. 2024. https:\/\/github.com\/jorisschellekens\/borb."},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/2076732.2076785"},{"key":"e_1_3_2_2_24_1","volume-title":"Proceedings of the 31st International Conference on Machine Learning (ICML '14)","author":"Le Quoc","year":"2014","unstructured":"Quoc Le and Tomas Mikolov. 2014. Distributed Representations of Sentences and Documents. In Proceedings of the 31st International Conference on Machine Learning (ICML '14)."},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3460120.3484587"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/DSN.2014.92"},{"key":"e_1_3_2_2_27_1","volume-title":"Analyzing PDFs like Binaries: Adversarially Robust PDF Malware Analysis via Intermediate Representation and Language Model. arXiv preprint arXiv:2506.17162","author":"Liu Side","year":"2025","unstructured":"Side Liu, Jiang Ming, Guodong Zhou, Xinyi Liu, Jianming Fu, and Guojun Peng. 2025. Analyzing PDFs like Binaries: Adversarially Robust PDF Malware Analysis via Intermediate Representation and Language Model. arXiv preprint arXiv:2506.17162 (2025)."},{"volume-title":"iText in Action","author":"Lowagie Bruno","key":"e_1_3_2_2_28_1","unstructured":"Bruno Lowagie. 2010. iText in Action, . Simon and Schuster."},{"key":"e_1_3_2_2_29_1","volume-title":"De-obfuscation and Detection of Malicious PDF Files with High Accuracy. In 2013 46th Hawaii International Conference on System Sciences. IEEE, 4890-4899","author":"Lu Xun","year":"2013","unstructured":"Xun Lu, Jianwei Zhuge, Ruoyu Wang, Yinzhi Cao, and Yan Chen. 2013. De-obfuscation and Detection of Malicious PDF Files with High Accuracy. In 2013 46th Hawaii International Conference on System Sciences. IEEE, 4890-4899."},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.5555\/3495724.3496123"},{"key":"e_1_3_2_2_31_1","volume-title":"Proceedings of the 2015 International Conference on Information Systems Security and Privacy (ICISSP '15)","author":"Maiorca Davide","year":"2015","unstructured":"Davide Maiorca, Davide Ariu, Igino Corona, and Giorgio Giacinto. 2015. A Structural and Content-ased Approach for a Precise and Robust Detection of Malicious PDF Files. In Proceedings of the 2015 International Conference on Information Systems Security and Privacy (ICISSP '15)."},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3332184"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/2484313.2484327"},{"key":"e_1_3_2_2_34_1","first-page":"309","volume-title":"Safe: Self-Attentive Function Embeddings for Binary Similarity. In International Conference on the 16th Detection of Intrusions and Malware, and Vulnerability Assessment (DIMVA '19)","author":"Massarelli Luca","year":"2019","unstructured":"Luca Massarelli, Giuseppe Antonio Di Luna, Fabio Petroni, Roberto Baldoni, and Leonardo Querzoni. 2019. Safe: Self-Attentive Function Embeddings for Binary Similarity. In International Conference on the 16th Detection of Intrusions and Malware, and Vulnerability Assessment (DIMVA '19). 309-329."},{"key":"e_1_3_2_2_35_1","volume-title":"Proceedings of the 26th International Conference on Neural Information Processing Systems (NIPS '13)","author":"Mikolov Tomas","year":"2013","unstructured":"Tomas Mikolov, Ilya Sutskever, Kai Chen, Greg S Corrado, and Jeff Dean. 2013. Distributed Representations of Words and Phrases and Their Compositionality. In Proceedings of the 26th International Conference on Neural Information Processing Systems (NIPS '13)."},{"key":"e_1_3_2_2_36_1","unstructured":"MuPDF. 2025. https:\/\/mupdf.com\/."},{"volume-title":"Networks","author":"Newman Mark","key":"e_1_3_2_2_37_1","unstructured":"Mark Newman. 2018. Networks. Oxford University Press."},{"key":"e_1_3_2_2_38_1","volume-title":"Congress. 2019","author":"Library","year":"2020","unstructured":"Library of Congress. 2019. 1000. gov PDF Dataset. https:\/\/www.loc.gov\/item\/2020445568\/."},{"key":"e_1_3_2_2_39_1","unstructured":"OpenAI. 2024. New Embedding Models and API Updates. https:\/\/openai.com\/index\/new-embedding-models-and-api-updates\/."},{"key":"e_1_3_2_2_40_1","unstructured":"Palo Alto Networks. 2023. 2023 Unit 42 Network Threat Trends Research Report. https:\/\/start.paloaltonetworks.com\/unit-42-network-threat-trends-report-malware-2023.html."},{"key":"e_1_3_2_2_41_1","unstructured":"Mila Parkour. 2013. 16 800 Clean and 11 960 Malicious Files for Signature Testing and Research. https:\/\/contagiodump.blogspot.com\/2013\/03\/16800-clean-and-11960-malicious-files.html."},{"key":"e_1_3_2_2_42_1","unstructured":"pdfminer\/pdfminer.six. 2025. https:\/\/github.com\/pdfminer\/pdfminer.six."},{"key":"e_1_3_2_2_43_1","unstructured":"Filipi Pires. 2020. Malware Analysis | Dissecting PDF file. https:\/\/medium.com\/coreshield\/malware-analysis-dissecting-pdf-file-a95a0ffa0dce."},{"key":"e_1_3_2_2_44_1","unstructured":"pmaupin\/pdfrw. 2017. https:\/\/github.com\/pmaupin\/pdfrw."},{"key":"e_1_3_2_2_45_1","unstructured":"Poppler. 2025. https:\/\/poppler.freedesktop.org\/."},{"key":"e_1_3_2_2_46_1","unstructured":"qpdf\/qpdf. 2024. https:\/\/github.com\/qpdf\/qpdf."},{"key":"e_1_3_2_2_47_1","unstructured":"Google Research. 2018. https:\/\/github.com\/google-research\/bert."},{"key":"e_1_3_2_2_48_1","unstructured":"Cuckoo Sandbox. 2018. https:\/\/github.com\/cuckoosandbox."},{"key":"e_1_3_2_2_49_1","unstructured":"Yashvi Shah and Preksha Saxena. 2024. Rise in Deceptive PDF: The Gateway to Malicious Payloads. https:\/\/www.mcafee.com\/blogs\/other-blogs\/mcafee-labs\/rise-in-deceptive-pdf-the-gateway-to-malicious-payloads\/."},{"key":"e_1_3_2_2_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/2420950.2420987"},{"key":"e_1_3_2_2_51_1","doi-asserted-by":"publisher","DOI":"10.14722\/ndss.2016.23078"},{"key":"e_1_3_2_2_52_1","unstructured":"SOCRadar. 2024. CVE-2024--4367 in PDF.js Allows JavaScript Execution Potentially Affecting Millions of Websites: Update Now. https:\/\/socradar.io\/cve-2024--4367-in-pdf-js-allows-javascript-execution-potentially-affecting-millions-of-websites-update-now\/."},{"key":"e_1_3_2_2_53_1","volume-title":"Proceedings of the 20th Network and Distributed System Security Symposium (NDSS '13)","author":"\u0160rndic Nedim","year":"2013","unstructured":"Nedim \u0160rndic and Pavel Laskov. 2013. Detection of Malicious PDF Files Based on Hierarchical Document Structure. In Proceedings of the 20th Network and Distributed System Security Symposium (NDSS '13)."},{"key":"e_1_3_2_2_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/SP.2014.20"},{"key":"e_1_3_2_2_55_1","doi-asserted-by":"publisher","DOI":"10.1186\/s13635-016-0045-0"},{"key":"e_1_3_2_2_56_1","unstructured":"Phil Stokes. 2019. Malicious PDFs | Revealing the Techniques Behind the Attacks. https:\/\/www.sentinelone.com\/blog\/malicious-pdfs-revealing-techniques-behind-attacks\/."},{"key":"e_1_3_2_2_57_1","volume-title":"Proceedings of 28th USENIX Security Symposium (USENIX Security '19)","author":"Tong Liang","year":"2019","unstructured":"Liang Tong, Bo Li, Chen Hajaj, Chaowei Xiao, Ning Zhang, and Yevgeniy Vorobeychik. 2019. Improving Robustness of ML Classifiers Against Realizable Evasion Attacks Using Conserved Features. In Proceedings of 28th USENIX Security Symposium (USENIX Security '19)."},{"key":"e_1_3_2_2_58_1","doi-asserted-by":"publisher","DOI":"10.5555\/3540261.3540796"},{"volume-title":"Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing (EMNLP '21)","author":"Wang Yue","key":"e_1_3_2_2_59_1","unstructured":"Yue Wang, Weishi Wang, Shafiq Joty, and Steven C.H. Hoi. 2021. CodeT5: Identifier-aware Unified Pre-trained Encoder-Decoder Models for Code Understanding and Generation. In Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing (EMNLP '21)."},{"key":"e_1_3_2_2_60_1","doi-asserted-by":"publisher","DOI":"10.14722\/ndss.2016.23115"},{"key":"e_1_3_2_2_61_1","volume-title":"Proceedings of the 30th USENIX Security Symposium (USENIX Security '21)","author":"Yang Limin","year":"2021","unstructured":"Limin Yang, Wenbo Guo, Qingying Hao, Arridhana Ciptadi, Ali Ahmadzadeh, Xinyu Xing, and Gang Wang. 2021. CADE: Detecting and Explaining Concept Drift Samples for Security Applications. In Proceedings of the 30th USENIX Security Symposium (USENIX Security '21)."},{"key":"e_1_3_2_2_62_1","unstructured":"Adeline Zhang. 2019. Chrome PDF File Parsing 0-Day Vulnerability Threat Alert. https:\/\/nsfocusglobal.com\/chrome-pdf-file-parsing-0-day-vulnerability-threat-alert\/."},{"key":"e_1_3_2_2_63_1","doi-asserted-by":"publisher","DOI":"10.14722\/ndss.2019.23492"}],"event":{"name":"CCS '25: ACM SIGSAC Conference on Computer and Communications Security","sponsor":["SIGSAC ACM Special Interest Group on Security, Audit, and Control"],"location":"Taipei Taiwan","acronym":"CCS '25"},"container-title":["Proceedings of the 2025 ACM SIGSAC Conference on Computer and Communications Security"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3719027.3744829","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3719027.3744829","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,22]],"date-time":"2025-12-22T22:11:32Z","timestamp":1766441492000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3719027.3744829"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,19]]},"references-count":63,"alternative-id":["10.1145\/3719027.3744829","10.1145\/3719027"],"URL":"https:\/\/doi.org\/10.1145\/3719027.3744829","relation":{},"subject":[],"published":{"date-parts":[[2025,11,19]]},"assertion":[{"value":"2025-11-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}