{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T18:10:03Z","timestamp":1755886203325,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":74,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,12,4]],"date-time":"2023-12-04T00:00:00Z","timestamp":1701648000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,12,4]]},"DOI":"10.1145\/3627106.3627172","type":"proceedings-article","created":{"date-parts":[[2023,12,2]],"date-time":"2023-12-02T18:13:22Z","timestamp":1701540802000},"page":"14-28","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["From Attachments to SEO: Click Here to Learn More about Clickbait PDFs!"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-7537-7797","authenticated-orcid":false,"given":"Giada","family":"Stivala","sequence":"first","affiliation":[{"name":"CISPA Helmholtz Center for Information Security, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-5269-951X","authenticated-orcid":false,"given":"Sahar","family":"Abdelnabi","sequence":"additional","affiliation":[{"name":"CISPA Helmholtz Center for Information Security, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-5560-1041","authenticated-orcid":false,"given":"Andrea","family":"Mengascini","sequence":"additional","affiliation":[{"name":"CISPA Helmholtz Center for Information Security, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-8842-9300","authenticated-orcid":false,"given":"Mariano","family":"Graziano","sequence":"additional","affiliation":[{"name":"Cisco Talos, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8949-9896","authenticated-orcid":false,"given":"Mario","family":"Fritz","sequence":"additional","affiliation":[{"name":"CISPA Helmholtz Center for Information Security, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-6223-8945","authenticated-orcid":false,"given":"Giancarlo","family":"Pellegrino","sequence":"additional","affiliation":[{"name":"CISPA Helmholtz Center for Information Security, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2023,12,4]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3372297.3417233"},{"key":"e_1_3_2_1_2_1","volume-title":"EXPOSURE: Finding Malicious Domains Using Passive DNS Analysis. In NDSS.","author":"Bilge Leyla","year":"2011","unstructured":"Leyla Bilge, Engin Kirda, Christopher Kruegel, and Marco Balduzzi. 2011. EXPOSURE: Finding Malicious Domains Using Passive DNS Analysis. In NDSS."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/1978942.1979459"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"crossref","unstructured":"Curtis Carmony Xunchao Hu Heng Yin Abhishek\u00a0Vasisht Bhaskar and Mu Zhang. 2016. Extract Me If You Can: Abusing PDF Parsers in Malware Detectors. In NDSS.","DOI":"10.14722\/ndss.2016.23483"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01264-9_9"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.5555\/3489212.3489344"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.5555\/3361338.3361428"},{"key":"e_1_3_2_1_8_1","unstructured":"MX\u00a0Mail Data. [n. d.]. MXMAILDATA: Email Threat Data."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/1124772.1124861"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/COMPSAC.2016.105"},{"key":"e_1_3_2_1_11_1","unstructured":"Jose\u00a0Miguel Esparza. 2016. peepdf."},{"key":"e_1_3_2_1_12_1","volume-title":"Proceedings of the 2nd ACM International Conference on Knowledge Discovery and Data Mining (KDD).","author":"Ester Martin","year":"1996","unstructured":"Martin Ester, Hans-Peter Kriegel, J\u00f6rg Sander, and Xiaowei Xu. 1996. A density-based algorithm for discovering clusters in large spatial databases with noise. In Proceedings of the 2nd ACM International Conference on Knowledge Discovery and Data Mining (KDD)."},{"key":"e_1_3_2_1_13_1","unstructured":"fanboy MonztA Famlam Khrin. [n. d.]. EasyList. (01\/22\/2022)."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/1242572.1242660"},{"key":"e_1_3_2_1_15_1","unstructured":"Google. 2022. Vision AI | Google Cloud. (01\/22\/2022)."},{"key":"e_1_3_2_1_16_1","unstructured":"Google. 2022. Safe Browsing \u2013 Google Safe Browsing. (01\/22\/2022)."},{"key":"e_1_3_2_1_17_1","volume-title":"Improving Image Clustering With Multiple Pretrained CNN Feature Extractors. In British Machine Vision Conference BMVC.","author":"Gu\u00e9rin Joris","year":"2018","unstructured":"Joris Gu\u00e9rin and Byron Boots. 2018. Improving Image Clustering With Multiple Pretrained CNN Feature Extractors. In British Machine Vision Conference BMVC."},{"key":"e_1_3_2_1_18_1","volume-title":"CNN features are also great at unsupervised classification. arXiv preprint arXiv:1707.01700","author":"Gu\u00e9rin Joris","year":"2017","unstructured":"Joris Gu\u00e9rin, Olivier Gibaru, St\u00e9phane Thiery, and Eric Nyiri. 2017. CNN features are also great at unsupervised classification. arXiv preprint arXiv:1707.01700 (2017)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.5555\/3241189.3241227"},{"key":"e_1_3_2_1_20_1","unstructured":"InQuest. [n. d.]. yara-rules."},{"key":"e_1_3_2_1_21_1","unstructured":"Internet Archive. [n. d.]. About the Internet Archive."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"crossref","unstructured":"Danesh Irani Steve Webb Jonathon Giffin and Calton Pu. 2008. Evolutionary study of phishing. In 2008 eCrime Researchers Summit.","DOI":"10.1109\/ECRIME.2008.4696967"},{"key":"e_1_3_2_1_23_1","unstructured":"John\u00a0P John Fang Yu Yinglian Xie Arvind Krishnamurthy and Martin Abadi. 2011. deSEO: Combating Search-Result Poisoning. In USENIX security symposium."},{"key":"e_1_3_2_1_24_1","volume-title":"Most common languages used on the internet as of","author":"Johnson Joseph","year":"2020","unstructured":"Joseph Johnson. 2021. Most common languages used on the internet as of January 2020, by share of internet users."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/SP.2018.00044"},{"key":"e_1_3_2_1_26_1","volume-title":"International Journal for Information Security Research (IJISR)","author":"Khonji Mahmoud","year":"2012","unstructured":"Mahmoud Khonji, Youssef Iraqi, and Andrew Jones. 2012. Enhancing phishing e-mail classifiers: A lexical url analysis approach. International Journal for Information Security Research (IJISR) (2012)."},{"key":"e_1_3_2_1_27_1","volume-title":"Imagenet classification with deep convolutional neural networks. Advances in neural information processing systems","author":"Krizhevsky Alex","year":"2012","unstructured":"Alex Krizhevsky, Ilya Sutskever, and Geoffrey\u00a0E Hinton. 2012. Imagenet classification with deep convolutional neural networks. Advances in neural information processing systems (2012)."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/2076732.2076785"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"crossref","unstructured":"Stevens Le\u00a0Blond C\u00e9dric Gilbert Utkarsh Upadhyay Manuel Gomez-Rodriguez and David\u00a0R Choffnes. 2017. A Broad View of the Ecosystem of Socially Engineered Exploit Documents. In NDSS.","DOI":"10.14722\/ndss.2017.23250"},{"key":"e_1_3_2_1_30_1","volume-title":"23rd USENIX Security Symposium.","author":"Le\u00a0Blond Stevens","year":"2014","unstructured":"Stevens Le\u00a0Blond, Adina Uritesc, C\u00e9dric Gilbert, Zheng\u00a0Leong Chua, Prateek Saxena, and Engin Kirda. 2014. A look at targeted attacks through the lense of an NGO. In 23rd USENIX Security Symposium."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/2660267.2660332"},{"key":"e_1_3_2_1_32_1","volume-title":"You\u2019ve Got Vulnerability: Exploring Effective Vulnerability Notifications. In USENIX Security Symposium.","author":"Li Frank","year":"2016","unstructured":"Frank Li, Zakir Durumeric, Jakub Czyz, Mohammad Karami, Michael Bailey, Damon McCoy, Stefan Savage, and Vern Paxson. 2016. You\u2019ve Got Vulnerability: Exploring Effective Vulnerability Notifications. In USENIX Security Symposium."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/2872427.2883060"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/2872427.2883008"},{"key":"e_1_3_2_1_35_1","volume-title":"30th USENIX Security Symposium.","author":"Lin Yun","year":"2021","unstructured":"Yun Lin, Ruofan Liu, Dinil\u00a0Mon Divakaran, Jun\u00a0Yang Ng, Qing\u00a0Zhou Chan, Yiwen Lu, Yuxuan Si, Fan Zhang, and Jin\u00a0Song Dong. 2021. Phishpedia: A Hybrid Deep Learning Based Approach to Visually Identify Phishing Webpages. In 30th USENIX Security Symposium."},{"key":"e_1_3_2_1_36_1","volume-title":"31st USENIX Security Symposium.","author":"Liu Ruofan","year":"2022","unstructured":"Ruofan Liu, Yun Lin, Xianglin Yang, Siang\u00a0Hwee Ng, Dinil\u00a0Mon Divakaran, and Jin\u00a0Song Dong. 2022. Inferring phishing intention via webpage appearance and dynamics: A deep vision based approach. In 31st USENIX Security Symposium."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-31537-4_40"},{"key":"e_1_3_2_1_38_1","unstructured":"Microsoft Defender Security Research Team. 2017. Phishers unleash simple but effective social engineering techniques using PDF attachments."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/2046707.2046761"},{"key":"e_1_3_2_1_40_1","unstructured":"Jens M\u00fcller Dominik Noss Christian Mainka Vladislav Mladenov and J\u00f6rg Schwenk. 2021. Processing Dangerous Paths. In NDSS."},{"key":"e_1_3_2_1_41_1","unstructured":"Derek Noonburg and Albert Astals. 2021. Poppler a PDF rendering library."},{"key":"e_1_3_2_1_42_1","volume-title":"Pervasive label errors in test sets destabilize machine learning benchmarks. arXiv preprint arXiv:2103.14749","author":"Northcutt G","year":"2021","unstructured":"Curtis\u00a0G Northcutt, Anish Athalye, and Jonas Mueller. 2021. Pervasive label errors in test sets destabilize machine learning benchmarks. arXiv preprint arXiv:2103.14749 (2021)."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/1135777.1135794"},{"key":"e_1_3_2_1_44_1","volume-title":"29th USENIX Security Symposium.","author":"Oest Adam","year":"2020","unstructured":"Adam Oest, Penghui Zhang, Brad Wardman, Eric Nunes, Jakub Burgis, Ali Zand, Kurt Thomas, Adam Doup\u00e9, and Gail-Joon Ahn. 2020. Sunrise to sunset: Analyzing the end-to-end life cycle and effectiveness of phishing attacks at scale. In 29th USENIX Security Symposium."},{"key":"e_1_3_2_1_45_1","unstructured":"Palo Alto Networks Unit 42. 2020. 2020 Phishing Trends With PDF Files."},{"key":"e_1_3_2_1_46_1","unstructured":"Yara Project. [n. d.]. YARA: The pattern matching swiss knife for malware researchers (and everyone else)."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3173574.3173786"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/1753326.1753383"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3419394.3423617"},{"key":"e_1_3_2_1_50_1","volume-title":"Very Deep Convolutional Networks for Large-Scale Image Recognition. In International Conference on Learning Representations (ICLR).","author":"Simonyan Karen","year":"2015","unstructured":"Karen Simonyan and Andrew Zisserman. 2015. Very Deep Convolutional Networks for Large-Scale Image Recognition. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/2420950.2420987"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"crossref","unstructured":"Charles Smutz and Angelos Stavrou. 2016. When a Tree Falls: Using Diversity in Ensemble Classifiers to Identify Evasion in Malware Detectors. In NDSS.","DOI":"10.14722\/ndss.2016.23078"},{"key":"e_1_3_2_1_53_1","volume-title":"Proceedings of the 20th Annual Network & Distributed System Security Symposium.","author":"\u0160rndic Nedim","year":"2013","unstructured":"Nedim \u0160rndic and Pavel Laskov. 2013. Detection of malicious pdf files based on hierarchical document structure. In Proceedings of the 20th Annual Network & Distributed System Security Symposium."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/SP.2014.20"},{"key":"e_1_3_2_1_55_1","unstructured":"Statcounter. 2022. Search Engine Market Share Worldwide | Statcounter Global Stats."},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"crossref","unstructured":"Giada Stivala and Giancarlo Pellegrino. 2020. Deceptive previews: A study of the link preview trustworthiness in social platforms. In NDSS.","DOI":"10.14722\/ndss.2020.24252"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"crossref","unstructured":"Ben Stock Giancarlo Pellegrino Frank Li Michael Backes and Christian Rossow. 2018. Didn\u2019t you hear me?\u2014Towards more successful web vulnerability notifications. (2018).","DOI":"10.14722\/ndss.2018.23171"},{"key":"e_1_3_2_1_58_1","unstructured":"Kurt Thomas Danny Huang David Wang Elie Bursztein Chris Grier Thomas\u00a0J Holt Christopher Kruegel Damon McCoy Stefan Savage and Giovanni Vigna. 2015. Framing dependencies introduced by underground commoditization. (2015)."},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1145\/1972551.1972555"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"crossref","unstructured":"urlscan. 2022. urlscan.io. (01\/22\/2022).","DOI":"10.56804\/Avishkara.2022.1107"},{"key":"e_1_3_2_1_61_1","volume-title":"28th USENIX Security Symposium.","author":"Der\u00a0Heijden Amber Van","year":"2019","unstructured":"Amber Van Der\u00a0Heijden and Luca Allodi. 2019. Cognitive triaging of phishing attacks. In 28th USENIX Security Symposium."},{"volume-title":"Purchased Fame: Exploring the Ecosystem of Private Blog Networks. In Proceedings of the 2019 ACM Asia CCS.","author":"Van\u00a0Goethem Tom","key":"e_1_3_2_1_62_1","unstructured":"Tom Van\u00a0Goethem, Najmeh Miramirkhani, Wouter Joosen, and Nick Nikiforakis. [n. d.]. Purchased Fame: Exploring the Ecosystem of Private Blog Networks. In Proceedings of the 2019 ACM Asia CCS."},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"crossref","unstructured":"Verizon Inc.2021. 2021 Data Breach Investigations Report.","DOI":"10.1016\/S1361-3723(21)00061-0"},{"key":"e_1_3_2_1_64_1","unstructured":"VirusTotal. 2022. File search modifiers \u2013 VirusTotal. (01\/22\/2022)."},{"key":"e_1_3_2_1_65_1","unstructured":"VirusTotal. 2022. VirusTotal - Home. (01\/22\/2022)."},{"key":"e_1_3_2_1_66_1","volume-title":"Juice: A Longitudinal Study of an SEO Botnet. In NDSS.","author":"Wang Y","year":"2013","unstructured":"David\u00a0Y Wang, Stefan Savage, and Geoffrey\u00a0M Voelker. 2013. Juice: A Longitudinal Study of an SEO Botnet. In NDSS."},{"key":"e_1_3_2_1_67_1","unstructured":"Colin Whittaker Brian Ryner and Marria Nazif. 2010. Large-scale automatic classification of phishing pages. In NDSS."},{"volume-title":"Special interest tracks and posters of the 14th international conference on World Wide Web.","author":"Wu Baoning","key":"e_1_3_2_1_68_1","unstructured":"Baoning Wu and Brian\u00a0D. Davison. 2005. Identifying link farm spam pages. In Special interest tracks and posters of the 14th international conference on World Wide Web."},{"key":"e_1_3_2_1_69_1","volume-title":"ACM Transactions on Information and System Security (TISSEC)","author":"Xiang Guang","year":"2011","unstructured":"Guang Xiang, Jason Hong, Carolyn\u00a0P Rose, and Lorrie Cranor. 2011. Cantina+ a feature-rich machine learning framework for detecting phishing web sites. ACM Transactions on Information and System Security (TISSEC) (2011)."},{"key":"e_1_3_2_1_70_1","unstructured":"Weilin Xu Yanjun Qi and David Evans. 2016. Automatically evading classifiers. In NDSS."},{"key":"e_1_3_2_1_71_1","unstructured":"Ronghai Yang Xianbo Wang Cheng Chi Dawei Wang Jiawei He Siming Pang and Wing\u00a0Cheong Lau. 2021. Scalable Detection of Promotional Website Defacements in Black Hat SEO Campaigns."},{"key":"e_1_3_2_1_72_1","doi-asserted-by":"publisher","DOI":"10.1109\/SP40001.2021.00021"},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"publisher","DOI":"10.1145\/1242572.1242659"},{"key":"e_1_3_2_1_74_1","doi-asserted-by":"publisher","DOI":"10.5555\/3489212.3489345"}],"event":{"name":"ACSAC '23: Annual Computer Security Applications Conference","acronym":"ACSAC '23","location":"Austin TX USA"},"container-title":["Annual Computer Security Applications Conference"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627106.3627172","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3627106.3627172","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T17:39:48Z","timestamp":1755884388000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627106.3627172"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12,4]]},"references-count":74,"alternative-id":["10.1145\/3627106.3627172","10.1145\/3627106"],"URL":"https:\/\/doi.org\/10.1145\/3627106.3627172","relation":{},"subject":[],"published":{"date-parts":[[2023,12,4]]},"assertion":[{"value":"2023-12-04","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}