{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T15:59:49Z","timestamp":1774540789802,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":33,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,6,19]],"date-time":"2024-06-19T00:00:00Z","timestamp":1718755200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,6,19]]},"DOI":"10.1145\/3626232.3653283","type":"proceedings-article","created":{"date-parts":[[2024,6,10]],"date-time":"2024-06-10T18:20:25Z","timestamp":1718043625000},"page":"361-366","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["WikiPhish: A Diverse Wikipedia-Based Dataset for Phishing Website Detection: Data\/Toolset Paper"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-0160-0979","authenticated-orcid":false,"given":"Gabriel","family":"Loiseau","sequence":"first","affiliation":[{"name":"Hornet Security, Hem, France"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-3012-1657","authenticated-orcid":false,"given":"Valentin","family":"Lefils","sequence":"additional","affiliation":[{"name":"Hornet Security, Hem, France"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8652-597X","authenticated-orcid":false,"given":"Maxime","family":"Meyer","sequence":"additional","affiliation":[{"name":"Hornet Security, Hem, France"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-9835-9878","authenticated-orcid":false,"given":"Damien","family":"Riquet","sequence":"additional","affiliation":[{"name":"Hornet Security, Hem, France"}]}],"member":"320","published-online":{"date-parts":[[2024,6,19]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2014.03.019"},{"key":"e_1_3_2_1_2_1","volume-title":"Proceedings of the 1st IEEE Conference on Secure and Trustworthy Machine Learning (SaTML). IEEE Computer Society","author":"Apruzzese Giovanni","unstructured":"Giovanni Apruzzese, Hyrum S. Anderson, Savino Dambra, David Freeman, Fabio Pierazzi, and Kevin A. Roundy. 2023. \"Real Attackers Don't Compute Gradients\": Bridging the Gap between Adversarial ML Research and Practice. In Proceedings of the 1st IEEE Conference on Secure and Trustworthy Machine Learning (SaTML). IEEE Computer Society, Los Alamitos, CA, USA, 339--364."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","unstructured":"Giovanni Apruzzese and V. S. Subrahmanian. 2022. Mitigating Adversarial Gray-Box Attacks Against Phishing Detectors. IEEE Transactions on Dependable and Secure Computing Vol. Early Access (2022) 1--19. https:\/\/doi.org\/10.1109\/tdsc.2022.3210029","DOI":"10.1109\/tdsc.2022.3210029"},{"key":"e_1_3_2_1_4_1","unstructured":"Subhash Ariyadasa and Shantha Fernando. 2021. Phishing Websites Dataset - Mendeley Data. https:\/\/data.mendeley.com\/datasets\/n96ncsr5g4"},{"key":"e_1_3_2_1_5_1","unstructured":"Zbigniew Banach. 2022. How to tackle false positives in web application security - activereach Ltd. https:\/\/tinyurl.com\/mrxh35xy. (Accessed on 11\/22\/2023)."},{"key":"e_1_3_2_1_6_1","volume-title":"Johari Abdullah, and Kelvin Sheng Chek Yong.","author":"Chiew Kang Leng","year":"2018","unstructured":"Kang Leng Chiew, Ee Hung Chang, Johari Abdullah, and Kelvin Sheng Chek Yong. 2018. Building standard offline anti-phishing dataset for benchmarking. In International Journal of Engineering & Technology, Vol. 7. Science Publishing Corporation, www.sciencepubco.com\/index.php\/IJET, 7--14."},{"key":"e_1_3_2_1_7_1","volume-title":"Proceedings, Part I 22","author":"Corona Igino","year":"2017","unstructured":"Igino Corona, Battista Biggio, Matteo Contini, Luca Piras, Roberto Corda, Mauro Mereu, Guido Mureddu, Davide Ariu, and Fabio Roli. 2017. Deltaphish: Detecting phishing webpages in compromised websites. In Computer Security--ESORICS 2017: 22nd European Symposium on Research in Computer Security, Oslo, Norway, September 11--15, 2017, Proceedings, Part I 22. Springer International Publishing, Cham, 370--388."},{"key":"e_1_3_2_1_8_1","unstructured":"Dheeru Dua and Casey Graff. 2017. UCI Machine Learning Repository. http:\/\/archive.ics.uci.edu\/ml"},{"key":"e_1_3_2_1_9_1","volume-title":"Crime Report","author":"Federal Bureau of Investigation FBI. 2022.","year":"2022","unstructured":"Federal Bureau of Investigation FBI. 2022. Internet Crime Report 2022. https:\/\/www.ic3.gov\/Media\/PDF\/AnnualReport\/2022_IC3Report.pdf"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/775152.775246"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","unstructured":"Abdelhakim Hannousse and Salima Yahiouche. 2021. Web page phishing detection - Mendeley Data. Mendeley. https:\/\/doi.org\/10.17632\/c2gw7fy2j4.3","DOI":"10.17632\/c2gw7fy2j4.3"},{"key":"e_1_3_2_1_12_1","volume-title":"Park","author":"Heiding Fredrik","year":"2023","unstructured":"Fredrik Heiding, Bruce Schneier, Arun Vishwanath, Jeremy Bernstein, and Peter S. Park. 2023. Devising and Detecting Phishing: Large Language Models vs. Smaller Human Models. arxiv: 2308.12287 [cs.CR]"},{"key":"e_1_3_2_1_13_1","unstructured":"LLC Interisle Consulting Group. 2023. Phishing Landscape 2023: An Annual Study of the Scope and Distribution of Phishing - Interisle Consulting Group. https:\/\/interisle.net\/insights\/phishinglandscape2023."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","unstructured":"Danesh Irani Steve Webb Jonathon Giffin and Calton Pu. 2008. Evolutionary study of phishing. https:\/\/doi.org\/10.1109\/ECRIME.2008.4696967","DOI":"10.1109\/ECRIME.2008.4696967"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2018.2876857"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46298-1_30"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/MILCOM52596.2021.9653028"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.24342\/f49465b2-c68a-4182--9171-075f0ed797d5"},{"key":"e_1_3_2_1_19_1","unstructured":"Max-Emanuel Maurer. 2012. Phishload. https:\/\/www.medien.ifi.lmu.de\/team\/max.maurer\/files\/phishload\/index.html."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/EDCC.2019.00025"},{"key":"e_1_3_2_1_21_1","unstructured":"Sayak Saha Roy Unique Karanjit and Shirin Nilizadeh. 2022. A Large-Scale Analysis of Phishing Websites Hosted on Free Web Hosting Domains. arxiv: 2212.02563 [cs.CR]"},{"key":"e_1_3_2_1_22_1","volume-title":"Hoi","author":"Sahoo Doyen","year":"2019","unstructured":"Doyen Sahoo, Chenghao Liu, and Steven C. H. Hoi. 2019. Malicious URL Detection using Machine Learning: A Survey. arxiv: 1701.07179 [cs.LG]"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3278532.3278574"},{"key":"e_1_3_2_1_24_1","volume-title":"30th USENIX Security Symposium (USENIX Security 21)","author":"Silva Ravindu De","year":"2021","unstructured":"Ravindu De Silva, Mohamed Nabeel, Charith Elvitigala, Issa Khalil, Ting Yu, and Chamath Keppitiyagama. 2021. Compromised or Attacker-Owned: A Large Scale Classification and Study of Hosting Domains of Malicious URLs. In 30th USENIX Security Symposium (USENIX Security 21). USENIX Association, Online, 3721--3738. https:\/\/www.usenix.org\/conference\/usenixsecurity21\/presentation\/desilva"},{"key":"e_1_3_2_1_25_1","volume-title":"How Many Websites Are There in the World? (2023) - Siteefy. Siteefy. https:\/\/siteefy.com\/how-many-websites-are-there Retrieved","year":"2023","unstructured":"Siteefy. 2023. How Many Websites Are There in the World? (2023) - Siteefy. Siteefy. https:\/\/siteefy.com\/how-many-websites-are-there Retrieved May 18, 2023 from"},{"key":"e_1_3_2_1_26_1","unstructured":"Choon Lin Tan. 2018. Phishing Dataset for Machine Learning: Feature Evaluation - Mendeley Data. https:\/\/data.mendeley.com\/datasets\/h3cgnj8hft\/1."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.3390\/make3030034"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3278532.3278569"},{"key":"e_1_3_2_1_29_1","unstructured":"Tarun Tiwari. 2020. Phishing Site URLs | Kaggle. https:\/\/www.kaggle.com\/datasets\/taruntiwarihp\/phishing-site-urls. (Accessed on 11\/22\/2023)."},{"key":"e_1_3_2_1_30_1","unstructured":"Shusei Tomonaga. 2023. GitHub - JPCERTCC\/phishurl-list: Phishing URL dataset from JPCERT\/CC. https:\/\/github.com\/JPCERTCC\/phishurl-list. (Accessed on 11\/22\/2023)."},{"key":"e_1_3_2_1_31_1","volume-title":"Deceit and deception: A large user study of phishing","author":"Tsow Alex","year":"2007","unstructured":"Alex Tsow and Markus Jakobsson. 2007. Deceit and deception: A large user study of phishing. Indiana University. Retrieved September , Vol. 9 (2007), 2007."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3465481.3470112"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3375708.3380313"}],"event":{"name":"CODASPY '24: Fourteenth ACM Conference on Data and Application Security and Privacy","location":"Porto Portugal","acronym":"CODASPY '24","sponsor":["SIGSAC ACM Special Interest Group on Security, Audit, and Control"]},"container-title":["Proceedings of the Fourteenth ACM Conference on Data and Application Security and Privacy"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3626232.3653283","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3626232.3653283","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T19:48:47Z","timestamp":1755892127000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3626232.3653283"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,19]]},"references-count":33,"alternative-id":["10.1145\/3626232.3653283","10.1145\/3626232"],"URL":"https:\/\/doi.org\/10.1145\/3626232.3653283","relation":{},"subject":[],"published":{"date-parts":[[2024,6,19]]},"assertion":[{"value":"2024-06-19","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}