{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,5]],"date-time":"2026-01-05T15:06:05Z","timestamp":1767625565913,"version":"3.45.0"},"reference-count":34,"publisher":"Tech Science Press","issue":"3","license":[{"start":{"date-parts":[[2025,10,26]],"date-time":"2025-10-26T00:00:00Z","timestamp":1761436800000},"content-version":"vor","delay-in-days":298,"URL":"https:\/\/doi.org\/10.32604\/TSP-CROSSMARKPOLICY"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["CMC"],"published-print":{"date-parts":[[2025]]},"DOI":"10.32604\/cmc.2025.068156","type":"journal-article","created":{"date-parts":[[2025,9,2]],"date-time":"2025-09-02T08:01:04Z","timestamp":1756800064000},"page":"4977-4993","update-policy":"https:\/\/doi.org\/10.32604\/tsp-crossmarkpolicy","source":"Crossref","is-referenced-by-count":1,"title":["OCR-Assisted Masked BERT for Homoglyph Restoration towards Multiple Phishing Text Downstream Tasks"],"prefix":"10.32604","volume":"85","author":[{"given":"Hanyong","family":"Lee","sequence":"first","affiliation":[]},{"given":"Ye-Chan","family":"Park","sequence":"additional","affiliation":[]},{"given":"Jaesung","family":"Lee","sequence":"additional","affiliation":[]}],"member":"17807","published-online":{"date-parts":[[2025]]},"reference":[{"key":"ref1","first-page":"1","article-title":"Adversarial attacks on deep-learning models in natural language processing: a survey","volume":"11","author":"Zhang","year":"2020","journal-title":"ACM Transact Intell Syst Technol (TIST)"},{"key":"ref2","series-title":"13th International Conference on Security of Information and Networks (SIN 2020); 2020 Nov 4\u20137","first-page":"1","article-title":"Visual spoofing in content-based spam detection","author":"Sokolov"},{"key":"ref3","series-title":"2022 IEEE Symposium on Security and Privacy (SP); 2022 May 22\u201326","first-page":"1987","article-title":"Bad characters: imperceptible NLP attacks","author":"Boucher"},{"key":"ref4","doi-asserted-by":"crossref","first-page":"536","DOI":"10.2197\/ipsjjip.27.536","article-title":"Detection method of homograph internationalized domain names with OCR","volume":"27","author":"Sawabe","year":"2019","journal-title":"J Inform Process"},{"key":"ref5","first-page":"1616","author":"Keller","year":"2021","journal-title":"Findings of the association for computational linguistics: ACL-IJCNLP 2021"},{"key":"ref6","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3593042","article-title":"A survey of adversarial defenses and robustness in NLP","volume":"55","author":"Goyal","year":"2023 Jul","journal-title":"ACM Comput Surv"},{"key":"ref7","series-title":"Proceedings of the Internet Measurement Conference, IMC \u201919","first-page":"449","article-title":"ShamFinder: an automated framework for detecting IDN homographs","author":"Suzuki","year":"2019"},{"key":"ref8","series-title":"2018 APWG Symposium on Electronic Crime Research (eCrime); 2018 May 15\u201317","first-page":"1","article-title":"Large scale detection of IDN domain name masquerading","author":"Elsayed"},{"key":"ref9","first-page":"103170","article-title":"OCR post-correction for detecting adversarial text images","volume":"66","author":"Imam","year":"2022","journal-title":"J Inf Secur Appl"},{"key":"ref10","series-title":"Proceedings of the 2018 ACM Workshop on Artificial Intelligence and Security (AISec); 2018 Oct 15\u201319","first-page":"1","article-title":"Detecting homoglyph attacks with a siamese neural network","author":"Woodbridge"},{"key":"ref11","series-title":"2020 IEEE European Symposium on Security and Privacy Workshops (EuroS&PW); 2020 Sep 7\u201311","first-page":"559","article-title":"A case of identity: detection of suspicious IDN homograph domains using active DNS measurements","author":"Yazdani"},{"key":"ref12","doi-asserted-by":"crossref","first-page":"54","DOI":"10.3390\/jsan11030054","article-title":"Homoglyph attack detection model using machine learning and hash function","volume":"11","author":"Almuhaideb","year":"2022","journal-title":"J Sens Actuator Netw"},{"key":"ref13","first-page":"498","article-title":"Spam detection in SMS using machine learning through text mining","volume":"9","author":"Julis","year":"2020","journal-title":"Int J Scient Technol Res"},{"key":"ref14","series-title":"Advances in Information Retrieval: 39th European Conference on IR Research, ECIR 2017","first-page":"669","article-title":"Plagiarism detection in texts obfuscated with homoglyphs","author":"Alvi","year":"2017"},{"key":"ref15","series-title":"2010 20th International Conference on Pattern Recognition; 2010 Aug 23\u201326","first-page":"1949","article-title":"Detect visual spoofing in unicode-based text","author":"Qiu"},{"key":"ref16","series-title":"Proceedings of the 2019 ACM Symposium on Document Engineering. Louvain-la-Neuve","first-page":"1","article-title":"OCR-based detection and correction of homoglyph attacks","author":"Sawabe","year":"2019"},{"key":"ref17","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence","first-page":"13094","article-title":"TrOCR: transformer-based optical character recognition with pre-trained models","author":"Li","year":"2023"},{"key":"ref18","series-title":"Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing (EMNLP)","first-page":"7235","article-title":"Robust open-vocabulary translation from visual text representations","author":"Salesky","year":"2021"},{"key":"ref19","series-title":"Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics; 2022 May 22\u201327","first-page":"6661","article-title":"SHIELD: defending textual neural networks against multiple black-box adversarial attacks with stochastic multi-expert patcher","author":"Le"},{"key":"ref20","series-title":"Proceedings of the 28th International Conference on Computational Linguistics; 2020 Dec 8\u201313","first-page":"6903","article-title":"CharacterBERT: reconciling ELMo and BERT for word-level open-vocabulary representations from characters","author":"El Boukkouri"},{"key":"ref21","series-title":"Proceedings of the 28th International Conference on Computational Linguistics; 2020 Dec 8\u201313","first-page":"39","article-title":"CharBERT: character-aware pre-trained language model","author":"Ma"},{"key":"ref22","series-title":"26th Annual Network and Distributed System Security Symposium, NDSS 2019","first-page":"1","article-title":"TEXTBUGGER: generating adversarial text against real-world applications","author":"Li","year":"2019"},{"key":"ref23","series-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies","first-page":"1634","article-title":"Text processing like humans do: visually attacking and shielding NLP systems","author":"Eger","year":"2019"},{"key":"ref24","first-page":"123","article-title":"LEGIT: a framework for evaluating the readability of visually perturbed text","volume":"76","author":"Seth","year":"2023","journal-title":"J Artif Intelli Res"},{"key":"ref25","first-page":"4367","author":"Lee","year":"2025","journal-title":"Findings of the association for computational linguistics: NAACL 2025"},{"key":"ref26","unstructured":"Lee MA. madmaze\/pytesseract; 2024 [Internet]. [cited 2025 Aug 6]. Available from: https:\/\/github.com\/madmaze\/pytesseract."},{"key":"ref27","unstructured":"Cozens S. Authors TNP, Contributors GF. notofonts\/runic. Noto Fonts; 2024 [Internet]. [cited 2025 Aug 6]. Available from: https:\/\/github.com\/notofonts\/runic."},{"key":"ref28","unstructured":"B S. satbyy\/go-noto-universal; 2024 [Internet]. [cited 2025 Aug 6]. Available from: https:\/\/github.com\/satbyy\/go-noto-universal."},{"key":"ref29","unstructured":"Bitcoin Abuse. Bitcoin Abuse Database; 2023 [Internet]. [cited 2025 Aug 6]. Available from: https:\/\/www.bitcoinabuse.com\/."},{"key":"ref30","series-title":"The IEEE International Conference on Computer Vision (ICCV); 2015 Dec 7\u201313","first-page":"13","article-title":"Aligning books and movies: towards story-like visual explanations by watching movies and reading books","author":"Zhu"},{"key":"ref31","series-title":"International Conference on Learning Representations; 2017 Apr 24\u201326","first-page":"1","article-title":"Pointer sentinel mixture models","author":"Merity"},{"key":"ref32","series-title":"Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics; 2020 Jul 5\u201310","first-page":"2699","article-title":"Masked language model scoring","author":"Salazar"},{"key":"ref33","series-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies","first-page":"4171","article-title":"BERT: pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2019"},{"key":"ref34","doi-asserted-by":"crossref","first-page":"33125","DOI":"10.1007\/s11042-024-20559-3","article-title":"Multimodal AI model for zero-shot vehicle brand identification","volume":"84","author":"Kerdvibulvech","year":"2025","journal-title":"Multimed Tools Appl"}],"container-title":["Computers, Materials &amp; Continua"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/cdn.techscience.cn\/files\/cmc\/2025\/TSP_CMC-85-3\/TSP_CMC_68156\/TSP_CMC_68156.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,17]],"date-time":"2025-11-17T02:22:00Z","timestamp":1763346120000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.techscience.com\/cmc\/v85n3\/64172"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":34,"journal-issue":{"issue":"3","published-online":{"date-parts":[[2025]]},"published-print":{"date-parts":[[2025]]}},"URL":"https:\/\/doi.org\/10.32604\/cmc.2025.068156","relation":{},"ISSN":["1546-2226"],"issn-type":[{"type":"electronic","value":"1546-2226"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"2025-05-22","order":0,"name":"received","label":"Received","group":{"name":"publication_history","label":"Publication History"}},{"value":"2025-08-07","order":1,"name":"accepted","label":"Accepted","group":{"name":"publication_history","label":"Publication History"}},{"value":"2025-10-23","order":2,"name":"published","label":"Published Online","group":{"name":"publication_history","label":"Publication History"}}]}}