{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,10]],"date-time":"2026-04-10T03:08:00Z","timestamp":1775790480503,"version":"3.50.1"},"publisher-location":"Singapore","reference-count":38,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819722648","type":"print"},{"value":"9789819722624","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-981-97-2262-4_6","type":"book-chapter","created":{"date-parts":[[2024,4,24]],"date-time":"2024-04-24T09:02:31Z","timestamp":1713949351000},"page":"65-77","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Adversarial Text Purification: A Large Language Model Approach for\u00a0Defense"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6891-2925","authenticated-orcid":false,"given":"Raha","family":"Moraffah","sequence":"first","affiliation":[]},{"given":"Shubh","family":"Khandelwal","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6117-6382","authenticated-orcid":false,"given":"Amrita","family":"Bhattacharjee","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3264-7904","authenticated-orcid":false,"given":"Huan","family":"Liu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,4,25]]},"reference":[{"key":"6_CR1","unstructured":"Alizadeh, M., et al.: Open-source large language models outperform crowd workers and approach ChatGPT in text-annotation tasks. arXiv preprint arXiv:2307.02179 (2023)"},{"key":"6_CR2","doi-asserted-by":"crossref","unstructured":"Alzantot, M., Sharma, Y., Elgohary, A., Ho, B.J., Srivastava, M., Chang, K.W.: Generating natural language adversarial examples. arXiv preprint arXiv:1804.07998 (2018)","DOI":"10.18653\/v1\/D18-1316"},{"key":"6_CR3","unstructured":"Bai, Y., , et\u00a0al.: Training a helpful and harmless assistant with reinforcement learning from human feedback. arXiv preprint arXiv:2204.05862 (2022)"},{"key":"6_CR4","doi-asserted-by":"crossref","unstructured":"Bhattacharjee, A., Liu, H.: Fighting fire with fire: can ChatGPT detect AI-generated text? arXiv preprint arXiv:2308.01284 (2023)","DOI":"10.1145\/3655103.3655106"},{"key":"6_CR5","unstructured":"Bhattacharjee, A., Moraffah, R., Garland, J., Liu, H.: LLMS as counterfactual explanation modules: can ChatGPT explain black-box text classifiers? arXiv preprint arXiv:2309.13340 (2023)"},{"key":"6_CR6","first-page":"1877","volume":"33","author":"T Brown","year":"2020","unstructured":"Brown, T., et al.: Language models are few-shot learners. Adv. Neural. Inf. Process. Syst. 33, 1877\u20131901 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"6_CR7","unstructured":"Chang, Y., et\u00a0al.: A survey on evaluation of large language models. arXiv preprint arXiv:2307.03109 (2023)"},{"key":"6_CR8","doi-asserted-by":"crossref","unstructured":"Cheng, Y., Jiang, L., Macherey, W.: Robust neural machine translation with doubly adversarial inputs. arXiv preprint arXiv:1906.02443 (2019)","DOI":"10.18653\/v1\/P19-1425"},{"key":"6_CR9","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)"},{"key":"6_CR10","doi-asserted-by":"crossref","unstructured":"Ebrahimi, J., Rao, A., Lowd, D., Dou, D.: HotFlip: white-box adversarial examples for text classification. arXiv preprint arXiv:1712.06751 (2017)","DOI":"10.18653\/v1\/P18-2006"},{"key":"6_CR11","doi-asserted-by":"crossref","unstructured":"Flamholz, Z.N., Biller, S.J., Kelly, L.: Large language models improve annotation of viral proteins. Res. Sq. (2023)","DOI":"10.1038\/s41564-023-01584-8"},{"key":"6_CR12","doi-asserted-by":"crossref","unstructured":"Jia, R., Raghunathan, A., G\u00f6ksel, K., Liang, P.: Certified robustness to adversarial word substitutions. arXiv preprint arXiv:1909.00986 (2019)","DOI":"10.18653\/v1\/D19-1423"},{"key":"6_CR13","doi-asserted-by":"crossref","unstructured":"Jin, D., Jin, Z., Zhou, J.T., Szolovits, P.: Is BERT really robust? A strong baseline for natural language attack on text classification and entailment. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a034, pp. 8018\u20138025 (2020)","DOI":"10.1609\/aaai.v34i05.6311"},{"key":"6_CR14","unstructured":"Latif, S., Usama, M., Malik, M.I., Schuller, B.W.: Can large language models aid in annotating speech emotional data? uncovering new frontiers. arXiv preprint arXiv:2307.06090 (2023)"},{"key":"6_CR15","doi-asserted-by":"crossref","unstructured":"LeCun, Y., Chopra, S., Hadsell, R., Ranzato, M., Huang, F.: A tutorial on energy-based learning. Predicting Struct. Data 1(0) (2006)","DOI":"10.7551\/mitpress\/7443.003.0014"},{"key":"6_CR16","doi-asserted-by":"crossref","unstructured":"Li, L., Ma, R., Guo, Q., Xue, X., Qiu, X.: BERT-attack: adversarial attack against BERT using BERT. arXiv preprint arXiv:2004.09984 (2020)","DOI":"10.18653\/v1\/2020.emnlp-main.500"},{"key":"6_CR17","doi-asserted-by":"crossref","unstructured":"Li, L., Qiu, X.: Token-aware virtual adversarial training in natural language understanding. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a035, pp. 8410\u20138418 (2021)","DOI":"10.1609\/aaai.v35i9.17022"},{"key":"6_CR18","doi-asserted-by":"crossref","unstructured":"Li, L., Song, D., Qiu, X.: Text adversarial purification as defense against adversarial attacks. arXiv preprint arXiv:2203.14207 (2022)","DOI":"10.18653\/v1\/2023.acl-long.20"},{"key":"6_CR19","doi-asserted-by":"crossref","unstructured":"Li, Z., et al.: Searching for an effective defender: benchmarking defense against adversarial word substitution. arXiv preprint arXiv:2108.12777 (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.251"},{"key":"6_CR20","unstructured":"Liu, Y., et al.: RoBERTa: a robustly optimized BERT pretraining approach. arXiv preprint arXiv:1907.11692 (2019)"},{"key":"6_CR21","unstructured":"Maas, A., et\u00a0al.: Learning word vectors for sentiment analysis. In: Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies, pp. 142\u2013150 (2011)"},{"key":"6_CR22","unstructured":"Madry, A., Makelov, A., Schmidt, L., Tsipras, D., Vladu, A.: Towards deep learning models resistant to adversarial attacks. arXiv preprint arXiv:1706.06083 (2017)"},{"key":"6_CR23","unstructured":"Miyato, T., Dai, A.M., Goodfellow, I.: Adversarial training methods for semi-supervised text classification. arXiv preprint arXiv:1605.07725 (2016)"},{"key":"6_CR24","doi-asserted-by":"crossref","unstructured":"Morris, J.X., Lifland, E., Yoo, J.Y., Grigsby, J., Jin, D., Qi, Y.: TextAttack: a framework for adversarial attacks, data augmentation, and adversarial training in NLP. arXiv preprint arXiv:2005.05909 (2020)","DOI":"10.18653\/v1\/2020.emnlp-demos.16"},{"key":"6_CR25","unstructured":"Nie, W., Guo, B., Huang, Y., Xiao, C., Vahdat, A., Anandkumar, A.: Diffusion models for adversarial purification. arXiv preprint arXiv:2205.07460 (2022)"},{"key":"6_CR26","first-page":"27730","volume":"35","author":"L Ouyang","year":"2022","unstructured":"Ouyang, L., et al.: Training language models to follow instructions with human feedback. Adv. Neural. Inf. Process. Syst. 35, 27730\u201327744 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"6_CR27","unstructured":"Peng, C., et\u00a0al.: A study of generative large language model for medical research and healthcare. arXiv preprint arXiv:2305.13523 (2023)"},{"issue":"8","key":"6_CR28","first-page":"9","volume":"1","author":"A Radford","year":"2019","unstructured":"Radford, A., et al.: Language models are unsupervised multitask learners. OpenAI Blog 1(8), 9 (2019)","journal-title":"OpenAI Blog"},{"key":"6_CR29","doi-asserted-by":"crossref","unstructured":"Ren, S., Deng, Y., He, K., Che, W.: Generating natural language adversarial examples through probability weighted word saliency. In: Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics, pp. 1085\u20131097 (2019)","DOI":"10.18653\/v1\/P19-1103"},{"key":"6_CR30","unstructured":"Samangouei, P., Kabkab, M., Chellappa, R.: Defense-GAN: protecting classifiers against adversarial attacks using generative models. arXiv preprint arXiv:1805.06605 (2018)"},{"key":"6_CR31","unstructured":"Shi, C., Holtz, C., Mishne, G.: Online adversarial purification based on self-supervision. arXiv preprint arXiv:2101.09387 (2021)"},{"key":"6_CR32","unstructured":"Shreya, G., Khapra, M.M.: A survey in adversarial defences and robustness in NLP. arXiv preprint arXiv:2203.06414 (2022)"},{"key":"6_CR33","unstructured":"Song, Y., Sohl-Dickstein, J., Kingma, D.P., Kumar, A., Ermon, S., Poole, B.: Score-based generative modeling through stochastic differential equations. arXiv preprint arXiv:2011.13456 (2020)"},{"key":"6_CR34","unstructured":"Xu, B., et al.: Expertprompting: instructing large language models to be distinguished experts. arXiv preprint arXiv:2305.14688 (2023)"},{"key":"6_CR35","doi-asserted-by":"crossref","unstructured":"Ye, M., Miao, C., Wang, T., Ma, F.: TextHoaxer: budgeted hard-label adversarial attacks on text. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a036, pp. 3877\u20133884 (2022)","DOI":"10.1609\/aaai.v36i4.20303"},{"key":"6_CR36","unstructured":"Yoon, J., Hwang, S.J., Lee, J.: Adversarial purification with score-based generative models. In: International Conference on Machine Learning. pp. 12062\u201312072. PMLR (2021)"},{"issue":"2","key":"6_CR37","doi-asserted-by":"publisher","first-page":"395","DOI":"10.1162\/coli_a_00476","volume":"49","author":"J Zeng","year":"2023","unstructured":"Zeng, J., Xu, J., Zheng, X., Huang, X.: Certified robustness to text adversarial attacks by randomized [mask]. Comput. Linguist. 49(2), 395\u2013427 (2023)","journal-title":"Comput. Linguist."},{"key":"6_CR38","unstructured":"Zhu, C., Cheng, Y., Gan, Z., Sun, S., Goldstein, T., Liu, J.: Freelb: Enhanced adversarial training for natural language understanding. arXiv preprint arXiv:1909.11764 (2019)"}],"container-title":["Lecture Notes in Computer Science","Advances in Knowledge Discovery and Data Mining"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-2262-4_6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,4,24]],"date-time":"2024-04-24T09:17:31Z","timestamp":1713950251000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-2262-4_6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9789819722648","9789819722624"],"references-count":38,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-2262-4_6","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"25 April 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PAKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Pacific-Asia Conference on Knowledge Discovery and Data Mining","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Taipei","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Taiwan","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7 May 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"10 May 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"pakdd2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/pakdd2024.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}