{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T20:47:53Z","timestamp":1757623673733,"version":"3.44.0"},"reference-count":288,"publisher":"Springer Science and Business Media LLC","issue":"9","license":[{"start":{"date-parts":[[2025,8,14]],"date-time":"2025-08-14T00:00:00Z","timestamp":1755129600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,8,14]],"date-time":"2025-08-14T00:00:00Z","timestamp":1755129600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Sci. China Inf. Sci."],"published-print":{"date-parts":[[2025,9]]},"DOI":"10.1007\/s11432-024-4351-3","type":"journal-article","created":{"date-parts":[[2025,8,19]],"date-time":"2025-08-19T01:52:38Z","timestamp":1755568358000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Backdoor threats in large language models\u2014a survey"],"prefix":"10.1007","volume":"68","author":[{"given":"Shuai","family":"Liu","sequence":"first","affiliation":[]},{"given":"Yiheng","family":"Pan","sequence":"additional","affiliation":[]},{"given":"Kun","family":"Hong","sequence":"additional","affiliation":[]},{"given":"Ruite","family":"Fei","sequence":"additional","affiliation":[]},{"given":"Chenhao","family":"Lin","sequence":"additional","affiliation":[]},{"given":"Qian","family":"Li","sequence":"additional","affiliation":[]},{"given":"Chao","family":"Shen","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,8,14]]},"reference":[{"key":"4351_CR1","volume-title":"OpenAI: introducing ChatGPT","author":"OpenAI O","year":"2022","unstructured":"OpenAI O. OpenAI: introducing ChatGPT. 2022. https:\/\/openai.com\/blog\/chatgpt"},{"key":"4351_CR2","unstructured":"Reid M, Savinov N, Teplyashin D, et al. Gemini 1.5: unlocking multimodal understanding across millions of tokens of context. 2024. ArXiv:2403.05530"},{"key":"4351_CR3","unstructured":"GitHub. Github Copilot. 2023. https:\/\/github.com\/features\/copilot"},{"key":"4351_CR4","first-page":"1007","volume-title":"Proceedings of the Association for Information Science and Technology","author":"D Kelly","year":"2023","unstructured":"Kelly D, Chen Y, Cornwell S E, et al. Bing chat: the future of search engines? In: Proceedings of the Association for Information Science and Technology, 2023. 1007\u20131009"},{"key":"4351_CR5","unstructured":"Mahmood A, Wang J, Yao B, et al. LLM-powered conversational voice assistants: interaction patterns, opportunities, challenges, and design guidelines. 2023. ArXiv:2309.13879"},{"key":"4351_CR6","unstructured":"King E, Yu H, Lee S, et al. \u201cGet ready for a party\u201d: exploring smarter smart spaces with help from large language models. 2023. ArXiv:2303.14143"},{"key":"4351_CR7","doi-asserted-by":"publisher","first-page":"e0000205","DOI":"10.1371\/journal.pdig.0000205","volume":"2","author":"A B Mbakwe","year":"2023","unstructured":"Mbakwe A B, Lourentzou I, Celi L A, et al. ChatGPT passing USMLE shines a spotlight on the flaws of medical education. PLOS Digit Health, 2023, 2: e0000205","journal-title":"PLOS Digit Health"},{"key":"4351_CR8","doi-asserted-by":"publisher","first-page":"1930","DOI":"10.1038\/s41591-023-02448-8","volume":"29","author":"A J Thirunavukarasu","year":"2023","unstructured":"Thirunavukarasu A J, Ting D S J, Elangovan K, et al. Large language models in medicine. Nat Med, 2023, 29: 1930\u20131940","journal-title":"Nat Med"},{"key":"4351_CR9","doi-asserted-by":"crossref","unstructured":"Zhao S, Jia M, Tuan L A, et al. Universal vulnerabilities in large language models: in-context learning backdoor attacks. 2024. ArXiv:2401.05949","DOI":"10.18653\/v1\/2024.emnlp-main.642"},{"key":"4351_CR10","doi-asserted-by":"publisher","first-page":"107166","DOI":"10.1016\/j.engappai.2023.107166","volume":"127","author":"T D Nguyen","year":"2024","unstructured":"Nguyen T D, Nguyen T, Nguyen P L, et al. Backdoor attacks and defenses in federated learning: survey, challenges and future research directions. Eng Appl Artif Intell, 2024, 127: 107166","journal-title":"Eng Appl Artif Intell"},{"key":"4351_CR11","unstructured":"Cheng P, Wu Z, Du W, et al. Backdoor attacks and countermeasures in natural language processing models: a comprehensive security review. 2023. ArXiv:2309.06055"},{"key":"4351_CR12","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1109\/TNNLS.2022.3182979","volume":"35","author":"Y Li","year":"2022","unstructured":"Li Y, Jiang Y, Li Z, et al. Backdoor learning: a survey. IEEE Trans Neural Netw Learn Syst, 2022, 35: 5\u201322","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"4351_CR13","first-page":"10546","volume-title":"Proceedings of Advances in Neural Information Processing Systems","author":"B Wu","year":"2022","unstructured":"Wu B, Chen H, Zhang M, et al. Backdoorbench: a comprehensive benchmark of backdoor learning. In: Proceedings of Advances in Neural Information Processing Systems, 2022. 10546\u201310559"},{"key":"4351_CR14","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1109\/MNET.2024.3367788","volume":"38","author":"H Yang","year":"2024","unstructured":"Yang H, Xiang K, Ge M, et al. A comprehensive overview of backdoor attacks in large language models within communication networks. IEEE Netw, 2024, 38: 211\u2013218","journal-title":"IEEE Netw"},{"key":"4351_CR15","doi-asserted-by":"crossref","unstructured":"Zhao S, Jia M, Guo Z, et al. A survey of backdoor attacks and defenses on large language models: implications for security measures. 2024. ArXiv:2406.06852","DOI":"10.36227\/techrxiv.172832726.62863760\/v1"},{"key":"4351_CR16","unstructured":"Gao Y, Doan B G, Zhang Z, et al. Backdoor attacks and countermeasures on deep learning: a comprehensive review. 2020. ArXiv:2007.10760"},{"key":"4351_CR17","doi-asserted-by":"publisher","first-page":"261","DOI":"10.1109\/OJSP.2022.3190213","volume":"3","author":"W Guo","year":"2022","unstructured":"Guo W, Tondi B, Barni M. An overview of backdoor attacks against deep neural networks and possible defences. IEEE Open J Signal Process, 2022, 3: 261\u2013287","journal-title":"IEEE Open J Signal Process"},{"key":"4351_CR18","doi-asserted-by":"publisher","first-page":"134","DOI":"10.1109\/OJCS.2023.3267221","volume":"4","author":"Y Li","year":"2023","unstructured":"Li Y, Zhang S, Wang W, et al. Backdoor attacks to deep learning models and countermeasures: a survey. IEEE Open J Comput Soc, 2023, 4: 134\u2013146","journal-title":"IEEE Open J Comput Soc"},{"key":"4351_CR19","series-title":"Technical Report AREA-202309-You","volume-title":"Backdoor Attacks and Defenses in Natural Language Processing","author":"W You","year":"2023","unstructured":"You W. Backdoor Attacks and Defenses in Natural Language Processing. Technical Report AREA-202309-You. 2023."},{"key":"4351_CR20","doi-asserted-by":"crossref","unstructured":"Koshkin R, Sudoh K, Nakamura S. TransLLaMa: LLM-based simultaneous translation system. 2024. ArXiv:2402.04636","DOI":"10.18653\/v1\/2024.findings-emnlp.27"},{"key":"4351_CR21","unstructured":"Sun X, Li X, Zhang S, et al. Sentiment analysis through LLM negotiations. 2023. ArXiv:2311.01876"},{"key":"4351_CR22","unstructured":"Jin H, Zhang Y, Meng D, et al. A comprehensive survey on process-oriented automatic text summarization with exploration of LLM-based methods. 2024. ArXiv:2403.02901"},{"key":"4351_CR23","first-page":"1877","volume-title":"Proceedings of the 34th International Conference on Neural Information Processing Systems","author":"T Brown","year":"2020","unstructured":"Brown T, Mann B, Ryder N, et al. Language models are few-shot learners. In: Proceedings of the 34th International Conference on Neural Information Processing Systems, 2020. 1877\u20131901"},{"key":"4351_CR24","volume-title":"Proceedings of International Conference on Learning Representations (ICLR)","author":"T B Brown","year":"2022","unstructured":"Brown T B, Mann T, Janecek D, et al. Bridging the gap between rehearsal and inference in continual learning. In: Proceedings of International Conference on Learning Representations (ICLR), 2022"},{"key":"4351_CR25","unstructured":"Wu S, Irsoy O, Lu S, et al. BloombergGPT: a large language model for finance. 2023. ArXiv:2303.17564"},{"key":"4351_CR26","series-title":"Harvard Business School Marketing Unit Working Paper","doi-asserted-by":"publisher","DOI":"10.2139\/ssrn.4395751","volume-title":"Using GPT for Market Research","author":"J Brand","year":"2023","unstructured":"Brand J, Israeli A, Ngwe D. Using GPT for Market Research. Harvard Business School Marketing Unit Working Paper, 2023"},{"key":"4351_CR27","first-page":"2454","volume-title":"Proceedings of ACM Conference on Fairness, Accountability, and Transparency","author":"I Cheong","year":"2024","unstructured":"Cheong I, Xia K, Feng K K, et al. (a) I am not a lawyer, but\u2026: engaging legal experts towards responsible LLM policies for legal advice. In: Proceedings of ACM Conference on Fairness, Accountability, and Transparency, 2024. 2454\u20132469"},{"key":"4351_CR28","series-title":"Dissertation for Ph.D. Degree","volume-title":"Statistical language models based on neural networks","author":"T Mikolov","year":"2012","unstructured":"Mikolov T. Statistical language models based on neural networks. Dissertation for Ph.D. Degree. Brno: Brno University of Technology, 2012"},{"key":"4351_CR29","doi-asserted-by":"publisher","first-page":"132306","DOI":"10.1016\/j.physd.2019.132306","volume":"404","author":"A Sherstinsky","year":"2020","unstructured":"Sherstinsky A. Fundamentals of recurrent neural network (RNN) and long short-term memory (LSTM) network. Phys D-Nonlinear Phenomena, 2020, 404: 132306","journal-title":"Phys D-Nonlinear Phenomena"},{"key":"4351_CR30","first-page":"1597","volume-title":"Proceedings of the 60th International Midwest Symposium on Circuits and Systems (MWSCAS)","author":"R Dey","year":"2017","unstructured":"Dey R, Salem F M. Gate-variants of gated recurrent unit (GRU) neural networks. In: Proceedings of the 60th International Midwest Symposium on Circuits and Systems (MWSCAS), 2017. 1597\u20131600"},{"key":"4351_CR31","first-page":"1","volume-title":"Proceedings of IEEE Security and Privacy Workshops (SPW)","author":"N Carlini","year":"2018","unstructured":"Carlini N, Wagner D. Audio adversarial examples: targeted attacks on speech-to-text. In: Proceedings of IEEE Security and Privacy Workshops (SPW), 2018. 1\u20137"},{"key":"4351_CR32","doi-asserted-by":"publisher","first-page":"151","DOI":"10.1007\/s11633-019-1211-x","volume":"17","author":"H Xu","year":"2020","unstructured":"Xu H, Ma Y, Liu H C, et al. Adversarial attacks and defenses in images, graphs and text: a review. Int J Autom Comput, 2020, 17: 151\u2013178","journal-title":"Int J Autom Comput"},{"key":"4351_CR33","first-page":"7766","volume-title":"Proceedings of the AAAI Conference on Artificial Intelligence","author":"L Zhu","year":"2024","unstructured":"Zhu L, Ning R, Li J, et al. SEER: backdoor detection for vision-language models through searching target text and image trigger jointly. In: Proceedings of the AAAI Conference on Artificial Intelligence, 2024. 7766\u20137774"},{"key":"4351_CR34","unstructured":"Chan S H, Dong Y, Zhu J, et al. BadDet: backdoor attacks on object detection. 2022. ArXiv:2205.14497"},{"key":"4351_CR35","unstructured":"Wang J, Liu Z, Park K H, et al. Adversarial demonstration attacks on large language models. 2023. ArXiv:2305.14950"},{"key":"4351_CR36","unstructured":"Yang W, Bi X, Lin Y, et al. Watch out for your agents! Investigating backdoor threats to LLM-based agents. 2024. ArXiv:2402.11208"},{"key":"4351_CR37","unstructured":"Sun Z, Kairouz P, Suresh A T, et al. Can you really backdoor federated learning? 2019. ArXiv:1911.07963"},{"key":"4351_CR38","first-page":"2938","volume-title":"Proceedings of the 23rd International Conference on Artificial Intelligence and Statistics","author":"E Bagdasaryan","year":"2020","unstructured":"Bagdasaryan E, Veit A, Hua Y, et al. How to backdoor federated learning. In: Proceedings of the 23rd International Conference on Artificial Intelligence and Statistics, 2020. 2938\u20132948"},{"key":"4351_CR39","doi-asserted-by":"publisher","first-page":"684","DOI":"10.1145\/3564625.3567999","volume-title":"Proceedings of the 38th Annual Computer Security Applications Conference","author":"J Xu","year":"2022","unstructured":"Xu J, Wang R, Koffas S, et al. More is better (mostly): on the backdoor attacks in federated graph neural networks. In: Proceedings of the 38th Annual Computer Security Applications Conference, 2022. 684\u2013698"},{"key":"4351_CR40","first-page":"16070","volume-title":"Proceedings of the Advances in Neural Information Processing Systems","author":"H Wang","year":"2020","unstructured":"Wang H, Sreenivasan K, Rajput S, et al. Attack of the tails: yes, you really can backdoor federated learning. In: Proceedings of the Advances in Neural Information Processing Systems, 2020. 16070\u201316084"},{"key":"4351_CR41","first-page":"6206","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","author":"E Wenger","year":"2021","unstructured":"Wenger E, Passananti J, Bhagoji A N, et al. Backdoor attacks against deep learning systems in the physical world. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 2021. 6206\u20136215"},{"key":"4351_CR42","doi-asserted-by":"publisher","first-page":"1526","DOI":"10.1109\/TSC.2020.3000900","volume":"15","author":"S Wang","year":"2022","unstructured":"Wang S, Nepal S, Rudolph C, et al. Backdoor attacks against transfer learning with pre-trained deep learning models. IEEE Trans Serv Comput, 2022, 15: 1526\u20131539","journal-title":"IEEE Trans Serv Comput"},{"key":"4351_CR43","unstructured":"Chen K, Meng Y, Sun X, et al. BadPre: task-agnostic backdoor attacks to pre-trained NLP foundation models. 2021. ArXiv:2110.02467"},{"key":"4351_CR44","first-page":"8821","volume-title":"Proceedings of International Conference on Machine Learning","author":"A Ramesh","year":"2021","unstructured":"Ramesh A, Pavlov M, Goh G, et al. Zero-shot text-to-image generation. In: Proceedings of International Conference on Machine Learning, 2021. 8821\u20138831"},{"key":"4351_CR45","volume-title":"Proceedings of Advances in Neural Information Processing Systems","author":"J Lu","year":"2019","unstructured":"Lu J, Batra D, Parikh D, et al. ViLBERT: pretraining task-agnostic visiolinguistic representations for vision-and-language tasks. In: Proceedings of Advances in Neural Information Processing Systems, 2019"},{"key":"4351_CR46","doi-asserted-by":"crossref","unstructured":"Feng Z, Guo D, Tang D, et al. CodeBERT: a pre-trained model for programming and natural languages. 2020. ArXiv:2002.08155","DOI":"10.18653\/v1\/2020.findings-emnlp.139"},{"key":"4351_CR47","unstructured":"Huang K, Altosaar J, Ranganath R. ClinicalBERT: modeling clinical notes and predicting hospital readmission. 2019. ArXiv:1904.05342"},{"key":"4351_CR48","doi-asserted-by":"publisher","first-page":"86","DOI":"10.1038\/s41746-021-00455-y","volume":"4","author":"L Rasmy","year":"2021","unstructured":"Rasmy L, Xiang Y, Xie Z, et al. Med-BERT: pretrained contextualized embeddings on large-scale structured electronic health records for disease prediction. npj Digit Med, 2021, 4: 86","journal-title":"npj Digit Med"},{"key":"4351_CR49","doi-asserted-by":"crossref","unstructured":"Conneau A, Khandelwal K, Goyal N, et al. Unsupervised cross-lingual representation learning at scale. 2019. ArXiv:1911.02116","DOI":"10.18653\/v1\/2020.acl-main.747"},{"key":"4351_CR50","unstructured":"Yang Y, Uy M C S, Huang A. FinBERT: a pretrained language model for financial communications. 2020. ArXiv:2006.08097"},{"key":"4351_CR51","first-page":"13041","volume-title":"Proceedings of the AAAI Conference on Artificial Intelligence","author":"L Zhou","year":"2020","unstructured":"Zhou L, Palangi H, Zhang L, et al. Unified vision-language pre-training for image captioning and VQA. In: Proceedings of the AAAI Conference on Artificial Intelligence, 2020. 13041\u201313049"},{"key":"4351_CR52","unstructured":"Su W, Zhu X, Cao Y, et al. VL-BERT: pre-training of generic visual-linguistic representations. 2019. ArXiv:1908.08530"},{"key":"4351_CR53","first-page":"1631","volume-title":"Proceedings of the Conference on Empirical Methods in Natural Language Processing","author":"R Socher","year":"2013","unstructured":"Socher R, Perelygin A, Wu J, et al. Recursive deep models for semantic compositionality over a sentiment treebank. In: Proceedings of the Conference on Empirical Methods in Natural Language Processing, 2013. 1631\u20131642"},{"key":"4351_CR54","first-page":"115","volume-title":"Proceedings of the 43rd Annual Meeting of the Association for Computational Linguistics (ACL\u201905)","author":"B Pang","year":"2005","unstructured":"Pang B, Lee L. Seeing stars: exploiting class relationships for sentiment categorization with respect to rating scales. In: Proceedings of the 43rd Annual Meeting of the Association for Computational Linguistics (ACL\u201905), 2005. 115\u2013124"},{"key":"4351_CR55","first-page":"168","volume-title":"Proceedings of the 10th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining","author":"M Hu","year":"2004","unstructured":"Hu M, Liu B. Mining and summarizing customer reviews. In: Proceedings of the 10th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, 2004. 168\u2013177"},{"key":"4351_CR56","unstructured":"Yelp, Inc. Yelp dataset. 2018. https:\/\/www.yelp.com\/dataset"},{"key":"4351_CR57","unstructured":"Julian McAuley A. Amazon product data. 2013. http:\/\/jmcauley.ucsd.edu\/data\/amazon\/"},{"key":"4351_CR58","unstructured":"CrowdFlower. Emotion in text dataset. 2016. https:\/\/data.world\/crowdflower\/sentiment-analysis-in-text"},{"key":"4351_CR59","first-page":"271","volume-title":"Proceedings of the 42nd Annual Meeting of the Association for Computational Linguistics (ACL\u201904)","author":"B Pang","year":"2004","unstructured":"Pang B, Lee L. A sentimental education: sentiment analysis using subjectivity summarization based on minimum cuts. In: Proceedings of the 42nd Annual Meeting of the Association for Computational Linguistics (ACL\u201904), 2004. 271\u2013278"},{"key":"4351_CR60","unstructured":"LAION. Laion aesthetics v2 6.5+ dataset. 2023. https:\/\/laion.ai\/"},{"key":"4351_CR61","unstructured":"Mohammadi S, Ghorbani R G, Saeed M H S. Emotion analysis of tweets using machine learning: a review. In: Proceedings of the 6th International Conference on Data Mining and Applications, 2021"},{"key":"4351_CR62","unstructured":"Jigsaw. Toxic comment classification challenge. 2018. https:\/\/www.kaggle.com\/c\/jigsaw-toxic-comment-classification-challenge"},{"key":"4351_CR63","first-page":"1205","volume-title":"Proceedings of the Conference on Empirical Methods in Natural Language Processing (EMNLP)","author":"Z Gao","year":"2020","unstructured":"Gao Z, He Y, Xie J, et al. AbuseEval: a benchmark dataset for abusive language detection. In: Proceedings of the Conference on Empirical Methods in Natural Language Processing (EMNLP), 2020. 1205\u20131215"},{"key":"4351_CR64","first-page":"256","volume-title":"Proceedings of the IEEE International Conference on Data Mining Workshops (ICDMW)","author":"P Badjatiya","year":"2017","unstructured":"Badjatiya P, Gupta S, Kumar M, et al. Deep learning for detecting hate speech on Twitter. In: Proceedings of the IEEE International Conference on Data Mining Workshops (ICDMW), 2017. 256\u2013263"},{"key":"4351_CR65","doi-asserted-by":"publisher","first-page":"259","DOI":"10.1145\/2034691.2034742","volume-title":"Proceedings of the ACM Symposium on Document Engineering (DocEng)","author":"M Almeida","year":"2011","unstructured":"Almeida M, Silva M M, Silva J S. Contributions to the study of SMS spam filtering: new datasets and methods. In: Proceedings of the ACM Symposium on Document Engineering (DocEng), 2011. 259\u2013262"},{"key":"4351_CR66","volume-title":"Spamassassin","author":"Apache SpamAssassin","year":"2001","unstructured":"Apache SpamAssassin. Spamassassin. 2001. https:\/\/spamassassin.apache.org\/"},{"key":"4351_CR67","volume-title":"HSOL: a dataset for hate speech detection on social media","author":"HSOL Dataset","year":"2020","unstructured":"HSOL Dataset. HSOL: a dataset for hate speech detection on social media. 2020. https:\/\/github.com\/HSOL-dataset"},{"key":"4351_CR68","volume-title":"HateSpeechC: a benchmark dataset for hate speech detection in text","author":"HateSpeechC","year":"2018","unstructured":"HateSpeechC. HateSpeechC: a benchmark dataset for hate speech detection in text. 2018. https:\/\/www.kaggle.com\/datasets\/hatespeechc"},{"key":"4351_CR69","volume-title":"Alexa massive: a large-scale dataset for multi-domain intent classification","author":"Amazon","year":"2021","unstructured":"Amazon. Alexa massive: a large-scale dataset for multi-domain intent classification. 2021. https:\/\/www.amazon.science\/publications\/alexa-massive-dataset"},{"key":"4351_CR70","volume-title":"Ultrachat_200k2: a large-scale dataset for conversational AI and chatbots","author":"UltraChat","year":"2022","unstructured":"UltraChat. Ultrachat_200k2: a large-scale dataset for conversational AI and chatbots. 2022. https:\/\/github.com\/ultrachat\/ultrachat_200k2"},{"key":"4351_CR71","volume-title":"Pushshift Reddit dataset: a comprehensive dataset of Reddit data","author":"Pushshift","year":"2021","unstructured":"Pushshift. Pushshift Reddit dataset: a comprehensive dataset of Reddit data. 2021. https:\/\/pushshift.io\/"},{"key":"4351_CR72","volume-title":"Alpaca: instruction-following language model training data","author":"Tatsu Lab","year":"2023","unstructured":"Tatsu Lab. Alpaca: instruction-following language model training data. 2023. https:\/\/github.com\/tatsu-lab\/alpaca"},{"key":"4351_CR73","volume-title":"Multirc: multi-sentence reading comprehension dataset","author":"The MultiRC Team","year":"2018","unstructured":"The MultiRC Team. Multirc: multi-sentence reading comprehension dataset. 2018. https:\/\/github.com\/stanfordnlp\/multirc"},{"key":"4351_CR74","volume-title":"Boolq: a dataset for boolean question answering","author":"Google AI","year":"2019","unstructured":"Google AI. Boolq: a dataset for boolean question answering. 2019. https:\/\/github.com\/google-research-datasets\/boolq"},{"key":"4351_CR75","volume-title":"WiC: word-in-context dataset","author":"The WiC Team","year":"2020","unstructured":"The WiC Team. WiC: word-in-context dataset. 2020. https:\/\/github.com\/UKPLab\/wic"},{"key":"4351_CR76","volume-title":"SuperGLUE: a benchmark for general-purpose language understanding systems","author":"The SuperGLUE Team","year":"2019","unstructured":"The SuperGLUE Team. SuperGLUE: a benchmark for general-purpose language understanding systems. 2019. https:\/\/super.gluebenchmark.com\/"},{"key":"4351_CR77","volume-title":"AG News dataset for text classification","author":"The AG News Team","year":"2015","unstructured":"The AG News Team. AG News dataset for text classification. 2015. https:\/\/www.kaggle.com\/amananandrai\/ag-news-classification-dataset"},{"key":"4351_CR78","volume-title":"MMLU: a benchmark for evaluating general-purpose language understanding systems","author":"The MMLU Team","year":"2022","unstructured":"The MMLU Team. MMLU: a benchmark for evaluating general-purpose language understanding systems. 2022. https:\/\/github.com\/hendrycks\/test"},{"key":"4351_CR79","volume-title":"MedMCQA: a benchmark dataset for medical multiple-choice question answering","author":"The MedMCQA Team","year":"2022","unstructured":"The MedMCQA Team. MedMCQA: a benchmark dataset for medical multiple-choice question answering. 2022. https:\/\/github.com\/MMLU\/MedMCQA"},{"key":"4351_CR80","volume-title":"Wikitext: a dataset for language modeling and text generation","author":"PyTorch Team","year":"2017","unstructured":"PyTorch Team. Wikitext: a dataset for language modeling and text generation. 2017. https:\/\/github.com\/pytorch\/examples\/tree\/main\/word_language_model"},{"key":"4351_CR81","volume-title":"Enron email dataset","author":"The Enron Team","year":"2009","unstructured":"The Enron Team. Enron email dataset. 2009. https:\/\/www.cs.cmu.edu\/~.\/enron\/"},{"key":"4351_CR82","volume-title":"Natural questions dataset for question answering","author":"Google Research","year":"2019","unstructured":"Google Research. Natural questions dataset for question answering. 2019. https:\/\/ai.google.com\/research\/NaturalQuestions"},{"key":"4351_CR83","volume-title":"Stanford Alpaca dataset","author":"Stanford University","year":"2023","unstructured":"Stanford University. Stanford Alpaca dataset. 2023. https:\/\/stanford.edu\/~alimpc\/alpaca\/"},{"key":"4351_CR84","volume-title":"DBpedia dataset","author":"DBpedia","year":"2024","unstructured":"DBpedia. DBpedia dataset. 2024. https:\/\/wiki.dbpedia.org\/"},{"key":"4351_CR85","volume-title":"MNLI: multi-genre natural language inference dataset","author":"The MNLI Team","year":"2018","unstructured":"The MNLI Team. MNLI: multi-genre natural language inference dataset. 2018. https:\/\/cims.nyu.edu\/~sbowman\/multinli\/"},{"key":"4351_CR86","volume-title":"QNLI: question natural language inference dataset","author":"The QNLI Team","year":"2018","unstructured":"The QNLI Team. QNLI: question natural language inference dataset. 2018. https:\/\/gluebenchmark.com\/tasks"},{"key":"4351_CR87","volume-title":"Record: reading comprehension with commonsense reasoning dataset","author":"The ReCoRD Team","year":"2019","unstructured":"The ReCoRD Team. Record: reading comprehension with commonsense reasoning dataset. 2019. https:\/\/github.com\/microsoft\/record"},{"key":"4351_CR88","volume-title":"QQP: Quora question pairs dataset","author":"The QQP Team","year":"2018","unstructured":"The QQP Team. QQP: Quora question pairs dataset. 2018. https:\/\/data.quora.com\/First-Quora-Dataset-Release-Question-Pairs"},{"key":"4351_CR89","volume-title":"PKU-saferlhf: a dataset for safe reinforcement learning with human feedback","author":"PKU","year":"2024","unstructured":"PKU. PKU-saferlhf: a dataset for safe reinforcement learning with human feedback. 2024. https:\/\/github.com\/pku-saferlhf"},{"key":"4351_CR90","volume-title":"Language models are unsupervised multitask learners","author":"A Radford","year":"2019","unstructured":"Radford A, Wu J, Child R, et al. Language models are unsupervised multitask learners. 2019. https:\/\/api.semanticscholar.org\/CorpusID:160025533"},{"key":"4351_CR91","first-page":"311","volume-title":"Proceedings of the 40th Annual Meeting on Association for Computational Linguistics","author":"K Papineni","year":"2002","unstructured":"Papineni K, Roukos S, Ward T, et al. BLEU: a method for automatic evaluation of machine translation. In: Proceedings of the 40th Annual Meeting on Association for Computational Linguistics, 2002. 311\u2013318"},{"key":"4351_CR92","first-page":"74","volume-title":"Text Summarization Branches Out. Barcelona: Association for Computational Linguistics","author":"C Y Lin","year":"2004","unstructured":"Lin C Y. Rouge: a package for automatic evaluation of summaries. In: Text Summarization Branches Out. Barcelona: Association for Computational Linguistics, 2004. 74\u201381"},{"key":"4351_CR93","first-page":"65","volume-title":"Proceedings of the ACL Workshop on Intrinsic and Extrinsic Evaluation Measures for Machine Translation and\/or Summarization","author":"S Banerjee","year":"2005","unstructured":"Banerjee S, Lavie A. Meteor: an automatic metric for mt evaluation with improved correlation with human judgments. In: Proceedings of the ACL Workshop on Intrinsic and Extrinsic Evaluation Measures for Machine Translation and\/or Summarization, 2005. 65\u201372"},{"key":"4351_CR94","first-page":"4566","volume-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","author":"R Vedantam","year":"2015","unstructured":"Vedantam R, Lawrence Zitnick C, Parikh D. Cider: consensus-based image description evaluation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2015. 4566\u20134575"},{"key":"4351_CR95","unstructured":"Zhang T, Kishore V, Wu F, et al. BERTScore: evaluating text generation with BERT. 2019. ArXiv:1904.09675"},{"key":"4351_CR96","doi-asserted-by":"crossref","unstructured":"Zhao W, Peyrard M, Liu F, et al. Moverscore: text generation evaluating with contextualized embeddings and earth mover distance. 2019. ArXiv:1909.02622","DOI":"10.18653\/v1\/D19-1053"},{"key":"4351_CR97","doi-asserted-by":"publisher","first-page":"675","DOI":"10.1145\/2647868.2654889","volume-title":"Proceedings of the 22nd ACM International Conference on Multimedia","author":"Y Jia","year":"2014","unstructured":"Jia Y, Shelhamer E, Donahue J, et al. Caffe: convolutional architecture for fast feature embedding. In: Proceedings of the 22nd ACM International Conference on Multimedia, 2014. 675\u2013678"},{"key":"4351_CR98","first-page":"265","volume-title":"Proceedings of the 12th USENIX Symposium on Operating Systems Design and Implementation (OSDI 16)","author":"M Abadi","year":"2016","unstructured":"Abadi M, Barham P, Chen J, et al. TensorFlow: a system for large-scale machine learning. In: Proceedings of the 12th USENIX Symposium on Operating Systems Design and Implementation (OSDI 16), 2016. 265\u2013283"},{"key":"4351_CR99","volume-title":"Torch: a modular machine learning software library","author":"R Collobert","year":"2002","unstructured":"Collobert R, Bengio S, Mari\u00e9thoz J. Torch: a modular machine learning software library. 2002. https:\/\/publications.idiap.ch\/downloads\/reports\/2002\/rr02-46.pdf"},{"key":"4351_CR100","first-page":"123","volume-title":"Proceedings of IEEE Security and Privacy Workshops (SPW)","author":"Q Xiao","year":"2018","unstructured":"Xiao Q, Li K, Zhang D, et al. Security risks in deep learning implementations. In: Proceedings of IEEE Security and Privacy Workshops (SPW), 2018. 123\u2013128"},{"key":"4351_CR101","first-page":"1505","volume-title":"Proceedings of the 30th USENIX Security Symposium (USENIX Security 21)","author":"E Bagdasaryan","year":"2021","unstructured":"Bagdasaryan E, Shmatikov V. Blind backdoors in deep learning models. In: Proceedings of the 30th USENIX Security Symposium (USENIX Security 21), 2021. 1505\u20131521"},{"key":"4351_CR102","first-page":"248","volume-title":"Proceedings of IEEE Conference on Computer Vision and Pattern Recognition","author":"J Deng","year":"2009","unstructured":"Deng J, Dong W, Socher R, et al. ImageNet: a large-scale hierarchical image database. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition, 2009. 248\u2013255"},{"key":"4351_CR103","volume-title":"Proceedings of Advances in Neural Information Processing Systems","author":"A Shafahi","year":"2018","unstructured":"Shafahi A, Huang W R, Najibi M, et al. Poison frogs! Targeted clean-label poisoning attacks on neural networks. In: Proceedings of Advances in Neural Information Processing Systems, 2018"},{"key":"4351_CR104","first-page":"7614","volume-title":"Proceedings of International Conference on Machine Learning","author":"C Zhu","year":"2019","unstructured":"Zhu C, Huang W R, Li H, et al. Transferable clean-label poisoning attacks on deep neural nets. In: Proceedings of International Conference on Machine Learning, 2019. 7614\u20137623"},{"key":"4351_CR105","first-page":"443","volume-title":"Proceedings of the 28th USENIX Security Symposium (USENIX Security 19)","author":"Q Xiao","year":"2019","unstructured":"Xiao Q, Chen Y, Shen C, et al. Seeing is not believing: camouflage attacks on image scaling algorithms. In: Proceedings of the 28th USENIX Security Symposium (USENIX Security 19), 2019. 443\u2013460"},{"key":"4351_CR106","first-page":"41","volume-title":"Proceedings of IEEE Security and Privacy Workshops (SPW)","author":"E Quiring","year":"2020","unstructured":"Quiring E, Rieck K. Backdooring and poisoning neural networks with image-scaling attacks. In: Proceedings of IEEE Security and Privacy Workshops (SPW), 2020. 41\u201347"},{"key":"4351_CR107","first-page":"349","volume-title":"Proceedings of the ACM SIGSAC Conference on Computer and Communications Security","author":"Y Ji","year":"2018","unstructured":"Ji Y, Zhang X, Ji S, et al. Model-reuse attacks on deep learning systems. In: Proceedings of the ACM SIGSAC Conference on Computer and Communications Security, 2018. 349\u2013363"},{"key":"4351_CR108","unstructured":"Ji Y, Liu Z, Hu X, et al. Programmable neural network Trojan for pre-trained feature extractor. 2019. ArXiv:1901.07766"},{"key":"4351_CR109","first-page":"1295","volume-title":"Proceedings of IEEE Symposium on Security and Privacy (SP)","author":"R Schuster","year":"2020","unstructured":"Schuster R, Schuster T, Meri Y, et al. Humpty Dumpty: controlling word meanings via corpus poisoning. In: Proceedings of IEEE Symposium on Security and Privacy (SP), 2020. 1295\u20131313"},{"key":"4351_CR110","volume-title":"Proceedings of the 25th Annual Network and Distributed System Security Symposium (NDSS 2018)","author":"Y Liu","year":"2018","unstructured":"Liu Y, Ma S, Aafer Y, et al. Trojaning attack on neural networks. In: Proceedings of the 25th Annual Network and Distributed System Security Symposium (NDSS 2018), 2018"},{"key":"4351_CR111","first-page":"2041","volume-title":"Proceedings of the ACM SIGSAC Conference on Computer and Communications Security","author":"Y Yao","year":"2019","unstructured":"Yao Y, Li H, Zheng H, et al. Latent backdoor attacks on deep neural networks. In: Proceedings of the ACM SIGSAC Conference on Computer and Communications Security, 2019. 2041\u20132055"},{"key":"4351_CR112","first-page":"13198","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"A S Rakin","year":"2020","unstructured":"Rakin A S, He Z, Fan D. TBT: targeted neural network attack with bit Trojan. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2020. 13198\u201313207"},{"key":"4351_CR113","first-page":"796","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops","author":"R Costales","year":"2020","unstructured":"Costales R, Mao C, Norwitz R, et al. Live Trojan attacks on deep neural networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops, 2020. 796\u2013797"},{"key":"4351_CR114","first-page":"1","volume-title":"Proceedings of IEEE International Joint Conference on Biometrics (IJCB)","author":"J Dumford","year":"2020","unstructured":"Dumford J, Scheirer W. Backdooring convolutional neural networks via targeted weight perturbations. In: Proceedings of IEEE International Joint Conference on Biometrics (IJCB), 2020. 1\u20139"},{"key":"4351_CR115","first-page":"2204","volume-title":"Proceedings of the ACM SIGSAC Conference on Computer and Communications Security","author":"J Breier","year":"2018","unstructured":"Breier J, Hou X, Jap D, et al. Practical fault attack on deep neural networks. In: Proceedings of the ACM SIGSAC Conference on Computer and Communications Security, 2018. 2204\u20132206"},{"key":"4351_CR116","first-page":"497","volume-title":"Proceedings of the 28th USENIX Security Symposium (USENIX Security 19)","author":"S Hong","year":"2019","unstructured":"Hong S, Frigo P, Kaya Y, et al. Terminal brain damage: exposing the graceless degradation in deep neural networks under hardware fault attacks. In: Proceedings of the 28th USENIX Security Symposium (USENIX Security 19), 2019. 497\u2013514"},{"key":"4351_CR117","unstructured":"Chen X, Liu C, Li B, et al. Targeted backdoor attacks on deep learning systems using data poisoning. 2017. ArXiv:1712.05526"},{"key":"4351_CR118","doi-asserted-by":"publisher","first-page":"554","DOI":"10.1145\/3485832.3485837","volume-title":"Proceedings of the 37th Annual Computer Security Applications Conference","author":"X Chen","year":"2021","unstructured":"Chen X, Salem A, Chen D, et al. BadNL: backdoor attacks against NLP models with semantic-preserving improvements. In: Proceedings of the 37th Annual Computer Security Applications Conference, 2021. 554\u2013569"},{"key":"4351_CR119","unstructured":"Shin J, Tang C, Mohati T, et al. Prompt engineering or fine tuning: an empirical assessment of large language models in automated software engineering tasks. 2023. ArXiv:2310.10508"},{"key":"4351_CR120","first-page":"9459","volume-title":"Proceedings of Advances in Neural Information Processing Systems","author":"P Lewis","year":"2020","unstructured":"Lewis P, Perez E, Piktus A, et al. Retrieval-augmented generation for knowledge-intensive NLP tasks. In: Proceedings of Advances in Neural Information Processing Systems, 2020. 9459\u20139474"},{"key":"4351_CR121","first-page":"24824","volume-title":"Proceedings of Advances in Neural Information Processing Systems","author":"J Wei","year":"2022","unstructured":"Wei J, Wang X, Schuurmans D, et al. Chain-of-thought prompting elicits reasoning in large language models. In: Proceedings of Advances in Neural Information Processing Systems, 2022. 24824\u201324837"},{"key":"4351_CR122","unstructured":"Nguyen C V, Shen X, Aponte R, et al. A survey of small language models. 2024. ArXiv:2410.20011"},{"key":"4351_CR123","doi-asserted-by":"crossref","unstructured":"Schick T, Sch\u00fctze H. It\u2019s not just size that matters: small language models are also few-shot learners. 2020. ArXiv:2009.07118","DOI":"10.18653\/v1\/2021.naacl-main.185"},{"key":"4351_CR124","unstructured":"Sutskever I, Vinyals O, Le Q V. Sequence to sequence learning with neural networks. 2014. ArXiv:1409.3215"},{"key":"4351_CR125","doi-asserted-by":"publisher","first-page":"2673","DOI":"10.1109\/78.650093","volume":"45","author":"M Schuster","year":"1997","unstructured":"Schuster M, Paliwal K K. Bidirectional recurrent neural networks. IEEE Trans Signal Process, 1997, 45: 2673\u20132681","journal-title":"IEEE Trans Signal Process"},{"key":"4351_CR126","doi-asserted-by":"publisher","first-page":"2451","DOI":"10.1162\/089976600300015015","volume":"12","author":"F A Gers","year":"2000","unstructured":"Gers F A, Schmidhuber J, Cummins F. Learning to forget: continual prediction with LSTM. Neural Comput, 2000, 12: 2451\u20132471","journal-title":"Neural Comput"},{"key":"4351_CR127","volume-title":"Proceedings of Advances in Neural Information Processing Systems","author":"A Wei","year":"2024","unstructured":"Wei A, Haghtalab N, Steinhardt J. Jailbroken: how does LLM safety training fail? In: Proceedings of Advances in Neural Information Processing Systems, 2024"},{"key":"4351_CR128","doi-asserted-by":"publisher","first-page":"823","DOI":"10.1080\/01431160600746456","volume":"28","author":"D Lu","year":"2007","unstructured":"Lu D, Weng Q. A survey of image classification methods and techniques for improving classification performance. Int J Remote Sens, 2007, 28: 823\u2013870","journal-title":"Int J Remote Sens"},{"key":"4351_CR129","volume-title":"Text Generation","author":"K McKeown","year":"1992","unstructured":"McKeown K. Text Generation. Cambridge: Cambridge University Press, 1992"},{"key":"4351_CR130","first-page":"2578","volume":"31","author":"J Zhang","year":"2020","unstructured":"Zhang J, Li C. Adversarial examples: opportunities and challenges. IEEE Trans Neural Netw Learn Syst, 2020, 31: 2578\u20132593","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"4351_CR131","doi-asserted-by":"publisher","first-page":"58443","DOI":"10.1109\/ACCESS.2020.2983149","volume":"8","author":"E Yurtsever","year":"2020","unstructured":"Yurtsever E, Lambert J, Carballo A, et al. A survey of autonomous driving: common practices and emerging technologies. IEEE Access, 2020, 8: 58443\u201358469","journal-title":"IEEE Access"},{"key":"4351_CR132","doi-asserted-by":"publisher","first-page":"192","DOI":"10.1049\/ip-vis:20041147","volume":"152","author":"M Valera","year":"2005","unstructured":"Valera M, Velastin S A. Intelligent distributed surveillance systems: a review. IEE Proc Vis Image Process, 2005, 152: 192\u2013204","journal-title":"IEE Proc Vis Image Process"},{"key":"4351_CR133","doi-asserted-by":"publisher","DOI":"10.1017\/9781316671849","volume-title":"Fundamentals of Medical Imaging","author":"P Suetens","year":"2017","unstructured":"Suetens P. Fundamentals of Medical Imaging. Cambridge: Cambridge University Press, 2017"},{"key":"4351_CR134","volume-title":"Proceedings of Workshop on Robustness of Few-shot and Zero-shot Learning in Foundation Models at NeurIPS","author":"S Ranjan","year":"2023","unstructured":"Ranjan S, Sun C E, Liu L, et al. Fooling GPT with adversarial in-context examples for text classification. In: Proceedings of Workshop on Robustness of Few-shot and Zero-shot Learning in Foundation Models at NeurIPS, 2023"},{"key":"4351_CR135","volume-title":"Proceedings of Workshop on Backdoors in Deep Learning\u2014the Good, the Bad, and the Ugly","author":"J Yan","year":"2023","unstructured":"Yan J, Yadav V, Li S, et al. Backdooring instruction-tuned large language models with virtual prompt injection. In: Proceedings of Workshop on Backdoors in Deep Learning\u2014the Good, the Bad, and the Ugly, 2023"},{"key":"4351_CR136","unstructured":"Zou W, Geng R, Wang B, et al. Poisonedrag: knowledge poisoning attacks to retrieval-augmented generation of large language models. 2024. ArXiv:2402.07867"},{"key":"4351_CR137","unstructured":"Wang J, Wu J, Chen M, et al. On the exploitability of reinforcement learning with human feedback for large language models. 2023. ArXiv:2311.09641"},{"key":"4351_CR138","unstructured":"Xu J, Ma M D, Wang F, et al. Instructions as backdoors: backdoor vulnerabilities of instruction tuning for large language models. 2023. ArXiv:2305.14710"},{"key":"4351_CR139","unstructured":"He J, Jiang W, Hou G, et al. Talk too much: poisoning large language models under token limit. 2024. ArXiv:2404.14795"},{"key":"4351_CR140","unstructured":"Ni Z, Ye R, Wei Y, et al. Physical backdoor attack can jeopardize driving with vision-large-language models. 2024. ArXiv:2404.12916"},{"key":"4351_CR141","first-page":"61836","volume-title":"Proceedings of Advances in Neural Information Processing Systems","author":"M Shu","year":"2023","unstructured":"Shu M, Wang J, Zhu C, et al. On the exploitability of instruction tuning. In: Proceedings of Advances in Neural Information Processing Systems, 2023. 61836\u201361856"},{"key":"4351_CR142","unstructured":"Wang J, Xu Q, He X, et al. Backdoor attack on multilingual machine translation. 2024. ArXiv:2404.02393"},{"key":"4351_CR143","doi-asserted-by":"crossref","unstructured":"Lamparth M, Reuel A. Analyzing and editing inner mechanisms of backdoored language models. 2023. ArXiv:2302.12461","DOI":"10.1145\/3630106.3659042"},{"key":"4351_CR144","volume-title":"Proceedings of Workshop on Secure and Trustworthy Large Language Models","author":"E Salimbeni","year":"2024","unstructured":"Salimbeni E, Craighero F, Khasanova R, et al. Beyond fine-tuning: LoRA modules boost near-OOD detection and LLM security. In: Proceedings of Workshop on Secure and Trustworthy Large Language Models, 2024"},{"key":"4351_CR145","unstructured":"Wen R, Wang T, Backes M, et al. Last one standing: a comparative analysis of security and privacy of soft prompt tuning, LoRA, and in-context learning. 2023. ArXiv:2310.11397"},{"key":"4351_CR146","unstructured":"Liu H, Liu Z, Tang R, et al. LoRA-as-an-attack! Piercing LLM safety under the share-and-play scenario. 2024. ArXiv:2403.00108"},{"key":"4351_CR147","unstructured":"Xue J, Zheng M, Hua T, et al. TrojLLM: a black-box Trojan prompt attack on large language models. 2023. ArXiv:2306.06815"},{"key":"4351_CR148","doi-asserted-by":"publisher","first-page":"521","DOI":"10.1145\/3627106.3627122","volume-title":"Proceedings of the 39th Annual Computer Security Applications Conference","author":"C Weeks","year":"2023","unstructured":"Weeks C, Cheruvu A, Abdullah S M, et al. A first look at toxicity injection attacks on open-domain chatbots. In: Proceedings of the 39th Annual Computer Security Applications Conference, 2023. 521\u2013534"},{"key":"4351_CR149","unstructured":"Qiang Y, Zhou X, Zade S Z, et al. Learning to poison large language models during instruction tuning. 2024. ArXiv:2402.13459"},{"key":"4351_CR150","unstructured":"Li Y, Li T, Chen K, et al. Badedit: backdooring large language models by model editing. 2024. ArXiv:2403.13355"},{"key":"4351_CR151","doi-asserted-by":"crossref","unstructured":"Gu N, Fu P, Liu X, et al. Light-peft: lightening parameter-efficient fine-tuning via early pruning. 2024. ArXiv:2406.03792","DOI":"10.18653\/v1\/2024.findings-acl.447"},{"key":"4351_CR152","unstructured":"Shi J, Liu Y, Zhou P, et al. BadGPT: exploring security vulnerabilities of ChatGPT via backdoor attacks to InstructGPT. 2023. ArXiv:2304.12298"},{"key":"4351_CR153","unstructured":"Carlini N. A LLM assisted exploitation of AI-guardian. 2023. ArXiv:2307.15008"},{"key":"4351_CR154","doi-asserted-by":"crossref","unstructured":"Tan Z, Chen Q, Huang Y, et al. Target: template-transferable backdoor attack against prompt-based NLP models via GPT4. 2023. ArXiv:2311.17429","DOI":"10.1007\/978-981-97-9434-8_31"},{"key":"4351_CR155","unstructured":"Liu T, Deng Z, Meng G, et al. Demystifying RCE vulnerabilities in LLM-integrated apps. 2023. ArXiv:2309.02926"},{"key":"4351_CR156","first-page":"7745","volume-title":"Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","author":"H Yao","year":"2024","unstructured":"Yao H, Lou J, Qin Z. Poisonprompt: backdoor attack on prompt-based large language models. In: Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2024. 7745\u20137749"},{"key":"4351_CR157","first-page":"1304","volume-title":"Proceedings of ASU International Conference in Emerging Technologies for Sustainability and Intelligent Systems (ICETSIS)","author":"L Alotaibi","year":"2024","unstructured":"Alotaibi L, Seher S, Mohammad N. Cyberattacks using ChatGPT: exploring malicious content generation through prompt engineering. In: Proceedings of ASU International Conference in Emerging Technologies for Sustainability and Intelligent Systems (ICETSIS), 2024. 1304\u20131311"},{"key":"4351_CR158","doi-asserted-by":"publisher","first-page":"79","DOI":"10.1145\/3605764.3623985","volume-title":"Proceedings of the 16th ACM Workshop on Artificial Intelligence and Security","author":"K Greshake","year":"2023","unstructured":"Greshake K, Abdelnabi S, Mishra S, et al. Not what you\u2019ve signed up for: compromising real-world LLM-integrated applications with indirect prompt injection. In: Proceedings of the 16th ACM Workshop on Artificial Intelligence and Security, 2023. 79\u201390"},{"key":"4351_CR159","unstructured":"Hao Y, Yang W, Lin Y. Exploring backdoor vulnerabilities of chat models. 2024. ArXiv:2404.02406"},{"key":"4351_CR160","doi-asserted-by":"crossref","unstructured":"Zhao S, Wen J, Tuan L A, et al. Prompt as triggers for backdoor attack: examining the vulnerability in language models. 2023. ArXiv:2305.01219","DOI":"10.18653\/v1\/2023.emnlp-main.757"},{"key":"4351_CR161","doi-asserted-by":"crossref","unstructured":"Huang H, Zhao Z, Backes M, et al. Composite backdoor attacks against large language models. 2024. ArXiv:2310.07676","DOI":"10.18653\/v1\/2024.findings-naacl.94"},{"key":"4351_CR162","doi-asserted-by":"crossref","unstructured":"You W, Hammoudeh Z, Lowd D. Large language models are better adversaries: exploring generative clean-label backdoor attacks against text classifiers. 2023. ArXiv:2310.18603","DOI":"10.18653\/v1\/2023.findings-emnlp.833"},{"key":"4351_CR163","unstructured":"Panda A, Choquette-Choo C A, Zhang Z, et al. Teach LLMs to phish: stealing private information from language models. 2024. ArXiv:2403.00871"},{"key":"4351_CR164","unstructured":"Wang H, Shen Q, Tong Y, et al. The stronger the diffusion model, the easier the backdoor: data poisoning to induce copyright breaches without adjusting finetuning pipeline. 2024. ArXiv:2401.04136"},{"key":"4351_CR165","unstructured":"Gao Y, Xiong Y, Gao X, et al. Retrieval-augmented generation for large language models: a survey. 2024. ArXiv:2312.10997"},{"key":"4351_CR166","doi-asserted-by":"publisher","first-page":"1132","DOI":"10.1016\/j.respol.2012.03.008","volume":"41","author":"J Fagerberg","year":"2012","unstructured":"Fagerberg J, Fosaas M, Sapprasert K. Innovation: exploring the knowledge base. Res Policy, 2012, 41: 1132\u20131153","journal-title":"Res Policy"},{"key":"4351_CR167","first-page":"1","volume":"53","author":"Y Wang","year":"2021","unstructured":"Wang Y, Yao Q, Kwok J T, et al. Generalizing from a few examples: a survey on few-shot learning. ACM Comput Surv, 2021, 53: 1\u201334","journal-title":"ACM Comput Surv"},{"key":"4351_CR168","unstructured":"Mengara O. Trading devil final: backdoor attack via stock market and bayesian optimization. 2024. ArXiv:2407.14573"},{"key":"4351_CR169","unstructured":"Turner A, Tsipras D, Madry A. Clean-label backdoor attacks. 2018. https:\/\/people.csail.mit.edu\/madry\/lab\/cleanlabel.pdf"},{"key":"4351_CR170","unstructured":"Turner A, Tsipras D, Madry A. Label-consistent backdoor attacks. 2019. ArXiv:1912.02771"},{"key":"4351_CR171","first-page":"1541","volume-title":"Proceedings of the 30th USENIX Security Symposium (USENIX Security 21)","author":"D Tang","year":"2021","unstructured":"Tang D, Wang X, Tang H, et al. Demon in the variant: statistical analysis of DNNs for robust backdoor contamination detection. In: Proceedings of the 30th USENIX Security Symposium (USENIX Security 21), 2021. 1541\u20131558"},{"key":"4351_CR172","doi-asserted-by":"publisher","first-page":"214","DOI":"10.1145\/1101908.1101941","volume-title":"Proceedings of the 20th IEEE\/ACM International Conference on Automated Software Engineering","author":"G Langelier","year":"2005","unstructured":"Langelier G, Sahraoui H, Poulin P. Visualization-based analysis of quality for large-scale software systems. In: Proceedings of the 20th IEEE\/ACM International Conference on Automated Software Engineering, 2005. 214\u2013223"},{"key":"4351_CR173","doi-asserted-by":"crossref","unstructured":"Gan L, Li J, Zhang T, et al. Triggerless backdoor attack for NLP tasks with clean labels. 2021. ArXiv:2111.07970","DOI":"10.18653\/v1\/2022.naacl-main.214"},{"key":"4351_CR174","doi-asserted-by":"publisher","first-page":"40612","DOI":"10.1109\/ACCESS.2020.2976687","volume":"8","author":"A K Pandey","year":"2020","unstructured":"Pandey A K, Khan A I, Abushark Y B, et al. Key issues in healthcare data integrity: analysis and recommendations. IEEE Access, 2020, 8: 40612\u201340628","journal-title":"IEEE Access"},{"key":"4351_CR175","unstructured":"Bai Y, Jones A, Ndousse K, et al. Training a helpful and harmless assistant with reinforcement learning from human feedback. 2022. ArXiv:2204.05862"},{"key":"4351_CR176","doi-asserted-by":"publisher","first-page":"4551","DOI":"10.1038\/s41467-019-12397-x","volume":"10","author":"P Godec","year":"2019","unstructured":"Godec P, Pan\u010dur M, Ileni\u010d N, et al. Democratized image analytics by visual programming through integration of deep models and small-scale machine learning. Nat Commun, 2019, 10: 4551","journal-title":"Nat Commun"},{"key":"4351_CR177","unstructured":"Ilyas A, Santurkar S, Tsipras D, et al. Adversarial examples are not bugs, they are features. In: Proceedings of Advances in Neural Information Processing Systems, 2019"},{"key":"4351_CR178","unstructured":"Kurakin A, Goodfellow I, Bengio S. Adversarial machine learning at scale. 2016. ArXiv:1611.01236"},{"key":"4351_CR179","first-page":"217","volume-title":"Proceedings of the European Conference on Computer Vision (ECCV)","author":"C Xiao","year":"2018","unstructured":"Xiao C, Deng R, Li B, et al. Characterizing adversarial examples based on spatial consistency information for semantic segmentation. In: Proceedings of the European Conference on Computer Vision (ECCV), 2018. 217\u2013234"},{"key":"4351_CR180","unstructured":"Cao Y, Cao B, Chen J. Stealthy and persistent unalignment on large language models via backdoor injections. 2023. ArXiv:2312.00027"},{"key":"4351_CR181","first-page":"36479","volume-title":"Proceedings of Advances in Neural Information Processing Systems","author":"C Saharia","year":"2022","unstructured":"Saharia C, Chan W, Saxena S, et al. Photorealistic text-to-image diffusion models with deep language understanding. In: Proceedings of Advances in Neural Information Processing Systems, 2022. 36479\u201336494"},{"key":"4351_CR182","unstructured":"Rando J, Tram\u00e8r F. Universal jailbreak backdoors from poisoned human feedback. 2023. ArXiv:2311.14455"},{"key":"4351_CR183","unstructured":"Liang S, Liang J, Pang T, et al. Revisiting backdoor attacks against large vision-language models. 2024. ArXiv:2406.18844"},{"key":"4351_CR184","doi-asserted-by":"publisher","first-page":"1877","DOI":"10.1109\/TPAMI.2024.3507000","volume":"47","author":"P Xu","year":"2025","unstructured":"Xu P, Shao W, Zhang K, et al. LVLM-EHub: a comprehensive evaluation benchmark for large vision-language models. IEEE Trans Pattern Anal Mach Intell, 2025, 47: 1877\u20131893","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"4351_CR185","doi-asserted-by":"publisher","first-page":"608","DOI":"10.1109\/JSTSP.2016.2539100","volume":"10","author":"M A Davenport","year":"2016","unstructured":"Davenport M A, Romberg J. An overview of low-rank matrix recovery from incomplete observations. IEEE J Sel Top Signal Process, 2016, 10: 608\u2013622","journal-title":"IEEE J Sel Top Signal Process"},{"key":"4351_CR186","unstructured":"Schwinn L, Dobre D, Xhonneux S, et al. Soft prompt threats: attacking safety alignment and unlearning in open-source LLMs through the embedding space. 2024. ArXiv:2402.09063"},{"key":"4351_CR187","unstructured":"Ellers M, Cochez M, Schumacher T, et al. Privacy attacks on network embeddings. 2019. ArXiv:1912.10979"},{"key":"4351_CR188","doi-asserted-by":"publisher","first-page":"1466","DOI":"10.3390\/s16091466","volume":"16","author":"A Augustin","year":"2016","unstructured":"Augustin A, Yi J, Clausen T, et al. A study of LoRa: long range & low power networks for the Internet of Things. Sensors, 2016, 16: 1466","journal-title":"Sensors"},{"key":"4351_CR189","first-page":"2438","volume-title":"Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics","author":"H Wu","year":"2022","unstructured":"Wu H, Shi X. Adversarial soft prompt tuning for cross-domain sentiment analysis. In: Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics, 2022. 2438\u20132447"},{"key":"4351_CR190","unstructured":"Liang J, Liang S, Luo M, et al. VL-Trojan: multimodal instruction backdoor attacks against autoregressive visual language models. 2024. ArXiv:2402.13851"},{"key":"4351_CR191","doi-asserted-by":"crossref","unstructured":"Cheng P, Ding Y, Ju T, et al. TrojanRAG: retrieval-augmented generation can be backdoor driver in large language models. 2024. ArXiv:2405.13401","DOI":"10.2139\/ssrn.5327517"},{"key":"4351_CR192","unstructured":"Jiao R, Xie S, Yue J, et al. Exploring backdoor attacks against large language model-based decision making. 2024. ArXiv:2405.20774"},{"key":"4351_CR193","unstructured":"Qi X, Zeng Y, Xie T, et al. Fine-tuning aligned language models compromises safety, even when users do not intend to! 2023. ArXiv:2310.03693"},{"key":"4351_CR194","unstructured":"Heibel J, Lowd D. Mapping your model: assessing the impact of adversarial attacks on LLM-based programming assistants. 2024. ArXiv:2407.11072"},{"key":"4351_CR195","doi-asserted-by":"publisher","first-page":"2858","DOI":"10.3390\/electronics13142858","volume":"13","author":"J He","year":"2024","unstructured":"He J, Hou G, Jia X, et al. Data stealing attacks against large language models via backdooring. Electronics, 2024, 13: 2858","journal-title":"Electronics"},{"key":"4351_CR196","unstructured":"Qiang Y, Zhou X, Zhu D. Hijacking large language models via adversarial in-context learning. 2023. ArXiv:2311.09948"},{"key":"4351_CR197","unstructured":"Halawi D, Wei A, Wallace E, et al. Covert malicious finetuning: challenges in safeguarding LLM adaptation. 2024. ArXiv:2406.20053"},{"key":"4351_CR198","unstructured":"Wang H, Shu K. Backdoor activation attack: attack large language models using activation steering for safety-alignment. 2023. ArXiv:2311.09433"},{"key":"4351_CR199","unstructured":"Xu L, Xie H, Qin S Z J, et al. Parameter-efficient fine-tuning methods for pretrained language models: a critical review and assessment. 2023. ArXiv:2312.12148"},{"key":"4351_CR200","unstructured":"Li Y. Deep reinforcement learning: an overview. 2017. ArXiv:1701.07274"},{"key":"4351_CR201","first-page":"27730","volume-title":"Proceedings of Advances in Neural Information Processing Systems","author":"L Ouyang","year":"2022","unstructured":"Ouyang L, Wu J, Jiang X, et al. Training language models to follow instructions with human feedback. In: Proceedings of Advances in Neural Information Processing Systems, 2022. 27730\u201327744"},{"key":"4351_CR202","unstructured":"Achiam J, Adler S, Agarwal S, et al. GPT-4 technical report. 2023. ArXiv:2303.08774"},{"key":"4351_CR203","unstructured":"Xiang Z, Jiang F, Xiong Z, et al. Badchain: backdoor chain-of-thought prompting for large language models. 2024. ArXiv:2401.12242"},{"key":"4351_CR204","doi-asserted-by":"crossref","unstructured":"Zhao S, Jia M, Tuan L A, et al. Universal vulnerabilities in large language models: backdoor attacks for in-context learning. 2024. ArXiv:2401.05949","DOI":"10.18653\/v1\/2024.emnlp-main.642"},{"key":"4351_CR205","unstructured":"Li S, Yao L, Gao J, et al. Double-I watermark: protecting model copyright for LLM fine-tuning. 2024. ArXiv:2402.14883"},{"key":"4351_CR206","unstructured":"Zhang R, Li H, Wen R, et al. Instruction backdoor attacks against customized LLMs. 2024. ArXiv:2402.09179"},{"key":"4351_CR207","doi-asserted-by":"crossref","unstructured":"Crawford K, Joler V. Anatomy of an AI System. 2018. https:\/\/anatomyof.ai\/","DOI":"10.1386\/vcr_00008_7"},{"key":"4351_CR208","unstructured":"Hubinger E, Denison C, Mu J, et al. Sleeper agents: training deceptive LLMs that persist through safety training. 2024. ArXiv:2401.05566"},{"key":"4351_CR209","unstructured":"Price S, Panickssery A, Bowman S, et al. Future events as backdoor triggers: investigating temporal vulnerabilities in LLMs. 2024. ArXiv:2407.04108"},{"key":"4351_CR210","doi-asserted-by":"publisher","first-page":"1316","DOI":"10.1145\/3634737.3656289","volume-title":"Proceedings of the 19th ACM Asia Conference on Computer and Communications Security","author":"B Chen","year":"2024","unstructured":"Chen B, Ivanov N, Wang G, et al. Multi-turn hidden backdoor in large language model-powered chatbot models. In: Proceedings of the 19th ACM Asia Conference on Computer and Communications Security, 2024. 1316\u20131330"},{"key":"4351_CR211","unstructured":"Chen Z, Xiang Z, Xiao C, et al. Agentpoison: red-teaming LLM agents via poisoning memory or knowledge bases. 2024. ArXiv:2407.12784"},{"key":"4351_CR212","unstructured":"Wang T, Yao Y, Xu F, et al. Confidence matters: inspecting backdoors in deep neural networks via distribution transfer. 2022. ArXiv:2208.06592"},{"key":"4351_CR213","doi-asserted-by":"crossref","unstructured":"Feng S, Tao G, Cheng S, et al. Detecting backdoors in pre-trained encoders. 2023. ArXiv:2303.15180","DOI":"10.1109\/CVPR52729.2023.01569"},{"key":"4351_CR214","doi-asserted-by":"crossref","unstructured":"Zhao S, Gan L, Tuan L A, et al. Defending against weight-poisoning backdoor attacks for parameter-efficient fine-tuning. 2024. ArXiv:2402.12168","DOI":"10.18653\/v1\/2024.findings-naacl.217"},{"key":"4351_CR215","unstructured":"Guo W, Wang L, Xing X, et al. TABOR: a highly accurate approach to inspecting and restoring Trojan backdoors in AI systems. 2019. ArXiv:1908.01763"},{"key":"4351_CR216","doi-asserted-by":"publisher","first-page":"120283","DOI":"10.1016\/j.ins.2024.120283","volume":"662","author":"N Zhong","year":"2024","unstructured":"Zhong N, Qian Z, Zhang X. Backdoor attack detection via prediction trustworthiness assessment. Inf Sci, 2024, 662: 120283","journal-title":"Inf Sci"},{"key":"4351_CR217","doi-asserted-by":"publisher","first-page":"4032","DOI":"10.1109\/TNNLS.2022.3201586","volume":"35","author":"W Jiang","year":"2024","unstructured":"Jiang W, Wen X, Zhan J, et al. Critical path-based backdoor detection for deep neural networks. IEEE Trans Neural Netw Learn Syst, 2024, 35: 4032\u20134046","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"4351_CR218","volume-title":"Proceedings of IEEE International Conference on Communications","author":"K M Hossain","year":"2024","unstructured":"Hossain K M, Oates T. Advancing security in AI systems: a novel approach to detecting backdoors in deep neural networks. In: Proceedings of IEEE International Conference on Communications, 2024"},{"key":"4351_CR219","doi-asserted-by":"publisher","first-page":"102730","DOI":"10.1016\/j.cose.2022.102730","volume":"118","author":"K Shao","year":"2022","unstructured":"Shao K, Zhang Y, Yang J, et al. The triggers that open the NLP model backdoors are hidden in the adversarial samples. Comput Secur, 2022, 118: 102730","journal-title":"Comput Secur"},{"key":"4351_CR220","doi-asserted-by":"publisher","first-page":"4117","DOI":"10.1109\/TIFS.2021.3103064","volume":"16","author":"M Fan","year":"2021","unstructured":"Fan M, Si Z, Xie X, et al. Text backdoor detection using an interpretable RNN abstract model. IEEE Trans Inform Forensic Secur, 2021, 16: 4117\u20134132","journal-title":"IEEE Trans Inform Forensic Secur"},{"key":"4351_CR221","first-page":"297","volume-title":"Singapore: Springer","author":"J Chen","year":"2024","unstructured":"Chen J, Zhang X, Zheng H. Using Adversarial Examples to against Backdoor Attack in Federated Learning. Singapore: Springer, 2024. 297\u2013311"},{"key":"4351_CR222","first-page":"363","volume-title":"Proceedings of the ACM Asia Conference on Computer and Communications Security","author":"H Qiu","year":"2021","unstructured":"Qiu H, Zeng Y, Guo S, et al. Deepsweep: an evaluation framework for mitigating dnn backdoor attacks using data augmentation. In: Proceedings of the ACM Asia Conference on Computer and Communications Security, 2021. 363\u2013377"},{"key":"4351_CR223","first-page":"14900","volume-title":"Proceedings of Advances in Neural Information Processing Systems","author":"Y Li","year":"2021","unstructured":"Li Y, Lyu X, Koren N, et al. Anti-backdoor learning: training clean models on poisoned data. In: Proceedings of Advances in Neural Information Processing Systems, 2021. 14900\u201314912"},{"key":"4351_CR224","unstructured":"Chan A, Ong Y. Poison as a cure: detecting & neutralizing variable-sized backdoor attacks in deep neural networks. 2019. ArXiv:1911.08040"},{"key":"4351_CR225","unstructured":"Wei S, Zha H, Wu B. Mitigating backdoor attack by injecting proactive defensive backdoor. 2024. ArXiv:2405.16112"},{"key":"4351_CR226","unstructured":"Pu Y, Chen J, Zhou C, et al. How to train a backdoor-robust model on a poisoned dataset without auxiliary data? 2024. ArXiv:2405.12719"},{"key":"4351_CR227","unstructured":"Pan M, Zeng Y, Lyu L, et al. ASSET: robust backdoor data detection across a multiplicity of deep learning paradigms. 2023. ArXiv:2302.11408"},{"key":"4351_CR228","doi-asserted-by":"crossref","unstructured":"Liu K, Dolan-Gavitt B, Garg S. Fine-pruning: defending against backdooring attacks on deep neural networks. 2018. ArXiv:1805.12185","DOI":"10.1007\/978-3-030-00470-5_13"},{"key":"4351_CR229","doi-asserted-by":"crossref","unstructured":"Zhai S, Shen Q, Chen X, et al. NCL: textual backdoor defense using noise-augmented contrastive learning. 2023. ArXiv:2303.01742","DOI":"10.1109\/ICASSP49357.2023.10095007"},{"key":"4351_CR230","doi-asserted-by":"crossref","unstructured":"Pei H, Jia J, Guo W, et al. TextGuard: provable defense against backdoor attacks on text classification. 2023. ArXiv:2311.11225","DOI":"10.14722\/ndss.2024.24090"},{"key":"4351_CR231","doi-asserted-by":"crossref","unstructured":"Qi F, Chen Y, Li M, et al. ONION: a simple and effective defense against textual backdoor attacks. 2021. ArXiv:2011.10369","DOI":"10.18653\/v1\/2021.emnlp-main.752"},{"key":"4351_CR232","first-page":"442","volume-title":"Proceedings of IEEE Military Communications Conference (MILCOM)","author":"Y Gao","year":"2022","unstructured":"Gao Y, Stokes J W, Prasad M A, et al. I know your triggers: defending against textual backdoor attacks with benign backdoor augmentation. In: Proceedings of IEEE Military Communications Conference (MILCOM), 2022. 442\u2013449"},{"key":"4351_CR233","unstructured":"Sagar S, Bhatt A, Bidaralli A S. Defending against stealthy backdoor attacks. 2022. ArXiv:2205.14246"},{"key":"4351_CR234","first-page":"1","volume-title":"Proceedings of the 29th International Workshop on Machine Learning for Signal Processing (MLSP)","author":"Z Xiang","year":"2019","unstructured":"Xiang Z, Miller D J, Kesidis G. A benchmark study of backdoor data poisoning defenses for deep neural network classifiers and a novel defense. In: Proceedings of the 29th International Workshop on Machine Learning for Signal Processing (MLSP), 2019. 1\u20136"},{"key":"4351_CR235","doi-asserted-by":"publisher","first-page":"880","DOI":"10.1109\/TR.2022.3159784","volume":"71","author":"S Udeshi","year":"2022","unstructured":"Udeshi S, Peng S, Woo G, et al. Model agnostic defence against backdoor attacks in machine learning. IEEE Trans Rel, 2022, 71: 880\u2013895","journal-title":"IEEE Trans Rel"},{"key":"4351_CR236","unstructured":"Yang W, Gao J, Mirzasoleiman B. Robust contrastive language-image pre-training against data poisoning and backdoor attacks. 2023. ArXiv:2303.06854"},{"key":"4351_CR237","unstructured":"Yang W, Gao J, Mirzasoleiman B. Better safe than sorry: pre-training clip against targeted data poisoning and backdoor attacks. 2023. ArXiv:2310.05862"},{"key":"4351_CR238","unstructured":"Shen G, Liu Y, Tao G, et al. Constrained optimization with dynamic bound-scaling for effective NLPbackdoor defense. 2022. ArXiv:2202.05749"},{"key":"4351_CR239","first-page":"1086","volume-title":"Proceedings of Advances in Neural Information Processing Systems","author":"B Zhu","year":"2022","unstructured":"Zhu B, Qin Y, Cui G, et al. Moderate-fitting as a natural backdoor defender for pre-trained language models. In: Proceedings of Advances in Neural Information Processing Systems, 2022. 1086\u20131099"},{"key":"4351_CR240","unstructured":"Li H, Chen Y, Zheng Z, et al. Backdoor removal for generative large language models. 2024. ArXiv:2405.07667"},{"key":"4351_CR241","doi-asserted-by":"crossref","unstructured":"Rieger P, Nguyen T D, Miettinen M, et al. DeepSight: mitigating backdoor attacks in federated learning through deep model inspection. 2022. ArXiv:2201.00763","DOI":"10.14722\/ndss.2022.23156"},{"key":"4351_CR242","unstructured":"Chen B, Carvalho W, Baracaldo N, et al. Detecting backdoor attacks on deep neural networks by activation clustering. 2018. ArXiv:1811.03728"},{"key":"4351_CR243","doi-asserted-by":"publisher","first-page":"220","DOI":"10.3390\/e25020220","volume":"25","author":"K Shao","year":"2023","unstructured":"Shao K, Yang J, Hu P, et al. A textual backdoor defense method based on deep feature classification. Entropy, 2023, 25: 220","journal-title":"Entropy"},{"key":"4351_CR244","unstructured":"Xi Z, Du T, Li C, et al. Defending pre-trained language models as few-shot learners against backdoor attacks. 2023. ArXiv:2309.13256"},{"key":"4351_CR245","first-page":"1","volume-title":"Proceedings of IEEE Colombian Conference on Applications of Computational Intelligence (ColCACI)","author":"H K Surendrababu","year":"2023","unstructured":"Surendrababu H K. Model agnostic approach for NLP backdoor detection. In: Proceedings of IEEE Colombian Conference on Applications of Computational Intelligence (ColCACI), 2023. 1\u20136"},{"key":"4351_CR246","unstructured":"Geiping J, Fowl L, Somepalli G, et al. What doesn\u2019t kill you makes you robust(er): how to adversarially train against data poisoning. 2022. ArXiv:2102.13624"},{"key":"4351_CR247","first-page":"3855","volume-title":"Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","author":"E Borgnia","year":"2021","unstructured":"Borgnia E, Cherepanova V, Fowl L, et al. Strong data augmentation sanitizes poisoning and backdoor attacks without an accuracy tradeoff. In: Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2021. 3855\u20133859"},{"key":"4351_CR248","first-page":"21850","volume-title":"Proceedings of the AAAI Conference on Artificial Intelligence","author":"J Zhou","year":"2024","unstructured":"Zhou J, Lv P, Lan Y, et al. DataElixir: purifying poisoned dataset to mitigate backdoor attacks via diffusion models. In: Proceedings of the AAAI Conference on Artificial Intelligence, 2024. 21850\u201321858"},{"key":"4351_CR249","first-page":"13358","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","author":"J Guan","year":"2022","unstructured":"Guan J, Tu Z, He R, et al. Few-shot backdoor defense using Shapley estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 2022. 13358\u201313367"},{"key":"4351_CR250","unstructured":"Li N, Yu H, Yi P. Rethinking pruning for backdoor mitigation: an optimization perspective. 2024. ArXiv:2405.17746"},{"key":"4351_CR251","doi-asserted-by":"crossref","unstructured":"Zhang Z, Lyu L, Ma X, et al. Fine-mixing: mitigating backdoors in fine-tuned language models. 2022. ArXiv:2210.09545","DOI":"10.18653\/v1\/2022.findings-emnlp.26"},{"key":"4351_CR252","doi-asserted-by":"crossref","unstructured":"Zhang Z, Chen D, Zhou H, et al. Diffusion theory as a scalpel: detecting and purifying poisonous dimensions in pre-trained language models caused by backdoor or bias, 2023. ArXiv:2305.04547","DOI":"10.18653\/v1\/2023.findings-acl.157"},{"key":"4351_CR253","first-page":"36396","volume-title":"Proceedings of Advances in Neural Information Processing Systems","author":"Z Wang","year":"2022","unstructured":"Wang Z, Ding H, Zhai J, et al. Training with more confidence: mitigating injected and natural backdoors during training. In: Proceedings of Advances in Neural Information Processing Systems, 2022. 36396\u201336410"},{"key":"4351_CR254","first-page":"20485","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","author":"Q Xu","year":"2023","unstructured":"Xu Q, Tao G, Honorio J, et al. MEDIC: remove model backdoors via importance driven cloning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 2023. 20485\u201320494"},{"key":"4351_CR255","unstructured":"Yang G, Zhou Y, Chen X, et al. DeCE: deceptive cross-entropy loss designed for defending backdoor attacks. 2024. ArXiv:2407.08956"},{"key":"4351_CR256","unstructured":"Zhang J C, Xiong Y J, Qiu H X, et al. LoRA2: multi-scale low-rank approximations for fine-tuning large language models. 2024. ArXiv:2408.06854"},{"key":"4351_CR257","unstructured":"Li X, Wang H, Miller D J, et al. Universal post-training reverse-engineering defense against backdoors in deep neural networks. 2024. ArXiv:2402.02034"},{"key":"4351_CR258","first-page":"22285","volume-title":"Proceedings of Advances in Neural Information Processing Systems","author":"S Chai","year":"2022","unstructured":"Chai S, Chen J. One-shot neural backdoor erasing via adversarial weight masking. In: Proceedings of Advances in Neural Information Processing Systems, 2022. 22285\u201322299"},{"key":"4351_CR259","unstructured":"Qiao X, Yang Y, Li H. Defending neural backdoors via generative distribution modeling. 2019. ArXiv:1910.04749"},{"key":"4351_CR260","unstructured":"Chen T, Zhou H, Mingrui H, et al. Gradient broadcast adaptation: defending against the backdoor attack in pre-trained models. 2022. https:\/\/openreview.net\/forum?id=aKZeBGUJXlH"},{"key":"4351_CR261","unstructured":"Liu Q, Wang F, Xiao C, et al. From shortcuts to triggers: backdoor defense with denoised PoE. 2024. ArXiv:2305.14910"},{"key":"4351_CR262","doi-asserted-by":"crossref","unstructured":"Graf V, Liu Q, Chen M. Two heads are better than one: nested PoE for robust defense against multi-backdoors. 2024. ArXiv:2404.02356","DOI":"10.18653\/v1\/2024.naacl-long.40"},{"key":"4351_CR263","unstructured":"Liang S, Liu K, Gong J, et al. Unlearning backdoor threats: enhancing backdoor defense in multimodal contrastive learning via local token unlearning. 2024. ArXiv:2403.16257"},{"key":"4351_CR264","doi-asserted-by":"crossref","unstructured":"Arora A, He X, Mozes M, et al. Here\u2019s a free lunch: sanitizing backdoored models with model merge. 2024. ArXiv:2402.19334","DOI":"10.18653\/v1\/2024.findings-acl.894"},{"key":"4351_CR265","doi-asserted-by":"crossref","unstructured":"Weber M, Xu X, Karla B, et al. RAB: provable robustness against backdoor attacks. 2023. ArXiv:2003.08904","DOI":"10.1109\/SP46215.2023.10179451"},{"key":"4351_CR266","unstructured":"Wang B, Cao X, Jia J, et al. On certifying robustness against backdoor attacks via randomized smoothing. 2020. ArXiv:2002.11750"},{"key":"4351_CR267","unstructured":"Xie C, Chen M, Chen P Y, et al. CRFL: certifiably robust federated learning against backdoor attacks. 2021. ArXiv:2106.08283"},{"key":"4351_CR268","first-page":"707","volume-title":"Proceedings of IEEE Symposium on Security and Privacy (SP)","author":"B Wang","year":"2019","unstructured":"Wang B, Yao Y, Shan S, et al. Neural cleanse: identifying and mitigating backdoor attacks in neural networks. In: Proceedings of IEEE Symposium on Security and Privacy (SP), 2019. 707\u2013723"},{"key":"4351_CR269","unstructured":"Azizi A, Tahmid I A, Waheed A, et al. T-Miner: a generative approach to defend against Trojan attacks on DNN-based text classification. 2021. ArXiv:2103.04264"},{"key":"4351_CR270","first-page":"2025","volume-title":"Proceedings of IEEE Symposium on Security and Privacy (SP)","author":"Y Liu","year":"2022","unstructured":"Liu Y, Shen G, Tao G, et al. Piccolo: exposing complex backdoors in NLP transformer models. In: Proceedings of IEEE Symposium on Security and Privacy (SP), 2022. 2025\u20132042"},{"key":"4351_CR271","unstructured":"Guo J, Li A, Liu C. AEVA: black-box backdoor detection using adversarial extreme value analysis. 2021. ArXiv:2110.14880"},{"key":"4351_CR272","first-page":"16482","volume-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV)","author":"Y Dong","year":"2021","unstructured":"Dong Y, Yang X, Deng Z, et al. Black-box detection of backdoor attacks with limited information and data. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), 2021. 16482\u201316491"},{"key":"4351_CR273","unstructured":"Li X, Zhang Y, Lou R, et al. Chain-of-scrutiny: detecting backdoor attacks for large language models. 2024. ArXiv:2406.05948"},{"key":"4351_CR274","unstructured":"Gao Y, Xu C, Wang D, et al. STRIP: a defence against Trojan attacks on deep neural networks. 2020. ArXiv:1902.06531"},{"key":"4351_CR275","unstructured":"Gao Y, Kim Y, Doan B G, et al. Design and evaluation of a multi-domain Trojan detection method on deep neural networks. 2019. ArXiv:1911.10312"},{"key":"4351_CR276","first-page":"5257","volume-title":"Proceedings of the AAAI Conference on Artificial Intelligence","author":"X Sun","year":"2023","unstructured":"Sun X, Li X, Meng Y, et al. Defending against backdoor attacks in natural language generation. In: Proceedings of the AAAI Conference on Artificial Intelligence, 2023. 5257\u20135265"},{"key":"4351_CR277","doi-asserted-by":"publisher","first-page":"1027","DOI":"10.1145\/3447548.3467213","volume-title":"Proceedings of the 27th ACM SIGKDD Conference on Knowledge Discovery & Data Mining","author":"Y S Lin","year":"2021","unstructured":"Lin Y S, Lee W C, Celik Z B. What do you see? Evaluation of explainable artificial intelligence (XAI) interpretability through neural backdoors. In: Proceedings of the 27th ACM SIGKDD Conference on Knowledge Discovery & Data Mining, 2021. 1027\u20131035"},{"key":"4351_CR278","unstructured":"Li Y, Zhang Z, Bai J, et al. Open-sourced dataset protection via backdoor watermarking. 2020. ArXiv:2010.05821"},{"key":"4351_CR279","unstructured":"Touvron H, Martin L, Stone K, et al. LLAMA 2: open foundation and fine-tuned chat models. 2023. ArXiv:2307.09288"},{"key":"4351_CR280","unstructured":"Mann B, Ryder N, Subbiah M, et al. Language models are few-shot learners. 2020. ArXiv:2005.14165"},{"key":"4351_CR281","unstructured":"Wei J, Bosma M, Zhao V Y, et al. Finetuned language models are zero-shot learners. 2021. ArXiv:2109.01652"},{"key":"4351_CR282","doi-asserted-by":"crossref","unstructured":"Peng W, Yi J, Wu F, et al. Are you copying my model? Protecting the copyright of large language models for EAAS via backdoor watermark. 2023. ArXiv:2305.10036","DOI":"10.18653\/v1\/2023.acl-long.423"},{"key":"4351_CR283","first-page":"17061","volume-title":"Proceedings of International Conference on Machine Learning","author":"J Kirchenbauer","year":"2023","unstructured":"Kirchenbauer J, Geiping J, Wen Y, et al. A watermark for large language models. In: Proceedings of International Conference on Machine Learning, 2023. 17061\u201317084"},{"key":"4351_CR284","doi-asserted-by":"publisher","first-page":"4417","DOI":"10.1145\/3474085.3475591","volume-title":"Proceedings of the 29th ACM International Conference on Multimedia","author":"S Szyller","year":"2021","unstructured":"Szyller S, Atli B G, Marchal S, et al. DAWN: dynamic adversarial watermarking of neural networks. In: Proceedings of the 29th ACM International Conference on Multimedia, 2021. 4417\u20134425"},{"key":"4351_CR285","first-page":"1615","volume-title":"Proceedings of the 27th USENIX Security Symposium (USENIX Security 18)","author":"Y Adi","year":"2018","unstructured":"Adi Y, Baum C, Cisse M, et al. Turning your weakness into a strength: watermarking deep neural networks by backdooring. In: Proceedings of the 27th USENIX Security Symposium (USENIX Security 18), 2018. 1615\u20131631"},{"key":"4351_CR286","doi-asserted-by":"publisher","first-page":"138872","DOI":"10.1109\/ACCESS.2019.2941376","volume":"7","author":"J Dai","year":"2019","unstructured":"Dai J, Chen C, Li Y. A backdoor attack against LSTM-based text classification systems. IEEE Access, 2019, 7: 138872","journal-title":"IEEE Access"},{"key":"4351_CR287","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3585385","volume":"55","author":"A E Cin\u00e0","year":"2022","unstructured":"Cin\u00e0 A E, Grosse K, Demontis A, et al. Wild patterns reloaded: a survey of machine learning security against training data poisoning. ACM Comput Surv, 2022, 55: 1\u201339","journal-title":"ACM Comput Surv"},{"key":"4351_CR288","doi-asserted-by":"crossref","unstructured":"Chow K H, Wei W, Yu L. Imperio: language-guided backdoor attacks for arbitrary model control. 2024. ArXiv:2401.01085","DOI":"10.24963\/ijcai.2024\/78"}],"container-title":["Science China Information Sciences"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11432-024-4351-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11432-024-4351-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11432-024-4351-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,9]],"date-time":"2025-09-09T07:59:45Z","timestamp":1757404785000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11432-024-4351-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,14]]},"references-count":288,"journal-issue":{"issue":"9","published-print":{"date-parts":[[2025,9]]}},"alternative-id":["4351"],"URL":"https:\/\/doi.org\/10.1007\/s11432-024-4351-3","relation":{},"ISSN":["1674-733X","1869-1919"],"issn-type":[{"type":"print","value":"1674-733X"},{"type":"electronic","value":"1869-1919"}],"subject":[],"published":{"date-parts":[[2025,8,14]]},"assertion":[{"value":"15 August 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 November 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 March 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 August 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"191101"}}