{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,19]],"date-time":"2026-02-19T02:23:45Z","timestamp":1771467825301,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":39,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,6,19]],"date-time":"2024-06-19T00:00:00Z","timestamp":1718755200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nd\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,6,19]]},"DOI":"10.1145\/3714393.3726501","type":"proceedings-article","created":{"date-parts":[[2025,6,4]],"date-time":"2025-06-04T18:38:47Z","timestamp":1749062327000},"page":"341-352","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":6,"title":["PromptShield: Deployable Detection for Prompt Injection Attacks"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-9086-8684","authenticated-orcid":false,"given":"Dennis","family":"Jacob","sequence":"first","affiliation":[{"name":"University of California, Berkeley, Berkeley, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-5766-8556","authenticated-orcid":false,"given":"Hend","family":"Alzahrani","sequence":"additional","affiliation":[{"name":"King Abdulaziz City for Science and Technology, Riyadh, Saudi Arabia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3746-1447","authenticated-orcid":false,"given":"Zhanhao","family":"Hu","sequence":"additional","affiliation":[{"name":"University of California, Berkeley, Berkeley, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0494-2586","authenticated-orcid":false,"given":"Basel","family":"Alomair","sequence":"additional","affiliation":[{"name":"King Abdulaziz City for Science and Technology, Riyadh, Saudi Arabia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9944-9232","authenticated-orcid":false,"given":"David","family":"Wagner","sequence":"additional","affiliation":[{"name":"University of California, Berkeley, Berkeley, CA, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,6,4]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2023. Synthetic Python Problems(SPP) Dataset. https:\/\/huggingface.co\/datasets\/wuyetao\/spp."},{"key":"e_1_3_2_1_2_1","unstructured":"Blueteam AI. 2024. Fmops\/Distilbert-Prompt-Injection. https:\/\/huggingface.co\/fmops\/distilbert-prompt-injection."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2107.03374"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2402.06363"},{"key":"e_1_3_2_1_5_1","volume-title":"Free Dolly: Introducing the World's First Truly Open Instruction-Tuned LLM. https:\/\/www.databricks.com\/blog\/2023\/04\/12\/dolly-first-open-commercially-viable-instruction-tuned-llm.","author":"Conover Mike","year":"2023","unstructured":"Mike Conover, Matt Hayes, Ankit Mathur, Jianwei Xie, Jun Wan, Sam Shah, Ali Ghodsi, Patrick Wendell, Matei Zaharia, and Reynold Xin. 2023. Free Dolly: Introducing the World's First Truly Open Instruction-Tuned LLM. https:\/\/www.databricks.com\/blog\/2023\/04\/12\/dolly-first-open-commercially-viable-instruction-tuned-llm."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.183"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","unstructured":"Aaron Grattafiori Abhimanyu Dubey Abhinav Jauhri Abhinav Pandey Abhishek Kadian Ahmad Al-Dahle Aiesha Letman Akhil Mathur Alan Schelten Alex Vaughan et al. 2024. The Llama 3 Herd of Models. doi:10.48550\/arXiv.2407.21783 arXiv:2407.21783 [cs]","DOI":"10.48550\/arXiv.2407.21783"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3605764.3623985"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2111.09543"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2106.09685"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2411.00348"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","unstructured":"Jean Kaddour Joshua Harris Maximilian Mozes Herbie Bradley Roberta Raileanu and Robert McHardy. 2023. Challenges and Applications of Large Language Models. doi:10.48550\/arXiv.2307.10169 arXiv:2307.10169 [cs]","DOI":"10.48550\/arXiv.2307.10169"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","unstructured":"Hao Li and Xiaogeng Liu. 2024. InjecGuard: Benchmarking and Mitigating Over-defense in Prompt Injection Guardrail Models. doi:10.48550\/arXiv.2410.22770 arXiv:2410.22770 [cs]","DOI":"10.48550\/arXiv.2410.22770"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2310.12815"},{"key":"e_1_3_2_1_15_1","unstructured":"OpenAI. 2023. Text-Davinci-003. https:\/\/platform.openai.com\/docs\/deprecations."},{"key":"e_1_3_2_1_16_1","unstructured":"OpenAI. 2024. Omni-Moderation-Latest. https:\/\/platform.openai.com\/docs\/api-reference\/moderations."},{"key":"e_1_3_2_1_17_1","unstructured":"OpenAI Josh Achiam Steven Adler Sandhini Agarwal Lama Ahmad Ilge Akkaya Florencia Leoni Aleman Diogo Almeida Janko Altenschmidt Sam Altman et al. 2024. GPT-4 Technical Report. doi:10.48550\/arXiv.2303.08774 arXiv:2303.08774 [cs]"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","unstructured":"Long Ouyang Jeff Wu Xu Jiang Diogo Almeida Carroll L. Wainwright Pamela Mishkin Chong Zhang Sandhini Agarwal Katarina Slama Alex Ray et al. 2022. Training Language Models to Follow Instructions with Human Feedback. doi:10.48550\/arXiv.2203.02155 arXiv:2203.02155 [cs]","DOI":"10.48550\/arXiv.2203.02155"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2211.09527"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-70879-4_6"},{"key":"e_1_3_2_1_21_1","unstructured":"ProtectAI.com. 2023. Fine-Tuned DeBERTa-v3-base for Prompt Injection Detection. https:\/\/huggingface.co\/protectai\/deberta-v3-base-prompt-injection-v2."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","unstructured":"ProtectAI.com. 2023. Fine-Tuned DeBERTa-v3 for Prompt Injection Detection. https:\/\/huggingface.co\/protectai\/deberta-v3-base-prompt-injection. doi:10.57967\/hf\/2739","DOI":"10.57967\/hf\/2739"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2305.14965"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2311.16119"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2308.03825"},{"key":"e_1_3_2_1_26_1","volume-title":"Stanford Alpaca: An Instruction-following LLaMA Model. https:\/\/github.com\/tatsu-lab\/stanford_alpaca.","author":"Taori Rohan","year":"2023","unstructured":"Rohan Taori, Ishaan Gulrajani, Tianyi Zhang, Yann Dubois, Xuechen Li, Carlos Guestrin, Percy Liang, and Tatsunori Hashimoto. 2023. Stanford Alpaca: An Instruction-following LLaMA Model. https:\/\/github.com\/tatsu-lab\/stanford_alpaca."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2312.11805"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","unstructured":"Hugo Touvron Thibaut Lavril Gautier Izacard Xavier Martinet Marie-Anne Lachaux Timoth\u00e9e Lacroix Baptiste Rozi\u00e8re Naman Goyal Eric Hambro Faisal Azhar et al. 2023. LLaMA: Open and Efficient Foundation Language Models. doi:10.48550\/arXiv.2302.13971 arXiv:2302.13971 [cs]","DOI":"10.48550\/arXiv.2302.13971"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","unstructured":"Eric Wallace Kai Xiao Reimar Leike Lilian Weng Johannes Heidecke and Alex Beutel. 2024. The Instruction Hierarchy: Training LLMs to Prioritize Privileged Instructions. doi:10.48550\/arXiv.2404.13208 arXiv:2404.13208 [cs]","DOI":"10.48550\/arXiv.2404.13208"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","unstructured":"Shengye Wan Cyrus Nikolaidis Daniel Song David Molnar James Crnkovich Jayson Grace Manish Bhatt Sahana Chennabasappa Spencer Whitman Stephanie Ding et al. 2024. CYBERSECEVAL 3: Advancing the Evaluation of Cybersecurity Risks and Capabilities in Large Language Models. doi:10.48550\/arXiv.2408.01605 arXiv:2408.01605 [cs]","DOI":"10.48550\/arXiv.2408.01605"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2212.10560"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2204.07705"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2109.01652"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2310.06387"},{"key":"e_1_3_2_1_35_1","volume-title":"OWASP Top 10 for LLM Applications","author":"Wilson Steve","year":"2025","unstructured":"Steve Wilson and Ads Dawson. 2024. OWASP Top 10 for LLM Applications 2025."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","unstructured":"Jingwei Yi Yueqi Xie Bin Zhu Emre Kiciman Guangzhong Sun Xing Xie and Fangzhao Wu. 2024. Benchmarking and Defending Against Indirect Prompt Injection Attacks on Large Language Models. doi:10.48550\/arXiv.2312.14197 arXiv:2312.14197 [cs]","DOI":"10.48550\/arXiv.2312.14197"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2309.11998"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","unstructured":"Jeffrey Zhou Tianjian Lu Swaroop Mishra Siddhartha Brahma Sujoy Basu Yi Luan Denny Zhou and Le Hou. 2023. Instruction-Following Evaluation for Large Language Models. doi:10.48550\/arXiv.2311.07911 arXiv:2311.07911 [cs]","DOI":"10.48550\/arXiv.2311.07911"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","unstructured":"Andy Zou Zifan Wang Nicholas Carlini Milad Nasr J. Zico Kolter and Matt Fredrikson. 2023. Universal and Transferable Adversarial Attacks on Aligned Language Models. doi:10.48550\/arXiv.2307.15043 arXiv:2307.15043 [cs]","DOI":"10.48550\/arXiv.2307.15043"}],"event":{"name":"CODASPY '25: Fifteenth ACM Conference on Data and Application Security and Privacy","location":"Pittsburgh PA USA","acronym":"CODASPY '25","sponsor":["SIGSAC ACM Special Interest Group on Security, Audit, and Control"]},"container-title":["Proceedings of the Fifteenth ACM Conference on Data and Application Security and Privacy"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3714393.3726501","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3714393.3726501","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T19:54:41Z","timestamp":1755892481000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3714393.3726501"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,19]]},"references-count":39,"alternative-id":["10.1145\/3714393.3726501","10.1145\/3714393"],"URL":"https:\/\/doi.org\/10.1145\/3714393.3726501","relation":{},"subject":[],"published":{"date-parts":[[2024,6,19]]},"assertion":[{"value":"2025-06-04","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}