{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T21:26:27Z","timestamp":1742937987900,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":26,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819772315"},{"type":"electronic","value":"9789819772322"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-981-97-7232-2_20","type":"book-chapter","created":{"date-parts":[[2024,8,27]],"date-time":"2024-08-27T16:02:47Z","timestamp":1724774567000},"page":"295-309","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Watch Your Words: Successfully Jailbreak LLM by\u00a0Mitigating the\u00a0\u201cPrompt Malice\u201d"],"prefix":"10.1007","author":[{"given":"Xiaowei","family":"Xu","sequence":"first","affiliation":[]},{"given":"Yixiao","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Xiong","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Peng","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Mohan","family":"Li","sequence":"additional","affiliation":[]},{"given":"Yanbin","family":"Sun","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,8,28]]},"reference":[{"key":"20_CR1","unstructured":"Achiam, J., et al.: GPT-4 technical report. arXiv preprint arXiv:2303.08774 (2023)"},{"key":"20_CR2","unstructured":"Bai, J., et al.: Qwen technical report. arXiv preprint arXiv:2309.16609 (2023)"},{"key":"20_CR3","unstructured":"Bai, Y., Kadavath, S., Kundu, S., et\u00a0al.: Constitutional AI: harmlessness from AI feedback. arXiv preprint arXiv:2212.08073 (2022)"},{"key":"20_CR4","unstructured":"Baichuan: Baichuan 2: open large-scale language models. arXiv preprint arXiv:2309.10305 (2023). https:\/\/arxiv.org\/abs\/2309.10305"},{"key":"20_CR5","unstructured":"Brown, T.B., et\u00a0al.: Language models are few-shot learners. In: Advances in Neural Information Processing Systems (NeurIPS) (2020)"},{"key":"20_CR6","unstructured":"Chu, Z., et\u00a0al.: A survey of chain of thought reasoning: advances, frontiers and future. arXiv preprint arXiv:2309.15402 (2023)"},{"key":"20_CR7","unstructured":"Dong, L., Yang, N., Wang, W., et\u00a0al.: Unified language model pre-training for natural language understanding and generation. In: Advances in Neural Information Processing Systems, vol.\u00a032 (2019)"},{"key":"20_CR8","doi-asserted-by":"crossref","unstructured":"Greshake, K., Abdelnabi, S., Mishra, S., et\u00a0al.: Not what you\u2019ve signed up for: compromising real-world LLM-integrated applications with indirect prompt injection. In: Proceedings of the 16th ACM Workshop on Artificial Intelligence and Security, pp. 79\u201390 (2023)","DOI":"10.1145\/3605764.3623985"},{"key":"20_CR9","unstructured":"von Hagen, M.: Copilot chat\u2019s confidential rules (2023). https:\/\/twitter.com\/marvinvonhagen\/status\/1657060506371346432. Accessed 13 May 2023"},{"key":"20_CR10","unstructured":"Hanu, L.: Unitary team: Detoxify. Github (2020). https:\/\/github.com\/unitaryai\/detoxify"},{"issue":"1","key":"20_CR11","first-page":"3","volume":"160","author":"SB Kotsiantis","year":"2007","unstructured":"Kotsiantis, S.B., Zaharakis, I., Pintelas, P., et al.: Supervised machine learning: a review of classification techniques. Emerg. Artif. Intell. Appl. Comput. Eng. 160(1), 3\u201324 (2007)","journal-title":"Emerg. Artif. Intell. Appl. Comput. Eng."},{"key":"20_CR12","doi-asserted-by":"crossref","unstructured":"Lapid, R., Langberg, R., Sipper, M.: Open sesame! universal black box jailbreaking of large language models. arXiv preprint (2023)","DOI":"10.3390\/app14167150"},{"key":"20_CR13","doi-asserted-by":"crossref","unstructured":"Li, H., Guo, D., Fan, W., et\u00a0al.: Multi-step jailbreaking privacy attacks on chatgpt. arXiv preprint arXiv:2304.05197 (2023)","DOI":"10.18653\/v1\/2023.findings-emnlp.272"},{"key":"20_CR14","unstructured":"Liu, K.: The entire prompt of Microsoft Bing chat?! (hi, Sydney) (2023). https:\/\/twitter.com\/kliu128\/status\/1623472922374574080. Accessed 9 Feb 2023"},{"key":"20_CR15","unstructured":"Radford, A., et\u00a0al.: Improving language understanding by generative pre-training (2018)"},{"issue":"8","key":"20_CR16","first-page":"9","volume":"1","author":"A Radford","year":"2019","unstructured":"Radford, A., et al.: Language models are unsupervised multitask learners. OpenAI Blog 1(8), 9 (2019)","journal-title":"OpenAI Blog"},{"key":"20_CR17","unstructured":"R\u00f6ttger, P., Pernisi, F., Vidgen, B., et\u00a0al.: Safetyprompts: a systematic review of open datasets for evaluating and improving large language model safety. arXiv preprint arXiv:2404.05399 (2024). https:\/\/arxiv.org\/abs\/2404.05399"},{"key":"20_CR18","unstructured":"Team, G., Anil, R., et\u00a0al.: Gemini: a family of highly capable multimodal models (2024)"},{"key":"20_CR19","unstructured":"Touvron, H., et\u00a0al.: Llama: open and efficient foundation language models. arXiv preprint arXiv:2302.13971 (2023)"},{"key":"20_CR20","unstructured":"Wei, A., Haghtalab, N., Steinhardt, J.: Jailbroken: how does LLM safety training fail? In: Advances in Neural Information Processing Systems, vol.\u00a036 (2024)"},{"key":"20_CR21","unstructured":"Wei, J., et\u00a0al.: Chain of thought prompting elicits reasoning in large language models. arXiv preprint arXiv:2201.11903 (2022)"},{"key":"20_CR22","unstructured":"Yang, C., et\u00a0al.: Large language models as optimizers. arXiv preprint arXiv:2309.03409 (2023). https:\/\/arxiv.org\/abs\/2309.03409"},{"key":"20_CR23","unstructured":"Yuan, Y., Jiao, W., Wang, W., et\u00a0al.: GPT-4 is too smart to be safe: stealthy chat with LLMS via cipher. arXiv preprint arXiv:2308.06463 (2023)"},{"key":"20_CR24","unstructured":"Zeng, A., et\u00a0al.: GLM-130B: an open bilingual pre-trained model. arXiv preprint arXiv:2210.02414 (2022)"},{"key":"20_CR25","unstructured":"Zhang, M., Pan, X., Yang, M.: Jade: a linguistic-based safety evaluation platform for LLM (2023)"},{"key":"20_CR26","unstructured":"Zou, A., Wang, Z., Kolter, J.Z., et\u00a0al.: Universal and transferable adversarial attacks on aligned language models. arXiv preprint arXiv:2307.15043 (2023)"}],"container-title":["Lecture Notes in Computer Science","Web and Big Data"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-7232-2_20","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,27]],"date-time":"2024-08-27T16:08:12Z","timestamp":1724774892000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-7232-2_20"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9789819772315","9789819772322"],"references-count":26,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-7232-2_20","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"28 August 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"APWeb-WAIM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Asia-Pacific Web (APWeb) and Web-Age Information Management (WAIM) Joint International Conference on Web and Big Data","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Jinhua","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"31 August 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 September 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"apwebwaim2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/apweb2024.zjnu.edu.cn\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}