{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,10]],"date-time":"2025-10-10T06:42:49Z","timestamp":1760078569086,"version":"build-2065373602"},"publisher-location":"Singapore","reference-count":15,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819530717","type":"print"},{"value":"9789819530724","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,10,11]],"date-time":"2025-10-11T00:00:00Z","timestamp":1760140800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,10,11]],"date-time":"2025-10-11T00:00:00Z","timestamp":1760140800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-3072-4_7","type":"book-chapter","created":{"date-parts":[[2025,10,10]],"date-time":"2025-10-10T06:21:12Z","timestamp":1760077272000},"page":"74-86","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["A Comprehensive Survey on White-Box Security Threats for Large Language Models"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-3213-4205","authenticated-orcid":false,"given":"Wenbiao","family":"Du","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0004-5478-4346","authenticated-orcid":false,"given":"Tengfei","family":"Yang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0007-8017-6733","authenticated-orcid":false,"given":"Zhihan","family":"Sun","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0008-5729-7753","authenticated-orcid":false,"given":"Xiuqi","family":"Yang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0003-1909-1643","authenticated-orcid":false,"given":"Zeyang","family":"Liu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3087-9701","authenticated-orcid":false,"given":"Jingfeng","family":"Xue","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,10,11]]},"reference":[{"key":"7_CR1","unstructured":"Andriushchenko, M., Croce, F., Flammarion, N.: Jailbreaking leading safety-aligned LLMs with simple adaptive attacks. arXiv preprint arXiv:2404.02151 (2024)"},{"key":"7_CR2","unstructured":"Du, Y., Zhao, S., Ma, M., Chen, Y., Qin, B.: Analyzing the inherent response tendency of llms: Real-world instructions-driven jailbreak. arXiv preprint arXiv:2312.04127 (2023)"},{"key":"7_CR3","unstructured":"Guo, X., Yu, F., Zhang, H., Qin, L., Hu, B.: Cold-attack: jailbreaking LLMs with stealthiness and controllability. arXiv preprint arXiv:2402.08679 (2024)"},{"key":"7_CR4","unstructured":"Jones, E., Dragan, A., Raghunathan, A., Steinhardt, J.: Automatically auditing large language models via discrete optimization. In: International Conference on Machine Learning, pp. 15307\u201315329. PMLR (2023)"},{"key":"7_CR5","unstructured":"Lermen, S., Rogers-Smith, C., Ladish, J.: Lora fine-tuning efficiently undoes safety training in llama 2-chat 70b. arXiv preprint arXiv:2310.20624 (2023)"},{"key":"7_CR6","unstructured":"Qi, X., et al.: Fine-tuning aligned language models compromises safety, even when users do not intend to! arXiv preprint arXiv:2310.03693 (2023)"},{"key":"7_CR7","unstructured":"Sitawarin, C., Mu, N., Wagner, D., Araujo, A.: Pal: proxy-guided black-box attack on large language models. arXiv preprint arXiv:2402.09674 (2024)"},{"key":"7_CR8","unstructured":"Wang, H., Li, H., Huang, M., Sha, L.: From noise to clarity: unraveling the adversarial suffix of large language model attacks via translation of text embeddings. arXiv preprint arXiv:2402.16006 (2024)"},{"key":"7_CR9","unstructured":"Wang, Z., et al.: Stop reasoning! when multimodal LLMs with chain-of-thought reasoning meets adversarial images. arXiv preprint arXiv:2402.14899 (2024)"},{"key":"7_CR10","unstructured":"Yang, X., et al.: Shadow alignment: the ease of subverting safely-aligned language models. arXiv preprint arXiv:2310.02949 (2023)"},{"key":"7_CR11","doi-asserted-by":"crossref","unstructured":"Zhan, Q., et al.: Removing RLHF protections in GPT-4 via fine-tuning. arXiv preprint arXiv:2311.05553 (2023)","DOI":"10.18653\/v1\/2024.naacl-short.59"},{"key":"7_CR12","unstructured":"Zhang, Z., Shen, G., Tao, G., Cheng, S., Zhang, X.: Make them spill the beans! coercive knowledge extraction from (production) LLMs. arXiv preprint arXiv:2312.04782 (2023)"},{"key":"7_CR13","unstructured":"Zhao, X., et al.: Weak-to-strong jailbreaking on large language models. arXiv preprint arXiv:2401.17256 (2024)"},{"key":"7_CR14","unstructured":"Zhu, S., et al.: Autodan: interpretable gradient-based adversarial attacks on large language models. In: First Conference on Language Modeling (2024)"},{"key":"7_CR15","unstructured":"Zou, A., et al.: Universal and transferable adversarial attacks on aligned language models. arXiv preprint arXiv:2307.15043 (2023)"}],"container-title":["Lecture Notes in Computer Science","Knowledge Science, Engineering and Management"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-3072-4_7","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,10]],"date-time":"2025-10-10T06:21:16Z","timestamp":1760077276000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-3072-4_7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,11]]},"ISBN":["9789819530717","9789819530724"],"references-count":15,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-3072-4_7","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,10,11]]},"assertion":[{"value":"11 October 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"KSEM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Knowledge Science, Engineering and Management","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Macao","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 August 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7 August 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ksem2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ksem2025.scimeeting.cn\/en\/web\/index\/27434","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}