{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,29]],"date-time":"2025-11-29T16:26:43Z","timestamp":1764433603249,"version":"3.41.0"},"publisher-location":"Cham","reference-count":23,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031976254","type":"print"},{"value":"9783031976261","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-97626-1_4","type":"book-chapter","created":{"date-parts":[[2025,6,29]],"date-time":"2025-06-29T11:52:32Z","timestamp":1751197952000},"page":"49-60","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Precise Language Deception: XAI Driven Targeted Adversarial Examples with\u00a0Restricted Knowledge"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0620-8123","authenticated-orcid":false,"given":"Mateusz","family":"Gniewkowski","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-0381-9202","authenticated-orcid":false,"given":"Pawe\u0142","family":"Walkowiak","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3141-8712","authenticated-orcid":false,"given":"Marek","family":"Klonowski","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7749-4251","authenticated-orcid":false,"given":"Tomasz","family":"Walkowiak","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,6,30]]},"reference":[{"key":"4_CR1","unstructured":"Albrecht, J., Kitanidis, E., Fetterman, A.J.: Despite \u201csuper-human\u201d performance, current LLMs are unsuited for decisions about ethics and safety (2022). https:\/\/arxiv.org\/abs\/2212.06295"},{"key":"4_CR2","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"387","DOI":"10.1007\/978-3-642-40994-3_25","volume-title":"Machine Learning and Knowledge Discovery in Databases","author":"B Biggio","year":"2013","unstructured":"Biggio, B., et al.: Evasion attacks against machine learning at test time. In: Blockeel, H., Kersting, K., Nijssen, S., \u017delezn\u00fd, F. (eds.) ECML PKDD 2013. LNCS (LNAI), vol. 8190, pp. 387\u2013402. Springer, Heidelberg (2013). https:\/\/doi.org\/10.1007\/978-3-642-40994-3_25"},{"key":"4_CR3","doi-asserted-by":"publisher","first-page":"135","DOI":"10.1162\/tacl_a_00051","volume":"5","author":"P Bojanowski","year":"2017","unstructured":"Bojanowski, P., Grave, E., Joulin, A., Mikolov, T.: Enriching word vectors with subword information. Trans. Assoc. Comput. Linguist. 5, 135\u2013146 (2017)","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"4_CR4","doi-asserted-by":"crossref","unstructured":"Burger, C., Chen, L., Le, T.: Are your explanations reliable? Investigating the stability of lime in explaining text classifiers by marrying XAI and adversarial attack (2023). https:\/\/arxiv.org\/abs\/2305.12351","DOI":"10.18653\/v1\/2023.emnlp-main.792"},{"key":"4_CR5","unstructured":"Carlini, N.: A complete list of all (arxiv) adversarial example papers (2019\u20132025). https:\/\/nicholas.carlini.com\/writing\/2019\/all-adversarial-example-papers.html"},{"key":"4_CR6","doi-asserted-by":"publisher","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Burstein, J., Doran, C., Solorio, T. (eds.) Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers), pp. 4171\u20134186. Association for Computational Linguistics, Minneapolis, Minnesota (2019). https:\/\/doi.org\/10.18653\/v1\/N19-1423","DOI":"10.18653\/v1\/N19-1423"},{"key":"4_CR7","doi-asserted-by":"crossref","unstructured":"Fidel, G., Bitton, R., Shabtai, A.: When explainability meets adversarial learning: detecting adversarial examples using SHAP signatures. In: 2020 International Joint Conference on Neural Networks, IJCNN 2020, Glasgow, United Kingdom, 19\u201324 July 2020, pp.\u00a01\u20138. IEEE (2020). https:\/\/doi.org\/10.1109\/IJCNN48605.2020.9207637","DOI":"10.1109\/IJCNN48605.2020.9207637"},{"key":"4_CR8","doi-asserted-by":"publisher","unstructured":"Gniewkowski, M., et\u00a0al.: Do not trust me: explainability against text classification. In: ECAI 2023 - 26th European Conference on Artificial Intelligence, 30 September\u20134 October 2023, Krak\u00f3w, Poland - Including 12th Conference on Prestigious Applications of Intelligent Systems (PAIS 2023). Frontiers in Artificial Intelligence and Applications, vol.\u00a0372, pp. 875\u2013882. IOS Press (2023). https:\/\/doi.org\/10.3233\/FAIA230356","DOI":"10.3233\/FAIA230356"},{"issue":"10","key":"4_CR9","doi-asserted-by":"publisher","first-page":"4381","DOI":"10.1109\/TIV.2023.3296227","volume":"8","author":"T Hickling","year":"2023","unstructured":"Hickling, T., Aouf, N., Spencer, P.: Robust adversarial attacks detection based on explainable deep reinforcement learning for UAV guidance and planning. IEEE Trans. Intell. Veh. 8(10), 4381\u20134394 (2023)","journal-title":"IEEE Trans. Intell. Veh."},{"key":"4_CR10","doi-asserted-by":"crossref","unstructured":"Jia, R., Liang, P.: Adversarial examples for evaluating reading comprehension systems. CoRR abs\/1707.07328 (2017). http:\/\/arxiv.org\/abs\/1707.07328","DOI":"10.18653\/v1\/D17-1215"},{"key":"4_CR11","unstructured":"Jia, X., et\u00a0al.: Global challenge for safe and secure LLMs track 1. arXiv preprint arXiv:2411.14502 (2024)"},{"key":"4_CR12","doi-asserted-by":"publisher","unstructured":"Lukas, N., et\u00a0al.: Analyzing leakage of personally identifiable information in language models. In: 44th IEEE Symposium on Security and Privacy, SP 2023, San Francisco, CA, USA, 21\u201325 May 2023, pp. 346\u2013363. IEEE (2023). https:\/\/doi.org\/10.1109\/SP46215.2023.10179300","DOI":"10.1109\/SP46215.2023.10179300"},{"key":"4_CR13","unstructured":"Lundberg, S.M., Lee, S.I.: A unified approach to interpreting model predictions. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"4_CR14","doi-asserted-by":"crossref","unstructured":"Reimers, N., Gurevych, I.: Sentence-BERT: sentence embeddings using Siamese BERT-networks. In: Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing. Association for Computational Linguistics (2019). http:\/\/arxiv.org\/abs\/1908.10084","DOI":"10.18653\/v1\/D19-1410"},{"key":"4_CR15","doi-asserted-by":"publisher","unstructured":"Ribeiro, M.T., Singh, S., Guestrin, C.: \u201cWhy should I trust you?\u201d: explaining the predictions of any classifier. In: Krishnapuram, B., Shah, M., Smola, A.J., Aggarwal, C.C., Shen, D., Rastogi, R. (eds.) Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, San Francisco, CA, USA, 13\u201317 August 2016, pp. 1135\u20131144. ACM (2016). https:\/\/doi.org\/10.1145\/2939672.2939778","DOI":"10.1145\/2939672.2939778"},{"key":"4_CR16","unstructured":"Szegedy, C., et al.: Intriguing properties of neural networks. arXiv preprint arXiv:1312.6199 (2013)"},{"key":"4_CR17","doi-asserted-by":"publisher","first-page":"387","DOI":"10.1162\/TACL_A_00279","volume":"7","author":"E Wallace","year":"2019","unstructured":"Wallace, E., Rodriguez, P., Feng, S., Yamada, I., Boyd-Graber, J.L.: Trick me if you can: human-in-the-loop generation of adversarial question answering examples. Trans. Assoc. Comput. Linguist. 7, 387\u2013401 (2019). https:\/\/doi.org\/10.1162\/TACL_A_00279","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"4_CR18","unstructured":"Wang, B., et\u00a0al.: Exploring the limits of domain-adaptive training for detoxifying large-scale language models. In: Advances in Neural Information Processing Systems 35: Annual Conference on Neural Information Processing Systems 2022, NeurIPS 2022, New Orleans, LA, USA, 28 November\u20139 December 2022 (2022). http:\/\/papers.nips.cc\/paper_files\/paper\/2022\/hash\/e8c20cafe841cba3e31a17488dc9c3f1-Abstract-Conference.html"},{"key":"4_CR19","unstructured":"Wang, G., Cheng, S., Zhan, X., Li, X., Song, S., Liu, Y.: OpenChat: advancing open-source language models with mixed-quality data. In: The Twelfth International Conference on Learning Representations (2024). https:\/\/openreview.net\/forum?id=AOJyfhWYHf"},{"key":"4_CR20","unstructured":"Wei, A., Haghtalab, N., Steinhardt, J.: Jailbroken: how does LLM safety training fail? In: Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, 10\u201316 December 2023 (2023). http:\/\/papers.nips.cc\/paper_files\/paper\/2023\/hash\/fd6613131889a4b656206c50a8bd7790-Abstract-Conference.html"},{"key":"4_CR21","unstructured":"Wei, A., Haghtalab, N., Steinhardt, J.: Jailbroken: how does LLM safety training fail? (2023). https:\/\/arxiv.org\/abs\/2307.02483"},{"key":"4_CR22","unstructured":"Yeghiazaryan, M., et\u00a0al.: Texture- and shape-based adversarial attacks for vehicle detection in synthetic overhead imagery (2024). https:\/\/arxiv.org\/abs\/2412.16358"},{"key":"4_CR23","unstructured":"Zou, A., Wang, Z., Carlini, N., Nasr, M., Kolter, J.Z., Fredrikson, M.: Universal and transferable adversarial attacks on aligned language models. arXiv preprint arXiv:2307.15043 (2023)"}],"container-title":["Lecture Notes in Computer Science","Computational Science \u2013 ICCS 2025"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-97626-1_4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,29]],"date-time":"2025-06-29T11:52:36Z","timestamp":1751197956000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-97626-1_4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031976254","9783031976261"],"references-count":23,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-97626-1_4","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"30 June 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICCS","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Computational Science","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Singapore","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Singapore","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7 July 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 July 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iccs-computsci2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.iccs-meeting.org\/iccs2025\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}