{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,23]],"date-time":"2025-11-23T16:05:13Z","timestamp":1763913913798,"version":"3.45.0"},"publisher-location":"Cham","reference-count":43,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032114013","type":"print"},{"value":"9783032114020","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,11,24]],"date-time":"2025-11-24T00:00:00Z","timestamp":1763942400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,11,24]],"date-time":"2025-11-24T00:00:00Z","timestamp":1763942400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-11402-0_5","type":"book-chapter","created":{"date-parts":[[2025,11,23]],"date-time":"2025-11-23T16:03:12Z","timestamp":1763913792000},"page":"65-78","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Mass-Scale Analysis of\u00a0In-the-Wild Conversations Reveals Complexity Bounds on\u00a0LLM Jailbreaking"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7401-5198","authenticated-orcid":false,"given":"Aldan","family":"Creo","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7675-6080","authenticated-orcid":false,"given":"Raul Castro","family":"Fernandez","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3681-7982","authenticated-orcid":false,"given":"Manuel","family":"Cebrian","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,11,24]]},"reference":[{"key":"5_CR1","unstructured":"AI village contributors: AI Village DEFCON 31 red teaming competition dataset. https:\/\/www.kaggle.com\/datasets\/pyotam\/ai-village-defcon-red-teaming-competition-dataset (2023), Accessed 05 Jun 2025"},{"key":"5_CR2","unstructured":"AI village contributors: AI Village DEFCON 32 generative red teaming 2 (grt2) dataset. https:\/\/www.kaggle.com\/datasets\/dreadnode\/aivillage-grt2 (2024), Accessed 05 Jun 2025"},{"key":"5_CR3","doi-asserted-by":"crossref","unstructured":"Altmann, E.G., Gerlach, M.: Statistical laws in linguistics. arXiv preprint arXiv:1502.03296 (2015)","DOI":"10.1007\/978-3-319-24403-7_2"},{"key":"5_CR4","unstructured":"Bai, Y., et al.: Constitutional AI: harmlessness from AI feedback (2022)"},{"issue":"2","key":"5_CR5","doi-asserted-by":"publisher","first-page":"026103","DOI":"10.1103\/PhysRevE.73.026103","volume":"73","author":"A Baronchelli","year":"2006","unstructured":"Baronchelli, A., Caglioti, E., Loreto, V.: Artificial sequences and complexity measures. Phys. Rev. E 73(2), 026103 (2006)","journal-title":"Phys. Rev. E"},{"key":"5_CR6","unstructured":"Bereska, L., Gavves, E.: Mechanistic interpretability for AI safety \u2013 a review. arXiv preprint arXiv:2404.14082 (2024)"},{"key":"5_CR7","unstructured":"Bergey, C.A., DeDeo, S.: From \u201cum\u201d to \u201cyeah\u201d: Producing, predicting, and regulating information flow in human conversation (2024), https:\/\/arxiv.org\/abs\/2403.08890"},{"key":"5_CR8","doi-asserted-by":"crossref","unstructured":"Bestgen, Y.: Measuring lexical diversity in texts: the twofold length problem. arXiv preprint arXiv:2307.04626 (2023)","DOI":"10.1111\/lang.12630"},{"key":"5_CR9","doi-asserted-by":"crossref","unstructured":"Burden, J., Cebrian, M., Hernandez-Orallo, J.: Conversational complexity for assessing risk in large language models (2024), https:\/\/arxiv.org\/abs\/2409.01247","DOI":"10.1140\/epjds\/s13688-025-00592-4"},{"key":"5_CR10","unstructured":"Cebrian, M., Gomez, E., Llorca, D.F.: Supervision policies can shape long-term risk management in general-purpose AI models (2025), https:\/\/arxiv.org\/abs\/2501.06137"},{"issue":"2","key":"5_CR11","doi-asserted-by":"publisher","first-page":"283","DOI":"10.1037\/h0076540","volume":"60","author":"M Coleman","year":"1975","unstructured":"Coleman, M., Liau, T.L.: A computer readability formula designed for machine scoring. J. Appl. Psychol. 60(2), 283 (1975)","journal-title":"J. Appl. Psychol."},{"issue":"4","key":"5_CR12","first-page":"51","volume":"12","author":"S Ellis","year":"2003","unstructured":"Ellis, S., Steyn, H.S.: Practical significance (effect sizes) versus or in combination with statistical significance (p-values): research note. Manage. Dyn. J. Southern African Inst. Manage. Sci. 12(4), 51\u201353 (2003)","journal-title":"Manage. Dyn. J. Southern African Inst. Manage. Sci."},{"key":"5_CR13","unstructured":"Grey, M., et\u00a0al.: Safety by measurement: a systematic literature review of AI safety evaluation methods. arXiv preprint arXiv:2505.05541 (2025)"},{"key":"5_CR14","doi-asserted-by":"crossref","unstructured":"Han, P., Qian, C., Chen, X., Zhang, Y., Zhang, D., Ji, H.: SafeSwitch: steering unsafe LLM behavior via internal activation signals (2025)","DOI":"10.18653\/v1\/2025.findings-emnlp.366"},{"key":"5_CR15","unstructured":"Hanu, L., Unitary team: detoxify. Github. https:\/\/github.com\/unitaryai\/detoxify (2020)"},{"key":"5_CR16","unstructured":"Hendrycks, D., Carlini, N., Schulman, J., Steinhardt, J.: Unsolved problems in ML safety. arXiv preprint arXiv:2109.13916 (2021)"},{"key":"5_CR17","unstructured":"Hendrycks, D., et\u00a0al.: X-risk analysis for AI research. arXiv preprint arXiv:2206.05862 (2022)"},{"key":"5_CR18","doi-asserted-by":"crossref","unstructured":"Kincaid, P., Fishburne, R.P., Rogers, R.L., Chissom, B.S.: Derivation of new readability formulas (automated readability index, fog count and flesch reading ease formula) for navy enlisted personnel (1975), https:\/\/api.semanticscholar.org\/CorpusID:61131325","DOI":"10.21236\/ADA006655"},{"key":"5_CR19","first-page":"47669","volume":"36","author":"A K\u00f6pf","year":"2023","unstructured":"K\u00f6pf, A., et al.: OpenAssistant conversations - democratizing large language model alignment. Adv. Neural. Inf. Process. Syst. 36, 47669\u201347681 (2023)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"5_CR20","doi-asserted-by":"crossref","unstructured":"Kuribayashi, T., Arase, Y., Takase, S., Inui, K.: Context limitations make neural language models more human-like. arXiv preprint arXiv:2205.11463 (2022)","DOI":"10.18653\/v1\/2022.emnlp-main.712"},{"key":"5_CR21","doi-asserted-by":"crossref","unstructured":"Lai, A., Tetreault, J.: Discourse coherence in the wild: a dataset, evaluation and methods. arXiv preprint arXiv:1805.04993 (2018)","DOI":"10.18653\/v1\/W18-5023"},{"key":"5_CR22","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1613\/jair.1.17654","volume":"78","author":"L Lin","year":"2025","unstructured":"Lin, L., et al.: Against the achilles\u2019 heel: a survey on red teaming for generative models. J. Artif. Intell. Res. 78, 1\u201354 (2025). https:\/\/doi.org\/10.1613\/jair.1.17654","journal-title":"J. Artif. Intell. Res."},{"key":"5_CR23","doi-asserted-by":"publisher","unstructured":"Lindstr\u00f6m, A.D., et al.: Helpful, harmless, honest? Sociotechnical limits of AI alignment and safety through reinforcement learning from human feedback. Ethics Inf. Technol. (2025). https:\/\/doi.org\/10.1007\/s10676-024-09780-5","DOI":"10.1007\/s10676-024-09780-5"},{"key":"5_CR24","unstructured":"liyucheng: ShareGPT90K dataset. https:\/\/huggingface.co\/datasets\/liyucheng\/ShareGPT90K (2023), Accessed 05 Jun 2025"},{"key":"5_CR25","unstructured":"Meister, C., Cotterell, R.: Language model evaluation beyond perplexity. arXiv preprint arXiv:2106.00085 (2021)"},{"key":"5_CR26","unstructured":"Ouyang, L., et al.: Training language models to follow instructions with human feedback. In: Advances in Neural Information Processing Systems, vol.\u00a035, pp. 27730\u201327744 (2022)"},{"key":"5_CR27","unstructured":"Pan, W., Liu, Z., Chen, Q., Zhou, X., Yu, H., Jia, X.: The hidden dimensions of LLM alignment: a multi-dimensional safety analysis (2025)"},{"key":"5_CR28","unstructured":"Panfilov, A., et\u00a0al.: Capability-based scaling laws for LLM red-teaming. arXiv preprint arXiv:2505.20162 (2025)"},{"key":"5_CR29","unstructured":"Pathade, C., et\u00a0al.: Red teaming the mind of the machine: a systematic evaluation of prompt injection and jailbreak vulnerabilities in LLMs. arXiv preprint arXiv:2505.04806 (2025)"},{"key":"5_CR30","unstructured":"Raheja, T., Pochhi, N.: Recent advancements in LLM red-teaming: techniques, defenses, and ethical considerations. arXiv preprint arXiv:2410.09097 (2024)"},{"issue":"7753","key":"5_CR31","doi-asserted-by":"publisher","first-page":"477","DOI":"10.1038\/s41586-019-1138-y","volume":"568","author":"I Rahwan","year":"2019","unstructured":"Rahwan, I., et al.: Mach. Behav. Nature 568(7753), 477\u2013486 (2019)","journal-title":"Nature"},{"key":"5_CR32","unstructured":"Shang, Z., Wei, W.: Evolving security in LLMs: a study of jailbreak attacks and defenses (2025), preprint"},{"key":"5_CR33","unstructured":"Shin, C., et\u00a0al.: Evaluating language model context windows: a \u201cworking memory\u201d test and inference-time correction. arXiv preprint arXiv:2407.03651 (2024)"},{"issue":"1","key":"5_CR34","first-page":"12","volume":"68","author":"B Thompson","year":"1999","unstructured":"Thompson, B.: Improving research clarity and usefulness with effect size indices as supplements to statistical significance tests. J. Exp. Educ. 68(1), 12\u201321 (1999)","journal-title":"J. Exp. Educ."},{"key":"5_CR35","unstructured":"Wang, Z., et\u00a0al.: A red teaming roadmap towards system-level safety. arXiv preprint arXiv:2506.05376 (2025)"},{"key":"5_CR36","doi-asserted-by":"crossref","unstructured":"Wichers, N., Denison, C., Beirami, A.: Gradient-based language model red teaming. In: Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 2783\u20132797 (2024)","DOI":"10.18653\/v1\/2024.eacl-long.175"},{"key":"5_CR37","unstructured":"Xiong, C., et\u00a0al.: CoP: agentic red-teaming for large language models using composition of principles. arXiv preprint arXiv:2506.00781 (2025)"},{"key":"5_CR38","unstructured":"Yu, M., et al.: LLM-Virus: evolutionary jailbreak attack on large language models. arXiv preprint arXiv:2501.00055 (2024)"},{"key":"5_CR39","unstructured":"Zhao, W., Ren, X., Hessel, J., Cardie, C., Choi, Y., Deng, Y.: WildChat: 1M ChatGPT interaction logs in the wild. In: The Twelfth International Conference on Learning Representations (2024), https:\/\/openreview.net\/forum?id=Bl8u7ZRlbM"},{"key":"5_CR40","unstructured":"Zheng, L., et al.: LMSYS-Chat-1M: a large-scale real-world LLM conversation dataset (2023)"},{"key":"5_CR41","doi-asserted-by":"crossref","unstructured":"Zhou, Z., Yu, H., Zhang, X., Xu, R., Huang, F., Li, Y.: How alignment and jailbreak work: explain LLM safety through intermediate hidden states. In: Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing, pp. 4063\u20134076 (2024)","DOI":"10.18653\/v1\/2024.findings-emnlp.139"},{"key":"5_CR42","unstructured":"Zhu, S., et al.: AutoDAN: interpretable gradient-based adversarial attacks on large language models (2023)"},{"key":"5_CR43","unstructured":"Zou, A., Wang, Z., Kolter, J.Z., Fredrikson, M.: Universal and transferable adversarial attacks on aligned language models (2023)"}],"container-title":["Lecture Notes in Computer Science","Artificial Intelligence XLII"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-11402-0_5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,23]],"date-time":"2025-11-23T16:03:17Z","timestamp":1763913797000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-11402-0_5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,24]]},"ISBN":["9783032114013","9783032114020"],"references-count":43,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-11402-0_5","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,11,24]]},"assertion":[{"value":"24 November 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have\u00a0no competing interests to declare that are relevant to the content\u00a0of this article.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing Interests"}},{"value":"SGAI-AI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Innovative Techniques and Applications of Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Cambridge","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 December 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 December 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"45","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"sgai2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/bcs-sgai.org\/ai2025\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}