{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,6]],"date-time":"2026-04-06T19:19:47Z","timestamp":1775503187829,"version":"3.50.1"},"reference-count":49,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100014890","name":"Qilu University of Technology","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100014890","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002338","name":"Ministry of Education of the People's Republic of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100002338","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100008118","name":"Shandong Academy of Sciences","doi-asserted-by":"publisher","award":["2023ZD025"],"award-info":[{"award-number":["2023ZD025"]}],"id":[{"id":"10.13039\/501100008118","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Neural Networks"],"published-print":{"date-parts":[[2026,6]]},"DOI":"10.1016\/j.neunet.2026.108574","type":"journal-article","created":{"date-parts":[[2026,1,15]],"date-time":"2026-01-15T00:11:08Z","timestamp":1768435868000},"page":"108574","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["CoT defender: Preemptive chain-of-thought occupation for jailbreak attack mitigation"],"prefix":"10.1016","volume":"198","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-4535-1552","authenticated-orcid":false,"given":"Xiaokang","family":"Li","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0359-0248","authenticated-orcid":false,"given":"Jin","family":"Liu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9333-8200","authenticated-orcid":false,"given":"Yongqiang","family":"Tang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0837-3285","authenticated-orcid":false,"given":"Zhiwen","family":"Xie","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0008-5256-0014","authenticated-orcid":false,"given":"Yihe","family":"Wang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4473-3068","authenticated-orcid":false,"given":"Xiao","family":"Yu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7284-0776","authenticated-orcid":false,"given":"Long","family":"Zhao","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5476-620X","authenticated-orcid":false,"given":"Bo","family":"Huang","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.neunet.2026.108574_bib0001","series-title":"Phi-3 technical report: A highly capable language model locally on your phone","author":"Abdin","year":"2024"},{"issue":"1","key":"10.1016\/j.neunet.2026.108574_bib0002","doi-asserted-by":"crossref","first-page":"45","DOI":"10.1016\/S0306-4573(02)00021-3","article-title":"An information-theoretic perspective of tf-idf measures","volume":"39","author":"Aizawa","year":"2003","journal-title":"Information Processing & Management"},{"key":"10.1016\/j.neunet.2026.108574_bib0003","doi-asserted-by":"crossref","first-page":"81","DOI":"10.70470\/SHIFRA\/2025\/005","article-title":"Beyond detection: Large language models and next-generation cybersecurity","volume":"2025","author":"Ali","year":"2025","journal-title":"SHIFRA"},{"key":"10.1016\/j.neunet.2026.108574_bib0004","series-title":"Detecting language model attacks with perplexity","author":"Alon","year":"2023"},{"key":"10.1016\/j.neunet.2026.108574_sbref0005","series-title":"Advances in neural information processing systems","first-page":"136037","article-title":"Refusal in language models is mediated by a single direction","volume":"vol. 37","author":"Arditi","year":"2024"},{"key":"10.1016\/j.neunet.2026.108574_bib0006","series-title":"Training a helpful and harmless assistant with reinforcement learning from human feedback","author":"Bai","year":"2022"},{"key":"10.1016\/j.neunet.2026.108574_sbref0007","series-title":"Proceedings of the 62nd annual meeting of the association for computational linguistics (volume 1: Long papers), ACL 2024, Bangkok, Thailand, August 11\u201316, 2024","first-page":"10542","article-title":"Defending against alignment-breaking attacks via robustly aligned LLM","author":"Cao","year":"2024"},{"key":"10.1016\/j.neunet.2026.108574_bib0008","series-title":"Reasoned safety alignment: Ensuring jailbreak defense via answer-then-check","author":"Cao","year":"2025"},{"key":"10.1016\/j.neunet.2026.108574_bib0009","series-title":"NeurIPS datasets and benchmarks track","article-title":"Jailbreakbench: An open robustness benchmark for jailbreaking large language models","author":"Chao","year":"2024"},{"key":"10.1016\/j.neunet.2026.108574_bib0010","series-title":"2025\u202fIEEE Conference on secure and trustworthy machine learning (saTML)","first-page":"23","article-title":"Jailbreaking black box large language models in twenty queries","author":"Chao","year":"2025"},{"key":"10.1016\/j.neunet.2026.108574_bib0011","series-title":"Proceedings of the 2024 conference of the North American chapter of the association for computational linguistics: Human language technologies (volume 1: Long papers)","first-page":"2136","article-title":"A wolf in sheep\u2019s clothing: Generalized nested jailbreak prompts can fool large language models easily","author":"Ding","year":"2024"},{"key":"10.1016\/j.neunet.2026.108574_sbref0012","series-title":"Advances in neural information processing systems","first-page":"70757","article-title":"Towards revealing the mystery behind chain of thought: A theoretical perspective","volume":"vol. 36","author":"Feng","year":"2023"},{"key":"10.1016\/j.neunet.2026.108574_bib0013","doi-asserted-by":"crossref","first-page":"8","DOI":"10.70470\/SHIFRA\/2024\/002","article-title":"Securing cloud computing environments: An analysis of multi-tenancy vulnerabilities and countermeasures","volume":"2024","author":"Hashim","year":"2024","journal-title":"SHIFRA"},{"issue":"2","key":"10.1016\/j.neunet.2026.108574_bib0014","first-page":"3","article-title":"Lora: Low-rank adaptation of large language models","volume":"1","author":"Hu","year":"2022","journal-title":"ICLR"},{"key":"10.1016\/j.neunet.2026.108574_sbref0015","series-title":"Neurips safe generative AI workshop 2024","article-title":"Token highlighter: Inspecting and mitigating jailbreak prompts for large language models","author":"Hu","year":"2024"},{"key":"10.1016\/j.neunet.2026.108574_bib0016","series-title":"Baseline defenses for adversarial attacks against aligned language models","author":"Jain","year":"2023"},{"key":"10.1016\/j.neunet.2026.108574_bib0017","series-title":"Mistral 7b","author":"Jiang","year":"2023"},{"key":"10.1016\/j.neunet.2026.108574_bib0018","series-title":"Deepinception: Hypnotize large language model to be jailbreaker","author":"Li","year":"2024"},{"key":"10.1016\/j.neunet.2026.108574_bib0019","series-title":"The twelfth international conference on learning representations","article-title":"Let\u2019s verify step by step","author":"Lightman","year":"2023"},{"key":"10.1016\/j.neunet.2026.108574_bib0020","series-title":"The twelfth international conference on learning representations","article-title":"The unlocking spell on base LLMs: Rethinking alignment via in-context learning","author":"Lin","year":"2025"},{"key":"10.1016\/j.neunet.2026.108574_sbref0021","series-title":"The thirteenth international conference on learning representations","article-title":"Understanding and enhancing the transferability of jailbreaking attacks","author":"Lin","year":"2025"},{"key":"10.1016\/j.neunet.2026.108574_bib0022","series-title":"Adversarial tuning: Defending against jailbreak attacks for LLMs","author":"Liu","year":"2024"},{"key":"10.1016\/j.neunet.2026.108574_sbref0023","series-title":"International conference on representation learning","first-page":"56174","article-title":"AutoDAN: Generating stealthy jailbreak prompts on aligned large language models","volume":"vol. 2024","author":"Liu","year":"2024"},{"key":"10.1016\/j.neunet.2026.108574_sbref0024","article-title":"Jailbreaking chatGPT via prompt engineering: An empirical study","author":"Liu","year":"2023","journal-title":"CoRR"},{"key":"10.1016\/j.neunet.2026.108574_bib0025","series-title":"Gpt-4 technical report","author":"OpenAI","year":"2023"},{"key":"10.1016\/j.neunet.2026.108574_sbref0026","series-title":"Advances in neural information processing systems","first-page":"27730","article-title":"Training language models to follow instructions with human feedback","volume":"vol. 35","author":"Ouyang","year":"2022"},{"key":"10.1016\/j.neunet.2026.108574_sbref0027","series-title":"Advances in neural information processing systems","first-page":"116617","article-title":"Iterative reasoning preference optimization","volume":"vol. 37","author":"Pang","year":"2024"},{"key":"10.1016\/j.neunet.2026.108574_sbref0028","series-title":"The second tiny papers track at ICLR 2024","article-title":"LLM self defense: By self examination, LLMs know they are being tricked","author":"Phute","year":"2024"},{"key":"10.1016\/j.neunet.2026.108574_sbref0029","series-title":"Advances in neural information processing systems","first-page":"53728","article-title":"Direct preference optimization: Your language model is secretly a reward model","volume":"vol. 36","author":"Rafailov","year":"2023"},{"key":"10.1016\/j.neunet.2026.108574_bib0030","series-title":"Proceedings of the 2024 on ACM SIGSAC conference on computer and communications security","first-page":"1671-1685","article-title":"\u201dDo anything now\u201d: Characterizing and evaluating in-the-wild jailbreak prompts on large language models","author":"Shen","year":"2024"},{"key":"10.1016\/j.neunet.2026.108574_bib0031","series-title":"Ease: Practical and efficient safety alignment for small language models","author":"Shi","year":"2025"},{"key":"10.1016\/j.neunet.2026.108574_bib0032","series-title":"Gemma 2: Improving open language models at a practical size","author":"Riviere","year":"2024"},{"key":"10.1016\/j.neunet.2026.108574_bib0033","series-title":"Llama: Open and efficient foundation language models","author":"Touvron","year":"2023"},{"key":"10.1016\/j.neunet.2026.108574_bib0034","series-title":"Solving math word problems with process- and outcome-based feedback","author":"Uesato","year":"2022"},{"key":"10.1016\/j.neunet.2026.108574_bib0035","series-title":"Findings of the association for computational linguistics ACL 2024","first-page":"16031","article-title":"Defending LLMs against jailbreaking attacks via backtranslation","author":"Wang","year":"2024"},{"key":"10.1016\/j.neunet.2026.108574_bib0036","series-title":"Reasoningguard: Safeguarding large reasoning models with inference-time safety aha moments","author":"Wang","year":"2025"},{"key":"10.1016\/j.neunet.2026.108574_sbref0037","series-title":"Advances in neural information processing systems","first-page":"80079","article-title":"Jailbroken: How does LLM safety training fail?","volume":"vol. 36","author":"Wei","year":"2023"},{"key":"10.1016\/j.neunet.2026.108574_sbref0038","series-title":"Advances in neural information processing systems","first-page":"24824","article-title":"Chain-of-thought prompting elicits reasoning in large language models","volume":"vol. 35","author":"Wei","year":"2022"},{"key":"10.1016\/j.neunet.2026.108574_bib0039","series-title":"Jailbreak and guard aligned language models with only few in-context demonstrations","author":"Wei","year":"2024"},{"key":"10.1016\/j.neunet.2026.108574_bib0040","series-title":"Ethical and social risks of harm from language models","author":"Weidinger","year":"2021"},{"issue":"12","key":"10.1016\/j.neunet.2026.108574_bib0041","doi-asserted-by":"crossref","first-page":"1486","DOI":"10.1038\/s42256-023-00765-8","article-title":"Defending chatGPT against jailbreak attack via self-reminders","volume":"5","author":"Xie","year":"2023","journal-title":"Nature Machine Intelligence"},{"key":"10.1016\/j.neunet.2026.108574_sbref0042","series-title":"Proceedings of the 62nd annual meeting of the association for computational linguistics (volume 1: Long papers)","first-page":"5587","article-title":"Safedecoding: Defending against jailbreak attacks via safety-aware decoding","author":"Xu","year":"2024"},{"key":"10.1016\/j.neunet.2026.108574_sbref0043","series-title":"Findings of the association for computational linguistics: ACL 2024","first-page":"7432","article-title":"A comprehensive study of jailbreak attack versus defense for large language models","author":"Xu","year":"2024"},{"key":"10.1016\/j.neunet.2026.108574_bib0044","series-title":"Enhancing model defense against jailbreaks with proactive safety reasoning","author":"Yang","year":"2025"},{"key":"10.1016\/j.neunet.2026.108574_bib0045","series-title":"33rd USENIX security symposium (USENIX security 24)","first-page":"4657","article-title":"{LLM-Fuzzer}: Scaling assessment of large language model jailbreaks","author":"Yu","year":"2024"},{"key":"10.1016\/j.neunet.2026.108574_bib0046","series-title":"Defending against jailbreak through early exit generation of large language models","author":"Zhao","year":"2025"},{"key":"10.1016\/j.neunet.2026.108574_bib0047","series-title":"Prefix guidance: A steering wheel for large language models to defend against jailbreak attacks","author":"Zhao","year":"2024"},{"key":"10.1016\/j.neunet.2026.108574_bib0048","series-title":"Siren: A learning-based multi-turn attack framework for simulating real-world human jailbreak behaviors","author":"Zhao","year":"2025"},{"key":"10.1016\/j.neunet.2026.108574_sbref0049","article-title":"Universal and transferable adversarial attacks on aligned language models","author":"Zou","year":"2023","journal-title":"CoRR"}],"container-title":["Neural Networks"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0893608026000377?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0893608026000377?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,4,6]],"date-time":"2026-04-06T18:33:10Z","timestamp":1775500390000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0893608026000377"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6]]},"references-count":49,"alternative-id":["S0893608026000377"],"URL":"https:\/\/doi.org\/10.1016\/j.neunet.2026.108574","relation":{},"ISSN":["0893-6080"],"issn-type":[{"value":"0893-6080","type":"print"}],"subject":[],"published":{"date-parts":[[2026,6]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"CoT defender: Preemptive chain-of-thought occupation for jailbreak attack mitigation","name":"articletitle","label":"Article Title"},{"value":"Neural Networks","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.neunet.2026.108574","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"108574"}}