{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T22:25:28Z","timestamp":1775082328656,"version":"3.50.1"},"publisher-location":"Singapore","reference-count":43,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819584048","type":"print"},{"value":"9789819584055","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-8405-5_19","type":"book-chapter","created":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T20:15:33Z","timestamp":1775074533000},"page":"349-363","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["SCoT Guard: A Safety Chain of\u00a0Thought Guardrail Model"],"prefix":"10.1007","author":[{"given":"Yuxuan","family":"Lin","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shi","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dongqin","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wei","family":"Mi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xuehai","family":"Tang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,4,2]]},"reference":[{"key":"19_CR1","unstructured":"Achiam, J., et\u00a0al.: Gpt-4 technical report. arXiv preprint arXiv:2303.08774 (2023)"},{"key":"19_CR2","unstructured":"Team, G., et\u00a0al.: Gemini 1.5: Unlocking multimodal understanding across millions of tokens of context. arXiv preprint arXiv:2403.05530 (2024)"},{"key":"19_CR3","unstructured":"Dubey, A., et\u00a0al.: The llama 3 herd of models. arXiv e-prints pp. arXiv\u20132407 (2024)"},{"key":"19_CR4","unstructured":"Jiang, A.Q., et\u00a0al.: Mixtral of experts. arXiv preprint arXiv:2401.04088 (2024)"},{"key":"19_CR5","unstructured":"Liu, A., et\u00a0al.: Deepseek-v3 technical report. arXiv preprint arXiv:2412.19437 (2024)"},{"key":"19_CR6","unstructured":"Guo, D., et\u00a0al.: Deepseek-r1: Incentivizing reasoning capability in LLMs via reinforcement learning. arXiv preprint arXiv:2501.12948 (2025)"},{"key":"19_CR7","unstructured":"Bai, J., et\u00a0al.: Qwen technical report. arXiv preprint arXiv:2309.16609 (2023)"},{"key":"19_CR8","unstructured":"Team, Q.: Qwen2 technical report. arXiv preprint arXiv:2407.10671 (2024)"},{"key":"19_CR9","unstructured":"Qwq: Reflect deeply on the boundaries of the unknown. https:\/\/qwenlm.github.io\/blog\/qwq-32b-preview\/, last accessed: 2025\/07\/15"},{"key":"19_CR10","unstructured":"Yang, A., et\u00a0al.: Qwen3 technical report. arXiv preprint arXiv:2505.09388 (2025)"},{"key":"19_CR11","unstructured":"Liu, Y., He, X., Xiong, M., Fu, J., Deng, S., Hooi, B.: Flipattack: Jailbreak LLMs via flipping. arXiv preprint arXiv:2410.02832 (2024)"},{"key":"19_CR12","unstructured":"Zou, A., Wang, Z., Carlini, N., Nasr, M., Kolter, J.Z., Fredrikson, M.: Universal and transferable adversarial attacks on aligned language models. arXiv preprint arXiv:2307.15043 (2023)"},{"key":"19_CR13","doi-asserted-by":"crossref","unstructured":"Formento, B., Foo, C.S., Tuan, L.A., Ng, S.K.: Using punctuation as an adversarial attack on deep learning-based NLP systems: An empirical study. In: Findings of the association for computational linguistics: EACL 2023, pp. 1\u201334 (2023)","DOI":"10.18653\/v1\/2023.findings-eacl.1"},{"key":"19_CR14","unstructured":"Guo, X., Yu, F., Zhang, H., Qin, L., Hu, B.: Cold-attack: Jailbreaking LLMs with stealthiness and controllability. arXiv preprint arXiv:2402.08679 (2024)"},{"key":"19_CR15","unstructured":"Liu, X., Xu, N., Chen, M., Xiao, C.: Autodan: Generating stealthy jailbreak prompts on aligned large language models. arXiv preprint arXiv:2310.04451 (2023)"},{"key":"19_CR16","doi-asserted-by":"crossref","unstructured":"Pasquini, D., Strohmeier, M., Troncoso, C.: Neural exec: Learning (and learning from) execution triggers for prompt injection attacks. In: Proceedings of the 2024 Workshop on Artificial Intelligence and Security, pp. 89\u2013100 (2024)","DOI":"10.1145\/3689932.3694764"},{"key":"19_CR17","unstructured":"Inan, H., et\u00a0al.: Llama guard: LLM-based input-output safeguard for human-ai conversations. arXiv preprint arXiv:2312.06674 (2023)"},{"key":"19_CR18","unstructured":"Zeng, W., et\u00a0al.: Shieldgemma: Generative ai content moderation based on gemma. arXiv preprint arXiv:2407.21772 (2024)"},{"key":"19_CR19","unstructured":"Ghosh, S., Varshney, P., Galinkin, E., Parisien, C.: Aegis: Online adaptive ai content safety moderation with ensemble of llm experts. arXiv preprint arXiv:2404.05993 (2024)"},{"key":"19_CR20","doi-asserted-by":"crossref","unstructured":"Ghosh, S., et al.: Aegis2. 0: A diverse ai safety dataset and risks taxonomy for alignment of LLM guardrails. arXiv preprint arXiv:2501.09004 (2025)","DOI":"10.18653\/v1\/2025.naacl-long.306"},{"key":"19_CR21","doi-asserted-by":"crossref","unstructured":"Zhang, Z., et\u00a0al.: Shieldlm: Empowering LLMs as aligned, customizable and explainable safety detectors. arXiv preprint arXiv:2402.16444 (2024)","DOI":"10.18653\/v1\/2024.findings-emnlp.610"},{"key":"19_CR22","unstructured":"Ji, J., et\u00a0al.: Ai alignment: a comprehensive survey. arXiv preprint arXiv:2310.19852 (2023)"},{"key":"19_CR23","first-page":"35811","volume":"35","author":"B Wang","year":"2022","unstructured":"Wang, B., et al.: Exploring the limits of domain-adaptive training for detoxifying large-scale language models. Adv. Neural. Inf. Process. Syst. 35, 35811\u201335824 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"19_CR24","unstructured":"Wu, J., et al.: Recursively summarizing books with human feedback. arXiv preprint arXiv:2109.10862 (2021)"},{"key":"19_CR25","first-page":"27730","volume":"35","author":"L Ouyang","year":"2022","unstructured":"Ouyang, L., et al.: Training language models to follow instructions with human feedback. Adv. Neural. Inf. Process. Syst. 35, 27730\u201327744 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"19_CR26","first-page":"53728","volume":"36","author":"R Rafailov","year":"2023","unstructured":"Rafailov, R., Sharma, A., Mitchell, E., Manning, C.D., Ermon, S., Finn, C.: Direct preference optimization: Your language model is secretly a reward model. Adv. Neural. Inf. Process. Syst. 36, 53728\u201353741 (2023)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"19_CR27","first-page":"24824","volume":"35","author":"J Wei","year":"2022","unstructured":"Wei, J., et al.: Chain-of-thought prompting elicits reasoning in large language models. Adv. Neural. Inf. Process. Syst. 35, 24824\u201324837 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"19_CR28","first-page":"22199","volume":"35","author":"T Kojima","year":"2022","unstructured":"Kojima, T., Gu, S.S., Reid, M., Matsuo, Y., Iwasawa, Y.: Large language models are zero-shot reasoners. Adv. Neural. Inf. Process. Syst. 35, 22199\u201322213 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"19_CR29","unstructured":"Kumar, A., et\u00a0al.: Training language models to self-correct via reinforcement learning. arXiv preprint arXiv:2409.12917 (2024)"},{"key":"19_CR30","unstructured":"Ke, P., et\u00a0al.: Critiquellm: Scaling LLM-as-critic for effective and explainable evaluation of large language model generation 6. arXiv:2311.18702 (2023)"},{"key":"19_CR31","doi-asserted-by":"crossref","unstructured":"Liang, T., et al.: Encouraging divergent thinking in large language models through multi-agent debate. arXiv preprint arXiv:2305.19118 (2023)","DOI":"10.18653\/v1\/2024.emnlp-main.992"},{"key":"19_CR32","unstructured":"Du, Y., Li, S., Torralba, A., Tenenbaum, J.B., Mordatch, I.: Improving factuality and reasoning in language models through multiagent debate. In: Forty-first International Conference on Machine Learning (2023)"},{"key":"19_CR33","doi-asserted-by":"crossref","unstructured":"Wang, L., et al.: Plan-and-solve prompting: Improving zero-shot chain-of-thought reasoning by large language models. arXiv preprint arXiv:2305.04091 (2023)","DOI":"10.18653\/v1\/2023.acl-long.147"},{"key":"19_CR34","unstructured":"Yao, S., et al.: React: Synergizing reasoning and acting in language models. In: International Conference on Learning Representations (ICLR) (2023)"},{"key":"19_CR35","unstructured":"Jaech, A., et\u00a0al.: Openai o1 system card. arXiv preprint arXiv:2412.16720 (2024)"},{"key":"19_CR36","unstructured":"Team, K., et\u00a0al.: Kimi k1. 5: Scaling reinforcement learning with LLMs. arXiv preprint arXiv:2501.12599 (2025)"},{"key":"19_CR37","first-page":"24678","volume":"36","author":"J Ji","year":"2023","unstructured":"Ji, J., et al.: Beavertails: towards improved safety alignment of LLM via a human-preference dataset. Adv. Neural. Inf. Process. Syst. 36, 24678\u201324704 (2023)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"issue":"2","key":"19_CR38","first-page":"3","volume":"1","author":"EJ Hu","year":"2022","unstructured":"Hu, E.J., et al.: Lora: Low-rank adaptation of large language models. ICLR 1(2), 3 (2022)","journal-title":"ICLR"},{"key":"19_CR39","doi-asserted-by":"crossref","unstructured":"Markov, T., et al.: A holistic approach to undesired content detection in the real world. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a037, pp. 15009\u201315018 (2023)","DOI":"10.1609\/aaai.v37i12.26752"},{"key":"19_CR40","doi-asserted-by":"crossref","unstructured":"Lin, Z., et al.: Toxicchat: Unveiling hidden challenges of toxicity detection in real-world user-ai conversation. arXiv preprint arXiv:2310.17389 (2023)","DOI":"10.18653\/v1\/2023.findings-emnlp.311"},{"key":"19_CR41","unstructured":"Dai, J., et al.: Safe rlhf: Safe reinforcement learning from human feedback. arXiv preprint arXiv:2310.12773 (2023)"},{"key":"19_CR42","first-page":"8093","volume":"37","author":"S Han","year":"2024","unstructured":"Han, S., et al.: Wildguard: open one-stop moderation tools for safety risks, jailbreaks, and refusals of LLMs. Adv. Neural. Inf. Process. Syst. 37, 8093\u20138131 (2024)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"19_CR43","unstructured":"Mazeika, M., et\u00a0al.: Harmbench: a standardized evaluation framework for automated red teaming and robust refusal. arXiv preprint arXiv:2402.04249 (2024)"}],"container-title":["Lecture Notes in Computer Science","Algorithms and Architectures for Parallel Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-8405-5_19","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T20:15:38Z","timestamp":1775074538000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-8405-5_19"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9789819584048","9789819584055"],"references-count":43,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-8405-5_19","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"2 April 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"ICA3PP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Algorithms and Architectures for Parallel Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Zhengzhou","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 October 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 November 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ica3pp2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ieee-cybermatics.org\/2025\/ica3pp\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}