{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,29]],"date-time":"2025-11-29T06:56:09Z","timestamp":1764399369579,"version":"3.46.0"},"reference-count":44,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T00:00:00Z","timestamp":1761091200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T00:00:00Z","timestamp":1761091200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,22]]},"DOI":"10.1109\/apsipaasc65261.2025.11249402","type":"proceedings-article","created":{"date-parts":[[2025,11,28]],"date-time":"2025-11-28T18:40:26Z","timestamp":1764355226000},"page":"2564-2569","source":"Crossref","is-referenced-by-count":0,"title":["Foundation Models as Guardrails: LLM-and VLM-Based Approaches to Safety and Alignment"],"prefix":"10.1109","author":[{"given":"Huy H.","family":"Nguyen","sequence":"first","affiliation":[{"name":"SB Intuitions,Tokyo,Japan"}]},{"given":"Pride","family":"Kavumba","sequence":"additional","affiliation":[{"name":"SB Intuitions,Tokyo,Japan"}]},{"given":"Tomoya","family":"Kurosawa","sequence":"additional","affiliation":[{"name":"SB Intuitions,Tokyo,Japan"}]},{"given":"Koki","family":"Wataoka","sequence":"additional","affiliation":[{"name":"SB Intuitions,Tokyo,Japan"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/j.hcc.2024.100211"},{"key":"ref2","first-page":"27 730","article-title":"Training language models to follow instructions with human feedback","volume":"35","author":"Ouyang","year":"2022","journal-title":"Advances in neural information processing systems"},{"key":"ref3","article-title":"Constitutional AI: Harmlessness fromAI feedback","author":"Bai","year":"2022","journal-title":"arXiv preprint"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-025-11389-2"},{"key":"ref5","first-page":"35 181","article-title":"HarmBench: A standardized evaluation framework for automated red teaming and robust refusal","volume-title":"International Conference on Machine Learning","author":"Mazeika"},{"article-title":"R2-Guard: Robust reasoning enabled LLM guardrail via knowledge-enhanced logical reasoning","volume-title":"The Thirteenth International Conference on Learning Representations","author":"Kang","key":"ref6"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-acl.198"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.findings-acl.960"},{"volume-title":"The landscape of LLM guardrails: Intervention levels and techniques","year":"2024","author":"Iris","key":"ref9"},{"key":"ref10","article-title":"Llama Guard: LLMbased input-output safeguard for human-AI conversations","author":"Inan","year":"2023","journal-title":"arXiv preprint"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-demo.40"},{"key":"ref12","article-title":"Refining Input Guardrails: Enhancing LLM-as-a-judge efficiency through chain-of-thought fine-tuning and alignment","volume-title":"AAAI Workshop on Preventing and Detecting LLM Misinformation (PDLM)","author":"Rad","year":"2025"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.findings-emnlp.301"},{"key":"ref14","article-title":"Training a helpful and harmless assistant with reinforcement learning from human feedback","author":"Bai","year":"2022","journal-title":"arXiv preprint"},{"key":"ref15","article-title":"Red-teaming large language models using chain of utterances for safety-alignment","author":"Bhardwaj","year":"2023","journal-title":"arXiv preprint"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.naacl-long.306"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.830"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/3689217.3690621"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.52202\/079017-0261"},{"key":"ref20","article-title":"Llama Guard 3 Vision: Safeguarding human-AI image understanding conversations","author":"Chi","year":"2024","journal-title":"arXiv preprint"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.01249"},{"article-title":"LlavaGuard: An open VLM-based framework for safeguarding vision datasets and models","volume-title":"International Conference on Machine Learning","author":"Helff","key":"ref22"},{"key":"ref23","article-title":"VLM-Guard: Safeguarding vision-language models via fulfilling safety alignment gap","author":"Liu","year":"2025","journal-title":"arXiv preprint"},{"key":"ref24","article-title":"VLMGuard-R1: Proactive safety alignment for VLMs via reasoning-driven prompt optimization","author":"Chen","year":"2025","journal-title":"arXiv preprint"},{"key":"ref25","article-title":"UniGuard: Towards universal safety guardrails for jailbreak attacks on multimodal large language models","volume-title":"ICML Workshop on Reliable and Responsible Foundation Models","author":"Oh","year":"2025"},{"volume-title":"Guardrails AI: Mitigate Gen AI risks with Guardrails","key":"ref26"},{"volume-title":"Amazon Bedrock Guardrails","key":"ref27"},{"volume-title":"Implementing LLM Guardrails for Safe and Responsible Generative AI Deployment on Databricks","key":"ref28"},{"volume-title":"What is Azure AI Content Safety?","year":"2025","key":"ref29"},{"volume-title":"Generative AI on Vertex AI - Responsible AI","year":"2025","key":"ref30"},{"volume-title":"Evaluate and track your LLM experiments: Introducing TruLens","year":"2023","key":"ref31"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-emnlp.311"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.findings-acl.158"},{"key":"ref34","article-title":"Video-SafetyBench: A benchmark for safety evaluation of video lvlms","author":"Liu","year":"2025","journal-title":"arXiv preprint"},{"article-title":"CASE-Bench: Context-aware safety benchmark for large language models","volume-title":"Forty-second International Conference on Machine Learning","author":"Sun","key":"ref35"},{"key":"ref36","article-title":"GPT-4 technical report","author":"Achiam","year":"2023","journal-title":"arXiv preprint"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.397"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i12.26752"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.01850"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.acl-long.405"},{"key":"ref41","article-title":"ALERT: A comprehensive benchmark for assessing large language models\u2019 safety through red teaming","author":"Tedeschi","year":"2024","journal-title":"arXiv preprint"},{"volume-title":"CircleGuardBench: New standard for evaluating AI moderation models","year":"2025","key":"ref42"},{"volume-title":"How good are the LLM guardrails on the market? a comparative study on the effectiveness of LLM content filtering across major GenAI platforms","year":"2025","key":"ref43"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.52202\/079017-2651"}],"event":{"name":"2025 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","start":{"date-parts":[[2025,10,22]]},"location":"Singapore, Singapore","end":{"date-parts":[[2025,10,24]]}},"container-title":["2025 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11248853\/11248968\/11249402.pdf?arnumber=11249402","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,29]],"date-time":"2025-11-29T06:51:09Z","timestamp":1764399069000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11249402\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,22]]},"references-count":44,"URL":"https:\/\/doi.org\/10.1109\/apsipaasc65261.2025.11249402","relation":{},"subject":[],"published":{"date-parts":[[2025,10,22]]}}}