{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,14]],"date-time":"2026-04-14T23:33:29Z","timestamp":1776209609764,"version":"3.50.1"},"reference-count":43,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2026,1,8]],"date-time":"2026-01-08T00:00:00Z","timestamp":1767830400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2026,2,11]],"date-time":"2026-02-11T00:00:00Z","timestamp":1770768000000},"content-version":"vor","delay-in-days":34,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["npj Digit. Med."],"DOI":"10.1038\/s41746-025-02333-3","type":"journal-article","created":{"date-parts":[[2026,1,8]],"date-time":"2026-01-08T16:15:14Z","timestamp":1767888914000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Assessing the impact of safety guardrails on large language models using irritability metrics"],"prefix":"10.1038","volume":"9","author":[{"given":"Bazen Gashaw","family":"Teferra","sequence":"first","affiliation":[]},{"given":"Nabil","family":"Johny","sequence":"additional","affiliation":[]},{"given":"Sandra","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Alice","family":"Rueda","sequence":"additional","affiliation":[]},{"given":"Mohammad Amin","family":"Kamaleddin","sequence":"additional","affiliation":[]},{"given":"Katharine","family":"Dunlop","sequence":"additional","affiliation":[]},{"given":"Yanbo","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Manish","family":"Jha","sequence":"additional","affiliation":[]},{"given":"Divya","family":"Sharma","sequence":"additional","affiliation":[]},{"given":"Venkat","family":"Bhat","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,1,8]]},"reference":[{"key":"2333_CR1","unstructured":"World mental health report: Transforming mental health for all. https:\/\/www.who.int\/publications\/i\/item\/9789240049338 (2025)."},{"key":"2333_CR2","doi-asserted-by":"publisher","first-page":"668","DOI":"10.1016\/S2215-0366(23)00193-1","volume":"10","author":"JJ McGrath","year":"2023","unstructured":"McGrath, J. J. et al. Age of onset and cumulative risk of mental disorders: a cross-national analysis of population surveys from 29 countries. Lancet Psychiatry 10, 668\u2013681 (2023).","journal-title":"Lancet Psychiatry"},{"key":"2333_CR3","doi-asserted-by":"publisher","first-page":"e1001434","DOI":"10.1371\/journal.pmed.1001434","volume":"10","author":"PY Collins","year":"2013","unstructured":"Collins, P. Y., Insel, T. R., Chockalingam, A., Daar, A. & Maddox, Y. T. Grand Challenges in Global Mental Health: Integration in Research, Policy, and Practice. PLoS Med. 10, e1001434 (2013).","journal-title":"PLoS Med."},{"key":"2333_CR4","first-page":"e0003923","volume":"5","author":"F Shiraz","year":"2025","unstructured":"Shiraz, F. et al. pretty much all white, and most of them are psychiatrists and men\u201d: Mixed-methods analysis of influence and challenges in global mental health. PLOS Glob. Public Health 5, e0003923 (2025).","journal-title":"Public Health"},{"key":"2333_CR5","doi-asserted-by":"publisher","first-page":"e57400","DOI":"10.2196\/57400","volume":"11","author":"Z Guo","year":"2024","unstructured":"Guo, Z. et al. Large Language Models for Mental. Health Appl. Syst. Rev. JMIR Ment. Health 11, e57400 (2024).","journal-title":"Health Appl. Syst. Rev. JMIR Ment. Health"},{"key":"2333_CR6","doi-asserted-by":"publisher","first-page":"e69284","DOI":"10.2196\/69284","volume":"27","author":"Y Jin","year":"2025","unstructured":"Jin, Y. et al. The Applications of Large Language Models in Mental Health: Scoping Review. J. Med. Internet Res. 27, e69284 (2025).","journal-title":"J. Med. Internet Res."},{"key":"2333_CR7","first-page":"e44325","volume":"10","author":"BG Teferra","year":"2023","unstructured":"Teferra, B. G. & Rose, J. Predicting Generalized Anxiety Disorder From Impromptu Speech Transcripts Using Context-Aware Transformer-Based Neural Networks: Model Evaluation Study. JMIR Ment. Health 10, e44325 (2023).","journal-title":"Health"},{"key":"2333_CR8","doi-asserted-by":"publisher","first-page":"8","DOI":"10.1038\/s44277-024-00010-z","volume":"2","author":"N Obradovich","year":"2024","unstructured":"Obradovich, N. et al. Opportunities and risks of large language models in psychiatry. NPP\u2014Digital Psychiatry Neurosci. 2, 8 (2024).","journal-title":"NPP\u2014Digital Psychiatry Neurosci."},{"key":"2333_CR9","doi-asserted-by":"publisher","first-page":"e59479","DOI":"10.2196\/59479","volume":"11","author":"HR Lawrence","year":"2024","unstructured":"Lawrence, H. R. et al. The Opportunities and Risks of Large Language Models in Mental. Health JMIR Ment. Health 11, e59479 (2024).","journal-title":"Health JMIR Ment. Health"},{"key":"2333_CR10","doi-asserted-by":"publisher","first-page":"e67891","DOI":"10.2196\/67891","volume":"27","author":"RK McBain","year":"2025","unstructured":"McBain, R. K. et al. Competency of Large Language Models in Evaluating Appropriate Responses to Suicidal Ideation: Comparative Study. J. Med. Internet Res. 27, e67891 (2025).","journal-title":"J. Med. Internet Res."},{"key":"2333_CR11","doi-asserted-by":"publisher","first-page":"230","DOI":"10.1038\/s41746-025-01611-4","volume":"8","author":"Y Hua","year":"2025","unstructured":"Hua, Y. et al. A scoping review of large language models for generative tasks in mental health care. Npj Digit. Med. 8, 230 (2025).","journal-title":"Npj Digit. Med."},{"key":"2333_CR12","doi-asserted-by":"publisher","first-page":"1504306","DOI":"10.3389\/fpsyt.2025.1504306","volume":"16","author":"C Lalk","year":"2025","unstructured":"Lalk, C. et al. Employing large language models for emotion detection in psychotherapy transcripts. Front. Psychiatry 16, 1504306 (2025).","journal-title":"Front. Psychiatry"},{"key":"2333_CR13","first-page":"e69294","volume":"12","author":"A Malouin-Lachance","year":"2025","unstructured":"Malouin-Lachance, A., Capolupo, J., Laplante, C. & Hudon, A. Does the Digital Therapeutic Alliance Exist? Integrative Review. JMIR Ment. Health 12, e69294\u2013e69294 (2025).","journal-title":"Health"},{"key":"2333_CR14","doi-asserted-by":"publisher","first-page":"1422807","DOI":"10.3389\/fpsyt.2024.1422807","volume":"15","author":"M Omar","year":"2024","unstructured":"Omar, M. et al. Applications of large language models in psychiatry: a systematic review. Front. Psychiatry 15, 1422807 (2024).","journal-title":"Front. Psychiatry"},{"key":"2333_CR15","first-page":"e76642","volume":"12","author":"Z Xu","year":"2025","unstructured":"Xu, Z., Lee, Y.-C., Stasiak, K., Warren, J. & Lottridge, D. The Digital Therapeutic Alliance With Mental. Health Chatbots: Diary Study Themat. Anal. JMIR Ment. Health 12, e76642 (2025).","journal-title":"Health Chatbots: Diary Study Themat. Anal. JMIR Ment. Health"},{"key":"2333_CR16","doi-asserted-by":"publisher","first-page":"e65589","DOI":"10.2196\/65589","volume":"27","author":"M Kim","year":"2025","unstructured":"Kim, M. et al. Therapeutic Potential of Social Chatbots in Alleviating Loneliness and Social Anxiety: Quasi-Experimental Mixed Methods Study. J. Med. Internet Res. 27, e65589 (2025).","journal-title":"J. Med. Internet Res."},{"key":"2333_CR17","doi-asserted-by":"crossref","unstructured":"Magnus, P. D., Buccella, A. & D\u2019Cruz, J. Chatbot apologies: Beyond bullshit. AI Ethics 5, 5517\u20135525 (2025).","DOI":"10.1007\/s43681-025-00800-x"},{"key":"2333_CR18","doi-asserted-by":"publisher","unstructured":"Ganguli, D. et al. Red Teaming Language Models to Reduce Harms: Methods, Scaling Behaviors, and Lessons Learned. Preprint at https:\/\/doi.org\/10.48550\/ARXIV.2209.07858 (2022).","DOI":"10.48550\/ARXIV.2209.07858"},{"key":"2333_CR19","doi-asserted-by":"publisher","unstructured":"OpenAI et al. GPT-4 Technical Report. Preprint at https:\/\/doi.org\/10.48550\/ARXIV.2303.08774 (2023).","DOI":"10.48550\/ARXIV.2303.08774"},{"key":"2333_CR20","doi-asserted-by":"publisher","first-page":"e69820","DOI":"10.2196\/69820","volume":"4","author":"PN Waaler","year":"2025","unstructured":"Waaler, P. N., Hussain, M., Molchanov, I., Bongo, L. A. & Elvev\u00e5g, B. Prompt Engineering an Informational Chatbot for Education on Mental Health Using a Multiagent Approach for Enhanced Compliance With Prompt Instructions: Algorithm Development and Validation. JMIR AI 4, e69820 (2025).","journal-title":"JMIR AI"},{"key":"2333_CR21","first-page":"e19","volume":"4","author":"KK Fitzpatrick","year":"2017","unstructured":"Fitzpatrick, K. K., Darcy, A. & Vierhile, M. Delivering Cognitive Behavior Therapy to Young Adults With Symptoms of Depression and Anxiety Using a Fully Automated Conversational Agent (Woebot): A Randomized Controlled Trial. JMIR Ment. Health 4, e19 (2017).","journal-title":"Health"},{"key":"2333_CR22","doi-asserted-by":"crossref","unstructured":"Hakim, J.B. et al. The need for guardrails with large language models in pharmacovigilance and other medical safety critical settings. Sci Rep. 15, 27886 (2025).","DOI":"10.1038\/s41598-025-09138-0"},{"key":"2333_CR23","doi-asserted-by":"publisher","first-page":"12","DOI":"10.1038\/s44184-024-00056-z","volume":"3","author":"EC Stade","year":"2024","unstructured":"Stade, E. C. et al. Large language models could change the future of behavioral healthcare: a proposal for responsible development and evaluation. Npj Ment. Health Res. 3, 12 (2024).","journal-title":"Npj Ment. Health Res."},{"key":"2333_CR24","unstructured":"Lambert, N. Reinforcement learning from human feedback. ArXiv Prepr. ArXiv250412501 (2025)."},{"key":"2333_CR25","unstructured":"Yu, L., Do, V., Hambardzumyan, K. & Cancedda, N. Robust LLM safeguarding via refusal feature adversarial training. ArXiv Prepr. ArXiv240920089 (2024)."},{"key":"2333_CR26","unstructured":"Masoud, R. I., Ferianc, M., Treleaven, P. C. & Rodrigues, M. R. LLM Alignment Using Soft Prompt Tuning: The Case of Cultural Alignment. in Workshop on Socially Responsible Language Modelling Research (2024)."},{"key":"2333_CR27","doi-asserted-by":"publisher","unstructured":"Han, S., Avestimehr, S. & He, C. Bridging the Safety Gap: A Guardrail Pipeline for Trustworthy LLM Inferences. Preprint at https:\/\/doi.org\/10.48550\/arXiv.2502.08142 (2025).","DOI":"10.48550\/arXiv.2502.08142"},{"key":"2333_CR28","doi-asserted-by":"publisher","unstructured":"Dong, Y. et al. Building Guardrails for Large Language Models. Preprint at https:\/\/doi.org\/10.48550\/arXiv.2402.01822 (2024).","DOI":"10.48550\/arXiv.2402.01822"},{"key":"2333_CR29","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3589959","volume":"30","author":"Y You","year":"2023","unstructured":"You, Y. et al. Beyond Self-diagnosis: How a Chatbot-based Symptom Checker Should Respond. ACM Trans. Comput. -Hum. Interact. 30, 1\u201344 (2023).","journal-title":"ACM Trans. Comput. -Hum. Interact."},{"key":"2333_CR30","doi-asserted-by":"publisher","first-page":"1193","DOI":"10.1111\/inm.13140","volume":"32","author":"B Saatchi","year":"2023","unstructured":"Saatchi, B., Olshansky, E. F. & Fortier, M. A. Irritability: A concept analysis. Int. J. Ment. Health Nurs. 32, 1193\u20131210 (2023).","journal-title":"Int. J. Ment. Health Nurs."},{"key":"2333_CR31","doi-asserted-by":"publisher","first-page":"e52597","DOI":"10.2196\/52597","volume":"26","author":"V Sorin","year":"2024","unstructured":"Sorin, V. et al. Large Language Models and Empathy: Systematic Review. J. Med. Internet Res. 26, e52597 (2024).","journal-title":"J. Med. Internet Res."},{"key":"2333_CR32","doi-asserted-by":"publisher","first-page":"101","DOI":"10.1177\/1073191114533814","volume":"22","author":"S Holtzman","year":"2015","unstructured":"Holtzman, S., O\u2019Connor, B. P., Barata, P. C. & Stewart, D. E. The Brief Irritability Test (BITe): A Measure of Irritability for Use Among Men and Women. Assessment 22, 101\u2013115 (2015).","journal-title":"Assessment"},{"key":"2333_CR33","doi-asserted-by":"publisher","first-page":"665","DOI":"10.1016\/0191-8869(85)90077-7","volume":"6","author":"GV Caprara","year":"1985","unstructured":"Caprara, G. V. et al. Indicators of impulsive aggression: Present status of research on irritability and emotional susceptibility scales. Personal. Individ. Differ. 6, 665\u2013674 (1985).","journal-title":"Personal. Individ. Differ."},{"key":"2333_CR34","doi-asserted-by":"publisher","first-page":"367","DOI":"10.1016\/j.psychres.2007.03.002","volume":"159","author":"KJ Craig","year":"2008","unstructured":"Craig, K. J., Hietanen, H., Markova, I. S. & Berrios, G. E. The Irritability Questionnaire: A new scale for the measurement of irritability. Psychiatry Res. 159, 367\u2013375 (2008).","journal-title":"Psychiatry Res."},{"key":"2333_CR35","unstructured":"LLM Safety LeaderBoard. https:\/\/www.enkryptai.com\/llm-safety-leaderboard."},{"key":"2333_CR36","doi-asserted-by":"publisher","first-page":"97053","DOI":"10.52202\/079017-3077","volume":"37","author":"J Huang","year":"2024","unstructured":"Huang, J. et al. Apathetic or empathetic? evaluating llms\u2019 emotional alignments with humans. Adv. Neural Inf. Process. Syst. 37, 97053\u201397087 (2024).","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"2333_CR37","unstructured":"Findings from a pilot Anthropic\u2013OpenAI alignment evaluation exercise: OpenAI Safety Tests. https:\/\/openai.com\/index\/openai-anthropic-safety-evaluation\/ (2025)."},{"key":"2333_CR38","doi-asserted-by":"publisher","unstructured":"Cao, C. et al. SafeLawBench: Towards Safe Alignment of Large Language Models. in Findings of the Association for Computational Linguistics: ACL 2025 14015\u201314048 (Association for Computational Linguistics, Vienna, Austria, 2025). https:\/\/doi.org\/10.18653\/v1\/2025.findings-acl.721.","DOI":"10.18653\/v1\/2025.findings-acl.721"},{"key":"2333_CR39","doi-asserted-by":"publisher","unstructured":"Hagendorff, T., Derner, E. & Oliver, N. Large Reasoning Models Are Autonomous Jailbreak Agents. Preprint at https:\/\/doi.org\/10.48550\/ARXIV.2508.04039 (2025).","DOI":"10.48550\/ARXIV.2508.04039"},{"key":"2333_CR40","doi-asserted-by":"publisher","first-page":"268","DOI":"10.1016\/j.beth.2019.06.009","volume":"51","author":"KE Seymour","year":"2020","unstructured":"Seymour, K. E., Rosch, K. S., Tiedemann, A. & Mostofsky, S. H. The Validity of a Frustration Paradigm to Assess the Effect of Frustration on Cognitive Control in School-Age Children. Behav. Ther. 51, 268\u2013282 (2020).","journal-title":"Behav. Ther."},{"key":"2333_CR41","doi-asserted-by":"publisher","first-page":"1300","DOI":"10.1038\/s41386-020-00954-8","volume":"46","author":"D Scheinost","year":"2021","unstructured":"Scheinost, D. et al. Functional connectivity during frustration: a preliminary study of predictive modeling of irritability in youth. Neuropsychopharmacol. Publ. Am. Coll. Neuropsychopharmacol. 46, 1300\u20131306 (2021).","journal-title":"Neuropsychopharmacol. Publ. Am. Coll. Neuropsychopharmacol."},{"key":"2333_CR42","doi-asserted-by":"publisher","first-page":"618895","DOI":"10.3389\/fnhum.2021.618895","volume":"15","author":"H Fang","year":"2021","unstructured":"Fang, H., Li, X., Ma, H. & Fu, H. The Sunny Side of Negative Feedback: Negative Feedback Enhances One\u2019s Motivation to Win in Another Activity. Front. Hum. Neurosci. 15, 618895 (2021).","journal-title":"Front. Hum. Neurosci."},{"key":"2333_CR43","doi-asserted-by":"publisher","first-page":"109","DOI":"10.1590\/S1516-44462010000200004","volume":"32","author":"CT Cerqueira","year":"2010","unstructured":"Cerqueira, C. T. et al. Cognitive control associated with irritability induction: an autobiographical recall fMRI study. Rev. Bras. Psiquiatr. 32, 109\u2013118 (2010).","journal-title":"Rev. Bras. Psiquiatr."}],"container-title":["npj Digital Medicine"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.nature.com\/articles\/s41746-025-02333-3","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s41746-025-02333-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s41746-025-02333-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,11]],"date-time":"2026-02-11T12:26:22Z","timestamp":1770812782000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.nature.com\/articles\/s41746-025-02333-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,1,8]]},"references-count":43,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2026,12]]}},"alternative-id":["2333"],"URL":"https:\/\/doi.org\/10.1038\/s41746-025-02333-3","relation":{},"ISSN":["2398-6352"],"issn-type":[{"value":"2398-6352","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,1,8]]},"assertion":[{"value":"26 September 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 December 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 January 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"N.J., S.H., M.A.K., K.D., Y.Z., M.J., D.S. do not have any conflicts to declare. B.G.T., A.R., are supported by a CIHR Post-doctoral Fellowship (2025\u20132027). V.B. is supported by an Academic Scholar Award from the University of Toronto Department of Psychiatry and has received research funding from the Canadian Institutes of Health Research, Brain & Behavior Foundation, Ontario Ministry of Health Innovation Funds, Royal College of Physicians and Surgeons of Canada, Department of National Defence (Government of Canada), New Frontiers in Research Fund, Associated Medical Services Inc. Healthcare, American Foundation for Suicide Prevention, Roche Canada, Novartis, and Eisai.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"148"}}