{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,2]],"date-time":"2026-05-02T08:47:20Z","timestamp":1777711640871,"version":"3.51.4"},"reference-count":41,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2024YFF0907200"],"award-info":[{"award-number":["2024YFF0907200"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","award":["CUC25SG006"],"award-info":[{"award-number":["CUC25SG006"]}],"id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Pattern Recognition"],"published-print":{"date-parts":[[2026,10]]},"DOI":"10.1016\/j.patcog.2026.113428","type":"journal-article","created":{"date-parts":[[2026,3,8]],"date-time":"2026-03-08T15:39:17Z","timestamp":1772984357000},"page":"113428","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["M2DE: A multi-stressor multi-dimensional dynamic evaluation framework for the trustworthiness of LLMs"],"prefix":"10.1016","volume":"178","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-5091-8829","authenticated-orcid":false,"given":"Hongjiang","family":"Xiao","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0006-3439-2470","authenticated-orcid":false,"given":"Xiuying","family":"Li","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0006-2895-1053","authenticated-orcid":false,"given":"Ran","family":"Wang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0003-8774-8213","authenticated-orcid":false,"given":"Hao","family":"Wang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6073-4688","authenticated-orcid":false,"given":"Ye","family":"Wang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1860-5281","authenticated-orcid":false,"given":"Liangfei","family":"Zhang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3783-7974","authenticated-orcid":false,"given":"Yuan","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.patcog.2026.113428_bib0001","unstructured":"J. Achiam, S. Adler, S. Agarwal, L. Ahmad, I. Akkaya, F. L. Aleman, D. Almeida, J. Altenschmidt, S. Altman, S. Anadkat, et al., GPT-4 technical report, 2023. arXiv: 2303.08774."},{"key":"10.1016\/j.patcog.2026.113428_bib0002","unstructured":"H. Touvron, L. Martin, K. Stone, P. Albert, A. Almahairi, Y. Babaei, N. Bashlykov, S. Batra, P. Bhargava, S. Bhosale, et al., Llama 2: open foundation and fine-tuned chat models, 2023. arXiv: 2307.09288."},{"key":"10.1016\/j.patcog.2026.113428_bib0003","first-page":"1877","article-title":"Language models are few-shot learners","volume":"33","author":"Brown","year":"2020","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patcog.2026.113428_bib0004","series-title":"International Conference on Human-Computer Interaction","first-page":"60","article-title":"A map of exploring human interaction patterns with LLM: insights into collaboration and creativity","author":"Li","year":"2024"},{"key":"10.1016\/j.patcog.2026.113428_bib0005","unstructured":"H. Woisetschl\u00e4ger, A. Erben, B. Marino, S. Wang, N.D. Lane, R. Mayer, H.-A. Jacobsen, Federated learning priorities under the European Union Artificial Intelligence Act, 2024. arXiv: 2402.05968."},{"key":"10.1016\/j.patcog.2026.113428_bib0006","unstructured":"Y. Zhang, Y. Huang, Y. Sun, C. Liu, Z. Zhao, Z. Fang, Y. Wang, H. Chen, X. Yang, X. Wei, et al., Benchmarking trustworthiness of multimodal large language models: a comprehensive study, 2024. arXiv: 2406.07057."},{"key":"10.1016\/j.patcog.2026.113428_bib0007","series-title":"International Conference on Machine Learning","first-page":"20166","article-title":"Position: TrustLLM: trustworthiness in large language models","author":"Huang","year":"2024"},{"key":"10.1016\/j.patcog.2026.113428_bib0008","article-title":"Large language models are good attackers: efficient and stealthy textual backdoor attacks","volume":"174","author":"Li","year":"2025","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2026.113428_bib0009","article-title":"RVBench: role values benchmark for role-playing LLMs","volume":"5","author":"Wang","year":"2025","journal-title":"Comput. Hum. Behav."},{"key":"10.1016\/j.patcog.2026.113428_bib0010","series-title":"Proceedings of the International Conference on Learning Representations","article-title":"Measuring massive multitask language understanding","author":"Hendrycks","year":"2021"},{"key":"10.1016\/j.patcog.2026.113428_bib0011","first-page":"62991","article-title":"C-eval: a multi-level multi-discipline chinese evaluation suite for foundation models","volume":"36","author":"Huang","year":"2023","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patcog.2026.113428_bib0012","unstructured":"K. Cobbe, V. Kosaraju, M. Bavarian, M. Chen, H. Jun, L. Kaiser, M. Plappert, J. Tworek, J. Hilton, R. Nakano, et al., Training verifiers to solve math word problems, 2021. arXiv: 2110.14168."},{"key":"10.1016\/j.patcog.2026.113428_bib0013","series-title":"International Conference on Learning Representations","first-page":"18091","article-title":"DyVal: dynamic evaluation of large language models for reasoning tasks","volume":"2024","author":"Zhu","year":"2024"},{"key":"10.1016\/j.patcog.2026.113428_bib0014","first-page":"135904","article-title":"Darg: dynamic evaluation of large language models via adaptive reasoning graph","volume":"37","author":"Zhang","year":"2024","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patcog.2026.113428_bib0015","series-title":"Proceedings of the 31st International Conference on Computational Linguistics","first-page":"3310","article-title":"Benchmark self-evolving: a multi-agent framework for dynamic LLM evaluation","author":"Wang","year":"2025"},{"key":"10.1016\/j.patcog.2026.113428_bib0016","first-page":"3266","article-title":"Superglue: a stickier benchmark for general-purpose language understanding systems","volume":"32","author":"Wang","year":"2019","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patcog.2026.113428_bib0017","series-title":"Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","first-page":"3214","article-title":"TruthfulQA: measuring how models mimic human falsehoods","author":"Lin","year":"2022"},{"key":"10.1016\/j.patcog.2026.113428_bib0018","series-title":"Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)","first-page":"8706","article-title":"Investigating data contamination in modern benchmarks for large language models","author":"Deng","year":"2024"},{"key":"10.1016\/j.patcog.2026.113428_bib0019","unstructured":"I. Magar, R. Schwartz, Data contamination: From memorization to exploitation, 2022. arXiv: 2203.08242."},{"key":"10.1016\/j.patcog.2026.113428_bib0020","series-title":"Proceedings of the 41st International Conference on Machine Learning","article-title":"Dynamic evaluation of large language models by meta probing agents","author":"Zhu","year":"2024"},{"key":"10.1016\/j.patcog.2026.113428_bib0021","series-title":"Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","first-page":"5967","article-title":"KIEVal: a knowledge-grounded interactive evaluation framework for large language models","author":"Yu","year":"2024"},{"key":"10.1016\/j.patcog.2026.113428_bib0022","unstructured":"A. Zou, Z. Wang, N. Carlini, M. Nasr, J.Z. Kolter, M. Fredrikson, Universal and transferable adversarial attacks on aligned language models, 2023. arXiv: 2307.15043."},{"key":"10.1016\/j.patcog.2026.113428_bib0023","series-title":"2025 IEEE Conference on Secure and Trustworthy Machine Learning (SaTML)","first-page":"23","article-title":"Jailbreaking black box large language models in twenty queries","author":"Chao","year":"2025"},{"key":"10.1016\/j.patcog.2026.113428_bib0024","series-title":"Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","first-page":"14322","article-title":"How Johnny can persuade LLMs to jailbreak them: rethinking persuasion to challenge AI safety by humanizing LLMs","author":"Zeng","year":"2024"},{"issue":"10","key":"10.1016\/j.patcog.2026.113428_bib0025","doi-asserted-by":"crossref","DOI":"10.1073\/pnas.2413443122","article-title":"Scaling language model size yields diminishing returns for single-message political persuasion","volume":"122","author":"Hackenburg","year":"2025","journal-title":"Proc. Natl. Acad. Sci."},{"key":"10.1016\/j.patcog.2026.113428_bib0026","first-page":"129696","article-title":"Many-shot jailbreaking","volume":"37","author":"Anil","year":"2024","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patcog.2026.113428_bib0027","unstructured":"Y. Yang, H. Fu, Transferable ensemble black-box jailbreak attacks on large language models, 2024. arXiv: 2410.23558."},{"key":"10.1016\/j.patcog.2026.113428_bib0028","series-title":"Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing","first-page":"9004","article-title":"SelfcheckGPT: zero-resource black-box hallucination detection for generative large language models","author":"Manakul","year":"2023"},{"key":"10.1016\/j.patcog.2026.113428_bib0029","series-title":"Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","first-page":"4440","article-title":"Auto-arena: automating LLM evaluations with agent peer battles and committee discussions","author":"Zhao","year":"2025"},{"key":"10.1016\/j.patcog.2026.113428_bib0030","first-page":"46595","article-title":"Judging LLM-as-a-judge with MT-bench and chatbot arena","volume":"36","author":"Zheng","year":"2023","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patcog.2026.113428_bib0031","series-title":"ICLR 2025 Workshop on Building Trust in Language Models and Applications","article-title":"TEMPEST: multi-turn jailbreaking of large language models with tree search","author":"Zhou","year":"2025"},{"key":"10.1016\/j.patcog.2026.113428_bib0032","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence","first-page":"23814","article-title":"Multi-turn jailbreaking large language models via attention shifting","volume":"39","author":"Du","year":"2025"},{"key":"10.1016\/j.patcog.2026.113428_bib0033","series-title":"Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","first-page":"31705","article-title":"LongSafety: evaluating long-context safety of large language models","author":"Lu","year":"2025"},{"key":"10.1016\/j.patcog.2026.113428_bib0034","unstructured":"Y. Liu, Y. Yao, J.-F. Ton, X. Zhang, R. Guo, H. Cheng, Y. Klochkov, M.F. Taufiq, H. Li, Trustworthy LLMs: a survey and guideline for evaluating large language models\u2019 alignment, 2024. arXiv: 2308.05374."},{"issue":"1","key":"10.1016\/j.patcog.2026.113428_bib0035","doi-asserted-by":"crossref","first-page":"18","DOI":"10.1007\/s11263-025-02613-1","article-title":"Safebench: a safety evaluation framework for multimodal large language models","volume":"134","author":"Ying","year":"2026","journal-title":"Int. J. Comput. Vis."},{"key":"10.1016\/j.patcog.2026.113428_bib0036","first-page":"49279","article-title":"Multitrust: a comprehensive benchmark towards trustworthy multimodal large language models","volume":"37","author":"Zhang","year":"2024","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patcog.2026.113428_bib0037","unstructured":"A. Grattafiori, A. Dubey, A. Jauhri, A. Pandey, A. Kadian, A. Al-Dahle, A. Letman, A. Mathur, et al., The Llama 3 herd of models, 2024. arXiv: 2407.21783."},{"key":"10.1016\/j.patcog.2026.113428_bib0038","unstructured":"A. Jiang, A. Sablayrolles, A. Mensch, et al., Mistral 7B, 2023. arXiv: 2310.06825."},{"key":"10.1016\/j.patcog.2026.113428_bib0039","unstructured":"J. Ye, X. Chen, N. Xu, C. Zu, Z. Shao, S. Liu, Y. Cui, Z. Zhou, C. Gong, Y. Shen, J. Zhou, S. Chen, T. Gui, Q. Zhang, X. Huang, A comprehensive capability analysis of GPT-3 and GPT-3.5 series models, 2023. arXiv: 2303.10420."},{"key":"10.1016\/j.patcog.2026.113428_bib0040","unstructured":"A. Zeng, X. Lv, Q. Zheng, Z. Hou, B. Chen, C. Xie, C. Wang, D. Yin, H. Zeng, J. Zhang, K. Wang, L. Zhong, M. Liu, et al., GLM-4.5: agentic, reasoning, and coding (ARC) foundation models, 2025. arXiv: 2508.06471."},{"key":"10.1016\/j.patcog.2026.113428_bib0041","unstructured":"A. Yang, A. Li, B. Yang, B. Zhang, B. Hui, B. Zheng, B. Yu, C. Gao, C. Huang, C. Lv, C. Zheng, D. Liu, F. Zhou, et al., Qwen3 technical report, 2025. arXiv: 2505.09388."}],"container-title":["Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320326003936?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320326003936?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T10:02:10Z","timestamp":1777456930000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0031320326003936"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,10]]},"references-count":41,"alternative-id":["S0031320326003936"],"URL":"https:\/\/doi.org\/10.1016\/j.patcog.2026.113428","relation":{},"ISSN":["0031-3203"],"issn-type":[{"value":"0031-3203","type":"print"}],"subject":[],"published":{"date-parts":[[2026,10]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"M2DE: A multi-stressor multi-dimensional dynamic evaluation framework for the trustworthiness of LLMs","name":"articletitle","label":"Article Title"},{"value":"Pattern Recognition","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.patcog.2026.113428","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"113428"}}