{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T16:34:50Z","timestamp":1774542890300,"version":"3.50.1"},"reference-count":107,"publisher":"Tech Science Press","issue":"1","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["CMC"],"published-print":{"date-parts":[[2025]]},"DOI":"10.32604\/cmc.2025.067750","type":"journal-article","created":{"date-parts":[[2025,8,6]],"date-time":"2025-08-06T08:21:37Z","timestamp":1754468497000},"page":"249-300","source":"Crossref","is-referenced-by-count":3,"title":["Beyond Intentions: A Critical Survey of Misalignment in LLMs"],"prefix":"10.32604","volume":"85","author":[{"given":"Yubin","family":"Qu","sequence":"first","affiliation":[]},{"given":"Song","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Long","family":"Li","sequence":"additional","affiliation":[]},{"given":"Peng","family":"Nie","sequence":"additional","affiliation":[]},{"given":"Yongming","family":"Yao","sequence":"additional","affiliation":[]}],"member":"17807","published-online":{"date-parts":[[2025]]},"reference":[{"key":"ref1","unstructured":"Shen T, Jin R, Huang Y, Liu C, Dong W, Guo Z, et al. Large language model alignment: a survey. arXiv:2309.15025. 2023."},{"key":"ref2","doi-asserted-by":"crossref","first-page":"65","DOI":"10.1007\/s10515-024-00464-7","article-title":"A survey on robustness attacks for deep code models","volume":"31","author":"Qu","year":"2024","journal-title":"Autom Softw Eng"},{"key":"ref3","unstructured":"Shi D, Shen T, Huang Y, Li Z, Leng Y, Jin R, et al. Large language model safety: a holistic survey. arXiv:2412.17686. 2024."},{"key":"ref4","first-page":"1","article-title":"Security and privacy challenges of large language models: a survey","volume":"57","author":"Das","year":"2025","journal-title":"ACM Comput Surv"},{"key":"ref5","doi-asserted-by":"crossref","first-page":"100211","DOI":"10.1016\/j.hcc.2024.100211","article-title":"A survey on large language model (llm) security and privacy: the good, the bad, and the ugly","volume":"4","author":"Yao","year":"2024","journal-title":"High-Confidence Comput"},{"key":"ref6","unstructured":"Wang Z, Bi B, Pentyala SK, Ramnath K, Chaudhuri S, Mehrotra S, et al. A comprehensive survey of LLM alignment techniques: RLHF, RLAIF, PPO, DPO and more. arXiv:2407.16216. 2024."},{"key":"ref7","first-page":"561","article-title":"Human compatible: artificial intelligence and the problem of control by Stuart Russell","volume":"40","author":"Hemphill","year":"2020","journal-title":"Cato J"},{"key":"ref8","doi-asserted-by":"crossref","first-page":"282","DOI":"10.1002\/aaai.12064","article-title":"Advanced artificial agents intervene in the provision of reward","volume":"43","author":"Cohen","year":"2022","journal-title":"AI Magaz"},{"key":"ref9","unstructured":"Ji J, Qiu T, Chen B, Zhang B, Lou H, Wang K, et al. AI alignment: a comprehensive survey. arXiv:2310.19852. 2023."},{"key":"ref10","doi-asserted-by":"crossref","first-page":"175","DOI":"10.1007\/s10462-024-10824-0","article-title":"A survey of safety and trustworthiness of large language models through the lens of verification and validation","volume":"57","author":"Huang","year":"2024","journal-title":"Artif Intell Rev"},{"key":"ref11","unstructured":"Cheng P, Du W, Wu Z, Zhang F, Chen L, Liu G. Syntactic ghost: an imperceptible general-purpose backdoor attacks on pre-trained language models. arXiv:2402.18945v1. 2024."},{"key":"ref12","unstructured":"Cao B, Lu K, Lu X, Chen J, Ren M, Xiang H, et al. Towards scalable automated alignment of llms: a survey. arXiv:2406.01252. 2024."},{"key":"ref13","unstructured":"Shen H, Knearem T, Ghosh R, Alkiek K, Krishna K, Liu Y, et al. Towards bidirectional human-AI alignment: a systematic review for clarifications, framework, and future directions. arXiv:2406.09264. 2024."},{"key":"ref14","unstructured":"Wang X, Duan S, Yi X, Yao J, Zhou S, Wei Z, et al. On the essence and prospect: an investigation of alignment approaches for big models. arXiv:2403.04204. 2024."},{"key":"ref15","doi-asserted-by":"crossref","unstructured":"Guan J, Wu J, Li JN, Cheng C, Wu W. A survey on personalized alignment\u2013The missing piece for large language models in real-world applications. arXiv:2503.17003. 2025.","DOI":"10.18653\/v1\/2025.findings-acl.277"},{"key":"ref16","unstructured":"Zhou D, Zhang J, Feng T, Sun Y. A survey on alignment for large language model agents. In:Submitted to CS598 LLM Agent 2025 Workshop; 2025 [Internet]. Under review. [cited 2025 Jul 20]. Available from: https:\/\/openreview.net\/forum?id=gkxt5kZS84."},{"key":"ref17","unstructured":"Carlsmith J. Is power-seeking AI an existential risk? arXiv:2206.13353. 2022."},{"key":"ref18","doi-asserted-by":"crossref","first-page":"3233","DOI":"10.1007\/s43681-024-00637-w","article-title":"Evaluating alignment in large language models: a review of methodologies","volume":"5","author":"Sarkar","year":"2025","journal-title":"AI Ethics"},{"key":"ref19","doi-asserted-by":"crossref","first-page":"2983","DOI":"10.1007\/s00146-024-02063-2","article-title":"The state as a model for AI control and alignment","volume":"40","author":"Elsner","year":"2025","journal-title":"AI Soc"},{"key":"ref20","doi-asserted-by":"crossref","first-page":"1439","DOI":"10.1007\/s00146-024-02039-2","article-title":"The problem of alignment","volume":"40","author":"Hristova","year":"2025","journal-title":"AI Soc"},{"key":"ref21","doi-asserted-by":"crossref","first-page":"24","DOI":"10.1145\/3705294","article-title":"The AI alignment paradox","volume":"68","author":"West","year":"2025","journal-title":"Commun ACM"},{"key":"ref22","unstructured":"Zhang Y, Rando J, Evtimov I, Chi J, Smith EM, Carlini N, et al. Persistent pre-training poisoning of LLMs. arXiv:2410.13722. 2024."},{"key":"ref23","doi-asserted-by":"crossref","first-page":"107707","DOI":"10.1016\/j.infsof.2025.107707","article-title":"A review of backdoor attacks and defenses in code large language models: implications for security measures","volume":"182","author":"Qu","year":"2025","journal-title":"Inf Softw Tech"},{"key":"ref24","series-title":"Proceedings of the Network and Distributed System Security Symposium (NDSS)","article-title":"Safety misalignment against large language models","author":"Gong","year":"2025"},{"key":"ref25","first-page":"1","article-title":"A survey on hallucination in large language models: principles, taxonomy, challenges, and open questions","volume":"43","author":"Huang","year":"2025","journal-title":"ACM Trans Inf Syst"},{"key":"ref26","unstructured":"Li M, Chen H, Wang Y, Zhu T, Zhang W, Zhu K, et al. Understanding and mitigating the bias inheritance in LLM-based data augmentation on downstream tasks. arXiv:2502.04419. 2025."},{"key":"ref27","doi-asserted-by":"crossref","unstructured":"Yang K, Tao G, Chen X, Xu J. Alleviating the fear of losing alignment in LLM fine-tuning. arXiv:250409757. 2025.","DOI":"10.1109\/SP61157.2025.00171"},{"key":"ref28","unstructured":"Yi S, Liu Y, Sun Z, Cong T, He X, Song J, et al. Jailbreak attacks and defenses against large language models: a survey. arXiv:2407.04295. 2024."},{"key":"ref29","doi-asserted-by":"crossref","first-page":"80218","DOI":"10.1109\/ACCESS.2023.3300381","article-title":"From ChatGPT to ThreatGPT: impact of generative AI in cybersecurity and privacy","volume":"11","author":"Gupta","year":"2023","journal-title":"IEEE Access"},{"key":"ref30","unstructured":"Betley J, Prerequisites CM, Gallego V, Langosco L, Heim L, Newman J, et al. Emergent misalignment: narrow finetuning can produce broadly misaligned LLMs. arXiv:2502.17424. 2025."},{"key":"ref31","doi-asserted-by":"crossref","first-page":"17","DOI":"10.1007\/s10515-024-00485-2","article-title":"BadCodePrompt: backdoor attacks against prompt engineering of large language models for code generation","volume":"32","author":"Qu","year":"2025","journal-title":"Autom Softw Eng"},{"key":"ref32","doi-asserted-by":"crossref","unstructured":"Zhang Z, Zhang Y, Li L, Gao H, Wang L, Lu H, et al. PsySafe: a comprehensive framework for psychological-based attack, defense, and evaluation of multi-agent system safety. arXiv:2401.11880. 2024.","DOI":"10.18653\/v1\/2024.acl-long.812"},{"key":"ref33","series-title":"30th USENIX Security Symposium (USENIX Security 21); 2021 Aug 11\u201313","first-page":"2633","article-title":"Extracting training data from large language models","author":"Carlini"},{"key":"ref34","unstructured":"Carlini N, Ippolito D, Jagielski M, Lee K, Tramer F, Zhang C. Quantifying memorization across neural language models. arXiv:2202.07646. 2022."},{"key":"ref35","first-page":"1","article-title":"Palm: scaling language modeling with pathways","volume":"24","author":"Chowdhery","year":"2023","journal-title":"J Mach Learn Res"},{"key":"ref36","doi-asserted-by":"crossref","unstructured":"Lin S, Hilton J, Evans O. Truthfulqa: measuring how models mimic human falsehoods. arXiv:2109.07958. 2021.","DOI":"10.18653\/v1\/2022.acl-long.229"},{"key":"ref37","unstructured":"Zhu M, Liu Y, Guo J, Wang Q, Zhang Y, Mao Z. Leveraging robust optimization for LLM alignment under distribution shifts. arXiv:2504.05831. 2025."},{"key":"ref38","unstructured":"Lian J, Pan J, Wang L, Wang Y, Mei S, Chau LP. Revealing the intrinsic ethical vulnerability of aligned large language models. arXiv:2504.05050. 2025."},{"key":"ref39","series-title":"The 40th International Conference on Machine Learning; 2023 Jul 23\u201329","first-page":"15696","article-title":"Large language models struggle to learn long-tail knowledge","author":"Kandpal"},{"key":"ref40","first-page":"49025","article-title":"Realtime qa: what\u2019s the answer right now?","volume":"36","author":"Kasai","year":"2023","journal-title":"Adv Neural Inf Process Syst"},{"key":"ref41","unstructured":"Gao L, Biderman S, Black S, Golding L, Hoppe T, Foster C, et al. The pile: an 800 GB dataset of diverse text for language modeling. arXiv:2101.00027. 2020."},{"key":"ref42","unstructured":"Xu Y, Chakraborty T, K\u0131c\u0131man E, Aryal B, Rodrigues E, Sharma S, et al. RLTHF: targeted human feedback for LLM alignment. arXiv:2502.13417. 2025."},{"key":"ref43","doi-asserted-by":"crossref","unstructured":"Chen X, Wen H, Nag S, Luo C, Yin Q, Li R, et al. Iteralign: iterative constitutional alignment of large language models. arXiv:2403.18341. 2024.","DOI":"10.18653\/v1\/2024.naacl-long.78"},{"key":"ref44","unstructured":"Li Z, Zhang S, Zhao H, Yang Y, Yang D. Batgpt: a bidirectional autoregessive talker from generative pre-trained transformer. arXiv:2307.00360. 2023."},{"key":"ref45","doi-asserted-by":"crossref","unstructured":"Chiang D, Cholak P. Overcoming a theoretical limitation of self-attention. arXiv:2202.12172. 2022.","DOI":"10.18653\/v1\/2022.acl-long.527"},{"key":"ref46","unstructured":"Burns C, Ye H, Klein D, Steinhardt J. Discovering latent knowledge in language models without supervision. arXiv:2212.03827. 2022."},{"key":"ref47","series-title":"Findings of the Association for Computational Linguistics: ACL 2023; 2023 Jul 9\u201314","first-page":"13387","article-title":"Discovering language model behaviors with model-written evaluations","author":"Perez"},{"key":"ref48","doi-asserted-by":"crossref","unstructured":"Xu J, Fu Y, Tan SH, He P. Aligning the objective of LLM-based program repair. arXiv:2404.08877. 2024.","DOI":"10.1109\/ICSE55347.2025.00169"},{"key":"ref49","unstructured":"Casper S, Davies X, Shi C, Gilbert TK, Scheurer J, Rando J, et al. Open problems and fundamental limitations of reinforcement learning from human feedback. arXiv:2307.15217. 2023."},{"key":"ref50","series-title":"Advances in Neural Information Processing Systems (NeurIPS)","article-title":"Defining and characterizing reward gaming","author":"Skalse","year":"2022 [Internet]. [cited 2025 Jul 20]"},{"key":"ref51","first-page":"53728","article-title":"Direct preference optimization: your language model is secretly a reward model","volume":"36","author":"Rafailov","year":"2023","journal-title":"Adv Neural Inf Process Syst"},{"key":"ref52","unstructured":"Bai Y, Jones A, Ndousse K, Askell A, Chen A, DasSarma N, et al. Training a helpful and harmless assistant with reinforcement learning from human feedback. arXiv:2204.05862. 2022."},{"key":"ref53","unstructured":"Hubinger E, van Merwijk C, Mikulik V, Skalse J, Garrabrant S. Risks from learned optimization in advanced machine learning systems. arXiv:1906.01820. 2019."},{"key":"ref54","series-title":"Proceedings of the 39th AAAI Conference on Artificial Intelligence; 2025 Feb 25\u2013Mar 4","first-page":"23951","article-title":"Figstep: jailbreaking large vision-language models via typographic visual prompts","author":"Gong"},{"key":"ref55","first-page":"17359","article-title":"Locating and editing factual associations in gpt","volume":"35","author":"Meng","year":"2022","journal-title":"Adv Neural Inf Process Syst"},{"key":"ref56","unstructured":"Meng K, Sharma AS, Andonian A, Belinkov Y, Bau D. Mass-editing memory in a transformer. arXiv:2210.07229. 2022."},{"key":"ref57","doi-asserted-by":"crossref","unstructured":"Geva M, Caciularu A, Wang KR, Goldberg Y. Transformer feed-forward layers build predictions by promoting concepts in the vocabulary space. arXiv:2203.14680. 2022.","DOI":"10.18653\/v1\/2022.emnlp-main.3"},{"key":"ref58","doi-asserted-by":"crossref","first-page":"411","DOI":"10.1007\/s11023-020-09539-2","article-title":"Artificial intelligence, values, and alignment","volume":"30","author":"Gabriel","year":"2020","journal-title":"Minds Mach"},{"key":"ref59","unstructured":"News C. \u201cGodfather of AI\u201d Geoffrey Hinton warns of dangers as he quits Google [Internet]. 2023 [cited 2025 May 8]. Available from: https:\/\/www.cbsnews.com\/news\/godfather-of-ai-geoffrey-hinton-ai-warning\/."},{"key":"ref60","author":"Russell","year":"2019","journal-title":"Human compatible: artificial intelligence and the problem of control"},{"key":"ref61","unstructured":"Hendrycks D. Natural selection favors AIs over humans. arXiv:2303.16200. 2023."},{"key":"ref62","series-title":"NIPS\u201922: Proceedings of the 36th International Conference on Neural Information Processing Systems; 2022 Nov 28\u2013Dec 9","first-page":"27730","article-title":"Training language models to follow instructions with human feedback","author":"Ouyang"},{"key":"ref63","unstructured":"Askell A, Bai Y, Chen A, Drain D, Ganguli D, Henighan T, et al. A general language assistant as a laboratory for alignment. arXiv:2112.00861. 2021."},{"key":"ref64","unstructured":"Zhou Z, Liu Z, Liu J, Dong Z, Yang C, Qiao Y. Weak-to-strong search: align large language models via searching over small language models. arXiv:2405.19262. 2024."},{"key":"ref65","unstructured":"Ziegler DM, Stiennon N, Wu J, Brown TB, Radford A, Amodei D, et al. Fine-tuning language models from human preferences. arXiv:1909.08593. 2019."},{"key":"ref66","doi-asserted-by":"crossref","unstructured":"Huang T, Hu S, Liu L. Vaccine: perturbation-aware alignment for large language models against harmful fine-tuning attack. arXiv:2402.01109. 2024.","DOI":"10.52202\/079017-2356"},{"key":"ref67","unstructured":"Zhao J, Deng Z, Madras D, Zou J, Ren M. Learning and forgetting unsafe examples in large language models. arXiv:2312.12736. 2023."},{"key":"ref68","unstructured":"Zong Y, Bohdal O, Yu T, Yang Y, Hospedales T. Safety fine-tuning at (almost) no cost: a baseline for vision large language models. arXiv:2402.02207. 2024."},{"key":"ref69","first-page":"104521","article-title":"Lisa: lazy safety alignment for large language models against harmful fine-tuning attack","volume":"37","author":"Huang","year":"2024","journal-title":"Adv Neural Inf Process Syst"},{"key":"ref70","unstructured":"Kaufmann T, Weng P, Bengs V, H\u00fcllermeier E. A survey of reinforcement learning from human feedback. arXiv:2312.14925. 2023."},{"key":"ref71","series-title":"Proceedings of the 2024 ACM Conference on Fairness, Accountability, and Transparency; 2024 Jun 3\u20136","first-page":"1395","article-title":"Collective constitutional AI: Aligning a language model with public input","author":"Huang"},{"key":"ref72","unstructured":"Lee H, Phatale S, Mansoor H, Lu K, Mesnard T, Bishop C, et al. RLAIF: scaling reinforcement learning from human feedback with AI feedback. arXiv:2309.00267. 2023."},{"key":"ref73","unstructured":"Bai Y, Kadavath S, Kundu S, Askell A, Kernion J, Jones A, et al. Constitutional AI: harmlessness from AI feedback. arXiv:2212.08073. 2022."},{"key":"ref74","unstructured":"Liu S, Fang W, Hu Z, Zhang J, Zhou Y, Zhang K, et al. A survey of direct preference optimization. arXiv:2503.11701. 2025."},{"key":"ref75","unstructured":"Gao B, Song F, Miao Y, Cai Z, Yang Z, Chen L, et al. Towards a unified view of preference learning for large language models: a survey. arXiv:2409.02795. 2024."},{"key":"ref76","first-page":"124198","article-title":"Simple preference optimization with a reference-free reward","volume":"37","author":"Meng","year":"2024","journal-title":"Adv Neural Inf Process Syst"},{"key":"ref77","unstructured":"Wu J, Huang K, Wang X, Gao J, Ding B, Wu J, et al. RePO: ReLU-based preference optimization. arXiv:2503.07426. 2025."},{"key":"ref78","unstructured":"Xiao T, Yuan Y, Chen Z, Li M, Liang S, Ren Z, et al. SimPER: a minimalist approach to preference alignment without hyperparameters. arXiv:2502.00883. 2025."},{"key":"ref79","unstructured":"Ganguli D, Lovitt L, Kernion J, Askell A, Bai Y, Kadavath S, et al. Red teaming language models to reduce harms: methods, scaling behaviors, and lessons learned. arXiv:2209.07858. 2022."},{"key":"ref80","first-page":"104412","article-title":"Distributional preference alignment of llms via optimal transport","volume":"37","author":"Melnyk","year":"2024","journal-title":"Adv Neural Inf Process Syst"},{"key":"ref81","unstructured":"Cui G, Yuan L, Ding N, Yao G, He B, Zhu W, et al. UltraFeedback: boosting language models with scaled AI feedback. arXiv:2310.01377. 2024."},{"key":"ref82","doi-asserted-by":"crossref","unstructured":"Ding N, Chen Y, Xu B, Qin Y, Zheng Z, Hu S, et al. Enhancing chat language models by scaling high-quality instructional conversations. arXiv:2305.14233. 2023.","DOI":"10.18653\/v1\/2023.emnlp-main.183"},{"key":"ref83","unstructured":"Yuan Z, Yuan H, Tan C, Wang W, Huang S, Huang F. Rrhf: rank responses to align language models with human feedback without tears. arXiv:2304.05302. 2023."},{"key":"ref84","series-title":"Proceedings of the 38th AAAI Conference on Artificial Intelligence; 2024 Feb 20\u201327","first-page":"18990","article-title":"Preference ranking optimization for human alignment","author":"Song"},{"key":"ref85","doi-asserted-by":"crossref","unstructured":"Liu T, Qin Z, Wu J, Shen J, Khalman M, Joshi R, et al. Lipo: listwise preference optimization through learning-to-rank. arXiv:2402.01878. 2024.","DOI":"10.18653\/v1\/2025.naacl-long.121"},{"key":"ref86","doi-asserted-by":"crossref","unstructured":"Zhu M, Liu Y, Zhang L, Guo J, Mao Z. LIRE: listwise reward enhancement for preference alignment. arXiv:2405.13516. 2024.","DOI":"10.18653\/v1\/2024.findings-acl.201"},{"key":"ref87","unstructured":"Wang K, Zhu J, Ren M, Liu Z, Li S, Zhang Z, et al. A survey on data synthesis and augmentation for large language models. arXiv:2410.12896. 2024."},{"key":"ref88","unstructured":"Xu S, Fu W, Gao J, Ye W, Liu W, Mei Z, et al. Is dpo superior to ppo for llm alignment? A comprehensive study. arXiv:2404.10719. 2024."},{"key":"ref89","doi-asserted-by":"crossref","unstructured":"Ye Z, Greenlee-Scott F, Bartolo M, Blunsom P, Campos JA, Gall\u00e9 M. Improving reward models with synthetic critiques. arXiv:2405.20850. 2024.","DOI":"10.18653\/v1\/2025.findings-naacl.254"},{"key":"ref90","unstructured":"Adila D, Shin C, Zhang Y, Sala F. Is free self-alignment possible? arXiv:2406.03642. 2024."},{"key":"ref91","doi-asserted-by":"crossref","unstructured":"Wei Y, Cassano F, Liu J, Ding Y, Jain N, Mueller Z, et al. SelfCodeAlign: self-alignment for code generation. arXiv:2410.24198. 2024.","DOI":"10.52202\/079017-2008"},{"key":"ref92","unstructured":"Ding M, Chakraborty S, Agrawal V, Che Z, Koppel A, Wang M, et al. SAIL: self-improving efficient online alignment of large language models. arXiv:2406.15567. 2024."},{"key":"ref93","unstructured":"OpenAI. GPT-4 technical report. arXiv:2303.08774. 2023."},{"key":"ref94","unstructured":"Ye M, Rong X, Huang W, Du B, Yu N, Tao D. A survey of safety on large vision-language models: attacks, defenses and evaluations. arXiv:2502.14881. 2025."},{"key":"ref95","doi-asserted-by":"crossref","first-page":"1328","DOI":"10.1109\/TKDE.2019.2946162","article-title":"A survey on data collection for machine learning: a big data-ai integration perspective","volume":"33","author":"Roh","year":"2019","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"ref96","series-title":"2011 IEEE Third International Conference on privacy, Security, Risk and Trust and 2011 IEEE Third International Conference on Social Computing; 2011 Oct 9\u201311","first-page":"766","article-title":"A survey of crowdsourcing systems","author":"Yuen"},{"key":"ref97","series-title":"Findings of the Association for Computational Linguistics: EMNLP 2022","first-page":"3684","article-title":"Constructing highly inductive contexts for dialogue safety through controllable reverse generation","author":"Zhang","year":"2022"},{"key":"ref98","first-page":"4349","article-title":"Man is to computer programmer as woman is to homemaker? Debiasing word embeddings","volume":"29","author":"Bolukbasi","year":"2016","journal-title":"Adv Neural Inf Process Syst"},{"key":"ref99","series-title":"Findings of the Association for Computational Linguistics: ACL 2022","first-page":"2086","article-title":"BBQ: A hand-built bias benchmark for question answering","author":"Parrish","year":"2022"},{"key":"ref100","series-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL-HLT 2019; 2019 Jun 2\u20137","first-page":"1415","article-title":"Predicting the type and target of offensive posts in social media","author":"Zampieri"},{"key":"ref101","first-page":"915","author":"Rosenthal","year":"2021","journal-title":"Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021"},{"key":"ref102","series-title":"Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies","first-page":"15","article-title":"Gender bias in coreference resolution: evaluation and debiasing methods","author":"Zhao","year":"2018"},{"key":"ref103","series-title":"Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)","first-page":"1953","article-title":"CrowS-Pairs: a challenge dataset for measuring social biases in masked language models","author":"Nangia","year":"2020"},{"key":"ref104","first-page":"139","article-title":"A theory of production","volume":"18","author":"Cobb","year":"1928","journal-title":"Am Econ Rev"},{"key":"ref105","unstructured":"Celikyilmaz A, Clark E, Gao J. Evaluation of text generation: a survey. arXiv:2006.14799. 2020."},{"key":"ref106","unstructured":"Papernot N, McDaniel P, Sinha A, Wellman M. Towards the science of security and privacy in machine learning. arXiv:1611.03814. 2016."},{"key":"ref107","unstructured":"Weston J, Bordes A, Chopra S, Rush AM, Van Merri\u00ebnboer B, Joulin A, et al. Towards ai-complete question answering: a set of prerequisite toy tasks. arXiv:1502.05698. 2015."}],"container-title":["Computers, Materials &amp; Continua"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/cdn.techscience.cn\/files\/cmc\/2025\/TSP_CMC-85-1\/TSP_CMC_67750\/TSP_CMC_67750.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,17]],"date-time":"2025-11-17T02:02:51Z","timestamp":1763344971000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.techscience.com\/cmc\/v85n1\/63570"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":107,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2025]]},"published-print":{"date-parts":[[2025]]}},"URL":"https:\/\/doi.org\/10.32604\/cmc.2025.067750","relation":{},"ISSN":["1546-2226"],"issn-type":[{"value":"1546-2226","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]}}}