{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,31]],"date-time":"2025-10-31T07:01:40Z","timestamp":1761894100578,"version":"build-2065373602"},"publisher-location":"Singapore","reference-count":81,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819527243","type":"print"},{"value":"9789819527250","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T00:00:00Z","timestamp":1761955200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T00:00:00Z","timestamp":1761955200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-2725-0_9","type":"book-chapter","created":{"date-parts":[[2025,10,31]],"date-time":"2025-10-31T05:19:20Z","timestamp":1761887960000},"page":"119-138","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["UMAD: Enhancing LLM Debiasing via\u00a0Multi-agent Debate and\u00a0Token-Level Bias Interpretation"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-4186-378X","authenticated-orcid":false,"given":"Hanwen","family":"Gu","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7957-4274","authenticated-orcid":false,"given":"Jie","family":"Ma","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7653-8240","authenticated-orcid":false,"given":"Ying","family":"Qin","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0003-8982-8890","authenticated-orcid":false,"given":"Ling","family":"Hu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,11,1]]},"reference":[{"key":"9_CR1","doi-asserted-by":"publisher","unstructured":"Raza, S., Bashir, S.R., Sneha, Qamar, U.: Addressing biases in the texts using an end-to-end pipeline approach. In: Boratto, L., Faralli, S., Marras, M., Stilo, G. (eds) BIAS 2023. CCIS, vol. 1840, pp. 100\u2013107. Springer, Cham (2023). https:\/\/doi.org\/10.1007\/978-3-031-37249-0_8","DOI":"10.1007\/978-3-031-37249-0_8"},{"key":"9_CR2","doi-asserted-by":"crossref","unstructured":"Hallinan, S., Liu, A., Choi, Y., Sap, M.: Detoxifying text with MaRCo: controllable revision with experts and anti-experts. In: Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers), pp. 228\u2013242. Association for Computational Linguistics, Toronto (2023)","DOI":"10.18653\/v1\/2023.acl-short.21"},{"key":"9_CR3","doi-asserted-by":"crossref","unstructured":"Pryzant, R., Martinez, R.D., Dass, N., Kurohashi, S., Jurafsky, D., Yang, D.: Automatically neutralizing subjective bias in text. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 34, no. 01, pp. 480\u2013489 (2020)","DOI":"10.1609\/aaai.v34i01.5385"},{"key":"9_CR4","doi-asserted-by":"crossref","unstructured":"Floto, G., et al.: DiffuDetox: a mixed diffusion model for text detoxification. In: Findings of the Association for Computational Linguistics: ACL 2023, pp. 7566\u20137574. Association for Computational Linguistics, Toronto (2023)","DOI":"10.18653\/v1\/2023.findings-acl.478"},{"key":"9_CR5","doi-asserted-by":"crossref","unstructured":"Madanagopal, K., Caverlee, J.: Reinforced sequence training based subjective bias correction. In: Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics, pp. 2585\u20132598. Association for Computational Linguistics, Dubrovnik (2023)","DOI":"10.18653\/v1\/2023.eacl-main.189"},{"key":"9_CR6","unstructured":"Doughman, J., Khreich, W.: Gender bias in text: labeled datasets and lexicons. arXiv preprint arXiv:2201.08675 (2023)"},{"key":"9_CR7","doi-asserted-by":"crossref","unstructured":"Kaneko, M., Bollegala, D.: Dictionary-based debiasing of pre-trained word embeddings. In: Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume, pp. 212\u2013223. Association for Computational Linguistics (2021)","DOI":"10.18653\/v1\/2021.eacl-main.16"},{"key":"9_CR8","unstructured":"Raza, S., Ding, C., Pandya, D.: Mitigating bias in conversations: a hate speech classifier and debiaser with prompts. arXiv preprint arXiv:2307.10213 (2023)"},{"key":"9_CR9","doi-asserted-by":"crossref","unstructured":"Logacheva, V., et al.: ParaDetox: detoxification with parallel data. In: Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 6804\u20136818. Association for Computational Linguistics, Dublin (2022)","DOI":"10.18653\/v1\/2022.acl-long.469"},{"key":"9_CR10","doi-asserted-by":"crossref","unstructured":"Pour, M.M.A., Farinneya, P., Bharadwaj, M., Verma, N., Pesaranghader, A., Sanner, S.: COUNT: COntastive UNlikelihood text style transfer for text detoxification. In: Findings of the Association for Computational Linguistics: EMNLP 2023, pp. 8658\u20138666. Association for Computational Linguistics, Singapore (2023)","DOI":"10.18653\/v1\/2023.findings-emnlp.579"},{"key":"9_CR11","doi-asserted-by":"crossref","unstructured":"Laugier, L., Pavlopoulos, J., Sorensen, J., Dixon, L.: Civil rephrases of toxic texts with self-supervised transformers. In: Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume, pp. 1442\u20131461. Association for Computational Linguistics (2021)","DOI":"10.18653\/v1\/2021.eacl-main.124"},{"key":"9_CR12","doi-asserted-by":"crossref","unstructured":"Sahoo, N., Gupta, H., Bhattacharyya, P.: Detecting unintended social bias in toxic language datasets. In: Proceedings of the 26th Conference on Computational Natural Language Learning (CoNLL), pp. 132\u2013143. Association for Computational Linguistics, Abu Dhabi (2022)","DOI":"10.18653\/v1\/2022.conll-1.10"},{"key":"9_CR13","doi-asserted-by":"crossref","unstructured":"Hu, S., et al.: Balancing gender bias in job advertisements with text-level bias mitigation. Front. Big Data 5 (2022). https:\/\/www.frontiersin.org\/articles\/c14","DOI":"10.3389\/fdata.2022.805713"},{"key":"9_CR14","doi-asserted-by":"crossref","unstructured":"Sun, T., et al.: Mitigating gender bias in natural language processing: literature review. In: Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics, pp. 1630\u20131640. Association for Computational Linguistics, Florence (2019)","DOI":"10.18653\/v1\/P19-1159"},{"key":"9_CR15","doi-asserted-by":"crossref","unstructured":"Muralidhar, D.: Examining religion bias in AI text generators. In: Proceedings of the 2021 AAAI\/ACM Conference on AI, Ethics, and Society (AIES 2021), pp. 273\u2013274. Association for Computing Machinery, Virtual Event, USA (2021)","DOI":"10.1145\/3461702.3462469"},{"key":"9_CR16","doi-asserted-by":"crossref","unstructured":"Mou, X., Wei, Z., Zhang, Q., Huang, X.: UPPAM: a unified pre-training architecture for political actor modeling based on language. In: Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 11996\u201312012. Association for Computational Linguistics, Toronto (2023)","DOI":"10.18653\/v1\/2023.acl-long.670"},{"key":"9_CR17","doi-asserted-by":"crossref","unstructured":"Jiang, J., Ren, X., Ferrara, E.: Retweet-BERT: political leaning detection using language features and information diffusion on social networks. In: Proceedings of the International AAAI Conference on Web and Social Media, vol. 17, no. 1, pp. 459\u2013469 (2023)","DOI":"10.1609\/icwsm.v17i1.22160"},{"key":"9_CR18","doi-asserted-by":"crossref","unstructured":"Liu, Y., Zhang, X.F., Wegsman, D., Beauchamp, N., Wang, L.: POLITICS: pretraining with same-story article comparison for ideology prediction and stance detection. In: Findings of the Association for Computational Linguistics: NAACL 2022, pp. 1354\u20131374. Association for Computational Linguistics, Seattle (2022)","DOI":"10.18653\/v1\/2022.findings-naacl.101"},{"key":"9_CR19","unstructured":"Louviere, J.J., Woodworth, G.G.: Best-worst scaling: a model for the largest difference judgments. Working paper (1991)"},{"key":"9_CR20","doi-asserted-by":"crossref","unstructured":"Hada, R., Seth, A., Diddee, H., Bali, K.: \u201cFifty shades of bias\u201d: normative ratings of gender bias in GPT generated English text. In: Proceedings of the Conference on Empirical Methods in Natural Language Processing (EMNLP 2023) (2023). https:\/\/api.semanticscholar.org\/CorpusID:264490615","DOI":"10.18653\/v1\/2023.emnlp-main.115"},{"key":"9_CR21","doi-asserted-by":"crossref","unstructured":"Xiong, K., Ding, X., Cao, Y., Liu, T., Qin, B.: Examining inter-consistency of large language models collaboration: an in-depth analysis via debate. In: Proceedings of the Conference on Empirical Methods in Natural Language Processing (EMNLP 2023) (2023). https:\/\/api.semanticscholar.org\/CorpusID:258832565","DOI":"10.18653\/v1\/2023.findings-emnlp.508"},{"key":"9_CR22","unstructured":"Chan, C.-M., et al.: ChatEval: towards better LLM-based evaluators through multi-agent debate. arXiv preprint arXiv:2308.07201 (2023). https:\/\/api.semanticscholar.org\/CorpusID:260887105"},{"key":"9_CR23","doi-asserted-by":"crossref","unstructured":"Feng, S., Park, C.Y., Liu, Y., Tsvetkov, Y.: From pretraining data to language models to downstream tasks: tracking the trails of political biases leading to unfair NLP models. In: Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 11737\u201311762. Association for Computational Linguistics, Toronto (2023)","DOI":"10.18653\/v1\/2023.acl-long.656"},{"key":"9_CR24","doi-asserted-by":"crossref","unstructured":"Flynn, T., Marley, A.A.J.: Best-worst scaling: theory and methods (2014). https:\/\/api.semanticscholar.org\/CorpusID:51732624","DOI":"10.4337\/9781781003152.00014"},{"key":"9_CR25","unstructured":"Verma, R., Roychoudhury, R., Ghosal, T.: The lack of theory is painful: modeling harshness in peer review comments. In: Proceedings of the AACL 2022 (2022). https:\/\/api.semanticscholar.org\/CorpusID:253762075"},{"key":"9_CR26","doi-asserted-by":"publisher","first-page":"336","DOI":"10.1007\/s11263-019-01228-7","volume":"128","author":"RR Selvaraju","year":"2016","unstructured":"Selvaraju, R.R., Das, A., Vedantam, R., Cogswell, M., Parikh, D., Batra, D.: Grad-CAM: visual explanations from deep networks via gradient-based localization. Int. J. Comput. Vision 128, 336\u2013359 (2016)","journal-title":"Int. J. Comput. Vision"},{"key":"9_CR27","doi-asserted-by":"crossref","unstructured":"Shu, Y., van den Hengel, A., Liu, L.: Learning common rationale to improve self-supervised representation for fine-grained visual recognition problems. In: Proceedings of the 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 11392\u201311401 (2023)","DOI":"10.1109\/CVPR52729.2023.01096"},{"key":"9_CR28","doi-asserted-by":"crossref","unstructured":"Spinde, T., Plank, M., Krieger, J.-D., Ruas, T., Gipp, B., Aizawa, A.: Neural media bias detection using distant supervision with BABE \u2013 bias annotations by experts. In: Findings of the Association for Computational Linguistics: EMNLP 2021, pp. 1166\u20131177. Association for Computational Linguistics, Punta Cana (2021)","DOI":"10.18653\/v1\/2021.findings-emnlp.101"},{"key":"9_CR29","unstructured":"Mesnard, T., et al.: Gemma: open models based on Gemini research and technology. arXiv preprint arXiv:2403.08295 (2024)"},{"key":"9_CR30","unstructured":"Jiang, A.Q., et al.: Mistral 7B. arXiv preprint arXiv:2310.06825 (2023)"},{"key":"9_CR31","unstructured":"MetaAI: Introducing Meta Llama 3: The Most Capable Openly Available LLM to Date (2024). https:\/\/ai.meta.com\/blog\/meta-llama-3\/. Accessed 10 May 2024"},{"key":"9_CR32","unstructured":"OpenAI: chatGPT: A Large-Scale Generative Model for Open-Domain Chat (2021). https:\/\/github.com\/openai\/gpt-3"},{"key":"9_CR33","unstructured":"OpenAI: GPT-4 Research (2024). https:\/\/openai.com\/index\/gpt-4-research\/. Accessed 10 May 2024"},{"key":"9_CR34","unstructured":"Achiam, J., et al.: GPT-4 technical report. arXiv preprint arXiv:2303.08774 (2024)"},{"key":"9_CR35","doi-asserted-by":"crossref","unstructured":"Gao, T., Yao, X., Chen, D.: SimCSE: simple contrastive learning of sentence embeddings. In: Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing, pp. 6894\u20136910. Association for Computational Linguistics, Online and Punta Cana (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.552"},{"key":"9_CR36","doi-asserted-by":"crossref","unstructured":"Danilevsky, M., Qian, K., Aharonov, R., Katsis, Y., Kawas, B., Sen, P.: A survey of the state of explainable AI for natural language processing. In: Proceedings of the 1st Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 10th International Joint Conference on Natural Language Processing, pp. 447\u2013459. Association for Computational Linguistics, Suzhou (2020)","DOI":"10.18653\/v1\/2020.aacl-main.46"},{"key":"9_CR37","doi-asserted-by":"crossref","unstructured":"Danilevsky, M., Qian, K., Aharonov, R., Katsis, Y., Kawas, B., Sen, P.: A survey of the state of explainable AI for natural language processing. arXiv preprint arXiv:2010.00711 (2020)","DOI":"10.18653\/v1\/2020.aacl-main.46"},{"key":"9_CR38","doi-asserted-by":"crossref","unstructured":"Atanasova, P., Simonsen, J.G., Lioma, C., Augenstein, I.: A diagnostic study of explainability techniques for text classification. In: Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 3256\u20133274. Association for Computational Linguistics, Online (2020)","DOI":"10.18653\/v1\/2020.emnlp-main.263"},{"key":"9_CR39","unstructured":"Ribeiro, M.T., Singh, S., Guestrin, C.: Model-agnostic interpretability of machine learning. arXiv preprint arXiv:1606.05386 (2016)"},{"key":"9_CR40","unstructured":"Mosca, E., Szigeti, F., Tragianni, S., Gallagher, D., Groh, G.: SHAP-based explanation methods: a review for NLP interpretability. In: Proceedings of the 29th International Conference on Computational Linguistics, pp. 4593\u20134603. International Committee on Computational Linguistics, Gyeongju (2022)"},{"key":"9_CR41","doi-asserted-by":"crossref","unstructured":"Devatine, N., Muller, P., Braud, C.: An integrated approach for political bias prediction and explanation based on discursive structure. In: Findings of the Association for Computational Linguistics: ACL 2023, pp. 11196\u201311211. Association for Computational Linguistics, Toronto (2023)","DOI":"10.18653\/v1\/2023.findings-acl.711"},{"key":"9_CR42","unstructured":"Dhingra, H., Jayashanker, P., Moghe, S., Strubell, E.: Queer people are people first: deconstructing sexual identity stereotypes in large language models. arXiv preprint arXiv:2307.00101 (2023)"},{"key":"9_CR43","doi-asserted-by":"crossref","unstructured":"Karlekar, S., Niu, T., Bansal, M.: Detecting linguistic characteristics of Alzheimer\u2019s dementia by interpreting neural models. In: Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 2 (Short Papers), pp. 701\u2013707. Association for Computational Linguistics, New Orleans (2018)","DOI":"10.18653\/v1\/N18-2110"},{"key":"9_CR44","doi-asserted-by":"crossref","unstructured":"Jahromi, M.N.S., Muddamsetty, S.M., Jarlner, A.S.S., H\u00f8genhaug, A.M., Gammeltoft-Hansen, T., Moeslund, T.B.: SIDU-TXT: an XAI algorithm for NLP with a holistic assessment approach. arXiv preprint arXiv:2402.03043 (2024)","DOI":"10.31235\/osf.io\/jemvk"},{"key":"9_CR45","unstructured":"Mavrepis, P., Makridis, G., Fatouros, G., Koukos, V., Separdani, M.M., Kyriazis, D.: XAI for all: can large language models simplify explainable AI? arXiv preprint arXiv:2401.13110 (2024)"},{"key":"9_CR46","unstructured":"Guo, T., et al.: Large language model based multi-agents: a survey of progress and challenges. arXiv preprint arXiv:2402.01680 (2024)"},{"key":"9_CR47","doi-asserted-by":"crossref","unstructured":"Xiong, K., Ding, X., Cao, Y., Liu, T., Qin, B.: Examining inter-consistency of large language models collaboration: an in-depth analysis via debate. In: Findings of the Association for Computational Linguistics: EMNLP 2023, pp. 7572\u20137590. Association for Computational Linguistics, Singapore (2023)","DOI":"10.18653\/v1\/2023.findings-emnlp.508"},{"key":"9_CR48","doi-asserted-by":"crossref","unstructured":"Liang, T., et al.: Encouraging divergent thinking in large language models through multi-agent debate. arXiv preprint arXiv:2305.19118 (2023)","DOI":"10.18653\/v1\/2024.emnlp-main.992"},{"key":"9_CR49","unstructured":"Li, L., Bamman, D.: Gender and representation bias in GPT-3 generated stories. In: Proceedings of the Third Workshop on Narrative Understanding, pp. 48\u201355. Association for Computational Linguistics, Virtual (2021)"},{"key":"9_CR50","doi-asserted-by":"crossref","unstructured":"Fang, X., Che, S., Mao, M., Zhang, H., Zhao, M., Zhao, X.: Bias of AI-generated content: an examination of news produced by large language models. arXiv preprint arXiv:2309.09825 (2024)","DOI":"10.21203\/rs.3.rs-3499674\/v1"},{"key":"9_CR51","doi-asserted-by":"crossref","unstructured":"Wang, L., Li, Y., Miller, T., Bethard, S., Savova, G.: Two-stage fine-tuning for improved bias and variance for large pretrained language models. In: Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 15746\u201315761. Association for Computational Linguistics, Toronto (2023)","DOI":"10.18653\/v1\/2023.acl-long.877"},{"key":"9_CR52","doi-asserted-by":"crossref","unstructured":"Felkner, V., Chang, H.-C.H., Jang, E., May, J.: WinoQueer: a community-in-the-loop benchmark for anti-LGBTQ+ bias in large language models. In: Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 9126\u20139140. Association for Computational Linguistics, Toronto (2023)","DOI":"10.18653\/v1\/2023.acl-long.507"},{"key":"9_CR53","doi-asserted-by":"crossref","unstructured":"Lee, H., Hong, S., Park, J., Kim, T., Kim, G., Ha, J.-W.: KoSBI: a dataset for mitigating social bias risks towards safer large language model applications. In: Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 5: Industry Track), pp. 208\u2013224. Association for Computational Linguistics, Toronto (2023)","DOI":"10.18653\/v1\/2023.acl-industry.21"},{"key":"9_CR54","unstructured":"Orme, B.: Maxdiff analysis: simple counting, individual-level logit, and HB. Sawtooth Softw. (2009)"},{"key":"9_CR55","doi-asserted-by":"crossref","unstructured":"Furniturewala, S., et al.: Thinking fair and slow: on the efficacy of structured prompts for debiasing language models. arXiv preprint arXiv:2405.10431 (2024)","DOI":"10.18653\/v1\/2024.emnlp-main.13"},{"key":"9_CR56","doi-asserted-by":"crossref","unstructured":"Ou, J., Zhang, J., Feng, Y., Zhou, J.: Counterfactual data augmentation via perspective transition for open-domain dialogues. In: Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing, pp. 1635\u20131648. Association for Computational Linguistics, Abu Dhabi (2022)","DOI":"10.18653\/v1\/2022.emnlp-main.106"},{"key":"9_CR57","doi-asserted-by":"crossref","unstructured":"Qian, R., Ross, C., Fernandes, J., Smith, E.M., Kiela, D., Williams, A.: Perturbation augmentation for fairer NLP. In: Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing, pp. 9496\u20139521. Association for Computational Linguistics, Abu Dhabi (2022)","DOI":"10.18653\/v1\/2022.emnlp-main.646"},{"key":"9_CR58","unstructured":"Zayed, A., Parthasarathi, P., Mordido, G., Palangi, H., Shabanian, S., Chandar, S.: Deep learning on a healthy data diet: finding important examples for fairness. In: Proceedings of the Thirty-Seventh AAAI Conference on Artificial Intelligence, the Thirty-Fifth Conference on Innovative Applications of Artificial Intelligence, and the Thirteenth Symposium on Educational Advances in Artificial Intelligence (AAAI 2023\/IAAI 2023\/EAAI 2023), article 1637, 9 p. AAAI Press (2023)"},{"key":"9_CR59","doi-asserted-by":"crossref","unstructured":"Thakur, H., Jain, A., Vaddamanu, P., Liang, P.P., Morency, L.-P.: Language models get a gender makeover: mitigating gender bias with few-shot data interventions. In: Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers), pp. 340\u2013351. Association for Computational Linguistics, Toronto (2023)","DOI":"10.18653\/v1\/2023.acl-short.30"},{"key":"9_CR60","doi-asserted-by":"crossref","unstructured":"Ghanbarzadeh, S., Huang, Y., Palangi, H., Cruz Moreno, R., Khanpour, H.: Gender-tuning: empowering fine-tuning for debiasing pre-trained language models. In: Findings of the Association for Computational Linguistics: ACL 2023, pp. 5448\u20135458. Association for Computational Linguistics, Toronto (2023)","DOI":"10.18653\/v1\/2023.findings-acl.336"},{"key":"9_CR61","unstructured":"Raffel, C., et al.: Exploring the limits of transfer learning with a unified text-to-text transformer. arXiv preprint arXiv:1910.10683 (2023)"},{"key":"9_CR62","doi-asserted-by":"crossref","unstructured":"Pesaranghader, A., Verma, N., Bharadwaj, M.: GPT-DETOX: an in-context learning-based paraphraser for text detoxification. In: Proceedings of the 2023 International Conference on Machine Learning and Applications (ICMLA), pp. 1528\u20131534. IEEE (2023)","DOI":"10.1109\/ICMLA58977.2023.00230"},{"key":"9_CR63","doi-asserted-by":"crossref","unstructured":"Mishra, A., Nayak, G., Bhattacharya, S., Kumar, T., Shah, A., Foltin, M.: LLM-guided counterfactual data generation for fairer AI. In: Companion Proceedings of the ACM on Web Conference 2024 (WWW 2024), pp. 1538\u20131545. Association for Computing Machinery, Singapore (2024)","DOI":"10.1145\/3589335.3651929"},{"issue":"140","key":"9_CR64","first-page":"1","volume":"21","author":"C Raffel","year":"2020","unstructured":"Raffel, C., et al.: Exploring the limits of transfer learning with a unified text-to-text transformer. J. Mach. Learn. Res. 21(140), 1\u201367 (2020)","journal-title":"J. Mach. Learn. Res."},{"key":"9_CR65","doi-asserted-by":"crossref","unstructured":"Dale, D., et al.: Text detoxification using large pre-trained neural models. In: Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing, pp. 7979\u20137996. Association for Computational Linguistics, Online and Punta Cana (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.629"},{"key":"9_CR66","unstructured":"Han, P., Kocielnik, R., Saravanan, A., Jiang, R., Sharir, O., Anandkumar, A.: ChatGPT-based data augmentation for improved parameter-efficient debiasing of LLMs. In: Proceedings of the Fourth Workshop on Language Technology for Equality, Diversity, Inclusion, pp. 73\u2013105. Association for Computational Linguistics, St. Julian\u2019s (2024)"},{"key":"9_CR67","unstructured":"Sun, Z., et al.: Enhancing code generation performance of smaller models by distilling the reasoning ability of LLMs. In: Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), pp. 5878\u20135895. ELRA and ICCL, Torino (2024)"},{"key":"9_CR68","doi-asserted-by":"crossref","unstructured":"Hsieh, C.-Y., et al.: Distilling step-by-step! Outperforming larger language models with less training data and smaller model sizes. In: Findings of the Association for Computational Linguistics: ACL 2023, pp. 8003\u20138017. Association for Computational Linguistics, Toronto, Canada (2023)","DOI":"10.18653\/v1\/2023.findings-acl.507"},{"key":"9_CR69","doi-asserted-by":"crossref","unstructured":"Gao, Z.-F., Zhou, K., Liu, P., Zhao, W.X., Wen, J.-R.: Small pre-trained language models can be fine-tuned as large models via over-parameterization. In: Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 3819\u20133834. Association for Computational Linguistics, Toronto (2023)","DOI":"10.18653\/v1\/2023.acl-long.212"},{"key":"9_CR70","doi-asserted-by":"crossref","unstructured":"Wang, J., Li, J., Zhao, H.: Self-prompted chain-of-thought on large language models for open-domain multi-hop reasoning. In: Findings of the Association for Computational Linguistics: EMNLP 2023, pp. 2717\u20132731. Association for Computational Linguistics, Singapore (2023)","DOI":"10.18653\/v1\/2023.findings-emnlp.179"},{"key":"9_CR71","unstructured":"Oba, D., Kaneko, M., Bollegala, D.: In-contextual gender bias suppression for large language models. In: Findings of the Association for Computational Linguistics: EACL 2024, pp. 1722\u20131742. Association for Computational Linguistics, St. Julian\u2019s (2024)"},{"key":"9_CR72","doi-asserted-by":"crossref","unstructured":"Selvaraju, R.R., Cogswell, M., Das, A., Vedantam, R., Parikh, D., Batra, D.: Grad-CAM: visual explanations from deep networks via gradient-based localization. In: Proceedings of the 2017 IEEE International Conference on Computer Vision (ICCV), pp. 618\u2013626. IEEE (2017)","DOI":"10.1109\/ICCV.2017.74"},{"key":"9_CR73","unstructured":"Ouyang, L., et al.: Training language models to follow instructions with human feedback. In: Advances in Neural Information Processing Systems, vol. 35, pp. 27730\u201327744. Curran Associates, Inc. (2022)"},{"issue":"1","key":"9_CR74","doi-asserted-by":"publisher","first-page":"237","DOI":"10.1162\/coli_a_00502","volume":"50","author":"C Ziems","year":"2024","unstructured":"Ziems, C., Held, W., Shaikh, O., Chen, J., Zhang, Z., Yang, D.: Can large language models transform computational social science? Comput. Linguist. 50(1), 237\u2013291 (2024)","journal-title":"Comput. Linguist."},{"key":"9_CR75","doi-asserted-by":"crossref","unstructured":"Li, Y., Ungar, L., Sedoc, J.: Conceptor-aided debiasing of large language models. In: Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing, pp. 10703\u201310727. Association for Computational Linguistics, Singapore (2023)","DOI":"10.18653\/v1\/2023.emnlp-main.661"},{"key":"9_CR76","doi-asserted-by":"crossref","unstructured":"Shrawgi, H., Rath, P., Singhal, T., Dandapat, S.: Uncovering stereotypes in large language models: a task complexity-based approach. In: Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 1841\u20131857. Association for Computational Linguistics, St. Julian\u2019s (2024)","DOI":"10.18653\/v1\/2024.eacl-long.111"},{"key":"9_CR77","doi-asserted-by":"crossref","unstructured":"Nozza, D., Bianchi, F., Hovy, D.: Pipelines for social bias testing of large language models. In: Proceedings of BigScience Episode #5 \u2013 Workshop on Challenges & Perspectives in Creating Large Language Models, pp. 68\u201374. Association for Computational Linguistics, Virtual+Dublin (2022)","DOI":"10.18653\/v1\/2022.bigscience-1.6"},{"key":"9_CR78","doi-asserted-by":"crossref","unstructured":"Chen, M., et al.: Combating security and privacy issues in the era of large language models. In: Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 5: Tutorial Abstracts), pp. 8\u201318. Association for Computational Linguistics, Mexico City (2024)","DOI":"10.18653\/v1\/2024.naacl-tutorials.2"},{"key":"9_CR79","unstructured":"Owens, D.M., et al.: A multi-LLM debiasing framework. arXiv preprint arXiv:2409.13884 (2024)"},{"key":"9_CR80","unstructured":"Li, J., et al.: Steering LLMs towards unbiased responses: a causality-guided debiasing framework. arXiv preprint arXiv:2403.08743 (2024)"},{"key":"9_CR81","unstructured":"Schlicht, I.B., Altiok, D., Taouk, M., Flek, L.: Pitfalls of conversational LLMs on news debiasing. In: Proceedings of the First Workshop on Language-driven Deliberation Technology (DELITE) @ LREC-COLING 2024, pp. 33\u201338. ELRA and ICCL, Torino (2024)"}],"container-title":["Lecture Notes in Computer Science","Chinese Computational Linguistics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-2725-0_9","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,31]],"date-time":"2025-10-31T05:19:48Z","timestamp":1761887988000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-2725-0_9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,1]]},"ISBN":["9789819527243","9789819527250"],"references-count":81,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-2725-0_9","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,11,1]]},"assertion":[{"value":"1 November 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"CCL","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China National Conference on Chinese Computational Linguistics","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Jinan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11 August 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 August 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"cncl2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/link.springer.com\/conference\/cncl","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}