{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,2]],"date-time":"2025-08-02T17:00:15Z","timestamp":1754154015756,"version":"3.41.2"},"publisher-location":"Singapore","reference-count":34,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819500192"},{"type":"electronic","value":"9789819500208"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-95-0020-8_17","type":"book-chapter","created":{"date-parts":[[2025,7,23]],"date-time":"2025-07-23T09:16:47Z","timestamp":1753262207000},"page":"201-211","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Talk the Talk, Debate the Bias: LLM Alignment via Role-Play Rumble"],"prefix":"10.1007","author":[{"given":"Ruoxi","family":"Cheng","sequence":"first","affiliation":[]},{"given":"Zhiqiang","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Shaowei","family":"Yuan","sequence":"additional","affiliation":[]},{"given":"Yizhong","family":"Ding","sequence":"additional","affiliation":[]},{"given":"Rui","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,7,24]]},"reference":[{"key":"17_CR1","unstructured":"Amanda, A., et al.: A general language assistant as a laboratory for alignment. arXiv: Computation and Language (2021)"},{"key":"17_CR2","unstructured":"Bai, Y., et al.: Constitutional AI: harmlessness from AI feedback. arXiv preprint arXiv:2212.08073 (2022)"},{"key":"17_CR3","doi-asserted-by":"crossref","unstructured":"Bender, E.M., Gebru, T., McMillan-Major, A., Shmitchell, S.: On the dangers of stochastic parrots: can language models be too big? In: Proceedings of the 2021 ACM Conference on Fairness, Accountability, and Transparency, pp. 610\u2013623 (2021)","DOI":"10.1145\/3442188.3445922"},{"key":"17_CR4","doi-asserted-by":"crossref","unstructured":"Birhane, A., Prabhu, V.U.: large image datasets: a pyrrhic win for computer vision? In: 2021 IEEE Winter Conference on Applications of Computer Vision (WACV), pp. 1536\u20131546. IEEE (2021)","DOI":"10.1109\/WACV48630.2021.00158"},{"key":"17_CR5","unstructured":"Bolukbasi, T., Chang, K.W., Zou, J.Y., Saligrama, V., Kalai, A.T.: Man is to computer programmer as woman is to homemaker? Debiasing word embeddings. Adv. Neural Inf. Process. Syst. 29 (2016)"},{"key":"17_CR6","unstructured":"Chan, C.M., et al.: ChatEval: towards better LLM-based evaluators through multi-agent debate. arXiv preprint arXiv:2308.07201 (2023)"},{"key":"17_CR7","unstructured":"Christiano, P.F., Leike, J., Brown, T., Martic, M., Legg, S., Amodei, D.: Deep reinforcement learning from human preferences. Adv. Neural Inf. Process. Syst. 30 (2017)"},{"key":"17_CR8","doi-asserted-by":"crossref","unstructured":"Duan, X., et al.: Legal summarization for multi-role debate dialogue via controversy focus mining and multi-task learning. In: Proceedings of the 28th ACM International Conference on Information and Knowledge Management, pp. 1361\u20131370 (2019)","DOI":"10.1145\/3357384.3357940"},{"key":"17_CR9","unstructured":"Fu, Y., Peng, H., Khot, T., Lapata, M.: Improving language model negotiation with self-play and in-context learning from AI feedback. arXiv preprint arXiv:2305.10142 (2023)"},{"key":"17_CR10","unstructured":"Han, S., Zhang, Q., Yao, Y., Jin, W., Xu, Z., He, C.: LLM multi-agent systems: challenges and open problems. arXiv preprint arXiv:2402.03578 (2024)"},{"issue":"248","key":"17_CR11","first-page":"1","volume":"21","author":"P Henderson","year":"2020","unstructured":"Henderson, P., Hu, J., Romoff, J., Brunskill, E., Jurafsky, D., Pineau, J.: Towards the systematic reporting of the energy and carbon footprints of machine learning. J. Mach. Learn. Res. 21(248), 1\u201343 (2020)","journal-title":"J. Mach. Learn. Res."},{"key":"17_CR12","unstructured":"Huang, J., et al.: Large language models cannot self-correct reasoning yet. arXiv preprint arXiv:2310.01798 (2023)"},{"key":"17_CR13","doi-asserted-by":"crossref","unstructured":"Kamruzzaman, M., Shovon, M.M.I., Kim, G.L.: Investigating subtler biases in LLMs: ageism, beauty, institutional, and nationality bias in generative models. arXiv preprint arXiv:2309.08902 (2023)","DOI":"10.18653\/v1\/2024.findings-acl.530"},{"key":"17_CR14","unstructured":"Kim, K., Lee, S., Huang, K.H., Chan, H.P., Li, M., Ji, H.: Can LLMs produce faithful explanations for fact-checking? Towards faithful explainable fact-checking via multi-agent debate. arXiv preprint arXiv:2402.07401 (2024)"},{"key":"17_CR15","unstructured":"Knox, W.B., Stone, P.: Reinforcement learning from simultaneous human and MDP reward. In: AAMAS, Valencia, vol. 1004, pp. 475\u2013482 (2012)"},{"key":"17_CR16","unstructured":"Lee, H., et al.: RLAIF: scaling reinforcement learning from human feedback with AI feedback. arXiv preprint arXiv:2309.00267 (2023)"},{"key":"17_CR17","unstructured":"Lee, K., Hwang, D., Park, S., Jang, Y., Lee, M.: Reinforcement learning from reflective feedback (RLRF): aligning and improving LLMs via fine-grained self-reflection. arXiv preprint arXiv:2403.14238 (2024)"},{"key":"17_CR18","unstructured":"Li, T., et al.: Your large language model is secretly a fairness proponent and you should prompt it like one. arXiv preprint arXiv:2402.12150 (2024)"},{"key":"17_CR19","unstructured":"Lu, L.C., Chen, S.J., Pai, T.M., Yu, C.H., Lee, H.y., Sun, S.H.: LLM discussion: enhancing the creativity of large language models via discussion framework and role-play. arXiv preprint arXiv:2405.06373 (2024)"},{"key":"17_CR20","unstructured":"Madaan, A., et al.: Self-refine: iterative refinement with self-feedback. Adv. Neural Inf. Process. Syst. 36 (2024)"},{"key":"17_CR21","doi-asserted-by":"crossref","unstructured":"Mao, Z., Li, J., Jin, D., Li, M., Tei, K.: Multi-role consensus through LLMs discussions for vulnerability detection (2024)","DOI":"10.1109\/QRS-C63300.2024.00173"},{"key":"17_CR22","unstructured":"Ouyang, L., et al.: Training language models to follow instructions with human feedback"},{"key":"17_CR23","first-page":"27730","volume":"35","author":"L Ouyang","year":"2022","unstructured":"Ouyang, L., et al.: Training language models to follow instructions with human feedback. Adv. Neural. Inf. Process. Syst. 35, 27730\u201327744 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"17_CR24","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347 (2017)"},{"key":"17_CR25","unstructured":"Sharma, A., Keh, S., Mitchell, E., Finn, C., Arora, K., Kollar, T.: A critical evaluation of ai feedback for aligning large language models (2024)"},{"key":"17_CR26","unstructured":"Stiennon, N., et al.: Learning to summarize from human feedback. arXiv: Computation and Language (2020)"},{"key":"17_CR27","unstructured":"Sun, T., et al.: Mitigating gender bias in natural language processing: literature review. arXiv preprint arXiv:1906.08976 (2019)"},{"key":"17_CR28","unstructured":"Tamkin, A., et al.: Evaluating and mitigating discrimination in language model decisions. arXiv preprint arXiv:2312.03689 (2023)"},{"key":"17_CR29","first-page":"24824","volume":"35","author":"J Wei","year":"2022","unstructured":"Wei, J., et al.: Chain-of-thought prompting elicits reasoning in large language models. Adv. Neural. Inf. Process. Syst. 35, 24824\u201324837 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"17_CR30","first-page":"34347","volume":"35","author":"G Zhang","year":"2022","unstructured":"Zhang, G., et al.: Fairness reprogramming. Adv. Neural. Inf. Process. Syst. 35, 34347\u201334362 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"17_CR31","doi-asserted-by":"crossref","unstructured":"Zhang, W., et al.: Self-contrast: better reflection through inconsistent solving perspectives. arXiv preprint arXiv:2401.02009 (2024)","DOI":"10.18653\/v1\/2024.acl-long.197"},{"key":"17_CR32","doi-asserted-by":"crossref","unstructured":"Zhao, J., Wang, T., Yatskar, M., Cotterell, R., Ordonez, V., Chang, K.W.: Gender bias in contextualized word embeddings. arXiv preprint arXiv:1904.03310 (2019)","DOI":"10.18653\/v1\/N19-1064"},{"key":"17_CR33","doi-asserted-by":"crossref","unstructured":"Zhou, Z., Song, J., Yao, K., Shu, Z., Ma, L.: ISR-LLM: iterative self-refined large language model for long-horizon sequential task planning. arXiv preprint arXiv:2308.13724 (2023)","DOI":"10.1109\/ICRA57147.2024.10610065"},{"key":"17_CR34","unstructured":"Ziegler, D.M., et al.: Fine-tuning language models from human preferences. arXiv preprint arXiv:1909.08593 (2019)"}],"container-title":["Lecture Notes in Computer Science","Advanced Intelligent Computing Technology and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-0020-8_17","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,23]],"date-time":"2025-07-23T22:20:07Z","timestamp":1753309207000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-0020-8_17"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9789819500192","9789819500208"],"references-count":34,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-0020-8_17","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"24 July 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICIC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Intelligent Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Ningbo","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 July 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 July 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icic2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.ic-icc.cn\/icg\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}