{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T19:28:20Z","timestamp":1777663700887,"version":"3.51.4"},"publisher-location":"Singapore","reference-count":47,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819666027","type":"print"},{"value":"9789819666034","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,7,9]],"date-time":"2025-07-09T00:00:00Z","timestamp":1752019200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,7,9]],"date-time":"2025-07-09T00:00:00Z","timestamp":1752019200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-96-6603-4_28","type":"book-chapter","created":{"date-parts":[[2025,7,8]],"date-time":"2025-07-08T11:14:15Z","timestamp":1751973255000},"page":"400-415","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Annotator Disagreement-Based Analysis for\u00a0Developing Bias Benchmark Datasets in\u00a0Resource-Restricted Settings"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6054-9543","authenticated-orcid":false,"given":"Vithya","family":"Yogarajan","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1257-2191","authenticated-orcid":false,"given":"Paul","family":"Rayson","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7245-0367","authenticated-orcid":false,"given":"Gillian","family":"Dobbie","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6416-0423","authenticated-orcid":false,"given":"Aaron","family":"Keesing","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8628-4993","authenticated-orcid":false,"given":"Te Taka","family":"Keegan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1137-2822","authenticated-orcid":false,"given":"Diana","family":"Benavides-Prado","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7554-0971","authenticated-orcid":false,"given":"Michael","family":"Witbrock","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,7,9]]},"reference":[{"key":"28_CR1","unstructured":"Akhtar, S., Basile, V., Patti, V.: Whose opinions matter? Perspective-aware models to identify opinions of hate speech victims in abusive language detection. arXiv preprint arXiv:2106.15896 (2021)"},{"key":"28_CR2","unstructured":"Almanea, D., Poesio, M.: Armis-the Arabic misogyny and sexism corpus with annotator subjective disagreements. In: LREC, pp. 2282\u20132291 (2022)"},{"key":"28_CR3","doi-asserted-by":"crossref","unstructured":"Bhatt, S., Dev, S., Talukdar, P., Dave, S., Prabhakaran, V.: Re-contextualizing fairness in NLP: the case of India. In: AACL-IJCNLP, pp. 727\u2013740 (2022)","DOI":"10.18653\/v1\/2022.aacl-main.55"},{"key":"28_CR4","doi-asserted-by":"crossref","unstructured":"Blodgett, S.L., Barocas, S., Daum\u00e9\u00a0III, H., Wallach, H.: Language (technology) is power: a critical survey of \u201cbias\u201d in NLP. In: ACL, pp. 5454\u20135476 (2020)","DOI":"10.18653\/v1\/2020.acl-main.485"},{"key":"28_CR5","doi-asserted-by":"crossref","unstructured":"Blodgett, S.L., Lopez, G., Olteanu, A., Sim, R., Wallach, H.: Stereotyping Norwegian salmon: an inventory of pitfalls in fairness benchmark datasets. In: ACL-IJCNLP, pp. 1004\u20131015. Online (2021)","DOI":"10.18653\/v1\/2021.acl-long.81"},{"key":"28_CR6","doi-asserted-by":"crossref","unstructured":"Braun, D.: I beg to differ: how disagreement is handled in the annotation of legal machine learning data sets. Artif. Intell. Law, 1\u201324 (2023)","DOI":"10.1007\/s10506-023-09369-4"},{"key":"28_CR7","doi-asserted-by":"publisher","unstructured":"Cerquides, J., M\u00fcl\u00e2yim, M.O.: CROWDNALYSIS: a software library to help analyze crowdsourcing results (2022). https:\/\/doi.org\/10.5281\/zenodo.5898579","DOI":"10.5281\/zenodo.5898579"},{"key":"28_CR8","unstructured":"Curry, A., Abercrombie, G., Rieser, V.: ConvAbuse: data, analysis, & benchmarks for nuanced abuse detection in conversational AI. In: EMNLP, pp. 7388\u20137403 (2021)"},{"issue":"1","key":"28_CR9","first-page":"1","volume":"18","author":"E Curtis","year":"2019","unstructured":"Curtis, E., Jones, R., Tipene-Leach, D., et al.: Why cultural safety rather than cultural competency is required to achieve health equity: a literature review & recommended definition. Equity Health 18(1), 1\u201317 (2019)","journal-title":"Equity Health"},{"key":"28_CR10","doi-asserted-by":"crossref","unstructured":"Davani, A.d.M., D\u00edaz, M., Prabhakaran, V.: Dealing with disagreements: looking beyond the majority vote in subjective annotations. TACL 10, 92\u2013110 (2022)","DOI":"10.1162\/tacl_a_00449"},{"key":"28_CR11","doi-asserted-by":"crossref","unstructured":"Dawid, A.P., Skene, A.M.: Maximum likelihood estimation of observer error-rates using the em algorithm. J. RSS (Appl. Stats.) 28(1), 20\u201328 (1979)","DOI":"10.2307\/2346806"},{"key":"28_CR12","first-page":"1","volume":"7","author":"J Dem\u0161ar","year":"2006","unstructured":"Dem\u0161ar, J.: Statistical comparisons of classifiers over multiple data sets. J. Mach. Learn. Res. 7, 1\u201330 (2006)","journal-title":"J. Mach. Learn. Res."},{"issue":"8","key":"28_CR13","doi-asserted-by":"publisher","first-page":"753","DOI":"10.1001\/jama.2016.0976","volume":"315","author":"AM Dondorp","year":"2016","unstructured":"Dondorp, A.M., Iyer, S.S., Schultz, M.J.: Critical care in resource-restricted settings. JAMA 315(8), 753\u2013754 (2016)","journal-title":"JAMA"},{"key":"28_CR14","doi-asserted-by":"crossref","unstructured":"Fornaciari, T., Uma, A., Paun, S., Plank, B., Hovy, D., Poesio, M.: Beyond black & white: leveraging annotator disagreement via soft-label multi-task learning. In: NAACL-HLT, pp. 2591\u20132597. ACL, Online (2021)","DOI":"10.18653\/v1\/2021.naacl-main.204"},{"key":"28_CR15","first-page":"2677","volume":"9","author":"S Garcia","year":"2009","unstructured":"Garcia, S., Herrera, F.: An extension on \u201cstatistical comparisons of classifiers over multiple data sets\u2019\u2019 for all pairwise comparisons. JMLR 9, 2677\u20132694 (2009)","journal-title":"JMLR"},{"key":"28_CR16","doi-asserted-by":"crossref","unstructured":"Garc\u00eda-D\u00edaz, J.A., Pan, R., Alcar\u00e1z-M\u00e1rmol, G., et\u00a0al.: UMUTeam at SemEval-2023 task 11: ensemble learning applied to binary supervised classifiers with disagreements. In: SemEval-2023, pp. 1061\u20131066. ACL, Canada (2023)","DOI":"10.18653\/v1\/2023.semeval-1.145"},{"key":"28_CR17","unstructured":"Harmsworth, G.R., Awatere, S., et al.: Indigenous M\u0101ori knowledge & perspectives of ecosystems, pp. 274\u2013286. Ecosystem services in NZ-conditions & trends. Manaaki Whenua Press, Lincoln, NZ (2013)"},{"key":"28_CR18","doi-asserted-by":"crossref","unstructured":"Kokkinos, Y., Margaritis, K.G.: Breaking ties of plurality voting in ensembles of distributed neural network classifiers using soft max accumulations. In: AIAI, pp. 20\u201328. Springer, Cham (2014)","DOI":"10.1007\/978-3-662-44654-6_2"},{"key":"28_CR19","doi-asserted-by":"crossref","unstructured":"Leonardelli, E., et al.: SemEval-2023 task 11: learning with disagreements (LeWiDi). In: SemEval-2023, pp. 2304\u20132318. ACL, Canada (2023)","DOI":"10.18653\/v1\/2023.semeval-1.314"},{"key":"28_CR20","doi-asserted-by":"crossref","unstructured":"Leonardelli, E., Menini, S., Aprosio, A.P., Guerini, M., Tonelli, S.: Agreeing to disagree: annotating offensive language datasets with annotators\u2019 disagreement. In: EMNLP, pp. 10528\u201310539 (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.822"},{"key":"28_CR21","doi-asserted-by":"crossref","unstructured":"Levi, E., Mor, G., Sheafer, T., Shenhav, S.: Detecting narrative elements in informational text. In: Findings of the ACL: NAACL 2022, pp. 1755\u20131765 (2022)","DOI":"10.18653\/v1\/2022.findings-naacl.133"},{"key":"28_CR22","unstructured":"Li, Y., Du, M., Song, R., Wang, X., Wang, Y.: A survey on fairness in large language models. arXiv preprint arXiv:2308.10149 (2023)"},{"key":"28_CR23","unstructured":"Liang, P.P., Wu, C., Morency, L.P., Salakhutdinov, R.: Towards understanding and mitigating social biases in language models. In: ICML, pp. 6565\u20136576 (2021)"},{"issue":"1","key":"28_CR24","doi-asserted-by":"publisher","first-page":"145","DOI":"10.1109\/18.61115","volume":"37","author":"J Lin","year":"1991","unstructured":"Lin, J.: Divergence measures based on the Shannon entropy. IEEE Trans. Inf. Theory 37(1), 145\u2013151 (1991)","journal-title":"IEEE Trans. Inf. Theory"},{"key":"28_CR25","doi-asserted-by":"crossref","unstructured":"Malik, V., Dev, S., Nishi, A., Peng, N., Chang, K.W.: Socially aware bias measurements for Hindi language representations. In: NAACL-HLT, pp. 1041\u20131052. ACL, Seattle, United States (2022)","DOI":"10.18653\/v1\/2022.naacl-main.76"},{"key":"28_CR26","unstructured":"Mikolov, T., Grave, E., Bojanowski, P., Puhrsch, C., Joulin, A.: Advances in pre-training distributed word representations. In: LREC (2018)"},{"key":"28_CR27","doi-asserted-by":"crossref","unstructured":"Mu, Y., Jin, M., Grimshaw, C., Scarton, C., Bontcheva, K., Song, X.: Vaxxhesitancy: a dataset for studying hesitancy towards COVID-19 vaccination on twitter. In: AAAI Conference on Web and Social Media, vol.\u00a017, pp. 1052\u20131062 (2023)","DOI":"10.1609\/icwsm.v17i1.22213"},{"key":"28_CR28","doi-asserted-by":"crossref","unstructured":"Nadeem, M., Bethke, A., Reddy, S.: StereoSet: measuring stereotypical bias in pretrained language models. In: ACL, pp. 5356\u20135371. ACL, Online (2021)","DOI":"10.18653\/v1\/2021.acl-long.416"},{"key":"28_CR29","doi-asserted-by":"crossref","unstructured":"Nangia, N., Vania, C., et\u00a0al.: Crows-pairs: a challenge dataset for measuring social biases in masked language models. In: EMNLP, pp. 1953\u20131967. ACL (2020)","DOI":"10.18653\/v1\/2020.emnlp-main.154"},{"key":"28_CR30","first-page":"2825","volume":"12","author":"F Pedregosa","year":"2011","unstructured":"Pedregosa, F., et al.: Scikit-learn: machine learning in Python. J. Mach. Learn. Res. 12, 2825\u20132830 (2011)","journal-title":"J. Mach. Learn. Res."},{"key":"28_CR31","doi-asserted-by":"crossref","unstructured":"Plank, B.: The \u201cproblem\u201d of human label variation: On ground truth in data, modeling and evaluation. In: EMNLP, pp. 10671\u201310682 (2022)","DOI":"10.18653\/v1\/2022.emnlp-main.731"},{"key":"28_CR32","doi-asserted-by":"crossref","unstructured":"Rudinger, R., Naradowsky, J., Leonard, B., Van\u00a0Durme, B.: Gender bias in coreference resolution. In: NAACL-HLT, pp. 8\u201314. ACL (2018)","DOI":"10.18653\/v1\/N18-2002"},{"key":"28_CR33","doi-asserted-by":"crossref","unstructured":"Sammut, C., Webb, G.I. (eds.): TF\u2013IDF, pp. 986\u2013987. Springer, Boston, MA (2010)","DOI":"10.1007\/978-0-387-30164-8_832"},{"key":"28_CR34","doi-asserted-by":"crossref","unstructured":"Shahriar, S., Solorio, T.: SafeWebUH at SemEval-2023 task 11: learning annotator disagreement in derogatory text: comparison of direct training vs aggregation. In: SemEval-2023, pp. 94\u2013100. ACL, Canada (2023)","DOI":"10.18653\/v1\/2023.semeval-1.12"},{"key":"28_CR35","doi-asserted-by":"crossref","unstructured":"Sheng, E., Chang, K.W., Natarajan, P., Peng, N.: Towards controllable biases in language generation. In: Findings of EMNLP, pp. 3239\u20133254. ACL (2020)","DOI":"10.18653\/v1\/2020.findings-emnlp.291"},{"key":"28_CR36","unstructured":"Sheng, E., Chang, K.W., et\u00a0al.: The woman worked as a babysitter: on biases in language generation. In: EMNLP-IJCNLP, pp. 3407\u20133412. ACL (2019)"},{"key":"28_CR37","doi-asserted-by":"publisher","DOI":"10.1016\/j.iccn.2022.103255","volume":"72","author":"M Taj","year":"2022","unstructured":"Taj, M., Brenner, M., Sulaiman, Z., Pandian, V.: Sepsis protocols to reduce mortality in resource-restricted settings: a systematic review. Intensive Crit. Care Nurs. 72, 103255 (2022)","journal-title":"Intensive Crit. Care Nurs."},{"key":"28_CR38","doi-asserted-by":"crossref","unstructured":"Uma, A., et al.: SemEval-2021 task 12: learning with disagreements. In: SemEval-2021, pp. 338\u2013347. ACL, Online (2021)","DOI":"10.18653\/v1\/2021.semeval-1.41"},{"key":"28_CR39","doi-asserted-by":"crossref","unstructured":"Uma, A., Fornaciari, T., Hovy, D., Paun, S., Plank, B., Poesio, M.: A case for soft loss functions. In: Proceedings of the AAAI Conference on Human Computation and Crowdsourcing, vol. 8, no. 1, pp. 173\u2013177 (2020)","DOI":"10.1609\/hcomp.v8i1.7478"},{"key":"28_CR40","first-page":"1385","volume":"72","author":"AN Uma","year":"2021","unstructured":"Uma, A.N., Fornaciari, T., Hovy, D., Paun, S., Plank, B., Poesio, M.: Learning from disagreement: a survey. J. AIR 72, 1385\u20131470 (2021)","journal-title":"J. AIR"},{"issue":"4","key":"28_CR41","doi-asserted-by":"publisher","first-page":"131","DOI":"10.1016\/j.bjae.2021.11.011","volume":"22","author":"CS Webster","year":"2022","unstructured":"Webster, C.S., Taylor, S., Thomas, C., Weller, J.M.: Social bias, discrimination and inequity in healthcare: mechanisms, implications and recommendations. BJA Educ. 22(4), 131\u2013137 (2022)","journal-title":"BJA Educ."},{"key":"28_CR42","doi-asserted-by":"publisher","first-page":"605","DOI":"10.1162\/tacl_a_00240","volume":"6","author":"K Webster","year":"2018","unstructured":"Webster, K., Recasens, M., Axelrod, V., Baldridge, J.: Mind the GAP: a balanced corpus of gendered ambiguous pronouns. TACL 6, 605\u2013617 (2018)","journal-title":"TACL"},{"key":"28_CR43","doi-asserted-by":"crossref","unstructured":"Wilson, D., Tweedie, F., Rumball-Smith, J., Ross, K., et\u00a0al.: Lessons learned from developing a COVID-19 algorithm governance framework in Aotearoa New Zealand. J. RSNZ, 1\u201313 (2022)","DOI":"10.1080\/03036758.2022.2121290"},{"key":"28_CR44","doi-asserted-by":"crossref","unstructured":"Wu, B., Li, Y., Mu, Y., Scarton, C., Bontcheva, K., Song, X.: Don\u2019t waste a single annotation: improving single-label classifiers through soft labels. In: Findings of the ACL: EMNLP 2023, pp. 5347\u20135355 (2023)","DOI":"10.18653\/v1\/2023.findings-emnlp.355"},{"key":"28_CR45","unstructured":"Yogarajan, V., Dobbie, G., Keegan, T.T.: Debiasing large language models: research opportunities. J. Roy. Soc. NZ, 1\u201324 (2024)"},{"key":"28_CR46","unstructured":"Yogarajan, V., Dobbie, G., et\u00a0al.: Challenges in annotating datasets to quantify bias in under-represented society. In: EthAIcs-IJCAI (2023)"},{"key":"28_CR47","unstructured":"Yogarajan, V., Dobbie, G., Keegan, T.T., Neuwirth, R.J.: Tackling bias in pre-trained language models: current trends and under-represented societies. arXiv preprint arXiv:2312.01509 (2023)"}],"container-title":["Lecture Notes in Computer Science","Neural Information Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-6603-4_28","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T07:33:22Z","timestamp":1777448002000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-6603-4_28"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,9]]},"ISBN":["9789819666027","9789819666034"],"references-count":47,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-6603-4_28","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,7,9]]},"assertion":[{"value":"9 July 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICONIP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Neural Information Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Auckland","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"New Zealand","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 December 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6 December 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"31","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iconip2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/iconip2024.org","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}