{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,27]],"date-time":"2026-03-27T16:35:34Z","timestamp":1774629334673,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":70,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,6,20]],"date-time":"2022-06-20T00:00:00Z","timestamp":1655683200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,6,21]]},"DOI":"10.1145\/3531146.3533216","type":"proceedings-article","created":{"date-parts":[[2022,6,20]],"date-time":"2022-06-20T14:27:10Z","timestamp":1655735230000},"page":"1585-1603","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":15,"title":["Assessing Annotator Identity Sensitivity via Item Response Theory: A Case Study in a Hate Speech Corpus"],"prefix":"10.1145","author":[{"given":"Pratik S.","family":"Sachdeva","sequence":"first","affiliation":[{"name":"D-Lab, University of California, Berkeley, USA"}]},{"given":"Renata","family":"Barreto","sequence":"additional","affiliation":[{"name":"University of California, Berkeley, USA"}]},{"given":"Claudia","family":"von Vacano","sequence":"additional","affiliation":[{"name":"University of California, Berkeley, USA"}]},{"given":"Chris J.","family":"Kennedy","sequence":"additional","affiliation":[{"name":"Harvard Medical School, USA"}]}],"member":"320","published-online":{"date-parts":[[2022,6,20]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Alim Al\u00a0Ayub Ahmed Ayman Aljabouh Praveen\u00a0Kumar Donepudi and Myung\u00a0Suh Choi. 2021. Detecting Fake News using Machine Learning: A Systematic Literature Review. arXiv preprint arXiv:2102.04458(2021)."},{"key":"e_1_3_2_1_2_1","first-page":"424","article-title":"Sentiment analysis using deep learning techniques: a review","volume":"8","author":"Ain Qurat\u00a0Tul","year":"2017","unstructured":"Qurat\u00a0Tul Ain, Mubashir Ali, Amna Riaz, Amna Noureen, Muhammad Kamran, Babar Hayat, and A Rehman. 2017. Sentiment analysis using deep learning techniques: a review. Int J Adv Comput Sci Appl 8, 6 (2017), 424.","journal-title":"Int J Adv Comput Sci Appl"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.alw-1.21"},{"key":"e_1_3_2_1_4_1","unstructured":"AHMER ARIF and OS KEYES. 2022. Vulnerability Trust and AI. (2022)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.17275\/per.21.88.8.4"},{"key":"e_1_3_2_1_6_1","unstructured":"Valerio Basile Federico Cabitza Andrea Campagner and Michael Fell. 2021. Toward a Perspectivist Turn in Ground Truthing for Predictive Computing. arXiv preprint arXiv:2109.04270(2021)."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00041"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"crossref","unstructured":"Abeba Birhane Pratyusha Kalluri Dallas Card William Agnew Ravit Dotan and Michelle Bao. 2021. The values encoded in machine learning research. arXiv preprint arXiv:2106.15590(2021).","DOI":"10.1145\/3531146.3533083"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"crossref","unstructured":"Su\u00a0Lin Blodgett Lisa Green and Brendan O\u2019Connor. 2016. Demographic dialectal variation in social media: A case study of African-American English. arXiv preprint arXiv:1608.08868(2016).","DOI":"10.18653\/v1\/D16-1120"},{"key":"e_1_3_2_1_10_1","unstructured":"Robin Brontsema. 2004. A queer revolution: Reconceptualizing the debate over linguistic reclamation. Colorado Research in Linguistics(2004)."},{"key":"e_1_3_2_1_11_1","volume-title":"Conference on fairness, accountability and transparency. PMLR, 77\u201391","author":"Buolamwini Joy","year":"2018","unstructured":"Joy Buolamwini and Timnit Gebru. 2018. Gender shades: Intersectional accuracy disparities in commercial gender classification. In Conference on fairness, accountability and transparency. PMLR, 77\u201391."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.2307\/1229039"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"crossref","unstructured":"Thomas Davidson Debasmita Bhattacharya and Ingmar Weber. 2019. Racial bias in hate speech and abusive language detection datasets. arXiv preprint arXiv:1905.12516(2019).","DOI":"10.18653\/v1\/W19-3504"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1609\/icwsm.v11i1.14955"},{"key":"e_1_3_2_1_15_1","volume-title":"Proceedings of the First Italian Conference on Cybersecurity (ITASEC17)","author":"Fabio","year":"2017","unstructured":"Fabio Del\u00a0Vigna12, Andrea Cimino23, Felice Dell\u2019Orletta, Marinella Petrocchi, and Maurizio Tesconi. 2017. Hate me, hate me not: Hate speech detection on facebook. In Proceedings of the First Italian Conference on Cybersecurity (ITASEC17). 86\u201395."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1177\/20539517211035955"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3159652.3159661"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1207\/s15434311laq0203_2"},{"key":"e_1_3_2_1_19_1","volume-title":"Invariant measurement with raters and rating scales: Rasch models for rater-mediated assessments","author":"Engelhard George","unstructured":"George Engelhard and Stefanie\u00a0A Wind. 2017. Invariant measurement with raters and rating scales: Rasch models for rater-mediated assessments. Routledge."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W19-3510"},{"key":"e_1_3_2_1_22_1","volume-title":"Incorporating demographic embeddings into language understanding. Cognitive science 43, 1","author":"Garten Justin","year":"2019","unstructured":"Justin Garten, Brendan Kennedy, Joe Hoover, Kenji Sagae, and Morteza Dehghani. 2019. Incorporating demographic embeddings into language understanding. Cognitive science 43, 1 (2019), e12701."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3351095.3372862"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"crossref","unstructured":"Mor Geva Yoav Goldberg and Jonathan Berant. 2019. Are we modeling the task or the annotator? an investigation of annotator bias in natural language understanding datasets. arXiv preprint arXiv:1908.07898(2019).","DOI":"10.18653\/v1\/D19-1107"},{"key":"e_1_3_2_1_25_1","unstructured":"Michael Wojatzki Tobias Horsmann\u00a0Darina Gold and Torsten Zesch. 2018. Do women perceive hate differently: Examining the relationship between hate speech gender and agreement judgments. (2018)."},{"key":"e_1_3_2_1_26_1","volume-title":"Exploring the Impact of Rater Identity on Toxicity Annotation. In The 25th ACM Conference On Computer- Supported Cooperative Work And Social Computing. ACM.","author":"Nitesh","unstructured":"Nitesh Goyal and et al.2022. Is Your Toxicity My Toxicity? Exploring the Impact of Rater Identity on Toxicity Annotation. In The 25th ACM Conference On Computer- Supported Cooperative Work And Social Computing. ACM."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W18-5103"},{"key":"e_1_3_2_1_28_1","volume-title":"Fundamentals of item response theory. Vol.\u00a02","author":"Hambleton K","unstructured":"Ronald\u00a0K Hambleton, Hariharan Swaminathan, and H\u00a0Jane Rogers. 1991. Fundamentals of item response theory. Vol.\u00a02. Sage."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1111\/lnc3.12432"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICMLA.2019.00104"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W15-4302"},{"key":"e_1_3_2_1_32_1","unstructured":"Chris\u00a0J Kennedy Geoff Bacon Alexander Sahn and Claudia von Vacano. 2020. Constructing interval variables via faceted Rasch measurement and multitask deep learning: a hate speech application. arXiv preprint arXiv:2009.10277(2020)."},{"key":"e_1_3_2_1_33_1","unstructured":"Jae\u00a0Yeon Kim Carlos Ortiz Sarah Nam Sarah Santiago and Vivek Datta. 2020. Intersectional bias in hate speech and abusive language datasets. arXiv preprint arXiv:2005.05921(2020)."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.2196\/20268"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1191\/0265532202lt218oa"},{"key":"e_1_3_2_1_36_1","unstructured":"Klaus Krippendorff. 1980. Validity in content analysis. (1980)."},{"key":"e_1_3_2_1_37_1","unstructured":"Klaus Krippendorff. 2011. Computing Krippendorff\u2019s alpha-reliability. (2011)."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.socialnlp-1.7"},{"key":"e_1_3_2_1_39_1","volume-title":"Many-Facet Rasch Measurement","author":"Linacre M","unstructured":"John\u00a0M Linacre. 1994. Many-Facet Rasch Measurement. MESA press."},{"key":"e_1_3_2_1_40_1","unstructured":"John\u00a0M. Linacre. 2015. Facets computer program for many-facet Rasch measurement."},{"key":"e_1_3_2_1_41_1","unstructured":"John\u00a0M Linacre and Benjamin\u00a0D Wright. 2002. Construction of measures from many-facet data.Journal of Applied Measurement(2002)."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.artint.2018.09.004"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3457607"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0237861"},{"key":"e_1_3_2_1_45_1","first-page":"386","article-title":"Detecting and measuring rater effects using many-facet Rasch measurement: Part I","volume":"4","author":"Myford M","year":"2003","unstructured":"Carol\u00a0M Myford and Edward\u00a0W Wolfe. 2003. Detecting and measuring rater effects using many-facet Rasch measurement: Part I. Journal of applied measurement 4, 4 (2003), 386\u2013422.","journal-title":"Journal of applied measurement"},{"key":"e_1_3_2_1_46_1","first-page":"189","article-title":"Detecting and measuring rater effects using many-facet Rasch measurement: Part II","volume":"5","author":"Myford M","year":"2004","unstructured":"Carol\u00a0M Myford and Edward\u00a0W Wolfe. 2004. Detecting and measuring rater effects using many-facet Rasch measurement: Part II. Journal of applied measurement 5, 2 (2004), 189\u2013227.","journal-title":"Journal of applied measurement"},{"key":"e_1_3_2_1_47_1","volume-title":"Algorithms of Oppression","author":"Noble Safiya\u00a0Umoja","unstructured":"Safiya\u00a0Umoja Noble. 2018. Algorithms of oppression. In Algorithms of Oppression. New York University Press."},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1016\/0022-2496(66)90002-2"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00167"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/P14-2083"},{"key":"e_1_3_2_1_51_1","volume-title":"6th Italian Conference on Computational Linguistics, CLiC-it","author":"Poletto Fabio","year":"2019","unstructured":"Fabio Poletto, Valerio Basile, Cristina Bosco, Viviana Patti, and Marco Stranisci. 2019. Annotating hate speech: Three schemes at comparison. In 6th Italian Conference on Computational Linguistics, CLiC-it 2019, Vol.\u00a02481. CEUR-WS, 1\u20138."},{"key":"e_1_3_2_1_52_1","volume-title":"Proceedings of the 27th International Conference on Computational Linguistics. 1534\u20131545","author":"Preo\u0163iuc-Pietro Daniel","year":"2018","unstructured":"Daniel Preo\u0163iuc-Pietro and Lyle Ungar. 2018. User-level race and ethnicity predictors from twitter text. In Proceedings of the 27th International Conference on Computational Linguistics. 1534\u20131545."},{"key":"e_1_3_2_1_53_1","volume-title":"Second International Workshop on Learning over Multiple Contexts in ECML.","author":"Prud\u00eancio BC","year":"2015","unstructured":"Ricardo\u00a0BC Prud\u00eancio, Jos\u00e9 Hern\u00e1ndez-Orallo, and Adolfo Mart\u0131nez-Us\u00f3. 2015. Analysis of instance hardness in machine learning using item response theory. In Second International Workshop on Learning over Multiple Contexts in ECML."},{"key":"e_1_3_2_1_54_1","unstructured":"Megan Randall Alena Stern and Yipeng Su. 2021. Five Ethical Risks to Consider before Filling Missing Race and Ethnicity Data. (2021)."},{"key":"e_1_3_2_1_55_1","volume-title":"Report from European Meeting on Statistics, Econometrics and Management Sciences","author":"Rasch George","unstructured":"George Rasch. 1968. A mathematical theory of objectivity and its consequences for model construction. In Report from European Meeting on Statistics, Econometrics and Management Sciences, Amsterdam."},{"key":"e_1_3_2_1_56_1","volume-title":"Who are the turkers? worker demographics in amazon mechanical turk. Department of Informatics","author":"Ross Joel","year":"2009","unstructured":"Joel Ross, Andrew Zaldivar, Lilly Irani, and Bill Tomlinson. 2009. Who are the turkers? worker demographics in amazon mechanical turk. Department of Informatics, University of California, Irvine, USA, Tech. Rep (2009), 49."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1177\/014662169001400305"},{"key":"e_1_3_2_1_58_1","volume-title":"Rating the ratings: Assessing the psychometric quality of rating data.Psychological bulletin 88, 2","author":"Saal E","year":"1980","unstructured":"Frank\u00a0E Saal, Ronald\u00a0G Downey, and Mary\u00a0A Lahey. 1980. Rating the ratings: Assessing the psychometric quality of rating data.Psychological bulletin 88, 2 (1980), 413."},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"crossref","unstructured":"P.\u00a0S. Sachdeva R. Barreto C. von Vacano and C.\u00a0J. Kennedy. 2022. Assessing Annotator Identity Sensitivity via Item Response Theory: A Case Study in a Hate Speech Corpus. https:\/\/github.com\/dlab-projects\/annotator_sensitivity_irt.","DOI":"10.1145\/3531146.3533216"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1163"},{"key":"e_1_3_2_1_61_1","unstructured":"Maarten Sap Swabha Swayamdipta Laura Vianna Xuhui Zhou Yejin Choi and Noah\u00a0A Smith. 2021. Annotators with Attitudes: How Annotator Beliefs And Identities Bias Toxic Language Detection. arXiv preprint arXiv:2111.07997(2021)."},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1145\/3476058"},{"key":"e_1_3_2_1_63_1","volume-title":"Proceedings of the 2008 conference on empirical methods in natural language processing. 254\u2013263","author":"Snow Rion","year":"2008","unstructured":"Rion Snow, Brendan O\u2019connor, Dan Jurafsky, and Andrew\u00a0Y Ng. 2008. Cheap and fast\u2013but is it good? evaluating non-expert annotations for natural language tasks. In Proceedings of the 2008 conference on empirical methods in natural language processing. 254\u2013263."},{"key":"e_1_3_2_1_64_1","first-page":"11","article-title":"Rasch Measurement v. Item Response Theory: Knowing When to Cross the Line","volume":"26","author":"Stemler E","year":"2021","unstructured":"Steven\u00a0E Stemler and Adam Naples. 2021. Rasch Measurement v. Item Response Theory: Knowing When to Cross the Line. Practical Assessment, Research, and Evaluation 26, 1(2021), 11.","journal-title":"Practical Assessment, Research, and Evaluation"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11336-013-9388-3"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W16-5618"},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N16-2013"},{"key":"e_1_3_2_1_68_1","volume-title":"Constructing measures: An item response modeling approach","author":"Wilson Mark","unstructured":"Mark Wilson. 2004. Constructing measures: An item response modeling approach. Routledge."},{"key":"e_1_3_2_1_69_1","unstructured":"Edward\u00a0W Wolfe Bradley\u00a0C Moulder and Carol\u00a0M Myford. 1999. Detecting differential rater functioning over time (DRIFT) using a Rasch multi-faceted rating scale model.(1999)."},{"key":"e_1_3_2_1_70_1","volume-title":"Rating scale analysis","author":"Wright D","unstructured":"Benjamin\u00a0D Wright and Geofferey\u00a0N Masters. 1982. Rating scale analysis. MESA press."},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.1145\/3396452.3396455"}],"event":{"name":"FAccT '22: 2022 ACM Conference on Fairness, Accountability, and Transparency","location":"Seoul Republic of Korea","acronym":"FAccT '22","sponsor":["ACM Association for Computing Machinery"]},"container-title":["2022 ACM Conference on Fairness Accountability and Transparency"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3531146.3533216","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3531146.3533216","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:31:30Z","timestamp":1750188690000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3531146.3533216"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6,20]]},"references-count":70,"alternative-id":["10.1145\/3531146.3533216","10.1145\/3531146"],"URL":"https:\/\/doi.org\/10.1145\/3531146.3533216","relation":{},"subject":[],"published":{"date-parts":[[2022,6,20]]},"assertion":[{"value":"2022-06-20","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}