{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,29]],"date-time":"2026-05-29T11:50:42Z","timestamp":1780055442114,"version":"3.54.0"},"reference-count":90,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,7,7]],"date-time":"2025-07-07T00:00:00Z","timestamp":1751846400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"},{"start":{"date-parts":[[2025,7,7]],"date-time":"2025-07-07T00:00:00Z","timestamp":1751846400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"}],"funder":[{"DOI":"10.13039\/501100000780","name":"European Commission","doi-asserted-by":"publisher","award":["101094364"],"award-info":[{"award-number":["101094364"]}],"id":[{"id":"10.13039\/501100000780","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100000780","name":"European Commission","doi-asserted-by":"publisher","award":["101094364"],"award-info":[{"award-number":["101094364"]}],"id":[{"id":"10.13039\/501100000780","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100000780","name":"European Commission","doi-asserted-by":"publisher","award":["101094364"],"award-info":[{"award-number":["101094364"]}],"id":[{"id":"10.13039\/501100000780","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100000780","name":"European Commission","doi-asserted-by":"publisher","award":["101094364"],"award-info":[{"award-number":["101094364"]}],"id":[{"id":"10.13039\/501100000780","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100000780","name":"European Commission","doi-asserted-by":"publisher","award":["101094364"],"award-info":[{"award-number":["101094364"]}],"id":[{"id":"10.13039\/501100000780","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100000780","name":"European Commission","doi-asserted-by":"publisher","award":["101094364"],"award-info":[{"award-number":["101094364"]}],"id":[{"id":"10.13039\/501100000780","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100000780","name":"European Commission","doi-asserted-by":"publisher","award":["101094364"],"award-info":[{"award-number":["101094364"]}],"id":[{"id":"10.13039\/501100000780","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100000780","name":"European Commission","doi-asserted-by":"publisher","award":["101094364"],"award-info":[{"award-number":["101094364"]}],"id":[{"id":"10.13039\/501100000780","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["AI &amp; Soc"],"published-print":{"date-parts":[[2026,1]]},"abstract":"<jats:title>Abstract<\/jats:title>\n                  <jats:p>Toxic speech on online civic engagement platforms (CEPs) disproportionately affects marginalized groups and threatens the diversity of citizen voices. However, the deployment of AI-driven toxic speech detection (TSD) tools for CEPs faces complex challenges from legal, psychological, and technical perspectives that remain insufficiently explored. We present a first-of-its-kind interdisciplinary review of these challenges, focusing on the explainability of TSD systems, their compliance with European legal standards and offer a roadmap for ethical deployment. Our review reveals three main findings. First, although transparency in AI decision-making is necessary from both legal and psychological perspectives, assessing the explainability of AI-driven TSD tools, and their compliance with legal regulations within Europe, remains a significant challenge. Second, current explainability approaches, ranging from toxic span identification to advanced explainable AI methods, lack standardized metrics. This makes it difficult to assess their reliability and appropriateness for CEPs. Third, despite the importance of TSD, frameworks and best practices for CEPs are still lacking in existing literature. This paper aims to fill this gap by providing a holistic perspective on the challenges and solutions for TSD deployment. It provides the foundation for collaborative efforts to develop and standardize metrics, evaluation protocols, and best practices that can ensure AI decisions in CEPs are transparent, accountable, and aligned with users\u2019 needs.<\/jats:p>","DOI":"10.1007\/s00146-025-02424-5","type":"journal-article","created":{"date-parts":[[2025,7,7]],"date-time":"2025-07-07T14:33:01Z","timestamp":1751898781000},"page":"527-544","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["A multidisciplinary analysis of transparent AI-driven toxicity detection tools for civic engagement platforms"],"prefix":"10.1007","volume":"41","author":[{"given":"Maria","family":"Zangl","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Iliana","family":"Loi","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Panagiotis","family":"Zachos","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Michael","family":"Bedek","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Emmanouil","family":"Dimogerontakis","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Charikleia-Eleni","family":"Nikolaou","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Dietrich","family":"Albert","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Konstantinos","family":"Moustakas","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2025,7,7]]},"reference":[{"key":"2424_CR1","unstructured":"A.W.S. (2024) Amazon transcribe toxicity detection. https:\/\/aws.amazon.com\/transcribe\/toxicity-detection\/. Retrieved 10 September 2024"},{"key":"2424_CR2","doi-asserted-by":"publisher","unstructured":"Abadi M, Chu A, Goodfellow I et al (2016) Deep learning with differential privacy. In: 2016 ACM SIGSAC conference, pp 308\u2013318. https:\/\/doi.org\/10.1145\/2976749.2978318","DOI":"10.1145\/2976749.2978318"},{"key":"2424_CR3","doi-asserted-by":"publisher","unstructured":"Abokhodair N, Skop Y, R\u00fcller S et al (2024) Opaque algorithms, transparent biases: automated content moderation during the Sheikh Jarrah crisis. First Monday. https:\/\/doi.org\/10.5210\/fm.v29i4.13620","DOI":"10.5210\/fm.v29i4.13620"},{"key":"2424_CR4","unstructured":"ADL (2019) Online hate and harassment: the American experience | ADL. https:\/\/www.adl.org\/resources\/report\/online-hate-and-harassment-american-experience"},{"issue":"6","key":"2424_CR5","doi-asserted-by":"publisher","first-page":"273","DOI":"10.3390\/info13060273","volume":"13","author":"F Alkomah","year":"2022","unstructured":"Alkomah F, Ma X (2022) A literature review of textual hate speech detection methods and datasets. Information 13(6):273","journal-title":"Information"},{"issue":"1","key":"2424_CR6","doi-asserted-by":"publisher","first-page":"577","DOI":"10.1007\/s10207-023-00755-2","volume":"23","author":"KR Anjum","year":"2024","unstructured":"Anjum KR (2024) Hate speech, toxicity detection in online social media: a recent survey of state of the art and opportunities. Int J Inf Secur 23(1):577\u2013608","journal-title":"Int J Inf Secur"},{"issue":"3","key":"2424_CR7","doi-asserted-by":"publisher","first-page":"3609","DOI":"10.1007\/s13369-023-08100-4","volume":"49","author":"G Ansari","year":"2024","unstructured":"Ansari G, Kaur P, Saxena C (2024) Data augmentation for improving explainability of hate speech detection. Arab J Sci Eng 49(3):3609\u20133621","journal-title":"Arab J Sci Eng"},{"key":"2424_CR8","doi-asserted-by":"publisher","unstructured":"Appelman N (2021) Using terms and conditions to apply fundamental rights to content moderation. Verfassungsblog: On matters constitutional, Fachinformationsdienst f\u00fcr internationale und interdisziplin\u00e4re Rechtsforschung. https:\/\/doi.org\/10.17176\/20210901-233103-0","DOI":"10.17176\/20210901-233103-0"},{"key":"2424_CR9","doi-asserted-by":"publisher","unstructured":"Aroyo L, Dixon L, Thain N et al (2019) Crowdsourcing subjective tasks: the case study of understanding toxicity in online discussions. In: Companion proceedings of the 2019 world wide web conference. WWW\u201919: the web conference. ACM, San Francisco, USA, pp 1100\u20131105. ISBN: 9781450366755. https:\/\/doi.org\/10.1145\/3308560.3317083","DOI":"10.1145\/3308560.3317083"},{"issue":"1","key":"2424_CR10","doi-asserted-by":"publisher","first-page":"24","DOI":"10.1007\/s13278-021-00852-x","volume":"12","author":"TT Aurpa","year":"2022","unstructured":"Aurpa TT, Sadik R, Ahmed MS (2022) Abusive Bangla comments detection on Facebook using transformer-based deep learning models. Soc Netw Anal Min 12(1):24","journal-title":"Soc Netw Anal Min"},{"key":"2424_CR11","doi-asserted-by":"publisher","unstructured":"Banerjee P, Barnwal RP (2022) Methods and metrics for explaining artificial intelligence models: a review. Springer International Publishing, pp 61\u201388. https:\/\/doi.org\/10.1007\/978-3-031-12807-3","DOI":"10.1007\/978-3-031-12807-3"},{"key":"2424_CR12","doi-asserted-by":"publisher","first-page":"299","DOI":"10.1007\/978-3-319-17620-8_16","volume-title":"Smarter as the new urban agenda","author":"L Berntzen","year":"2016","unstructured":"Berntzen L, Johannessen MR (2016) The role of citizen participation in municipal smart city projects: lessons learned from Norway. In: Gil-Garcia JR, Pardo TA, Nam T (eds) Smarter as the new urban agenda, vol 11. Springer International Publishing, New York, pp 299\u2013314"},{"issue":"10","key":"2424_CR13","doi-asserted-by":"publisher","first-page":"6038","DOI":"10.3390\/app13106038","volume":"13","author":"A Bonetti","year":"2023","unstructured":"Bonetti A, Mart\u00ednez-Sober M, Torres JC et al (2023) Comparison between machine learning and deep learning approaches for the detection of toxic comments on social networks. Appl Sci 13(10):6038","journal-title":"Appl Sci"},{"key":"2424_CR14","first-page":"1","volume-title":"Psychological reactance","author":"SS Brehm","year":"1981","unstructured":"Brehm SS, Brehm JW (1981) Chapter 1\u2014introduction: freedom, control, and reactance theory. In: Brehm SS, Brehm JW (eds) Psychological reactance. Academic Press, USA, pp 1\u20137"},{"key":"2424_CR15","doi-asserted-by":"crossref","unstructured":"Breitfeller L, Ahn E, Jurgens D et al (2019) Finding microaggressions in the wild: a case for locating elusive phenomena in social media posts. In: Proceedings of the 2019 conference on empirical methods in natural language processing and the 9th international joint conference on natural language processing (EMNLP-IJCNLP), pp 1664\u20131674","DOI":"10.18653\/v1\/D19-1176"},{"key":"2424_CR16","doi-asserted-by":"publisher","DOI":"10.1515\/9781503616295","volume-title":"The psychic life of power: theories in subjection","author":"J Butler","year":"1997","unstructured":"Butler J (1997) The psychic life of power: theories in subjection. Standford UP, USA"},{"issue":"2","key":"2424_CR17","doi-asserted-by":"publisher","first-page":"142","DOI":"10.1177\/0261927X07300077","volume":"26","author":"A Carnaghi","year":"2007","unstructured":"Carnaghi A, Maass A (2007) In-group and out-group perspectives in the use of derogatory group labels: gay versus fag. J Lang Soc Psychol 26(2):142\u2013156. https:\/\/doi.org\/10.1177\/0261927X07300077","journal-title":"J Lang Soc Psychol"},{"issue":"3","key":"2424_CR18","doi-asserted-by":"publisher","first-page":"1203","DOI":"10.1007\/s00530-023-01051-8","volume":"29","author":"A Chhabra","year":"2023","unstructured":"Chhabra A, Vishwakarma DK (2023) A literature survey on multimodal and multilingual automatic hate speech identification. Multimedia Syst 29(3):1203\u20131230","journal-title":"Multimedia Syst"},{"issue":"3","key":"2424_CR19","doi-asserted-by":"publisher","first-page":"763","DOI":"10.1177\/1078087419897821","volume":"57","author":"S Cho","year":"2021","unstructured":"Cho S, Mossberger K, Swindell D et al (2021) Experimenting with public engagement platforms in local government. Urban Affairs Rev 57(3):763\u2013793. https:\/\/doi.org\/10.1177\/1078087419897821","journal-title":"Urban Affairs Rev"},{"key":"2424_CR20","unstructured":"Clark K, Luong MT, Le QV et al (2020) Electra: pre-training text encoders as discriminators rather than generators"},{"issue":"4","key":"2424_CR21","doi-asserted-by":"publisher","first-page":"739","DOI":"10.1007\/s13347-020-00429-0","volume":"34","author":"J Cobbe","year":"2021","unstructured":"Cobbe J (2021) Algorithmic censorship by social platforms: power and resistance. Philos Technol 34(4):739\u2013766. https:\/\/doi.org\/10.1007\/s13347-020-00429-0","journal-title":"Philos Technol"},{"key":"2424_CR23","doi-asserted-by":"publisher","first-page":"626409","DOI":"10.3389\/fhumd.2021.626409","volume":"3","author":"CL Cook","year":"2021","unstructured":"Cook CL, Patel A, Wohn DY (2021) Commercial versus volunteer: Comparing user perceptions of toxicity and transparency in content moderation across social media platforms. Front Hum Dyn 3:626409. https:\/\/doi.org\/10.3389\/fhumd.2021.626409","journal-title":"Front Hum Dyn"},{"key":"2424_CR24","doi-asserted-by":"publisher","unstructured":"D\u2019Sa AG, Illina I, Fohr D (2020) BERT and fast text embeddings for automatic detection of toxic speech. In: 2020 International multi-conference on: \u201corganization of knowledge and advanced technologies\u201d (OCTA), pp 1\u20135. https:\/\/doi.org\/10.1109\/OCTA49274.2020.9151853","DOI":"10.1109\/OCTA49274.2020.9151853"},{"key":"2424_CR25","doi-asserted-by":"publisher","unstructured":"Davies J, Procter R (2020) Online platforms of public participation\u2014a deliberative democracy or a delusion? https:\/\/doi.org\/10.48550\/ARXIV.2009.14074","DOI":"10.48550\/ARXIV.2009.14074"},{"key":"2424_CR26","doi-asserted-by":"publisher","DOI":"10.1016\/j.clsr.2019.105374","volume":"36","author":"G De Gregorio","year":"2020","unstructured":"De Gregorio G (2020) Democratising online content moderation: a constitutional framework. Comput Law Secur Rev 36:105374. https:\/\/doi.org\/10.1016\/j.clsr.2019.105374","journal-title":"Comput Law Secur Rev"},{"key":"2424_CR27","doi-asserted-by":"crossref","unstructured":"De Gregorio G (2022) Digital constitutionalism and freedom of expression. In: Cambridge studies in European law and policy. Cambridge University Press, pp 157\u2013215","DOI":"10.1017\/9781009071215.006"},{"issue":"7","key":"2424_CR28","doi-asserted-by":"publisher","first-page":"779","DOI":"10.3390\/electronics10070779","volume":"10","author":"D Dessi","year":"2021","unstructured":"Dessi D, Recupero DR, Sack H (2021) An assessment of deep learning models and word embeddings for toxicity detection within online textual comments. Electronics 10(7):779","journal-title":"Electronics"},{"key":"2424_CR29","unstructured":"Devlin J, Chang MW, Lee K et al (2019) Bert: pre-training of deep bidirectional transformers for language understanding. https:\/\/arxiv.org\/abs\/1810.04805"},{"key":"2424_CR30","doi-asserted-by":"crossref","unstructured":"DeYoung J, Jain S, Rajani N et al (2020) Eraser: a benchmark to evaluate rationalized NLP models. arXiv:1911.03429","DOI":"10.18653\/v1\/2020.acl-main.408"},{"issue":"2","key":"2424_CR31","doi-asserted-by":"publisher","first-page":"144","DOI":"10.1080\/03637750500111815","volume":"72","author":"JP Dillard","year":"2005","unstructured":"Dillard JP, Shen L (2005) On the nature of reactance and its role in persuasive health communication. Commun Monogr 72(2):144\u2013168. https:\/\/doi.org\/10.1080\/03637750500111815","journal-title":"Commun Monogr"},{"key":"2424_CR32","doi-asserted-by":"publisher","DOI":"10.1609\/icwsm.v12i1.15038","author":"M ElSherief","year":"2018","unstructured":"ElSherief M, Nilizadeh S, Nguyen D et al (2018) Peer to peer hate: hate speech instigators and their targets. Proc Inter AAAI Conf Web Soc Media. https:\/\/doi.org\/10.1609\/icwsm.v12i1.15038","journal-title":"Proc Inter AAAI Conf Web Soc Media"},{"key":"2424_CR33","doi-asserted-by":"publisher","first-page":"345","DOI":"10.18653\/v1\/2021.emnlp-main.29","volume-title":"Proceedings of the 2021 conference on empirical methods in natural language processing","author":"M ElSherief","year":"2021","unstructured":"ElSherief M, Ziems C, Muchlinski D et al (2021) Latent hatred: a benchmark for understanding implicit hate speech. In: Moens MF, Huang X, Specia L et al (eds) Proceedings of the 2021 conference on empirical methods in natural language processing. Association for Computational Linguistics, USA, pp 345\u2013363"},{"key":"2424_CR34","doi-asserted-by":"publisher","first-page":"17","DOI":"10.1016\/j.ijinfomgt.2018.01.007","volume":"40","author":"E Falco","year":"2018","unstructured":"Falco E, Kleinhans R (2018) Beyond technology: Identifying local government challenges for using digital platforms for citizen engagement. Int J Inf Manage 40:17\u201320. https:\/\/doi.org\/10.1016\/j.ijinfomgt.2018.01.007","journal-title":"Int J Inf Manage"},{"issue":"11","key":"2424_CR35","doi-asserted-by":"publisher","first-page":"1332","DOI":"10.3390\/electronics10111332","volume":"10","author":"H Fan","year":"2021","unstructured":"Fan H, Du W, Dahou A et al (2021) Social media toxicity classification using deep learning: real-world application UK Brexit. Electronics 10(11):1332","journal-title":"Electronics"},{"key":"2424_CR36","first-page":"37","volume":"9","author":"DS Farinho","year":"2024","unstructured":"Farinho DS (2024) Personal data processing by online platforms and search engines: the case of the EU digital services act. Pub Govern Admin Fin L Rev 9:37","journal-title":"Pub Govern Admin Fin L Rev"},{"issue":"1","key":"2424_CR37","doi-asserted-by":"publisher","first-page":"33","DOI":"10.1386\/jdmp.10.1.331","volume":"10","author":"T Flew","year":"2019","unstructured":"Flew T, Martin F, Suzor N (2019) Internet regulation as media policy: rethinking the question of digital communication platform governance. J Digit Media Policy 10(1):33\u201350. https:\/\/doi.org\/10.1386\/jdmp.10.1.331","journal-title":"J Digit Media Policy"},{"issue":"3","key":"2424_CR38","doi-asserted-by":"publisher","first-page":"319","DOI":"10.1111\/j.1083-6101.2012.01574.x","volume":"17","author":"H Gil de Z\u00fa\u00f1iga","year":"2012","unstructured":"Gil de Z\u00fa\u00f1iga H, Jung N, Valenzuela S (2012) Social media use for news and individuals\u2019 social capital, civic engagement and political participation. J Comput Mediat Commun 17(3):319\u2013336. https:\/\/doi.org\/10.1111\/j.1083-6101.2012.01574.x","journal-title":"J Comput Mediat Commun"},{"key":"2424_CR39","doi-asserted-by":"publisher","first-page":"587","DOI":"10.1007\/s42001-024-00248-9","volume":"7","author":"V Gongane","year":"2024","unstructured":"Gongane V, Munot M, Anuse A (2024) A survey of explainable AI techniques for detection of fake news and hate speech on social media platforms. J Comput Soc Sc 7:587\u2013623. https:\/\/doi.org\/10.1007\/s42001-024-00248-9","journal-title":"J Comput Soc Sc"},{"key":"2424_CR41","first-page":"171","volume-title":"Proceedings of the eleventh workshop on computational approaches to subjectivity, sentiment and social media analysis","author":"L Grimminger","year":"2021","unstructured":"Grimminger L, Klinger R (2021) Hate towards the political opponent: A Twitter corpus study of the 2020 US elections on the basis of offensive speech and stance detection. In: De Clercq O, Balahur A, Sedoc J et al (eds) Proceedings of the eleventh workshop on computational approaches to subjectivity, sentiment and social media analysis. Association for Computational Linguistics, USA, pp 171\u2013180"},{"issue":"1","key":"2424_CR42","doi-asserted-by":"publisher","first-page":"76","DOI":"10.1080\/13621025.2013.764219","volume":"17","author":"I Guldvik","year":"2013","unstructured":"Guldvik I, Askheim OP, Johansen V (2013) Political citizenship and local political participation for disabled people. Citizenship Stud 17(1):76\u201391. https:\/\/doi.org\/10.1080\/13621025.2013.764219","journal-title":"Citizenship Stud"},{"key":"2424_CR43","doi-asserted-by":"publisher","unstructured":"Halevy M, Harris C, Bruckman A et al (2021) Mitigating racial biases in toxic language detection with an equity-based ensemble framework. https:\/\/doi.org\/10.48550\/ARXIV.2109.13137","DOI":"10.48550\/ARXIV.2109.13137"},{"key":"2424_CR44","doi-asserted-by":"publisher","DOI":"10.1177\/20563051231196874","author":"C Haythornthwaite","year":"2023","unstructured":"Haythornthwaite C (2023) Moderation, networks, and anti-social behavior online. Soc Media Soc. https:\/\/doi.org\/10.1177\/20563051231196874","journal-title":"Soc Media Soc"},{"key":"2424_CR45","doi-asserted-by":"crossref","unstructured":"Hoang NM, Do XL, Do DA et al (2024) Toxcl: a unified framework for toxic speech detection and explanation. https:\/\/arxiv.org\/abs\/2403.16685","DOI":"10.18653\/v1\/2024.naacl-long.359"},{"key":"2424_CR46","doi-asserted-by":"publisher","first-page":"5491","DOI":"10.18653\/v1\/2020.acl-main.487","volume-title":"Proceedings of the 58th annual meeting of the association for computational linguistics","author":"B Hutchinson","year":"2020","unstructured":"Hutchinson B, Prabhakaran V, Denton E et al (2020) Social biases in NLP models as barriers for persons with disabilities. Proceedings of the 58th annual meeting of the association for computational linguistics. Association for Computational Linguistics, USA, pp 5491\u20135501"},{"issue":"2","key":"2424_CR47","first-page":"175","volume":"16","author":"MA Ibrahim","year":"2022","unstructured":"Ibrahim MA, Arifin S, Yudistira IGAA et al (2022) An explainable AI model for hate speech detection on Indonesian twitter. CommIT (Commun Inf Technol) J 16(2):175\u2013182","journal-title":"CommIT (Commun Inf Technol) J"},{"key":"2424_CR48","doi-asserted-by":"publisher","first-page":"126232","DOI":"10.1016\/j.neucom.2023.126232","volume":"546","author":"MS Jahan","year":"2023","unstructured":"Jahan MS, Oussalah M (2023) A systematic review of hate speech automatic detection using natural language processing. Neurocomputing 546:126232. https:\/\/doi.org\/10.1016\/j.neucom.2023.126232","journal-title":"Neurocomputing"},{"issue":"4","key":"2424_CR49","doi-asserted-by":"publisher","DOI":"10.1177\/00936502211062773","volume":"50","author":"J Jakob","year":"2023","unstructured":"Jakob J, Dobbrick T, Freudenthaler R et al (2023) Is constructive engagement online a lost cause? Toxic outrage in online user comments across democratic political systems and discussion arenas. Commun Res 50(4):508531. https:\/\/doi.org\/10.1177\/00936502211062773","journal-title":"Commun Res"},{"key":"2424_CR50","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3359252","volume":"3","author":"S Jhaver","year":"2019","unstructured":"Jhaver S, Bruckman A, Gilbert E (2019) Does transparency in moderation really matter?: user behavior after content removal explanations on reddit. Proc ACM Hum-Comput Interact 3:1\u201327. https:\/\/doi.org\/10.1145\/3359252","journal-title":"Proc ACM Hum-Comput Interact"},{"key":"2424_CR51","first-page":"427","volume-title":"Proceedings of the 15th conference of the European chapter of the association for computational linguistics: volume 2, short papers","author":"A Joulin","year":"2017","unstructured":"Joulin A, Grave E, Bojanowski P et al (2017) Bag of tricks for efficient text classification. In: Lapata M, Blunsom P, Koller A (eds) Proceedings of the 15th conference of the European chapter of the association for computational linguistics: volume 2, short papers. Association for Computational Linguistics, Valencia, Spain, pp 427\u2013431"},{"issue":"1","key":"2424_CR52","doi-asserted-by":"publisher","DOI":"10.1111\/spc3.12652","volume":"16","author":"FJ Kachanoff","year":"2022","unstructured":"Kachanoff FJ, Gray K, Koestner R et al (2022) Collective autonomy: why groups fight for power and status. Soc Pers Psychol Compass 16(1):e12652","journal-title":"Soc Pers Psychol Compass"},{"key":"2424_CR53","doi-asserted-by":"publisher","first-page":"109153","DOI":"10.1016\/j.compeleceng.2024.109153","volume":"116","author":"H Kibriya","year":"2024","unstructured":"Kibriya H, Siddiqa A, Khan WZ et al (2024) Towards safer online communities: deep learning and explainable AI for hate speech detection and classification. Comput Electr Eng 116:109153. https:\/\/doi.org\/10.1016\/j.compeleceng.2024.109153","journal-title":"Comput Electr Eng"},{"key":"2424_CR54","doi-asserted-by":"crossref","unstructured":"Kirk HR, Yin W, Vidgen B et al (2023) Semeval-2023 task 10: explainable detection of online sexism. https:\/\/arxiv.org\/abs\/2303.04222","DOI":"10.18653\/v1\/2023.semeval-1.305"},{"key":"2424_CR55","doi-asserted-by":"publisher","DOI":"10.1016\/j.ijdrr.2020.101826","volume":"50","author":"CHA Kuran","year":"2020","unstructured":"Kuran CHA, Morsut C, Kruke BI et al (2020) Vulnerability and vulnerable groups from an intersectionality perspective. Int J Disaster Risk Reduct 50:101826. https:\/\/doi.org\/10.1016\/j.ijdrr.2020.101826","journal-title":"Int J Disaster Risk Reduct"},{"key":"2424_CR56","unstructured":"Liu Y, Ott M, Goyal N, et al (2019) RoBERTta: a robustly optimized BERT pretraining approach. https:\/\/arxiv.org\/abs\/1907.11692"},{"issue":"7","key":"2424_CR57","doi-asserted-by":"publisher","first-page":"880","DOI":"10.1080\/17512786.2016.1164614","volume":"10","author":"M L\u00f6fgren Nilsson","year":"2016","unstructured":"L\u00f6fgren Nilsson M, \u00d6rnebring H (2016) Journalism under threat: intimidation and harassment of Swedish journalists. J Pract 10(7):880\u2013890. https:\/\/doi.org\/10.1080\/17512786.2016.1164614","journal-title":"J Pract"},{"key":"2424_CR58","doi-asserted-by":"publisher","first-page":"849","DOI":"10.1007\/978-981-33-4367-2_81","volume-title":"Emerging technologies in data mining and information security: proceedings of IEMIS 2020","author":"A Mahajan","year":"2021","unstructured":"Mahajan A, Shah D, Jafar G (2021) Explainable AI approach towards toxic comment classification. Emerging technologies in data mining and information security: proceedings of IEMIS 2020, vol 2. Springer, New York, pp 849\u2013858"},{"key":"2424_CR59","doi-asserted-by":"crossref","unstructured":"Malik P, Aggrawal A, Vishwakarma DK (2021) Toxic speech detection using traditional machine learning models and BERT and fastText embedding with deep neural networks. In: 2021 5th international conference on computing methodologies and communication (ICCMC). IEEE, pp 1254\u20131259","DOI":"10.1109\/ICCMC51019.2021.9418395"},{"key":"2424_CR60","doi-asserted-by":"crossref","unstructured":"Mathew B, Saha P, Yimam SM et al (2021) Hatexplain: a benchmark dataset for explainable hate speech detection. In: Proceedings of the AAAI conference on artificial intelligence, pp 14867\u201314875","DOI":"10.1609\/aaai.v35i17.17745"},{"issue":"8","key":"2424_CR61","doi-asserted-by":"publisher","first-page":"291","DOI":"10.3390\/a15080291","volume":"15","author":"H Mehta","year":"2022","unstructured":"Mehta H, Passi K (2022) Social media hate speech detection using explainable artificial intelligence (xai). Algorithms 15(8):291","journal-title":"Algorithms"},{"issue":"4","key":"2424_CR62","doi-asserted-by":"publisher","first-page":"zmac010","DOI":"10.1093\/jcmc\/zmac010","volume":"27","author":"MD Molina","year":"2022","unstructured":"Molina MD, Sundar SS (2022) When AI moderates online content: effects of human collaboration and interactive transparency on user trust. J Comput Mediat Commun 27(4):zmac010. https:\/\/doi.org\/10.1093\/jcmc\/zmac010","journal-title":"J Comput Mediat Commun"},{"key":"2424_CR63","doi-asserted-by":"crossref","unstructured":"Nguyen DQ, Vu T, Nguyen AT (2020) BERTweet: a pre-trained language model for English Tweets. arXiv preprint arXiv:200510200","DOI":"10.18653\/v1\/2020.emnlp-demos.2"},{"key":"2424_CR64","unstructured":"One AI (2024) Toxicity detection skill. https:\/\/oneai.com\/skill\/toxicity. Retrieved 10 September 2024"},{"key":"2424_CR65","doi-asserted-by":"publisher","unstructured":"Pavlopoulos J, Sorensen J, Laugier L et al (2021) Semeval-2021 task 5: toxic spans detection. In: Proceedings of the 15th international workshop on semantic evaluation (SemEval-2021). Association for Computational Linguistics, USA. https:\/\/doi.org\/10.18653\/v1\/2021.semeval-1.6","DOI":"10.18653\/v1\/2021.semeval-1.6"},{"key":"2424_CR66","unstructured":"PerspectiveAPI (2023) https:\/\/www.perspectiveapi.com\/. Retrieved 10 September 2024"},{"issue":"2","key":"2424_CR67","doi-asserted-by":"publisher","first-page":"477","DOI":"10.1007\/s10579-020-09502-8","volume":"55","author":"F Poletto","year":"2021","unstructured":"Poletto F, Basile V, Sanguinetti M et al (2021) Resources and benchmark corpora for hate speech detection: a systematic review. Lang Resour Eval 55(2):477\u2013523. https:\/\/doi.org\/10.1007\/s10579-020-09502-8","journal-title":"Lang Resour Eval"},{"key":"2424_CR68","unstructured":"Protect AI (2025) LLM guard\u2014the security toolkit for LLM interactions. https:\/\/llm-guard.com\/"},{"issue":"1","key":"2424_CR69","doi-asserted-by":"publisher","first-page":"77","DOI":"10.1080\/15017419.2013.781957","volume":"16","author":"H Raisio","year":"2014","unstructured":"Raisio H, Valkama K, Peltola E (2014) Disability and deliberative democracy: towards involving the whole human spectrum in public deliberation. Scand J Disabil Res 16(1):77\u201397. https:\/\/doi.org\/10.1080\/15017419.2013.781957","journal-title":"Scand J Disabil Res"},{"issue":"5","key":"2424_CR70","doi-asserted-by":"publisher","first-page":"593","DOI":"10.1080\/13562517.2018.1457636","volume":"23","author":"B Read","year":"2018","unstructured":"Read B (2018) Truth, masculinity and the anti-elitist backlash against the university in the age of trump. Teach Higher Educ 23(5):593\u2013605. https:\/\/doi.org\/10.1080\/13562517.2018.1457636","journal-title":"Teach Higher Educ"},{"key":"2424_CR71","doi-asserted-by":"publisher","first-page":"1135","DOI":"10.1145\/2939672.2939778","volume-title":"Proceedings of the 22nd ACM SIGKDD international conference on knowledge discovery and data mining, KDD\u201916","author":"MT Ribeiro","year":"2016","unstructured":"Ribeiro MT, Singh S, Guestrin C (2016) Why should I trust you?: explaining the predictions of any classifier. Proceedings of the 22nd ACM SIGKDD international conference on knowledge discovery and data mining, KDD\u201916. Association for Computing Machinery, New York, NY, USA, pp 1135\u20131144"},{"key":"2424_CR72","volume-title":"Deep learning-based approaches for sentiment analysis. Algorithms for intelligent systems","author":"J Risch","year":"2020","unstructured":"Risch J, Krestel R (2020) Toxic comment detection in online discussions. In: Agarwal B, Nayak R, Mittal N et al (eds) Deep learning-based approaches for sentiment analysis. Algorithms for intelligent systems. Springer, Singapore"},{"issue":"2","key":"2424_CR73","first-page":"44","volume":"3","author":"M Rojszczak","year":"2023","unstructured":"Rojszczak M (2023) The digital services act and the problem of preventive blocking of (clearly) illegal content. Inst Admin J Admin Sci 3(2):44\u201359","journal-title":"Inst Admin J Admin Sci"},{"key":"2424_CR74","doi-asserted-by":"crossref","unstructured":"Saha K, Chandrasekharan E, De Choudhury M (2019) Prevalence and psychological effects of hateful speech in online college communities. In: Proceedings of the 10th ACM conference on web science. ACM, pp 255\u2013264","DOI":"10.1145\/3292522.3326032"},{"key":"2424_CR75","first-page":"1","volume":"10","author":"J Salminen","year":"2020","unstructured":"Salminen J, Hopf M, Chowdhury SA et al (2020) Developing an online hate classifier for multiple social media platforms. HCIS 10:1\u201334","journal-title":"HCIS"},{"key":"2424_CR76","doi-asserted-by":"publisher","unstructured":"Sarker J, Sultana S, Wilson SR et al (2023) ToxiSpanSE: an explainable toxicity detection in code review comments. In: 2023 ACM\/IEEE international symposium on empirical software engineering and measurement (ESEM). IEEE, pp 1\u201312. https:\/\/doi.org\/10.1109\/esem56168.2023.10304855","DOI":"10.1109\/esem56168.2023.10304855"},{"key":"2424_CR77","first-page":"1","volume-title":"Proceedings of the fifth international workshop on natural language processing for social media","author":"A Schmidt","year":"2017","unstructured":"Schmidt A, Wiegand M (2017) A survey on hate speech detection using natural language processing. In: Ku LW, Li CT (eds) Proceedings of the fifth international workshop on natural language processing for social media. Association for Computational Linguistics, USA, pp 1\u201310"},{"key":"2424_CR78","doi-asserted-by":"crossref","unstructured":"Shakil MH, Alam MGR (2022) Hate speech classification implementing NLP and CNN with machine learning algorithm through interpretable explainable AI. In: 2022 IEEE region 10 symposium (TENSYMP). IEEE, pp 1\u20136","DOI":"10.1109\/TENSYMP54529.2022.9864421"},{"issue":"1","key":"2424_CR79","doi-asserted-by":"publisher","first-page":"687","DOI":"10.1609\/icwsm.v10i1.14811","volume":"10","author":"L Silva","year":"2021","unstructured":"Silva L, Mondal M, Correa D et al (2021) Analyzing the targets of hate in online social media. Proc Int AAAI Conf Web Soc Media 10(1):687\u2013690. https:\/\/doi.org\/10.1609\/icwsm.v10i1.14811","journal-title":"Proc Int AAAI Conf Web Soc Media"},{"issue":"12","key":"2424_CR80","doi-asserted-by":"publisher","first-page":"pgad385","DOI":"10.1093\/pnasnexus\/pgad385","volume":"2","author":"I Smirnov","year":"2023","unstructured":"Smirnov I, Oprea C, Strohmaier M (2023) Toxic comments are associated with reduced activity of volunteer editors on wikipedia. PNAS Nexus 2(12):pgad385. https:\/\/doi.org\/10.1093\/pnasnexus\/pgad385","journal-title":"PNAS Nexus"},{"key":"2424_CR81","doi-asserted-by":"publisher","first-page":"58","DOI":"10.1007\/BF02680544","volume":"11","author":"CR Sunstein","year":"1995","unstructured":"Sunstein CR (1995) Democracy and the problem of free speech. Publ Res Q 11:58\u201372","journal-title":"Publ Res Q"},{"key":"2424_CR82","first-page":"18","volume":"13","author":"NP Suzor","year":"2019","unstructured":"Suzor NP, West SM, Quodling A et al (2019) What do we mean when we talk about transparency? Toward meaningful transparency in commercial content moderation. Int J Commun 13:18","journal-title":"Int J Commun"},{"key":"2424_CR83","unstructured":"TrollWall AI (2025) Trollwall AI: AI-powered community management solution. https:\/\/trollwall.ai"},{"key":"2424_CR84","doi-asserted-by":"publisher","first-page":"1260974","DOI":"10.3389\/frsps.2024.1260974","volume":"2","author":"TI Vaughan-Johnston","year":"2024","unstructured":"Vaughan-Johnston TI, Nguyen A, Jacobson JA (2024) A surprising lack of consequences when constraining language. Front Soc Psychol 2:1260974. https:\/\/doi.org\/10.3389\/frsps.2024.1260974","journal-title":"Front Soc Psychol"},{"issue":"7","key":"2424_CR85","doi-asserted-by":"publisher","first-page":"1863","DOI":"10.1007\/s10115-022-01690-9","volume":"64","author":"F Ventura","year":"2022","unstructured":"Ventura F, Greco S, Apiletti D et al (2022) Trusting deep learning natural-language models via local and global explanations. Knowl Inf Syst 64(7):1863\u20131907","journal-title":"Knowl Inf Syst"},{"issue":"7","key":"2424_CR86","doi-asserted-by":"publisher","first-page":"416","DOI":"10.1089\/cyber.2022.0009","volume":"25","author":"S Wachs","year":"2022","unstructured":"Wachs S, G\u00e1mez-Guadix M, Wright MF (2022) Online hate speech victimization and depressive symptoms among adolescents: the protective role of resilience. Cyberpsychol Behav Soc Netw 25(7):416\u2013423. https:\/\/doi.org\/10.1089\/cyber.2022.0009","journal-title":"Cyberpsychol Behav Soc Netw"},{"key":"2424_CR87","doi-asserted-by":"crossref","unstructured":"Waseem Z, Davidson T, Warmsley D et al (2017) Understanding abuse: a typology of abusive language detection subtasks. http:\/\/arxiv.org\/abs\/1705.09899","DOI":"10.18653\/v1\/W17-3012"},{"key":"2424_CR88","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-45304-5","volume-title":"Introduction to digital humanism: a textbook","author":"H Werthner","year":"2024","unstructured":"Werthner H, Ghezzi C, Kramer J et al (2024) Introduction to digital humanism: a textbook. Springer, Cham"},{"key":"2424_CR89","doi-asserted-by":"publisher","unstructured":"Yee K, Sebag AS, Redfield O et al (2022) A keyword based approach to understanding the overpenalization of marginalized groups by English marginal abuse models on twitter. https:\/\/doi.org\/10.48550\/arXiv.2210.06351","DOI":"10.48550\/arXiv.2210.06351"},{"issue":"1","key":"2424_CR90","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1080\/13600834.2021.1905593","volume":"31","author":"GK Young","year":"2022","unstructured":"Young GK (2022) How much is too much: the difficulties of social media content moderation. Inf Commun Technol Law 31(1):1\u201316","journal-title":"Inf Commun Technol Law"},{"issue":"4","key":"2424_CR92","doi-asserted-by":"publisher","first-page":"559","DOI":"10.47743\/saeb-2019-0045","volume":"66","author":"A Zait","year":"2019","unstructured":"Zait A, Andrei AG (2019) Civic engagement at the crossroads of online and offline spaces: a PLS-SEM assessment. Sci Ann Econ Bus 66(4):559\u2013572","journal-title":"Sci Ann Econ Bus"},{"issue":"4","key":"2424_CR93","doi-asserted-by":"publisher","first-page":"525","DOI":"10.5771\/2192-4007-2018-4-525","volume":"7","author":"M Ziegele","year":"2018","unstructured":"Ziegele M, Jost P, Bormann M et al (2018) Journalistic counter-voices in comment sections: patterns, determinants, and potential consequences of interactive moderation of uncivil user comments. Stud Commun 7(4):525\u2013554. https:\/\/doi.org\/10.5771\/2192-4007-2018-4-525","journal-title":"Stud Commun"}],"container-title":["AI &amp; SOCIETY"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00146-025-02424-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00146-025-02424-5","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00146-025-02424-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,3]],"date-time":"2026-02-03T02:59:59Z","timestamp":1770087599000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00146-025-02424-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,7]]},"references-count":90,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2026,1]]}},"alternative-id":["2424"],"URL":"https:\/\/doi.org\/10.1007\/s00146-025-02424-5","relation":{},"ISSN":["0951-5666","1435-5655"],"issn-type":[{"value":"0951-5666","type":"print"},{"value":"1435-5655","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,7,7]]},"assertion":[{"value":"13 March 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 June 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 July 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interests"}}]}}