{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,22]],"date-time":"2025-11-22T11:31:48Z","timestamp":1763811108262,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":55,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,4,30]],"date-time":"2023-04-30T00:00:00Z","timestamp":1682812800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,4,30]]},"DOI":"10.1145\/3543507.3583290","type":"proceedings-article","created":{"date-parts":[[2023,4,26]],"date-time":"2023-04-26T23:30:51Z","timestamp":1682551851000},"page":"3689-3700","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":6,"title":["Same Same, But Different: Conditional Multi-Task Learning for Demographic-Specific Toxicity Detection"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8145-9347","authenticated-orcid":false,"given":"Soumyajit","family":"Gupta","sequence":"first","affiliation":[{"name":"Dept. of Computer Science, University of Texas at Austin, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4093-6546","authenticated-orcid":false,"given":"Sooyong","family":"Lee","sequence":"additional","affiliation":[{"name":"Dept. of Computer Science, University of Texas at Austin, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2297-3308","authenticated-orcid":false,"given":"Maria","family":"De-Arteaga","sequence":"additional","affiliation":[{"name":"Dept. of Information, Risk and Operation Management, University of Texas at Austin, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0056-2834","authenticated-orcid":false,"given":"Matthew","family":"Lease","sequence":"additional","affiliation":[{"name":"School of Information, University of Texas at Austin, USA"}]}],"member":"320","published-online":{"date-parts":[[2023,4,30]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3331184.3331262"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-75762-5_55"},{"key":"e_1_3_2_1_3_1","volume-title":"Machine learning 28, 1","author":"Baxter Jonathan","year":"1997","unstructured":"Jonathan Baxter. 1997. A Bayesian\/information theoretic model of learning to learn via multiple task sampling. Machine learning 28, 1 (1997), 7\u201339."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3308560.3317593"},{"key":"e_1_3_2_1_5_1","volume-title":"International conference on machine learning. PMLR, 794\u2013803","author":"Chen Zhao","year":"2018","unstructured":"Zhao Chen, Vijay Badrinarayanan, Chen-Yu Lee, and Andrew Rabinovich. 2018. Gradnorm: Gradient normalization for adaptive loss balancing in deep multitask networks. In International conference on machine learning. PMLR, 794\u2013803."},{"key":"e_1_3_2_1_6_1","unstructured":"Fran\u00e7ois Chollet. 2015. keras. https:\/\/github.com\/fchollet\/keras."},{"key":"e_1_3_2_1_7_1","volume-title":"The measure and mismeasure of fairness: A critical review of fair machine learning. arXiv preprint arXiv:1808.00023","author":"Corbett-Davies Sam","year":"2018","unstructured":"Sam Corbett-Davies and Sharad Goel. 2018. The measure and mismeasure of fairness: A critical review of fair machine learning. arXiv preprint arXiv:1808.00023 (2018)."},{"key":"e_1_3_2_1_8_1","volume-title":"Multi-task learning with deep neural networks: A survey. arXiv preprint arXiv:2009.09796","author":"Crawshaw Michael","year":"2020","unstructured":"Michael Crawshaw. 2020. Multi-task learning with deep neural networks: A survey. arXiv preprint arXiv:2009.09796 (2020)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/2090236.2090255"},{"key":"e_1_3_2_1_10_1","volume-title":"Conference on fairness, accountability and transparency. PMLR, 119\u2013133","author":"Dwork Cynthia","year":"2018","unstructured":"Cynthia Dwork, Nicole Immorlica, Adam\u00a0Tauman Kalai, and Max Leiserson. 2018. Decoupled classifiers for group-fair and efficient machine learning. In Conference on fairness, accountability and transparency. PMLR, 119\u2013133."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00332"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3502004"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01270-0_17"},{"key":"e_1_3_2_1_14_1","volume-title":"Equality of opportunity in supervised learning. Advances in neural information processing systems 29","author":"Hardt Moritz","year":"2016","unstructured":"Moritz Hardt, Eric Price, and Nati Srebro. 2016. Equality of opportunity in supervised learning. Advances in neural information processing systems 29 (2016)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D17-1206"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/S19-1015"},{"key":"e_1_3_2_1_17_1","volume-title":"Differential validity of employment tests by race: A comprehensive review and analysis.Psychological Bulletin 86, 4","author":"Hunter E","year":"1979","unstructured":"John\u00a0E Hunter, Frank\u00a0L Schmidt, and Ronda Hunter. 1979. Differential validity of employment tests by race: A comprehensive review and analysis.Psychological Bulletin 86, 4 (1979), 721."},{"key":"e_1_3_2_1_18_1","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition. 7482\u20137491","author":"Kendall Alex","year":"2018","unstructured":"Alex Kendall, Yarin Gal, and Roberto Cipolla. 2018. Multi-task learning using uncertainty to weigh losses for scene geometry and semantics. In Proceedings of the IEEE conference on computer vision and pattern recognition. 7482\u20137491."},{"key":"e_1_3_2_1_19_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","author":"Kingma P","year":"2014","unstructured":"Diederik\u00a0P Kingma and Jimmy Ba. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"crossref","unstructured":"Jon Kleinberg Jens Ludwig Sendhil Mullainathan and Ashesh Rambachan. 2018. Algorithmic fairness. In AEA P&P.","DOI":"10.1257\/pandp.20181018"},{"key":"e_1_3_2_1_21_1","volume-title":"International Conference on Machine Learning. PMLR, 5637\u20135664","author":"Koh Pang\u00a0Wei","year":"2021","unstructured":"Pang\u00a0Wei Koh, Shiori Sagawa, Henrik Marklund, Sang\u00a0Michael Xie, Marvin Zhang, Akshay Balsubramani, Weihua Hu, Michihiro Yasunaga, Richard\u00a0Lanas Phillips, Irena Gao, 2021. Wilds: A benchmark of in-the-wild distribution shifts. In International Conference on Machine Learning. PMLR, 5637\u20135664."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.579"},{"key":"e_1_3_2_1_23_1","volume-title":"Fairly Accurate: Learning Optimal Accuracy vs. Fairness Tradeoffs for Hate Speech Detection. arXiv preprint arXiv:2204.07661","author":"Kovatchev Venelin","year":"2022","unstructured":"Venelin Kovatchev, Soumyajit Gupta, and Matthew Lease. 2022. Fairly Accurate: Learning Optimal Accuracy vs. Fairness Tradeoffs for Hate Speech Detection. arXiv preprint arXiv:2204.07661 (2022)."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.324"},{"key":"e_1_3_2_1_25_1","volume-title":"Does mitigating ML\u2019s impact disparity require treatment disparity?Advances in neural information processing systems 31","author":"Lipton Zachary","year":"2018","unstructured":"Zachary Lipton, Julian McAuley, and Alexandra Chouldechova. 2018. Does mitigating ML\u2019s impact disparity require treatment disparity?Advances in neural information processing systems 31 (2018)."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"crossref","unstructured":"Han Liu Pete Burnap Wafa Alorainy and Matthew\u00a0L Williams. 2019. Fuzzy multi-task learning for hate speech type identification. In The world wide web conference. 3006\u20133012.","DOI":"10.1145\/3308558.3313546"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00197"},{"key":"e_1_3_2_1_28_1","volume-title":"Learning multiple tasks with multilinear relationship networks. Advances in neural information processing systems 30","author":"Long Mingsheng","year":"2017","unstructured":"Mingsheng Long, Zhangjie Cao, Jianmin Wang, and Philip\u00a0S Yu. 2017. Learning multiple tasks with multilinear relationship networks. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.126"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.433"},{"key":"e_1_3_2_1_31_1","volume-title":"Proceedings of the GermEval 2021 Shared Task on the Identification of Toxic, Engaging, and Fact-Claiming Comments. Association for Computational Linguistics","author":"Morgan Skye","year":"2021","unstructured":"Skye Morgan, Tharindu Ranasinghe, and Marcos Zampieri. 2021. WLV-RIT at GermEval 2021: Multitask Learning with Transformers to Detect Toxic, Engaging, and Fact-Claiming Comments. In Proceedings of the GermEval 2021 Shared Task on the Identification of Toxic, Engaging, and Fact-Claiming Comments. Association for Computational Linguistics, Duesseldorf, Germany, 32\u201338."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1302"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.5555\/1953048.2078195"},{"key":"e_1_3_2_1_34_1","volume-title":"Multi-task learning with sentiment, emotion, and target detection to recognize hate speech and offensive language. arXiv preprint arXiv:2109.10255","author":"del Arco Flor\u00a0Miriam","year":"2021","unstructured":"Flor\u00a0Miriam Plaza-del Arco, Sercan Halat, Sebastian Pad\u00f3, and Roman Klinger. 2021. Multi-task learning with sentiment, emotion, and target detection to recognize hate speech and offensive language. arXiv preprint arXiv:2109.10255 (2021)."},{"key":"e_1_3_2_1_35_1","volume-title":"FairBatch: Batch Selection for Model Fairness. In 9th International Conference on Learning Representations. The International Conference on Learning Representations.","author":"Roh Yuji","year":"2021","unstructured":"Yuji Roh, Kangwook Lee, Steven\u00a0Euijong Whang, and Changho Suh. 2021. FairBatch: Batch Selection for Model Fairness. In 9th International Conference on Learning Representations. The International Conference on Learning Representations."},{"volume-title":"Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)","author":"R\u00f6ttger Paul","key":"e_1_3_2_1_36_1","unstructured":"Paul R\u00f6ttger, Bertie Vidgen, Dong Nguyen, Zeerak Waseem, Helen Margetts, Janet Pierrehumbert, 2021. HateCheck: Functional Tests for Hate Speech Detection Models. In Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers). Association for Computational Linguistics, 41\u201358."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3313831.3376645"},{"key":"e_1_3_2_1_38_1","volume-title":"An overview of multi-task learning in deep neural networks. arXiv preprint arXiv:1706.05098","author":"Ruder Sebastian","year":"2017","unstructured":"Sebastian Ruder. 2017. An overview of multi-task learning in deep neural networks. arXiv preprint arXiv:1706.05098 (2017)."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33014822"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3531146.3533216"},{"key":"e_1_3_2_1_41_1","volume-title":"Proceedings of the Second Workshop on Trolling, Aggression and Cyberbullying. 126\u2013131","author":"Samghabadi Niloofar\u00a0Safi","year":"2020","unstructured":"Niloofar\u00a0Safi Samghabadi, Parth Patwa, Srinivas Pykl, Prerana Mukherjee, Amitava Das, and Thamar Solorio. 2020. Aggression and misogyny detection using BERT: A multi-task approach. In Proceedings of the Second Workshop on Trolling, Aggression and Cyberbullying. 126\u2013131."},{"key":"e_1_3_2_1_42_1","volume-title":"a distilled version of BERT: smaller, faster, cheaper and lighter. arXiv preprint arXiv:1910.01108","author":"Sanh Victor","year":"2019","unstructured":"Victor Sanh, Lysandre Debut, Julien Chaumond, and Thomas Wolf. 2019. DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter. arXiv preprint arXiv:1910.01108 (2019)."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1163"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W17-1101"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.naacl-main.299"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P16-2038"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-96957-8_20"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00146"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1609\/icwsm.v14i1.7334"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3054719"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.26615\/978-954-452-056-4_141"},{"key":"e_1_3_2_1_52_1","volume-title":"The State of Online Harassment","author":"Vogels A.","year":"2021","unstructured":"Emily\u00a0A. Vogels. 2021. The State of Online Harassment. Pew Res. Center, Washington, DC, USA, Tech. Rep (2021)."},{"key":"e_1_3_2_1_53_1","unstructured":"Chris Wotton. 2019. \u2019same same but different\u2019: The origins of Thailand\u2019s tourist catchphrase. https:\/\/theculturetrip.com\/asia\/thailand\/articles\/same-same-but-different-the-origins-of-thailands-tourist-catchphrase\/"},{"key":"e_1_3_2_1_54_1","volume-title":"Deep Multi-task Representation Learning: A Tensor Factorisation Approach. In 5th International Conference on Learning Representations.","author":"Yang Yongxin","year":"2017","unstructured":"Yongxin Yang and Timothy Hospedales. 2017. Deep Multi-task Representation Learning: A Tensor Factorisation Approach. In 5th International Conference on Learning Representations."},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/3278721.3278779"}],"event":{"name":"WWW '23: The ACM Web Conference 2023","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"],"location":"Austin TX USA","acronym":"WWW '23"},"container-title":["Proceedings of the ACM Web Conference 2023"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3543507.3583290","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3543507.3583290","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:37:22Z","timestamp":1750178242000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3543507.3583290"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,4,30]]},"references-count":55,"alternative-id":["10.1145\/3543507.3583290","10.1145\/3543507"],"URL":"https:\/\/doi.org\/10.1145\/3543507.3583290","relation":{},"subject":[],"published":{"date-parts":[[2023,4,30]]},"assertion":[{"value":"2023-04-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}