{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,16]],"date-time":"2026-07-16T05:18:50Z","timestamp":1784179130785,"version":"3.55.0"},"publisher-location":"New York, NY, USA","reference-count":45,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,4,22]],"date-time":"2025-04-22T00:00:00Z","timestamp":1745280000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,4,28]]},"DOI":"10.1145\/3696410.3714534","type":"proceedings-article","created":{"date-parts":[[2025,4,22]],"date-time":"2025-04-22T22:57:28Z","timestamp":1745362648000},"page":"5255-5263","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":10,"title":["Cross-Modal Transfer from Memes to Videos: Addressing Data Scarcity in Hateful Video Detection"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-4486-0693","authenticated-orcid":false,"given":"Han","family":"Wang","sequence":"first","affiliation":[{"name":"Singapore University of Technology and Design, Singapore, Singapore"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-1325-5888","authenticated-orcid":false,"given":"Rui Yang","family":"Tan","sequence":"additional","affiliation":[{"name":"Singapore University of Technology and Design, Singapore, Singapore"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1986-7750","authenticated-orcid":false,"given":"Roy Ka-Wei","family":"Lee","sequence":"additional","affiliation":[{"name":"Singapore University of Technology and Design, Singapore, Singapore"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2025,4,22]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Proceedings of the Twelfth Language Resources and Evaluation Conference. 4309--4319","author":"Alc\u00e2ntara C.","unstructured":"C. Alc\u00e2ntara, V. Moreira, and D. Feijo. 2020. Offensive video detection: dataset and baseline results. In Proceedings of the Twelfth Language Resources and Evaluation Conference. 4309--4319."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-75762-5_55"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSS.2023.3252401"},{"key":"e_1_3_2_1_4_1","volume-title":"Proceedings of the International Conference on Machine Learning (ICML)","volume":"2","author":"Bertasius Gedas","year":"2021","unstructured":"Gedas Bertasius, Heng Wang, and Lorenzo Torresani. 2021. Is space-time attention all you need for video understanding?. In Proceedings of the International Conference on Machine Learning (ICML), Vol. 2. 4."},{"key":"e_1_3_2_1_5_1","volume-title":"Proceedings of the 31st ACM International Conference on Multimedia. 5244--5252","author":"Cao R.","unstructured":"R. Cao, M. S. Hee, A. Kuek, W. H. Chong, R. K. W. Lee, and J. Jiang. 2023. Procap: Leveraging a frozen vision-language model for hateful meme detection. In Proceedings of the 31st ACM International Conference on Multimedia. 5244--5252."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.coling-main.557"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"crossref","unstructured":"R. Cao R. K. W. Lee W. H. Chong and J. Jiang. 2023. Prompting for Multimodal Hateful Meme Classification. arXiv preprint arXiv:2302.04156 (2023).","DOI":"10.18653\/v1\/2022.emnlp-main.22"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394231.3397890"},{"key":"e_1_3_2_1_9_1","volume-title":"Proceedings of the ACM on Web Conference","author":"Cao R.","year":"2024","unstructured":"R. Cao, R. K. W. Lee, and J. Jiang. 2024. Modularized Networks for Few-shot Hateful Meme Detection. In Proceedings of the ACM on Web Conference 2024. 4575--4584."},{"key":"e_1_3_2_1_10_1","volume-title":"Proceedings of the IEEE International Conference on Computer Vision (ICCV). 6568--6577","author":"Carreira Jo\u00e3o","year":"2017","unstructured":"Jo\u00e3o Carreira and Andrew Zisserman. 2017. Temporal 3D ConvNets: New Architecture and Transfer Learning for Video Classification. In Proceedings of the IEEE International Conference on Computer Vision (ICCV). 6568--6577."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00530-023-01051-8"},{"key":"e_1_3_2_1_12_1","volume-title":"Hatemm: A Multi-Modal Dataset for Hate Video Classification. In Proceedings of the International AAAI Conference on Web and Social Media","volume":"17","author":"Das M.","unstructured":"M. Das, R. Raj, P. Saha, B. Mathew, M. Gupta, and A. Mukherjee. 2023. Hatemm: A Multi-Modal Dataset for Hate Video Classification. In Proceedings of the International AAAI Conference on Web and Social Media, Vol. 17. 1014--1023."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1609\/icwsm.v11i1.14955"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_15_1","volume-title":"International Conference on Learning Representations (ICLR).","author":"Dosovitskiy Alexey","year":"2021","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, and Neil Houlsby. 2021. An Image is Worth 16x16Words: Transformers for Image Recognition at Scale. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_1_16_1","unstructured":"A. Dubey A. Jauhri A. Pandey A. Kadian A. Al-Dahle A. Letman and R. Ganapathy. 2024. The LLAMA 3 Herd of Models. arXiv preprint arXiv:2407.21783 (2024)."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.semeval-1.74"},{"key":"e_1_3_2_1_18_1","volume-title":"Overview of the EVALITA 2018 task on automatic misogyny identification (AMI). In CEUR Workshop Proceedings","volume":"2263","author":"Fersini Elisabetta","year":"2018","unstructured":"Elisabetta Fersini, Debora Nozza, and Paolo Rosso. 2018. Overview of the EVALITA 2018 task on automatic misogyny identification (AMI). In CEUR Workshop Proceedings, Vol. 2263. 1--9."},{"key":"e_1_3_2_1_19_1","volume-title":"Detecting online hate speech using contextaware models. arXiv preprint arXiv:1710.07395","author":"Gao Lei","year":"2017","unstructured":"Lei Gao and Ruihong Huang. 2017. Detecting online hate speech using contextaware models. arXiv preprint arXiv:1710.07395 (2017)."},{"key":"e_1_3_2_1_20_1","volume-title":"Bridging Modalities: Enhancing Cross-Modality Hate Speech Detection with Few-Shot In-Context Learning. arXiv preprint arXiv:2410.05600","author":"Hee M. S.","year":"2024","unstructured":"M. S. Hee, A. Kumaresan, and R. K.W. Lee. 2024. Bridging Modalities: Enhancing Cross-Modality Hate Speech Detection with Few-Shot In-Context Learning. arXiv preprint arXiv:2410.05600 (2024)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-emnlp.254"},{"key":"e_1_3_2_1_22_1","volume-title":"Proceedings of the 39th International Conference on Machine Learning (ICML). https:\/\/arxiv.org\/abs\/2106","author":"Hu Edward","year":"2021","unstructured":"Edward Hu, Xuezhi Peng, Yi Li, Xifeng Liu, Jie He, Ziyang Chen, Ziyi Li, Yiming Zhang, Caiming Xiong, and Kai-Wei Chang. 2021. LoRA: Low-Rank Adaptation of Large Language Models. In Proceedings of the 39th International Conference on Machine Learning (ICML). https:\/\/arxiv.org\/abs\/2106.09685"},{"key":"e_1_3_2_1_23_1","first-page":"2611","article-title":"The Hateful Memes Challenge: Detecting Hate Speech in Multimodal Memes","volume":"33","author":"Kiela Douwe","year":"2020","unstructured":"Douwe Kiela, Hamed Firooz, and et al. Mohan, Ankur. 2020. The Hateful Memes Challenge: Detecting Hate Speech in Multimodal Memes. Advances in Neural Information Processing Systems 33 (2020), 2611--2624.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.woah-1.21"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00320"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3531925"},{"key":"e_1_3_2_1_27_1","volume-title":"Love-Hate Dataset: A Multi-Modal Multi-Platform Dataset Depicting Emotions in the 2023 Israel-Hamas War. In Companion Proceedings of the ACM on Web Conference 2024. 1807","author":"Ng L. H. X.","year":"2024","unstructured":"L. H. X. Ng, A. X. W. Lim, and R. K. W. Lee. 2024. Love-Hate Dataset: A Multi-Modal Multi-Platform Dataset Depicting Emotions in the 2023 Israel-Hamas War. In Companion Proceedings of the ACM on Web Conference 2024. 1807--1815."},{"key":"e_1_3_2_1_28_1","volume-title":"Kenny Tsu Wei Choo, and Roy Ka-Wei Lee.","author":"Ng Ri Chi","year":"2024","unstructured":"Ri Chi Ng, Nirmalendu Prakash, Ming Shan Hee, Kenny Tsu Wei Choo, and Roy Ka-Wei Lee. 2024. SGHateCheck: Functional Tests for Detecting Hate Speech in Low-Resource Languages of Singapore. arXiv preprint arXiv:2405.01842 (2024)."},{"key":"e_1_3_2_1_29_1","first-page":"26462","article-title":"ST-Adapter: Parameter-efficient Image-to-video Transfer Learning","volume":"35","author":"Pan J.","year":"2022","unstructured":"J. Pan, Z. Lin, X. Zhu, J. Shao, and H. Li. 2022. ST-Adapter: Parameter-efficient Image-to-video Transfer Learning. In Advances in Neural Information Processing Systems (NeurIPS), Vol. 35. 26462--26477.","journal-title":"Advances in Neural Information Processing Systems (NeurIPS)"},{"key":"e_1_3_2_1_30_1","volume-title":"Preslav Nakov, and Tanmoy Chakraborty.","author":"Pramanick Souvik","year":"2021","unstructured":"Souvik Pramanick, Dimitar Dimitrov, Ritam Mukherjee, Shubham Sharma, Md Shad Akhtar, Preslav Nakov, and Tanmoy Chakraborty. 2021. Detecting Harmful Memes and Their Targets. arXiv preprint arXiv:2110.00413 (2021)."},{"key":"e_1_3_2_1_31_1","first-page":"16","article-title":"Offensive Language Detection Using Multi-level Classification","volume":"6085","author":"Razavi Aminul-Haq","year":"2010","unstructured":"Aminul-Haq Razavi, Diana Inkpen, Sasha Uritsky, and Stan Matwin. 2010. Offensive Language Detection Using Multi-level Classification. Advances in Artificial Intelligence 6085 (2010), 16--27.","journal-title":"Advances in Artificial Intelligence"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W17-1101"},{"key":"e_1_3_2_1_33_1","volume-title":"Evaluating GPT-3 generated explanations for hateful content moderation. arXiv preprint arXiv:2305.17680","author":"Wang H.","year":"2023","unstructured":"H.Wang, M. S. Hee, M. R. Awal, K. T.W. Choo, and R. K.W. Lee. 2023. Evaluating GPT-3 generated explanations for hateful content moderation. arXiv preprint arXiv:2305.17680 (2023)."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681521"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.5555\/2390374.2390377"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N16-2013"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_39_1","volume-title":"2020 International Conference on Computational Science and Computational Intelligence (CSCI). IEEE, 585--590","author":"Wu C. S.","unstructured":"C. S. Wu and U. Bhandary. 2020. Detection of hate speech in videos using machine learning. In 2020 International Conference on Computational Science and Computational Intelligence (CSCI). IEEE, 585--590."},{"key":"e_1_3_2_1_40_1","volume-title":"Kenny Tsu Wei Choo, and Roy Ka-wei Lee","author":"Xiao Yunze","year":"2024","unstructured":"Yunze Xiao, Yujia Hu, Kenny Tsu Wei Choo, and Roy Ka-wei Lee. 2024. Toxi-CloakCN: Evaluating Robustness of Offensive Language Detection in Chinese with Cloaking Perturbations. arXiv preprint arXiv:2406.12223 (2024)."},{"key":"e_1_3_2_1_41_1","unstructured":"H. Yao W. Wu and Z. Li. 2023. Side4video: Spatial-temporal side network for memory-efficient image-to-video transfer learning. arXiv preprint arXiv:2311.15769 (2023)."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19-1144"},{"key":"e_1_3_2_1_43_1","volume-title":"Llava-next: A Strong Zero-shot Video Understanding Model. arXiv preprint arXiv:2407.xxxxx","author":"Zhang Y.","year":"2024","unstructured":"Y. Zhang, B. Li, H. Liu, Y. Lee, L. Gui, D. Fu, and C. Li. 2024. Llava-next: A Strong Zero-shot Video Understanding Model. arXiv preprint arXiv:2407.xxxxx (2024)."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-93417-4_48"},{"key":"e_1_3_2_1_45_1","volume-title":"Proceedings of the 14th ACM Web Science Conference","author":"Zhu J.","year":"2022","unstructured":"J. Zhu, R. K.W. Lee, and W. H. Chong. 2022. Multimodal Zero-Shot Hateful Meme Detection. In Proceedings of the 14th ACM Web Science Conference 2022. 382--389."}],"event":{"name":"WWW '25: The ACM Web Conference 2025","location":"Sydney NSW Australia","acronym":"WWW '25","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Proceedings of the ACM on Web Conference 2025"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3696410.3714534","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3696410.3714534","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:18:33Z","timestamp":1750295913000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3696410.3714534"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,22]]},"references-count":45,"alternative-id":["10.1145\/3696410.3714534","10.1145\/3696410"],"URL":"https:\/\/doi.org\/10.1145\/3696410.3714534","relation":{},"subject":[],"published":{"date-parts":[[2025,4,22]]},"assertion":[{"value":"2025-04-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}