{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,25]],"date-time":"2026-04-25T18:26:49Z","timestamp":1777141609628,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":44,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,6,12]],"date-time":"2023-06-12T00:00:00Z","timestamp":1686528000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100000780","name":"European Commission","doi-asserted-by":"publisher","award":["957252"],"award-info":[{"award-number":["957252"]}],"id":[{"id":"10.13039\/501100000780","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,6,12]]},"DOI":"10.1145\/3591106.3592254","type":"proceedings-article","created":{"date-parts":[[2023,6,8]],"date-time":"2023-06-08T22:33:38Z","timestamp":1686263618000},"page":"586-591","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":14,"title":["MemeFier: Dual-stage Modality Fusion for Image Meme Classification"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3682-408X","authenticated-orcid":false,"given":"Christos","family":"Koutlis","sequence":"first","affiliation":[{"name":"Centre for Research and Technology Hellas - Information and Communication Technologies Institute, Greece"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-2132-3430","authenticated-orcid":false,"given":"Manos","family":"Schinas","sequence":"additional","affiliation":[{"name":"Centre for Research and Technology Hellas - Information and Communication Technologies Institute, Greece"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5441-7341","authenticated-orcid":false,"given":"Symeon","family":"Papadopoulos","sequence":"additional","affiliation":[{"name":"Centre for Research and Technology Hellas - Information and Communication Technologies Institute, Greece"}]}],"member":"320","published-online":{"date-parts":[[2023,6,12]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICIIP53038.2021.9702696"},{"key":"e_1_3_2_1_2_1","volume-title":"Proceedings of the 30th International Conference on Machine Learning(Proceedings of Machine Learning Research, Vol.\u00a028)","author":"Andrew Galen","year":"2013","unstructured":"Galen Andrew, Raman Arora, Jeff Bilmes, and Karen Livescu. 2013. Deep Canonical Correlation Analysis. In Proceedings of the 30th International Conference on Machine Learning(Proceedings of Machine Learning Research, Vol.\u00a028), Sanjoy Dasgupta and David McAllester (Eds.). PMLR, Atlanta, Georgia, USA, 1247\u20131255. https:\/\/proceedings.mlr.press\/v28\/andrew13.html"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.semeval-1.112"},{"key":"e_1_3_2_1_4_1","volume-title":"Caption enriched samples for improving hateful memes detection. arXiv preprint arXiv:2109.10649","author":"Blaier Efrat","year":"2021","unstructured":"Efrat Blaier, Itzik Malkiel, and Lior Wolf. 2021. Caption enriched samples for improving hateful memes detection. arXiv preprint arXiv:2109.10649 (2021)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.semeval-1.102"},{"key":"e_1_3_2_1_6_1","volume-title":"International Conference on Learning Representations. https:\/\/arxiv.org\/pdf\/1908","author":"Cai Han","year":"2020","unstructured":"Han Cai, Chuang Gan, Tianzhe Wang, Zhekai Zhang, and Song Han. 2020. Once for All: Train One Network and Specialize it for Efficient Deployment. In International Conference on Learning Representations. https:\/\/arxiv.org\/pdf\/1908.09791.pdf"},{"key":"e_1_3_2_1_7_1","volume-title":"A literature survey on multimodal and multilingual automatic hate speech identification. Multimedia Systems","author":"Chhabra Anusha","year":"2023","unstructured":"Anusha Chhabra and Dinesh\u00a0Kumar Vishwakarma. 2023. A literature survey on multimodal and multilingual automatic hate speech identification. Multimedia Systems (2023), 1\u201328."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.semeval-1.152"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2022.09.025"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.semeval-1.147"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.semeval-1.114"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.semeval-1.148"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.semeval-1.111"},{"key":"e_1_3_2_1_15_1","volume-title":"Deep Residual Learning for Image Recognition. 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","author":"He Kaiming","year":"2015","unstructured":"Kaiming He, X. Zhang, Shaoqing Ren, and Jian Sun. 2015. Deep Residual Learning for Image Recognition. 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2015), 770\u2013778."},{"key":"e_1_3_2_1_16_1","volume-title":"Long short-term memory. Neural computation 9, 8","author":"Hochreiter Sepp","year":"1997","unstructured":"Sepp Hochreiter and J\u00fcrgen Schmidhuber. 1997. Long short-term memory. Neural computation 9, 8 (1997), 1735\u20131780."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00159"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.semeval-1.150"},{"key":"e_1_3_2_1_19_1","unstructured":"Douwe Kiela Hamed Firooz Aravind Mohan Vedanuj Goswami Amanpreet Singh Casey\u00a0A Fitzpatrick Peter Bull Greg Lipstein Tony Nelli Ron Zhu 2021. The hateful memes challenge: Competition report. In NeurIPS 2020 Competition and Demonstration Track. PMLR 344\u2013360."},{"key":"e_1_3_2_1_20_1","first-page":"2611","article-title":"The hateful memes challenge: Detecting hate speech in multimodal memes","volume":"33","author":"Kiela Douwe","year":"2020","unstructured":"Douwe Kiela, Hamed Firooz, Aravind Mohan, Vedanuj Goswami, Amanpreet Singh, Pratik Ringshia, and Davide Testuggine. 2020. The hateful memes challenge: Detecting hate speech in multimodal memes. Advances in Neural Information Processing Systems 33 (2020), 2611\u20132624.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_21_1","volume-title":"MemeTector: Enforcing deep focus for meme detection. arXiv preprint arXiv:2205.13268","author":"Koutlis Christos","year":"2022","unstructured":"Christos Koutlis, Manos Schinas, and Symeon Papadopoulos. 2022. MemeTector: Enforcing deep focus for meme detection. arXiv preprint arXiv:2205.13268 (2022)."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.nlp4pi-1.20"},{"key":"e_1_3_2_1_23_1","volume-title":"Proceedings of the 18th International Conference on Natural Language Processing (ICON). NLP Association of India (NLPAI), National Institute of Technology Silchar","author":"Kumari Gitanjali","year":"2021","unstructured":"Gitanjali Kumari, Amitava Das, and Asif Ekbal. 2021. Co-attention based Multimodal Factorized Bilinear Pooling for Internet Memes Analysis. In Proceedings of the 18th International Conference on Natural Language Processing (ICON). NLP Association of India (NLPAI), National Institute of Technology Silchar, Silchar, India, 261\u2013270. https:\/\/aclanthology.org\/2021.icon-main.31"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1142\/S012906570000034X"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475625"},{"key":"e_1_3_2_1_26_1","volume-title":"A multimodal framework for the detection of hateful memes. arXiv preprint arXiv:2012.12871","author":"Lippe Phillip","year":"2020","unstructured":"Phillip Lippe, Nithin Holla, Shantanu Chandra, Santhosh Rajamanickam, Georgios Antoniou, Ekaterina Shutova, and Helen Yannakoudakis. 2020. A multimodal framework for the detection of hateful memes. arXiv preprint arXiv:2012.12871 (2020)."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.woah-1.21"},{"key":"e_1_3_2_1_28_1","volume-title":"Vilio: State-of-the-art visio-linguistic models applied to hateful memes. arXiv preprint arXiv:2012.07788","author":"Muennighoff Niklas","year":"2020","unstructured":"Niklas Muennighoff. 2020. Vilio: State-of-the-art visio-linguistic models applied to hateful memes. arXiv preprint arXiv:2012.07788 (2020)."},{"key":"e_1_3_2_1_29_1","volume-title":"Multimodal Feature Extraction for Memes Sentiment Classification. In 2022 IEEE 2nd Conference on Information Technology and Data Science (CITDS). IEEE, 285\u2013290","author":"Ouaari Sofiane","year":"2022","unstructured":"Sofiane Ouaari, Tsegaye\u00a0Misikir Tashu, and Tom\u00e1\u0161 Horv\u00e1th. 2022. Multimodal Feature Extraction for Memes Sentiment Classification. In 2022 IEEE 2nd Conference on Information Technology and Data Science (CITDS). IEEE, 285\u2013290."},{"key":"e_1_3_2_1_30_1","volume-title":"International conference on machine learning. PMLR, 8748\u20138763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748\u20138763."},{"key":"e_1_3_2_1_31_1","volume-title":"Detecting hateful memes using a multimodal deep ensemble. arXiv preprint arXiv:2012.13235","author":"Sandulescu Vlad","year":"2020","unstructured":"Vlad Sandulescu. 2020. Detecting hateful memes using a multimodal deep ensemble. arXiv preprint arXiv:2012.13235 (2020)."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.semeval-1.99"},{"key":"e_1_3_2_1_33_1","volume-title":"Hamed Firooz, Alon Halevy, Fabrizio Silvestri","author":"Sharma Shivam","year":"2022","unstructured":"Shivam Sharma, Firoj Alam, Md Akhtar, Dimitar Dimitrov, Giovanni Da\u00a0San Martino, Hamed Firooz, Alon Halevy, Fabrizio Silvestri, Preslav Nakov, Tanmoy Chakraborty, 2022. Detecting and understanding harmful memes: A survey. arXiv preprint arXiv:2205.04274 (2022)."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.semeval-1.113"},{"key":"e_1_3_2_1_35_1","volume-title":"Proceedings of the second workshop on trolling, aggression and cyberbullying. 32\u201341","author":"Suryawanshi Shardul","year":"2020","unstructured":"Shardul Suryawanshi, Bharathi\u00a0Raja Chakravarthi, Mihael Arcan, and Paul Buitelaar. 2020. Multimodal meme dataset (MultiOFF) for identifying offensive content in image and text. In Proceedings of the second workshop on trolling, aggression and cyberbullying. 32\u201341."},{"key":"e_1_3_2_1_36_1","volume-title":"Visualizing data using t-SNE.Journal of machine learning research 9, 11","author":"Maaten Laurens Van\u00a0der","year":"2008","unstructured":"Laurens Van\u00a0der Maaten and Geoffrey Hinton. 2008. Visualizing data using t-SNE.Journal of machine learning research 9, 11 (2008)."},{"key":"e_1_3_2_1_37_1","volume-title":"Detecting hate speech in memes using multimodal deep learning approaches: Prize-winning solution to hateful memes challenge. arXiv preprint arXiv:2012.12975","author":"Velioglu Riza","year":"2020","unstructured":"Riza Velioglu and Jewgeni Rose. 2020. Detecting hate speech in memes using multimodal deep learning approaches: Prize-winning solution to hateful memes challenge. arXiv preprint arXiv:2012.12975 (2020)."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.semeval-1.160"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2019.2958342"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.semeval-1.116"},{"key":"e_1_3_2_1_41_1","volume-title":"Advances in Information Retrieval, Matthias Hagen, Suzan Verberne, Craig Macdonald, Christin Seifert, Krisztian Balog, Kjetil N\u00f8rv\u00e5g","author":"Zhang Yazhou","unstructured":"Yazhou Zhang, Lu Rong, Xiang Li, and Rui Chen. 2022. Multi-modal Sentiment and Emotion Joint Analysis with a Deep Attentive Multi-task Learning Model. In Advances in Information Retrieval, Matthias Hagen, Suzan Verberne, Craig Macdonald, Christin Seifert, Krisztian Balog, Kjetil N\u00f8rv\u00e5g, and Vinay Setty (Eds.). Springer International Publishing, Cham, 518\u2013532."},{"key":"e_1_3_2_1_42_1","volume-title":"MultiMedia Modeling, Bj\u00f6rn \u00de\u00f3r J\u00f3nsson, Cathal Gurrin, Minh-Triet Tran, Duc-Tien Dang-Nguyen, Anita Min-Chun Hu, Binh Huynh Thi\u00a0Thanh","author":"Zhong Qi","unstructured":"Qi Zhong, Qian Wang, and Ji Liu. 2022. Combining Knowledge and Multi-modal Fusion for Meme Classification. In MultiMedia Modeling, Bj\u00f6rn \u00de\u00f3r J\u00f3nsson, Cathal Gurrin, Minh-Triet Tran, Duc-Tien Dang-Nguyen, Anita Min-Chun Hu, Binh Huynh Thi\u00a0Thanh, and Benoit Huet (Eds.). Springer International Publishing, Cham, 599\u2013611."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICMEW53276.2021.9455994"},{"key":"e_1_3_2_1_44_1","volume-title":"Enhance multimodal transformer with external label and in-domain pretrain: Hateful meme challenge winning solution. arXiv preprint arXiv:2012.08290","author":"Zhu Ron","year":"2020","unstructured":"Ron Zhu. 2020. Enhance multimodal transformer with external label and in-domain pretrain: Hateful meme challenge winning solution. arXiv preprint arXiv:2012.08290 (2020)."}],"event":{"name":"ICMR '23: International Conference on Multimedia Retrieval","location":"Thessaloniki Greece","acronym":"ICMR '23","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 2023 ACM International Conference on Multimedia Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3591106.3592254","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3591106.3592254","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:37:30Z","timestamp":1750178250000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3591106.3592254"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,12]]},"references-count":44,"alternative-id":["10.1145\/3591106.3592254","10.1145\/3591106"],"URL":"https:\/\/doi.org\/10.1145\/3591106.3592254","relation":{},"subject":[],"published":{"date-parts":[[2023,6,12]]},"assertion":[{"value":"2023-06-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}