{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,8]],"date-time":"2025-10-08T15:24:15Z","timestamp":1759937055514,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":24,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,5,13]]},"DOI":"10.1145\/3589335.3652504","type":"proceedings-article","created":{"date-parts":[[2024,5,12]],"date-time":"2024-05-12T18:41:21Z","timestamp":1715539281000},"page":"1681-1689","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":4,"title":["Decoding Memes: A Comprehensive Analysis of Late and Early Fusion Models for Explainable Meme Analysis"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3084-0604","authenticated-orcid":false,"given":"Faseela","family":"Abdullakutty","sequence":"first","affiliation":[{"name":"School of Computing, Robert Gordon University, Aberdeen, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0191-7171","authenticated-orcid":false,"given":"Usman","family":"Naseem","sequence":"additional","affiliation":[{"name":"School of Computing, Macquarie University, Sydney, Australia"}]}],"member":"320","published-online":{"date-parts":[[2024,5,13]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Jawad Khan, and Young-Koo Lee.","author":"Afridi Tariq Habib","year":"2021","unstructured":"Tariq Habib Afridi, Aftab Alam, Muhammad Numan Khan, Jawad Khan, and Young-Koo Lee. 2021. A multimodal memes classification: A survey and open research issues. In Innovations in Smart Cities Applications Volume 4: The Proceedings of the 5th International Conference on Smart City Applications. Springer, 1451--1466."},{"key":"e_1_3_2_2_2_1","volume-title":"On the benefits of early fusion in multimodal representation learning. arXiv preprint arXiv:2011.07191","author":"Barnum George","year":"2020","unstructured":"George Barnum, Sabera Talukder, and Yisong Yue. 2020. On the benefits of early fusion in multimodal representation learning. arXiv preprint arXiv:2011.07191 (2020)."},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"crossref","unstructured":"Lisa Bonheme and Marek Grzes. 2020. SESAM at SemEval-2020 task 8: investigating the relationship between image and text in sentiment analysis of memes. (2020).","DOI":"10.18653\/v1\/2020.semeval-1.102"},{"key":"e_1_3_2_2_4_1","volume-title":"Fusion strategies for large-scale multi-modal image retrieval. Transactions on Large-Scale Data-and Knowledge-Centered Systems XXXIII","author":"Budikova Petra","year":"2017","unstructured":"Petra Budikova, Michal Batko, and Pavel Zezula. 2017. Fusion strategies for large-scale multi-modal image retrieval. Transactions on Large-Scale Data-and Knowledge-Centered Systems XXXIII (2017), 146--184."},{"key":"e_1_3_2_2_5_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_2_6_1","unstructured":"Alexey Dosovitskiy Lucas Beyer Alexander Kolesnikov Dirk Weissenborn Xiaohua Zhai Thomas Unterthiner Mostafa Dehghani Matthias Minderer Georg Heigold Sylvain Gelly et al. 2020. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)."},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.23919\/FUSION45008.2020.9190246"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2022.09.025"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CONIT51480.2021.9498415"},{"key":"e_1_3_2_2_10_1","volume-title":"Farkhund Iqbal, Amanullah Yasin, Gautam Srivastava, Dawid Po\u0142ap, Thippa Reddy Gadekallu, and Zunera Jalil.","author":"Hamza Ameer","year":"2023","unstructured":"Ameer Hamza, Abdul Rehman Javed, Farkhund Iqbal, Amanullah Yasin, Gautam Srivastava, Dawid Po\u0142ap, Thippa Reddy Gadekallu, and Zunera Jalil. 2023. Multimodal Religiously Hateful Social Media Memes Classification based on Textual and Image Data. ACM Transactions on Asian and Low-Resource Language Information Processing (2023)."},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.387"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3485447.3512260"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.33564\/IJEAST.2021.v06i02.025"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3587427"},{"key":"e_1_3_2_2_15_1","volume-title":"Albert: A lite bert for self-supervised learning of language representations. arXiv preprint arXiv:1909.11942","author":"Lan Zhenzhong","year":"2019","unstructured":"Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, and Radu Soricut. 2019. Albert: A lite bert for self-supervised learning of language representations. arXiv preprint arXiv:1909.11942 (2019)."},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539597.3570450"},{"key":"e_1_3_2_2_17_1","volume-title":"Proceedings of De-Factify: Workshop on Multimodal Fact Checking and Hate Speech Detection, CEUR.","author":"Nguyen Thanh Tin","year":"2022","unstructured":"Thanh Tin Nguyen, Nhat Truong Pham, Ngoc Duy Nguyen, Hai Nguyen, Long H Nguyen, and Yong-Guk Kim. 2022. HCILab at Memotion 2.0 2022: Analysis of sentiment, emotion and intensity of emotion classes from meme images using single and multi modalities. In Proceedings of De-Factify: Workshop on Multimodal Fact Checking and Hate Speech Detection, CEUR."},{"key":"e_1_3_2_2_18_1","volume-title":"A multimodal framework for the detection of hateful memes. arXiv preprint arXiv:2012.12871","author":"Phillip Lippe","year":"2020","unstructured":"Lippe Phillip, Holla Nithin, Chandra Shantanu, Rajamanickam Santhosh, Antoniou Georgios, Shutova Ekaterina, and Yannakoudakis Helen. 2020. A multimodal framework for the detection of hateful memes. arXiv preprint arXiv:2012.12871 (2020)."},{"key":"e_1_3_2_2_19_1","volume-title":"a distilled version of BERT: smaller, faster, cheaper and lighter. arXiv preprint arXiv:1910.01108","author":"Sanh Victor","year":"2019","unstructured":"Victor Sanh, Lysandre Debut, Julien Chaumond, and Thomas Wolf. 2019. DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter. arXiv preprint arXiv:1910.01108 (2019)."},{"key":"e_1_3_2_2_20_1","volume-title":"SemEval-2020 Task 8: Memotion Analysis--The Visuo-Lingual Metaphor! arXiv preprint arXiv:2008.03781","author":"Sharma Chhavi","year":"2020","unstructured":"Chhavi Sharma, Deepesh Bhageria, William Scott, Srinivas Pykl, Amitava Das, Tanmoy Chakraborty, Viswanath Pulabaigari, and Bjorn Gamback. 2020. SemEval-2020 Task 8: Memotion Analysis--The Visuo-Lingual Metaphor! arXiv preprint arXiv:2008.03781 (2020)."},{"key":"e_1_3_2_2_21_1","volume-title":"Dimitar Dimitrov, Giovanni Da San Martino, Hamed Firooz, Alon Halevy, Fabrizio Silvestri, Preslav Nakov, and Tanmoy Chakraborty.","author":"Sharma Shivam","year":"2022","unstructured":"Shivam Sharma, Firoj Alam, Md Shad Akhtar, Dimitar Dimitrov, Giovanni Da San Martino, Hamed Firooz, Alon Halevy, Fabrizio Silvestri, Preslav Nakov, and Tanmoy Chakraborty. 2022. Detecting and understanding harmful memes: A survey. arXiv preprint arXiv:2205.04274 (2022)."},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.semeval-1.81"},{"key":"e_1_3_2_2_23_1","volume-title":"Accelerating very deep convolutional networks for classification and detection","author":"Zhang Xiangyu","year":"2015","unstructured":"Xiangyu Zhang, Jianhua Zou, Kaiming He, and Jian Sun. 2015. Accelerating very deep convolutional networks for classification and detection. IEEE transactions on pattern analysis and machine intelligence, Vol. 38, 10 (2015), 1943--1955."},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-98358-1_47"}],"event":{"name":"WWW '24: The ACM Web Conference 2024","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"],"location":"Singapore Singapore","acronym":"WWW '24"},"container-title":["Companion Proceedings of the ACM Web Conference 2024"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3589335.3652504","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3589335.3652504","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:33:58Z","timestamp":1755822838000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3589335.3652504"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,13]]},"references-count":24,"alternative-id":["10.1145\/3589335.3652504","10.1145\/3589335"],"URL":"https:\/\/doi.org\/10.1145\/3589335.3652504","relation":{},"subject":[],"published":{"date-parts":[[2024,5,13]]},"assertion":[{"value":"2024-05-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}