{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,14]],"date-time":"2026-02-14T08:08:09Z","timestamp":1771056489941,"version":"3.50.1"},"reference-count":52,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,11,29]],"date-time":"2025-11-29T00:00:00Z","timestamp":1764374400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2026,1,7]],"date-time":"2026-01-07T00:00:00Z","timestamp":1767744000000},"content-version":"vor","delay-in-days":39,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Process Lett"],"DOI":"10.1007\/s11063-025-11816-z","type":"journal-article","created":{"date-parts":[[2025,11,29]],"date-time":"2025-11-29T06:36:23Z","timestamp":1764398183000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Representative Sample Augmented Hateful Memes Detection"],"prefix":"10.1007","volume":"58","author":[{"given":"Yuting","family":"He","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zetao","family":"Jiang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,11,29]]},"reference":[{"key":"11816_CR1","doi-asserted-by":"crossref","unstructured":"Bhandari A, Shah S B, Thapa S, et al (2023) Crisishatemm: multimodal analysis of directed and undirected hate speech in text-embedded images from Russia-Ukraine conflict. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 1994\u20132003.","DOI":"10.1109\/CVPRW59228.2023.00193"},{"key":"11816_CR2","doi-asserted-by":"crossref","unstructured":"Cao R, Hee M S, Kuek A, et al (2023) Pro-cap: Leveraging a frozen vision-language model for hateful meme detection. In: Proceedings of the 31st ACM international conference on multimedia, pp 5244\u20135252.","DOI":"10.1145\/3581783.3612498"},{"key":"11816_CR3","unstructured":"Cao R, Lee RKW, Chong WH et al (2023) Prompting for multimodal hateful meme classification. arxiv preprint http:\/\/arxiv.org\/abs\/2302.04156."},{"key":"11816_CR4","doi-asserted-by":"crossref","unstructured":"Chu L, Hu X, Hu J et al (2018) Exact and consistent interpretation for piecewise linear neural networks: a closed form solution. In: Proceedings of the 24th ACM SIGKDD international conference on knowledge discovery & data mining, pp 1244\u20131253.","DOI":"10.1145\/3219819.3220063"},{"key":"11816_CR5","unstructured":"Devlin J, Chang MW, Lee K, et al. Bert: Pre-training of deep bidirectional transformers for language understanding. In: Proceedings of the 2019 conference of the North American chapter of the association for computational linguistics: human language technologies, volume 1 (long and short papers). 2019: 4171\u20134186."},{"key":"11816_CR6","doi-asserted-by":"crossref","unstructured":"Fersini E, Gasparini F, Rizzi G et al (2022) SemEval-2022 task 5: Multimedia automatic misogyny identification. In: Proceedings of the 16th international workshop on semantic evaluation (SemEval-2022), pp 533\u2013549.","DOI":"10.18653\/v1\/2022.semeval-1.74"},{"key":"11816_CR7","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S et al (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 770\u2013778.","DOI":"10.1109\/CVPR.2016.90"},{"key":"11816_CR8","doi-asserted-by":"crossref","unstructured":"Hee MS, Lee RKW, Chong WH (2022) On explaining multimodal hateful meme detection models. In: Proceedings of the ACM web conference 2022, pp 3651\u20133655.","DOI":"10.1145\/3485447.3512260"},{"key":"11816_CR9","doi-asserted-by":"crossref","unstructured":"Hee MS, Chong WH, Lee RKW (2023) Decoding the underlying meaning of multimodal hateful memes. arxiv preprint http:\/\/arxiv.org\/abs\/2305.17678.","DOI":"10.24963\/ijcai.2023\/665"},{"issue":"11","key":"11816_CR10","doi-asserted-by":"publisher","first-page":"12833","DOI":"10.1007\/s10462-023-10459-7","volume":"56","author":"PCQ Hermida","year":"2023","unstructured":"Hermida PCQ, Santos EM (2023) Detecting hate speech in memes: a review. Artif Intell Rev 56(11):12833\u201312851","journal-title":"Artif Intell Rev"},{"key":"11816_CR11","doi-asserted-by":"crossref","unstructured":"Jeshion R (2018) Slurs, dehumanization, and the expression of contempt. Bad words Philos Perspect Slurs, pp 77\u2013107.","DOI":"10.1093\/oso\/9780198758655.003.0005"},{"issue":"3","key":"11816_CR12","doi-asserted-by":"publisher","first-page":"1800","DOI":"10.1109\/TAFFC.2024.3378698","volume":"15","author":"S Sharma","year":"2024","unstructured":"Sharma S, Ramaneswaran S, Akhtar MS et al (2024) Emotion-aware multimodal fusion for meme emotion detection. IEEE Trans Affect Comput 15(3):1800\u20131811","journal-title":"IEEE Trans Affect Comput"},{"key":"11816_CR13","doi-asserted-by":"crossref","unstructured":"Khalane A, Shaikh T (2022) Context-aware multimodal emotion recognition. In: Proceedings of international conference on information technology and applications: ICITA 2021. Springer Nature Singapore, Singapore, pp 51\u201361.","DOI":"10.1007\/978-981-16-7618-5_5"},{"key":"11816_CR14","doi-asserted-by":"crossref","unstructured":"Sun T, Wang W, Jing L et al (2022) Counterfactual reasoning for out-of-distribution multimodal sentiment analysis. In: Proceedings of the 30th ACM international conference on multimedia, pp 15\u201323.","DOI":"10.1145\/3503161.3548211"},{"key":"11816_CR15","doi-asserted-by":"crossref","unstructured":"Yu W, Xu H, Yuan Z et al (2021) Learning modality-specific representations with self-supervised multi-task learning for multimodal sentiment analysis. In: Proceedings of the AAAI conference on artificial intelligence, vol  35,  10790\u201310797.","DOI":"10.1609\/aaai.v35i12.17289"},{"key":"11816_CR16","doi-asserted-by":"crossref","unstructured":"Jing L, Song X, Ouyang K et al (2023) Multi-source semantic graph-based multimodal sarcasm explanation generation. arxiv preprint http:\/\/arxiv.org\/abs\/2306.16650.","DOI":"10.18653\/v1\/2023.acl-long.635"},{"key":"11816_CR17","doi-asserted-by":"crossref","unstructured":"Tang B, Lin B, Yan H et al (2024) Leveraging generative large language models with visual instruction and demonstration retrieval for multimodal sarcasm detection. In: Proceedings of the 2024 conference of the North American chapter of the association for computational linguistics: human language technologies, pp 1732\u20131742.","DOI":"10.18653\/v1\/2024.naacl-long.97"},{"key":"11816_CR18","doi-asserted-by":"crossref","unstructured":"Wang X, Zhang Y, Jing L (2025) Can large vision-language models understand multimodal sarcasm?. arxiv preprint http:\/\/arxiv.org\/abs\/2508.03654.","DOI":"10.1145\/3746252.3760864"},{"key":"11816_CR19","unstructured":"Simpson JA, Weiner ESC (eds) (1989) The oxford English dictionary, 2nd ed., Clarendon Press, Oxford"},{"key":"11816_CR20","doi-asserted-by":"crossref","unstructured":"Lam PCH, Chu L, Torgonskiy M et al (2021) Finding representative interpretations on convolutional neural networks. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 1345\u20131354.","DOI":"10.1109\/ICCV48922.2021.00138"},{"key":"11816_CR21","unstructured":"Lippe P, Holla N, Chandra S et al (2020) A multimodal framework for the detection of hateful memes. arxiv preprint http:\/\/arxiv.org\/abs\/2012.12871."},{"issue":"4","key":"11816_CR22","doi-asserted-by":"publisher","DOI":"10.1016\/j.ipm.2024.103772","volume":"61","author":"F Wu","year":"2024","unstructured":"Wu F, Gao B, Pan X et al (2024) Fuser: an enhanced multimodal fusion framework with congruent reinforced perceptron for hateful memes detection. Inf Process Manage 61(4):103772","journal-title":"Inf Process Manage"},{"key":"11816_CR23","unstructured":"Mehta S, Rastegari M (2021) Mobilevit: Light-weight, general-purpose, and mobile-friendly vision transformer. arxiv 2021. arxiv preprint http:\/\/arxiv.org\/abs\/2110.02178."},{"key":"11816_CR24","doi-asserted-by":"crossref","unstructured":"Aggarwal P, Chawla P, Das M et al (2023) Hateproof: are hateful meme detection systems really robust?. In: Proceedings of the ACM web conference,pp 3734\u20133743.","DOI":"10.1145\/3543507.3583356"},{"key":"11816_CR25","doi-asserted-by":"crossref","unstructured":"Pramanick S, Dimitrov D, Mukherjee R et al (2021) Detecting harmful memes and their targets. arxiv preprint http:\/\/arxiv.org\/abs\/2110.00413.","DOI":"10.18653\/v1\/2021.findings-acl.246"},{"key":"11816_CR26","doi-asserted-by":"crossref","unstructured":"Zhou Y, Chen Z, Yang H (2021) Multimodal learning for hateful memes detection. In: 2021 IEEE International conference on multimedia & expo workshops (ICMEW). IEEE, New York, pp 1\u20136.","DOI":"10.1109\/ICMEW53276.2021.9455994"},{"key":"11816_CR27","doi-asserted-by":"crossref","unstructured":"Pramanick S, Sharma S, Dimitrov D et al (2021) MOMENTA: A multimodal framework for detecting harmful memes and their targets. arxiv preprint http:\/\/arxiv.org\/abs\/2109.05184.","DOI":"10.18653\/v1\/2021.findings-emnlp.379"},{"issue":"5","key":"11816_CR28","doi-asserted-by":"publisher","first-page":"053059","DOI":"10.1117\/1.JEI.33.5.053059","volume":"33","author":"G Shi","year":"2024","unstructured":"Shi G, Li L, Song M (2024) Beyond pixels: text-guided deep insights into graphic design image aesthetics. J Electron Imaging 33(5):053059\u2013053059","journal-title":"J Electron Imaging"},{"key":"11816_CR29","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2025.3546107","author":"C Chen","year":"2025","unstructured":"Chen C, Liu X, Song M et al (2025) Unveiling context-related anomalies: knowledge graph empowered decoupling of scene and action for human-related video anomaly detection. IEEE Trans Circuits Syst Video Technol. https:\/\/doi.org\/10.1109\/TCSVT.2025.3546107","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"issue":"6","key":"11816_CR30","doi-asserted-by":"publisher","first-page":"1137","DOI":"10.1109\/TPAMI.2016.2577031","volume":"39","author":"S Ren","year":"2016","unstructured":"Ren S, He K, Girshick R et al (2016) Faster R-CNN: Towards real-time object detection with region proposal networks. IEEE Trans Pattern Anal Mach Intell 39(6):1137\u20131149","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"11816_CR31","doi-asserted-by":"crossref","unstructured":"Ribeiro MT, Singh S, Guestrin C (2016)  Why should i trust you?\" Explaining the predictions of any classifier. In: Proceedings of the 22nd ACM SIGKDD international conference on knowledge discovery and data mining, pp: 1135\u20131144.","DOI":"10.1145\/2939672.2939778"},{"key":"11816_CR32","unstructured":"Sanh V, Debut L, Chaumond J et al (2019) DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter. arxiv preprint http:\/\/arxiv.org\/abs\/1910.01108."},{"key":"11816_CR33","doi-asserted-by":"publisher","first-page":"336","DOI":"10.1007\/s11263-019-01228-7","volume":"128","author":"RR Selvaraju","year":"2020","unstructured":"Selvaraju RR, Cogswell M, Das A et al (2020) Grad-CAM: visual explanations from deep networks via gradient-based localization. Int J Comput Vis 128:336\u2013359","journal-title":"Int J Comput Vis"},{"key":"11816_CR34","doi-asserted-by":"crossref","unstructured":"Sharma P, Ding N, Goodman S et al (2018) Conceptual captions: A cleaned, hypernymed, image alt-text dataset for automatic image captioning. In: Proceedings of the 56th annual meeting of the association for computational linguistics (Volume 1: Long Papers), pp 2556\u20132565.","DOI":"10.18653\/v1\/P18-1238"},{"key":"11816_CR35","doi-asserted-by":"crossref","unstructured":"Sharma S, Akhtar M S, Nakov P et al (2022) DISARM: Detecting the victims targeted by harmful memes. arxiv preprint http:\/\/arxiv.org\/abs\/2205.05738.","DOI":"10.18653\/v1\/2022.findings-naacl.118"},{"key":"11816_CR36","unstructured":"Simonyan K, Zisserman A (2014) Very deep convolutional networks for large-scale image recognition. arxiv preprint http:\/\/arxiv.org\/abs\/1409.1556."},{"key":"11816_CR37","doi-asserted-by":"crossref","unstructured":"Badour J, Brown JA (2021) Hateful memes classification using machine learning. In: 2021 IEEE Symposium Series on Computational Intelligence (SSCI). IEEE, New York, pp 1\u20138.","DOI":"10.1109\/SSCI50451.2021.9659896"},{"key":"11816_CR38","unstructured":"Suryawanshi S, Chakravarthi BR, Arcan M et al (2020) Multimodal meme dataset (MultiOFF) for identifying offensive content in image and text. In: Proceedings of the second workshop on trolling, aggression and cyberbullying, pp 32\u201341."},{"key":"11816_CR39","doi-asserted-by":"crossref","unstructured":"Xu J, De Mello S, Liu S et al (2022) Groupvit: sSemantic segmentation emerges from text supervision. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 18134\u201318144.","DOI":"10.1109\/CVPR52688.2022.01760"},{"key":"11816_CR40","doi-asserted-by":"crossref","unstructured":"Yang C, Zhu F, Liu G et al (2022) Multimodal hate speech detection via cross-domain knowledge transfer. In: Proceedings of the 30th ACM International Conference on Multimedia, pp  4505\u20134514.","DOI":"10.1145\/3503161.3548255"},{"key":"11816_CR41","doi-asserted-by":"crossref","unstructured":"Zhou Y, Chen Z, Yang H (2021) Multimodal learning for hateful memes detection. In: 2021 IEEE International conference on multimedia & expo workshops (ICMEW). IEEE, New York, pp 1\u20136.","DOI":"10.1109\/ICMEW53276.2021.9455994"},{"key":"11816_CR42","doi-asserted-by":"crossref","unstructured":"Zhu J, Lee RKW, Chong WH (2022) Multimodal zero-shot hateful meme detection. In: Proceedings of the 14th ACM web science conference 2022, pp 382\u2013389.","DOI":"10.1145\/3501247.3531557"},{"issue":"9","key":"11816_CR43","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0274300","volume":"17","author":"Y Chen","year":"2022","unstructured":"Chen Y, Pan F (2022) Multimodal detection of hateful memes by applying a vision-language pre-training model. PLoS ONE 17(9):e0274300","journal-title":"PLoS ONE"},{"key":"11816_CR44","first-page":"2611","volume":"33","author":"D Kiela","year":"2020","unstructured":"Kiela D, Firooz H, Mohan A et al (2020) The hateful memes challenge: detecting hate speech in multimodal memes. Adv Neural Inf Process Syst 33:2611\u20132624","journal-title":"Adv Neural Inf Process Syst"},{"issue":"1","key":"11816_CR45","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s40537-019-0197-0","volume":"6","author":"C Shorten","year":"2019","unstructured":"Shorten C, Khoshgoftaar TM (2019) A survey on image data augmentation for deep learning. J Big Data 6(1):1\u201348","journal-title":"J Big Data"},{"key":"11816_CR46","unstructured":"Zhang H, Cisse M, Dauphin YN et al (2017) mixup: beyond empirical risk minimization. arxiv preprint http:\/\/arxiv.org\/abs\/1710.09412."},{"key":"11816_CR47","unstructured":"Yun S, Han D, Oh SJ et al (219) Cutmix: regularization strategy to train strong classifiers with localizable features. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 023\u20136032."},{"key":"11816_CR48","unstructured":"Mei J, Chen J, Lin W et al (2023) Improving hateful meme detection through retrieval-guided contrastive learning. arxiv preprint http:\/\/arxiv.org\/abs\/2311.08110."},{"key":"11816_CR49","doi-asserted-by":"crossref","unstructured":"Chen J, Yang Z, Yang D (2020) Mixtext: Linguistically-informed interpolation of hidden space for semi-supervised text classification. arxiv preprint http:\/\/arxiv.org\/abs\/2004.12239.","DOI":"10.18653\/v1\/2020.acl-main.194"},{"issue":"12","key":"11816_CR50","doi-asserted-by":"publisher","first-page":"12605","DOI":"10.1109\/TKDE.2023.3270940","volume":"35","author":"T Sun","year":"2023","unstructured":"Sun T, Jing L, Wei Y et al (2023) Dual consistency-enhanced semi-supervised sentiment analysis towards COVID-19 tweets. IEEE Trans Knowl Data Eng 35(12):12605\u201312617","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"11816_CR51","unstructured":"Liu Z, Tang Z, Shi X et al (2022) Learning multimodal data augmentation in feature space. arxiv preprint\nhttp:\/\/arxiv.org\/abs\/2212.14453."},{"key":"11816_CR52","first-page":"34892","volume":"36","author":"H Liu","year":"2023","unstructured":"Liu H, Li C, Wu Q et al (2023) Visual instruction tuning. Adv Neural Inf Process Syst 36:34892\u201334916","journal-title":"Adv Neural Inf Process Syst"}],"container-title":["Neural Processing Letters"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11063-025-11816-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11063-025-11816-z","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11063-025-11816-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,14]],"date-time":"2026-02-14T07:25:01Z","timestamp":1771053901000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11063-025-11816-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,29]]},"references-count":52,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2026,2]]}},"alternative-id":["11816"],"URL":"https:\/\/doi.org\/10.1007\/s11063-025-11816-z","relation":{},"ISSN":["1573-773X"],"issn-type":[{"value":"1573-773X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,11,29]]},"assertion":[{"value":"4 April 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 November 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 November 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics Approval"}},{"value":"Not applicable.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for Publication"}}],"article-number":"4"}}