{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,15]],"date-time":"2025-11-15T08:52:54Z","timestamp":1763196774280,"version":"3.45.0"},"publisher-location":"Singapore","reference-count":29,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819533480","type":"print"},{"value":"9789819533497","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,11,16]],"date-time":"2025-11-16T00:00:00Z","timestamp":1763251200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,11,16]],"date-time":"2025-11-16T00:00:00Z","timestamp":1763251200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-3349-7_31","type":"book-chapter","created":{"date-parts":[[2025,11,15]],"date-time":"2025-11-15T08:49:21Z","timestamp":1763196561000},"page":"403-415","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["EVL-MCoT: Enhanced Vision-Language Multi-CoT for\u00a0Harmful Meme Detection"],"prefix":"10.1007","author":[{"given":"Hao","family":"Yang","sequence":"first","affiliation":[]},{"given":"Jin","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Xuejie","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,11,16]]},"reference":[{"key":"31_CR1","unstructured":"Alayrac, J.B., et al.: Flamingo: a visual language model for few-shot learning. In: Advances in Neural Information Processing Systems, vol. 35, pp. 23716\u201323736 (2022)"},{"key":"31_CR2","unstructured":"Berrios, W., Mittal, G., Thrush, T., Kiela, D., Singh, A.: Towards language models that can see: computer vision through the lens of natural language. arXiv preprint arXiv:2306.16410 (2023)"},{"issue":"2","key":"31_CR3","doi-asserted-by":"publisher","first-page":"581","DOI":"10.1007\/s11263-023-01891-x","volume":"132","author":"P Gao","year":"2024","unstructured":"Gao, P., et al.: CLIP-adapter: better vision-language models with feature adapters. Int. J. Comput. Vision 132(2), 581\u2013595 (2024)","journal-title":"Int. J. Comput. Vision"},{"key":"31_CR4","unstructured":"Henderson, M., et al.: Efficient natural language response suggestion for smart reply. arXiv preprint arXiv:1705.00652 (2017)"},{"key":"31_CR5","unstructured":"Kiela, D., Bhooshan, S., Firooz, H., Perez, E., Testuggine, D.: Supervised multimodal bitransformers for classifying images and text (2020). https:\/\/arxiv.org\/abs\/1909.02950"},{"key":"31_CR6","unstructured":"Kiela, D., et al.: The hateful memes challenge: detecting hate speech in multimodal memes. In: Advances in Neural Information Processing Systems, vol. 33, pp. 2611\u20132624 (2020)"},{"key":"31_CR7","doi-asserted-by":"crossref","unstructured":"Kumar, G.K., Nandakumar, K.: Hate-CLIPper: multimodal hateful meme classification based on cross-modal interaction of CLIP features. In: Proceedings of the Second Workshop on NLP for Positive Impact (NLP4PI), pp. 171\u2013183 (2022)","DOI":"10.18653\/v1\/2022.nlp4pi-1.20"},{"key":"31_CR8","doi-asserted-by":"crossref","unstructured":"Kumari, G., Jain, K., Ekbal, A.: M3Hop-CoT: misogynous meme identification with multimodal multi-hop chain-of-thought. In: Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing, pp. 22105\u201322138, November 2024","DOI":"10.18653\/v1\/2024.emnlp-main.1234"},{"key":"31_CR9","doi-asserted-by":"crossref","unstructured":"Lauren\u00e7on, H., Tronchon, L., Cord, M., Sanh, V.: What matters when building vision-language models? Advances in Neural Information Processing Systems, vol. 37, pp. 87874\u201387907 (2024)","DOI":"10.52202\/079017-2789"},{"key":"31_CR10","unstructured":"Li, L.H., Yatskar, M., Yin, D., Hsieh, C.J., Chang, K.W.: VisualBERT: a simple and performant baseline for vision and language. arXiv preprint arXiv:1908.03557 (2019)"},{"key":"31_CR11","doi-asserted-by":"crossref","unstructured":"Li, Y., Fan, H., Hu, R., Feichtenhofer, C., He, K.: Scaling language-image pre-training via masking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 23390\u201323400 (2023)","DOI":"10.1109\/CVPR52729.2023.02240"},{"key":"31_CR12","doi-asserted-by":"crossref","unstructured":"Lin, H., Luo, Z., Gao, W., Ma, J., Wang, B., Yang, R.: Towards explainable harmful meme detection through multimodal debate between large language models. In: Proceedings of the ACM Web Conference 2024, pp. 2359\u20132370 (2024)","DOI":"10.1145\/3589334.3645381"},{"key":"31_CR13","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization (2019). https:\/\/arxiv.org\/abs\/1711.05101"},{"key":"31_CR14","doi-asserted-by":"crossref","unstructured":"Luo, X., Tang, Z., Wang, J., Zhang, X.: Zero-shot cross-domain dialogue state tracking via dual low-rank adaptation. In: Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 5746\u20135765, August 2024","DOI":"10.18653\/v1\/2024.acl-long.312"},{"key":"31_CR15","doi-asserted-by":"crossref","unstructured":"Ma, X., Liu, J., Wang, J., Zhang, X.: FedID: federated interactive distillation for large-scale pretraining language models. In: Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing, pp. 8566\u20138577, December 2023","DOI":"10.18653\/v1\/2023.emnlp-main.529"},{"key":"31_CR16","doi-asserted-by":"crossref","unstructured":"Nguyen, M.V., et al.: Direct evaluation of chain-of-thought in multi-hop reasoning with knowledge graphs. In: Findings of the Association for Computational Linguistics: ACL 2024, pp. 2862\u20132883 (2024)","DOI":"10.18653\/v1\/2024.findings-acl.168"},{"key":"31_CR17","doi-asserted-by":"crossref","unstructured":"Pramanick, S., et al.: Detecting harmful memes and their targets. In: Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021, pp. 2783\u20132796 (2021)","DOI":"10.18653\/v1\/2021.findings-acl.246"},{"key":"31_CR18","doi-asserted-by":"crossref","unstructured":"Qiu, L., et al.: Dynamically fused graph network for multi-hop reasoning. In: Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics, pp. 6140\u20136150 (2019)","DOI":"10.18653\/v1\/P19-1617"},{"key":"31_CR19","unstructured":"Qu, L., Fu, K., Wang, M., Song, Z., et al.: The rise of AI language pathologists: exploring two-level prompt learning for few-shot weakly-supervised whole slide image classification. In: Advances in Neural Information Processing Systems, vol. 36, pp. 67551\u201367564 (2023)"},{"key":"31_CR20","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. In: Proceedings of the 38th International Conference on Machine Learning, vol.\u00a0139, pp. 8748\u20138763 (2021)"},{"key":"31_CR21","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. In: Advances in Neural Information Processing Systems, vol. 28 (2015)"},{"key":"31_CR22","unstructured":"Suryawanshi, S., Chakravarthi, B.R., Arcan, M., Buitelaar, P.: Multimodal meme dataset (MultiOFF) for identifying offensive content in image and text. In: Proceedings of the Second Workshop on Trolling, Aggression and Cyberbullying, pp. 32\u201341 (2020)"},{"key":"31_CR23","doi-asserted-by":"crossref","unstructured":"Yoran, O., Wolfson, T., Bogin, B., Katz, U., Deutch, D., Berant, J.: Answering questions by meta-reasoning over multiple chains of thought. In: Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing, pp. 5942\u20135966 (2023)","DOI":"10.18653\/v1\/2023.emnlp-main.364"},{"issue":"1","key":"31_CR24","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TKDE.2024.3485107","volume":"37","author":"L Yuan","year":"2025","unstructured":"Yuan, L., Cai, Y., Xu, J., Li, Q., Wang, T.: A fine-grained network for joint multimodal entity-relation extraction. IEEE Trans. Knowl. Data Eng. 37(1), 1\u201314 (2025). https:\/\/doi.org\/10.1109\/TKDE.2024.3485107","journal-title":"IEEE Trans. Knowl. Data Eng."},{"issue":"2","key":"31_CR25","doi-asserted-by":"publisher","first-page":"722","DOI":"10.1109\/TAFFC.2023.3291730","volume":"15","author":"L Yuan","year":"2024","unstructured":"Yuan, L., Wang, J., Yu, L.C., Zhang, X.: Encoding syntactic information into transformers for aspect-based sentiment triplet extraction. IEEE Trans. Affect. Comput. 15(2), 722\u2013735 (2024). https:\/\/doi.org\/10.1109\/TAFFC.2023.3291730","journal-title":"IEEE Trans. Affect. Comput."},{"key":"31_CR26","doi-asserted-by":"crossref","unstructured":"Zhang, B., Zhang, P., Dong, X., Zang, Y., Wang, J.: Long-CLIP: unlocking the long-text capability of clip. In: European Conference on Computer Vision, pp. 310\u2013325 (2024)","DOI":"10.1007\/978-3-031-72983-6_18"},{"key":"31_CR27","doi-asserted-by":"crossref","unstructured":"Zheng, G., Wang, J., Yu, L.C., Zhang, X.: Instruction tuning with retrieval-based examples ranking for aspect-based sentiment analysis (2024). https:\/\/arxiv.org\/abs\/2405.18035","DOI":"10.18653\/v1\/2024.findings-acl.284"},{"key":"31_CR28","unstructured":"Zheng, G., Wang, J., Zhou, X., Zhang, X.: Enhancing semantics in multimodal chain of thought via soft negative sampling. In: Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), Torino, Italia, pp. 6059\u20136076, May 2024"},{"issue":"9","key":"31_CR29","doi-asserted-by":"publisher","first-page":"2337","DOI":"10.1007\/s11263-022-01653-1","volume":"130","author":"K Zhou","year":"2022","unstructured":"Zhou, K., Yang, J., Loy, C.C., Liu, Z.: Learning to prompt for vision-language models. Int. J. Comput. Vision 130(9), 2337\u20132348 (2022)","journal-title":"Int. J. Comput. Vision"}],"container-title":["Lecture Notes in Computer Science","Natural Language Processing and Chinese Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-3349-7_31","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,15]],"date-time":"2025-11-15T08:49:28Z","timestamp":1763196568000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-3349-7_31"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,16]]},"ISBN":["9789819533480","9789819533497"],"references-count":29,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-3349-7_31","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,11,16]]},"assertion":[{"value":"16 November 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"NLPCC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"CCF International Conference on Natural Language Processing and Chinese Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Urumqi","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7 August 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 August 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"nlpcc2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/tcci.ccf.org.cn\/conference\/2025\/index.php","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}