{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,15]],"date-time":"2025-11-15T08:52:55Z","timestamp":1763196775020,"version":"3.45.0"},"publisher-location":"Singapore","reference-count":39,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819533480","type":"print"},{"value":"9789819533497","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,11,16]],"date-time":"2025-11-16T00:00:00Z","timestamp":1763251200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,11,16]],"date-time":"2025-11-16T00:00:00Z","timestamp":1763251200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-3349-7_42","type":"book-chapter","created":{"date-parts":[[2025,11,15]],"date-time":"2025-11-15T08:49:23Z","timestamp":1763196563000},"page":"549-562","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["ISIPN: Intention-Semantic Incongruity Perception Network for\u00a0Multimodal Metaphor Detection"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7521-348X","authenticated-orcid":false,"given":"Tianlong","family":"Zheng","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2639-3944","authenticated-orcid":false,"given":"Yating","family":"Yang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4110-3976","authenticated-orcid":false,"given":"Rui","family":"Dong","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3082-648X","authenticated-orcid":false,"given":"Bo","family":"Ma","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6685-0858","authenticated-orcid":false,"given":"Lei","family":"Wang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2867-1765","authenticated-orcid":false,"given":"Xi","family":"Zhou","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,11,16]]},"reference":[{"key":"42_CR1","unstructured":"AI@Meta: Llama 3 model card (2024). https:\/\/github.com\/meta-llama\/llama3\/blob\/main\/MODEL_CARD.md"},{"key":"42_CR2","unstructured":"Anthropic: Claude 3.7 sonnet and claude code (2025). https:\/\/www.anthropic.com\/news\/claude-3-7-sonnet"},{"key":"42_CR3","unstructured":"Bai, S., et\u00a0al.: Qwen2. 5-vl technical report. arXiv preprint arXiv:2502.13923 (2025)"},{"key":"42_CR4","doi-asserted-by":"crossref","unstructured":"Cameron, L.: Metaphor and reconciliation: The discourse dynamics of empathy in post-conflict conversations. Routledge (2012)","DOI":"10.4324\/9780203837771"},{"key":"42_CR5","doi-asserted-by":"crossref","unstructured":"Chen, X., et al.: Hybrid transformer with multi-level fusion for multimodal knowledge graph completion. In: Proceedings of the 45th International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 904\u2013915 (2022)","DOI":"10.1145\/3477495.3531992"},{"key":"42_CR6","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)"},{"key":"42_CR7","unstructured":"Dosovitskiy, A., et\u00a0al.: An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"issue":"Suppl 2","key":"42_CR8","doi-asserted-by":"publisher","first-page":"1829","DOI":"10.1007\/s10462-023-10564-7","volume":"56","author":"M Ge","year":"2023","unstructured":"Ge, M., Mao, R., Cambria, E.: A survey on computational metaphor processing techniques: from identification, interpretation, generation to application. Artif. Intell. Rev. 56(Suppl 2), 1829\u20131895 (2023)","journal-title":"Artif. Intell. Rev."},{"issue":"3","key":"42_CR9","doi-asserted-by":"publisher","DOI":"10.1016\/j.ipm.2024.103652","volume":"61","author":"X He","year":"2024","unstructured":"He, X., Yu, L., Tian, S., Yang, Q., Long, J., Wang, B.: Viemf: multimodal metaphor detection via visual information enhancement with multimodal fusion. Inform. Process. Manag. 61(3), 103652 (2024)","journal-title":"Inform. Process. Manag."},{"key":"42_CR10","unstructured":"Hu, E.J., et al.: Lora: low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 (2021)"},{"key":"42_CR11","unstructured":"Hurst, A., et\u00a0al.: Gpt-4o system card. arXiv preprint arXiv:2410.21276 (2024)"},{"key":"42_CR12","doi-asserted-by":"crossref","unstructured":"Hutson, J., Schnellmann, A.: The poetry of prompts: the collaborative role of generative artificial intelligence in the creation of poetry and the anxiety of machine influence. Global J. Comput. Sci. Technol. 23(1) (2023)","DOI":"10.34257\/GJCSTDVOL23IS1PG1"},{"key":"42_CR13","doi-asserted-by":"crossref","unstructured":"Jia, K., Li, R.: Metaphor detection with context enhancement and curriculum learning. In: Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers), pp. 2726\u20132737 (2024)","DOI":"10.18653\/v1\/2024.naacl-long.149"},{"key":"42_CR14","unstructured":"Kehat, G., Pustejovsky, J.: Improving neural metaphor detection with visual datasets. In: Proceedings of the Twelfth Language Resources and Evaluation Conference, pp. 5928\u20135933 (2020)"},{"key":"42_CR15","doi-asserted-by":"crossref","unstructured":"Lakoff, G.: The contemporary theory of metaphor (1993)","DOI":"10.1017\/CBO9781139173865.013"},{"key":"42_CR16","unstructured":"Li, B., et al.: Llava-next: stronger llms supercharge multimodal capabilities in the wild (May 2024). https:\/\/llava-vl.github.io\/blog\/2024-05-10-llava-next-stronger-llms\/"},{"key":"42_CR17","unstructured":"Li, L.H., Yatskar, M., Yin, D., Hsieh, C.J., Chang, K.W.: Visualbert: a simple and performant baseline for vision and language. arXiv preprint arXiv:1908.03557 (2019)"},{"key":"42_CR18","doi-asserted-by":"crossref","unstructured":"Liu, H., Li, C., Li, Y., Lee, Y.J.: Improved baselines with visual instruction tuning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 26296\u201326306 (2024)","DOI":"10.1109\/CVPR52733.2024.02484"},{"key":"42_CR19","unstructured":"Van\u00a0der Maaten, L., Hinton, G.: Visualizing data using t-sne. J. Mach. Learn. Res. 9(11) (2008)"},{"key":"42_CR20","unstructured":"OpenAI: Openai o3-mini system card (2025). https:\/\/openai.com\/index\/o3-mini-system-card\/"},{"key":"42_CR21","doi-asserted-by":"crossref","unstructured":"Pan, H., Lin, Z., Fu, P., Qi, Y., Wang, W.: Modeling intra and inter-modality incongruity for multi-modal sarcasm detection. In: Findings of the Association for Computational Linguistics: EMNLP 2020, pp. 1383\u20131392 (2020)","DOI":"10.18653\/v1\/2020.findings-emnlp.124"},{"key":"42_CR22","unstructured":"Radford, A et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PmLR (2021)"},{"key":"42_CR23","doi-asserted-by":"crossref","unstructured":"Selvaraju, R.R., Cogswell, M., Das, A., Vedantam, R., Parikh, D., Batra, D.: Grad-cam: Visual explanations from deep networks via gradient-based localization. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 618\u2013626 (2017)","DOI":"10.1109\/ICCV.2017.74"},{"key":"42_CR24","doi-asserted-by":"crossref","unstructured":"Shen, X., Sun, D., Pan, S., Zhou, X., Yang, L.T.: Neighbor contrastive learning on learnable graph augmentation. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a037, pp. 9782\u20139791 (2023)","DOI":"10.1609\/aaai.v37i8.26168"},{"key":"42_CR25","doi-asserted-by":"crossref","unstructured":"Singh, A., et al.: Flava: A foundational language and vision alignment model. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 15638\u201315650 (2022)","DOI":"10.1109\/CVPR52688.2022.01519"},{"issue":"1","key":"42_CR26","doi-asserted-by":"publisher","first-page":"309","DOI":"10.1109\/TAFFC.2023.3274829","volume":"15","author":"L Sun","year":"2023","unstructured":"Sun, L., Lian, Z., Liu, B., Tao, J.: Efficient multimodal transformer with dual-level feature restoration for robust multimodal sentiment analysis. IEEE Trans. Affect. Comput. 15(1), 309\u2013325 (2023)","journal-title":"IEEE Trans. Affect. Comput."},{"key":"42_CR27","doi-asserted-by":"crossref","unstructured":"de\u00a0Toledo, G.L., Marcacini, R.M.: Transfer learning with joint fine-tuning for multimodal sentiment analysis. arXiv preprint arXiv:2210.05790 (2022)","DOI":"10.52591\/lxai202207173"},{"key":"42_CR28","doi-asserted-by":"crossref","unstructured":"Tsai, Y.H.H., Bai, S., Liang, P.P., Kolter, J.Z., Morency, L.P., Salakhutdinov, R.: Multimodal transformer for unaligned multimodal language sequences. In: Proceedings of the Conference. Association for Computational Linguistics. Meeting. vol.\u00a02019, p.\u00a06558. NIH Public Access (2019)","DOI":"10.18653\/v1\/P19-1656"},{"key":"42_CR29","unstructured":"Veli\u010dkovi\u0107, P., Cucurull, G., Casanova, A., Romero, A., Lio, P., Bengio, Y.: Graph attention networks. arXiv preprint arXiv:1710.10903 (2017)"},{"key":"42_CR30","doi-asserted-by":"crossref","unstructured":"Wachowiak, L., Gromann, D.: Does gpt-3 grasp metaphors? identifying metaphor mappings with generative language models. In: Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 1018\u20131032 (2023)","DOI":"10.18653\/v1\/2023.acl-long.58"},{"key":"42_CR31","unstructured":"Wang, P., et al.: Qwen2-vl: enhancing vision-language model\u2019s perception of the world at any resolution. arXiv preprint arXiv:2409.12191 (2024)"},{"key":"42_CR32","doi-asserted-by":"crossref","unstructured":"Wen, C., Jia, G., Yang, J.: Dip: dual incongruity perceiving network for sarcasm detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2540\u20132550 (2023)","DOI":"10.1109\/CVPR52729.2023.00250"},{"key":"42_CR33","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2024.128689","volume":"612","author":"Q Wu","year":"2025","unstructured":"Wu, Q., Fang, W., Zhong, W., Li, F., Xue, Y., Chen, B.: Dual-level adaptive incongruity-enhanced model for multimodal sarcasm detection. Neurocomputing 612, 128689 (2025)","journal-title":"Neurocomputing"},{"key":"42_CR34","doi-asserted-by":"crossref","unstructured":"Xu, B., Li, T., Zheng, J., Naseriparsa, M., Zhao, Z., Lin, H., Xia, F.: Met-meme: a multimodal meme dataset rich in metaphors. In: Proceedings of the 45th international ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 2887\u20132899 (2022)","DOI":"10.1145\/3477495.3532019"},{"key":"42_CR35","doi-asserted-by":"publisher","first-page":"130","DOI":"10.1016\/j.neucom.2021.09.041","volume":"467","author":"B Yang","year":"2022","unstructured":"Yang, B., Shao, B., Wu, L., Lin, X.: Multimodal sentiment analysis with unidirectional modality translation. Neurocomputing 467, 130\u2013137 (2022)","journal-title":"Neurocomputing"},{"key":"42_CR36","doi-asserted-by":"crossref","unstructured":"Zhang, D., Yu, J., Jin, S., Yang, L., Lin, H.: Multicmet: a novel chinese benchmark for understanding multimodal metaphor. In: Findings of the Association for Computational Linguistics: EMNLP 2023. pp. 6141\u20136154 (2023)","DOI":"10.18653\/v1\/2023.findings-emnlp.409"},{"key":"42_CR37","doi-asserted-by":"crossref","unstructured":"Zhang, D., Zhang, M., Guo, T., Peng, C., Saikrishna, V., Xia, F.: In your face: sentiment analysis of metaphor with facial expressive features. In: 2021 International Joint Conference on Neural Networks (IJCNN), pp.\u00a01\u20138. IEEE (2021)","DOI":"10.1109\/IJCNN52387.2021.9533972"},{"key":"42_CR38","doi-asserted-by":"crossref","unstructured":"Zhang, D., Zhang, M., Zhang, H., Yang, L., Lin, H.: Multimet: a multimodal dataset for metaphor understanding. In: Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers), pp. 3214\u20133225 (2021)","DOI":"10.18653\/v1\/2021.acl-long.249"},{"key":"42_CR39","unstructured":"Zheng, T., Dong, R., Yang, Y., Ma, B., Wang, L., Xi, Z.: Metaphor detection model based on linguistic multi-incongruity. J. Comput. Appli. (2025)"}],"container-title":["Lecture Notes in Computer Science","Natural Language Processing and Chinese Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-3349-7_42","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,15]],"date-time":"2025-11-15T08:49:32Z","timestamp":1763196572000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-3349-7_42"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,16]]},"ISBN":["9789819533480","9789819533497"],"references-count":39,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-3349-7_42","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,11,16]]},"assertion":[{"value":"16 November 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"NLPCC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"CCF International Conference on Natural Language Processing and Chinese Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Urumqi","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7 August 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 August 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"nlpcc2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/tcci.ccf.org.cn\/conference\/2025\/index.php","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}