{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,4]],"date-time":"2026-03-04T11:04:53Z","timestamp":1772622293039,"version":"3.50.1"},"reference-count":52,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2024,12,28]],"date-time":"2024-12-28T00:00:00Z","timestamp":1735344000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2024,12,28]],"date-time":"2024-12-28T00:00:00Z","timestamp":1735344000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"DOI":"10.13039\/100009110","name":"Natural Science Foundation of Xinjiang Uygur Autonomous Region","doi-asserted-by":"publisher","award":["2023D01C176"],"award-info":[{"award-number":["2023D01C176"]}],"id":[{"id":"10.13039\/100009110","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Xinjiang Uygur Autonomous Region Universities Fundamental Research Funds Scientific Research Project","award":["XJEDU2022P018"],"award-info":[{"award-number":["XJEDU2022P018"]}]},{"name":"Postgraduate Innovation Projects in the Autonomous Region, China","award":["XJ2023G044"],"award-info":[{"award-number":["XJ2023G044"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Complex Intell. Syst."],"published-print":{"date-parts":[[2025,1]]},"DOI":"10.1007\/s40747-024-01684-w","type":"journal-article","created":{"date-parts":[[2024,12,28]],"date-time":"2024-12-28T05:30:31Z","timestamp":1735363831000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Metaphor recognition based on cross-modal multi-level information fusion"],"prefix":"10.1007","volume":"11","author":[{"given":"Qimeng","family":"Yang","sequence":"first","affiliation":[]},{"given":"Yuanbo","family":"Yan","sequence":"additional","affiliation":[]},{"given":"Xiaoyu","family":"He","sequence":"additional","affiliation":[]},{"given":"Shisong","family":"Guo","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,12,28]]},"reference":[{"key":"1684_CR1","volume-title":"Metaphors we live by","author":"G Lakoff","year":"1980","unstructured":"Lakoff G, Johnson M (1980) Metaphors we live by. University of Chicago, Chicago"},{"key":"1684_CR2","unstructured":"Abd Yusof NF, Lin C, Guerin F (2017) Analysing the causes of depressed mood from depression vulnerable individuals. In: Proceedings of the international workshop on digital disease detection using social media 2017 (DDDSM-2017), pp 9\u201317"},{"key":"1684_CR3","unstructured":"Tang C, Guerin F, Li Y, Lin C (2022) Recent advances in neural text generation: a task-agnostic survey. arXiv preprint arXiv:2203.03047 (2022)"},{"issue":"6","key":"1684_CR4","doi-asserted-by":"publisher","first-page":"74","DOI":"10.1109\/MIS.2017.4531228","volume":"32","author":"E Cambria","year":"2017","unstructured":"Cambria E, Poria S, Gelbukh A, Thelwall M (2017) Sentiment analysis is a big suitcase. IEEE Intell Syst 32(6):74\u201380","journal-title":"IEEE Intell Syst"},{"issue":"1","key":"1684_CR5","doi-asserted-by":"publisher","first-page":"91","DOI":"10.4314\/laligens.v6i1.8","volume":"6","author":"SM Anurudu","year":"2017","unstructured":"Anurudu SM, Obi IM (2017) Decoding the metaphor of internet meme: a study of satirical tweets on Black Friday sales in Nigeria. Afrrev Laligens 6(1):91\u2013100","journal-title":"Afrrev Laligens"},{"key":"1684_CR6","doi-asserted-by":"crossref","unstructured":"Shutova E, Kiela D, Maillard J (2016) Black holes and white rabbits: metaphor identification with visual features. In: Proceedings of the 2016 conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, pp 160\u2013170","DOI":"10.18653\/v1\/N16-1020"},{"key":"1684_CR7","doi-asserted-by":"publisher","first-page":"166","DOI":"10.1016\/j.neucom.2020.11.051","volume":"429","author":"C Su","year":"2021","unstructured":"Su C, Chen W, Fu Z, Chen Y (2021) Multimodal metaphor detection based on distinguishing concreteness. Neurocomputing 429:166\u2013173","journal-title":"Neurocomputing"},{"key":"1684_CR8","doi-asserted-by":"crossref","unstructured":"Choi M, Lee S, Choi E, Park H, Lee J, Lee D, Lee J (2021) Melbert: metaphor detection via contextualized late interaction using metaphorical identification theories. arXiv preprint arXiv:2104.13615","DOI":"10.18653\/v1\/2021.naacl-main.141"},{"key":"1684_CR9","unstructured":"Steen G (2007) Mip: A method for identifying metaphorically used words in discourse. Metaphor and symbol"},{"issue":"1","key":"1684_CR10","doi-asserted-by":"publisher","first-page":"53","DOI":"10.1016\/0004-3702(75)90016-8","volume":"6","author":"Y Wilks","year":"1975","unstructured":"Wilks Y (1975) A preferential, pattern-seeking, semantics for natural language inference. Artif Intell 6(1):53\u201374","journal-title":"Artif Intell"},{"key":"1684_CR11","doi-asserted-by":"crossref","unstructured":"Li Y, Wang S, Lin C, Frank G (2023) Metaphor detection via explicit basic meanings modelling. arXiv preprint arXiv:2305.17268","DOI":"10.18653\/v1\/2023.acl-short.9"},{"key":"1684_CR12","doi-asserted-by":"crossref","unstructured":"Li Y, Wang S, Lin C, Guerin F, Barrault L (2023) Framebert: conceptual metaphor detection with frame embedding learning. arXiv preprint arXiv:2302.04834","DOI":"10.18653\/v1\/2023.eacl-main.114"},{"key":"1684_CR13","unstructured":"Kehat G, Pustejovsky J (2020) Improving neural metaphor detection with visual datasets. In: Proceedings of the twelfth language resources and evaluation conference, pp 5928\u20135933"},{"issue":"3","key":"1684_CR14","doi-asserted-by":"publisher","DOI":"10.1016\/j.ipm.2024.103652","volume":"61","author":"X He","year":"2024","unstructured":"He X, Yu L, Tian S, Yang Q, Long J, Wang B (2024) Viemf: multimodal metaphor detection via visual information enhancement with multimodal fusion. Inf Process Manag 61(3):103652","journal-title":"Inf Process Manag"},{"key":"1684_CR15","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2024.111778","volume":"294","author":"B Wang","year":"2024","unstructured":"Wang B, Huang S, Liang B, Tu G, Yang M, Xu R (2024) What do they\u201cmeme\u201d? A metaphor-aware multi-modal multi-task framework for fine-grained meme understanding. Knowl-Based Syst 294:111778","journal-title":"Knowl-Based Syst"},{"key":"1684_CR16","doi-asserted-by":"crossref","unstructured":"Alnajjar K, H\u00e4m\u00e4l\u00e4inen M, Zhang S (2022) Ring that bell: A corpus and method for multimodal metaphor detection in videos. arXiv preprint arXiv:2301.01134","DOI":"10.18653\/v1\/2022.flp-1.4"},{"key":"1684_CR17","doi-asserted-by":"crossref","unstructured":"Zhang D, Zhang M, Zhang H, Yang L, Lin H (2021) Multimet: a multimodal dataset for metaphor understanding. In: Proceedings of the 59th annual meeting of the Association for Computational Linguistics and the 11th international joint conference on natural language processing, vol 1: Long papers, pp 3214\u20133225","DOI":"10.18653\/v1\/2021.acl-long.249"},{"key":"1684_CR18","doi-asserted-by":"crossref","unstructured":"Xu B, Li T, Zheng J, Naseriparsa M, Zhao Z, Lin H, Xia F (2022) Met-meme: a multimodal meme dataset rich in metaphors. In: Proceedings of the 45th international ACM SIGIR conference on research and development in information retrieval, pp 2887\u20132899","DOI":"10.1145\/3477495.3532019"},{"issue":"3","key":"1684_CR19","doi-asserted-by":"publisher","first-page":"196","DOI":"10.1080\/24725579.2018.1496494","volume":"8","author":"A Samareh","year":"2018","unstructured":"Samareh A, Jin Y, Wang Z, Chang X, Huang S (2018) Detect depression from communication: how computer vision, signal processing, and sentiment analysis join forces. IISE Trans Healthc Syst Eng 8(3):196\u2013208","journal-title":"IISE Trans Healthc Syst Eng"},{"key":"1684_CR20","doi-asserted-by":"crossref","unstructured":"Yin S, Liang C, Ding H, Wang S (2019) A multi-modal hierarchical recurrent neural network for depression detection. In: Proceedings of the 9th international on audio\/visual emotion challenge and workshop, pp 65\u201371","DOI":"10.1145\/3347320.3357696"},{"key":"1684_CR21","unstructured":"Bahdanau D, Cho K, Bengio Y (2014) Neural machine translation by jointly learning to align and translate. arXiv preprint arXiv:1409.0473"},{"key":"1684_CR22","doi-asserted-by":"crossref","unstructured":"Kumar S, Mondal I, Akhtar MS, Chakraborty T (2023) Explaining (sarcastic) utterances to enhance affect understanding in multimodal dialogues. In: Proceedings of the AAAI conference on artificial intelligence, vol 37, pp 12986\u201312994","DOI":"10.1609\/aaai.v37i11.26526"},{"key":"1684_CR23","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2023.111126","volume":"283","author":"X Li","year":"2024","unstructured":"Li X, Liu J, Xie Y, Gong P, Zhang X, He H (2024) Magdra: a multi-modal attention graph network with dynamic routing-by-agreement for multi-label emotion recognition. Knowl-Based Syst 283:111126","journal-title":"Knowl-Based Syst"},{"key":"1684_CR24","doi-asserted-by":"crossref","unstructured":"Gao Y, Beijbom O, Zhang N, Darrell T (2016) Compact bilinear pooling. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 317\u2013326","DOI":"10.1109\/CVPR.2016.41"},{"key":"1684_CR25","doi-asserted-by":"crossref","unstructured":"Fukui A, Park DH, Yang D, Rohrbach A, Darrell T, Rohrbach M (2016) Multimodal compact bilinear pooling for visual question answering and visual grounding. arXiv preprint arXiv:1606.01847","DOI":"10.18653\/v1\/D16-1044"},{"key":"1684_CR26","doi-asserted-by":"crossref","unstructured":"Trik M, Akhavan H, Bidgoli AM, Molk AMNG, Vashani H, Mozaffari SP (2023) A new adaptive selection strategy for reducing latency in networks on chip. Integration 89:9\u201324","DOI":"10.1016\/j.vlsi.2022.11.004"},{"issue":"9","key":"1684_CR27","volume":"35","author":"Z Wang","year":"2023","unstructured":"Wang Z, Jin Z, Yang Z, Zhao W, Trik M (2023) Increasing efficiency for routing in internet of things using binary gray wolf optimization and fuzzy logic. J King Saud Univ Comput Inf Sci 35(9):101732","journal-title":"J King Saud Univ Comput Inf Sci"},{"key":"1684_CR28","doi-asserted-by":"crossref","unstructured":"Wang G, Wu J, Trik M (2023) A novel approach to reduce video traffic based on understanding user demand and D2D communication in 5G networks. IETE J Res, 1\u201317","DOI":"10.1080\/03772063.2023.2278696"},{"key":"1684_CR29","doi-asserted-by":"crossref","unstructured":"Li Y, Wang H, Trik M (2024) Design and simulation of a new current mirror circuit with low power consumption and high performance and output impedance. Analog Integr Circ Signal Process, 1\u201313","DOI":"10.1007\/s10470-023-02243-y"},{"key":"1684_CR30","unstructured":"Wang Z, Mayhew S, Roth D et al (2019) Cross-lingual ability of multilingual bert: an empirical study. arXiv preprint arXiv:1912.07840"},{"key":"1684_CR31","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"1684_CR32","doi-asserted-by":"crossref","unstructured":"Khan Z, Fu Y (2021) Exploiting bert for multimodal target sentiment classification through input space translation. In: Proceedings of the 29th ACM international conference on multimedia, pp 3034\u20133042","DOI":"10.1145\/3474085.3475692"},{"key":"1684_CR33","doi-asserted-by":"crossref","unstructured":"Carion N, Massa F, Synnaeve G, Usunier N, Kirillov A, Zagoruyko S (2020) End-to-end object detection with transformers. In: European conference on computer vision. Springer, pp 213\u2013229","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"1684_CR34","doi-asserted-by":"publisher","first-page":"63373","DOI":"10.1109\/ACCESS.2019.2916887","volume":"7","author":"W Guo","year":"2019","unstructured":"Guo W, Wang J, Wang S (2019) Deep multimodal representation learning: a survey. IEEE Access 7:63373\u201363394","journal-title":"IEEE Access"},{"key":"1684_CR35","doi-asserted-by":"crossref","unstructured":"Zhu X, Cao B, Xu S, Liu B, Cao J (2019) Joint visual-textual sentiment analysis based on cross-modality attention mechanism. In: MultiMedia modeling: 25th international conference, MMM 2019, Thessaloniki, Greece, January 8\u201311, 2019, Proceedings, Part I 25. Springer, pp 264\u2013276","DOI":"10.1007\/978-3-030-05710-7_22"},{"key":"1684_CR36","doi-asserted-by":"crossref","unstructured":"Li XL, Liang P (2021) Prefix-tuning: optimizing continuous prompts for generation. arXiv preprint arXiv:2101.00190","DOI":"10.18653\/v1\/2021.acl-long.353"},{"key":"1684_CR37","doi-asserted-by":"crossref","unstructured":"Cai Y, Cai H, Wan X (2019) Multi-modal sarcasm detection in twitter with hierarchical fusion model. In: Proceedings of the 57th annual meeting of the Association for Computational Linguistics, pp 2506\u20132515","DOI":"10.18653\/v1\/P19-1239"},{"key":"1684_CR38","doi-asserted-by":"crossref","unstructured":"Lewis M, Liu Y, Goyal N, Ghazvininejad M, Mohamed A, Levy O, Stoyanov V, Zettlemoyer L (2019) Bart: denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension. arXiv preprint arXiv:1910.13461","DOI":"10.18653\/v1\/2020.acl-main.703"},{"key":"1684_CR39","unstructured":"Clark K, Luong M-T, Le QV, Manning CD (2020) Electra: Pre-training text encoders as discriminators rather than generators. arXiv preprint arXiv:2003.10555"},{"key":"1684_CR40","unstructured":"Simonyan K, Zisserman A (2014) Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556"},{"issue":"1","key":"1684_CR41","doi-asserted-by":"publisher","first-page":"61","DOI":"10.1109\/TNN.2008.2005605","volume":"20","author":"F Scarselli","year":"2008","unstructured":"Scarselli F, Gori M, Tsoi AC, Hagenbuchner M, Monfardini G (2008) The graph neural network model. IEEE Trans Neural Netw 20(1):61\u201380","journal-title":"IEEE Trans Neural Netw"},{"key":"1684_CR42","doi-asserted-by":"publisher","first-page":"130","DOI":"10.1016\/j.neucom.2021.09.041","volume":"467","author":"B Yang","year":"2022","unstructured":"Yang B, Shao B, Wu L, Lin X (2022) Multimodal sentiment analysis with unidirectional modality translation. Neurocomputing 467:130\u2013137","journal-title":"Neurocomputing"},{"key":"1684_CR43","doi-asserted-by":"crossref","unstructured":"Chen X, Zhang N, Li L, Deng S, Tan C, Xu C, Huang F, Si L, Chen H (2022) Hybrid transformer with multi-level fusion for multimodal knowledge graph completion. In: Proceedings of the 45th international ACM SIGIR conference on research and development in information retrieval, pp 904\u2013915","DOI":"10.1145\/3477495.3531992"},{"key":"1684_CR44","doi-asserted-by":"crossref","unstructured":"Sun L, Lian Z, Liu B, Tao J (2023) Efficient multimodal transformer with dual-level feature restoration for robust multimodal sentiment analysis. IEEE Trans Affect Comput","DOI":"10.1109\/TAFFC.2023.3274829"},{"key":"1684_CR45","doi-asserted-by":"crossref","unstructured":"Xu N, Zeng Z, Mao W (2020) Reasoning with multimodal sarcastic tweets via modeling cross-modality contrast and semantic association. In: Proceedings of the 58th annual meeting of the Association for Computational Linguistics, pp 3777\u20133786","DOI":"10.18653\/v1\/2020.acl-main.349"},{"key":"1684_CR46","doi-asserted-by":"crossref","unstructured":"Pan H, Lin Z, Fu P, Qi Y, Wang W (2020) Modeling intra and inter-modality incongruity for multi-modal sarcasm detection. In: Findings of the Association for Computational Linguistics: EMNLP 2020, pp 1383\u20131392","DOI":"10.18653\/v1\/2020.findings-emnlp.124"},{"key":"1684_CR47","doi-asserted-by":"crossref","unstructured":"Wang X, Sun X, Yang T, Wang H (2020) Building a bridge: a method for imagetext sarcasm detection without pretraining on image-text data. In: Proceedings of the first international workshop on natural language processing beyond text, pp 19\u201329","DOI":"10.18653\/v1\/2020.nlpbt-1.3"},{"issue":"140","key":"1684_CR48","first-page":"1","volume":"21","author":"C Raffel","year":"2020","unstructured":"Raffel C, Shazeer N, Roberts A, Lee K, Narang S, Matena M, Zhou Y, Li W, Liu PJ (2020) Exploring the limits of transfer learning with a unified text-to-text transformer. J Mach Learn Res 21(140):1\u201367","journal-title":"J Mach Learn Res"},{"key":"1684_CR49","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, Weissenborn D, Zhai X, Unterthiner T, Dehghani M, Minderer M, Heigold G, Gelly S et al (2020) An image is worth 16\u2009\u00d7\u200916 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929"},{"key":"1684_CR50","unstructured":"Touvron H, Cord M, Douze M, Massa F, Sablayrolles A, J\u00e9gou H (2021) Training data-efficient image transformers and distillation through attention. In: International conference on machine learning. PMLR, pp 10347\u201310357"},{"key":"1684_CR51","unstructured":"Radford A, Kim JW, Hallacy C, Ramesh A, Goh G, Agarwal S, Sastry G, Askell A, Mishkin P, Clark J, Krueger G, Sutskever I (2021) Learning transferable visual models from natural language supervision. In: International conference on machine learning. PMLR, pp 8748\u20138763"},{"key":"1684_CR52","unstructured":"Kim W, Son B, Kim I (2021) Vilt: vision-and-language transformer without convolution or region supervision. In: Proceedings of the 38th international conference on machine learning. PMLR, pp 5583\u20135594"}],"container-title":["Complex &amp; Intelligent Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s40747-024-01684-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s40747-024-01684-w\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s40747-024-01684-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,30]],"date-time":"2025-01-30T20:20:45Z","timestamp":1738268445000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s40747-024-01684-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,28]]},"references-count":52,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2025,1]]}},"alternative-id":["1684"],"URL":"https:\/\/doi.org\/10.1007\/s40747-024-01684-w","relation":{},"ISSN":["2199-4536","2198-6053"],"issn-type":[{"value":"2199-4536","type":"print"},{"value":"2198-6053","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,12,28]]},"assertion":[{"value":"25 February 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 November 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 December 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"95"}}