{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,20]],"date-time":"2026-02-20T08:59:25Z","timestamp":1771577965245,"version":"3.50.1"},"reference-count":65,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2025,5,16]],"date-time":"2025-05-16T00:00:00Z","timestamp":1747353600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,5,16]],"date-time":"2025-05-16T00:00:00Z","timestamp":1747353600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Prog Artif Intell"],"published-print":{"date-parts":[[2025,9]]},"DOI":"10.1007\/s13748-025-00370-3","type":"journal-article","created":{"date-parts":[[2025,5,16]],"date-time":"2025-05-16T04:26:24Z","timestamp":1747369584000},"page":"341-353","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Applying cross-modal feature alignment and fusion for effective sarcasm detection"],"prefix":"10.1007","volume":"14","author":[{"given":"Sarang P.","family":"Karun","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8977-1984","authenticated-orcid":false,"given":"V.","family":"Adithya","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,5,16]]},"reference":[{"key":"370_CR1","doi-asserted-by":"publisher","unstructured":"Wei, P., Adikari, A., Alahakoon, D., Gero, J.S.: Discovering the influence of sarcasm in social media responses. WIREs Data Mining and Knowledge Discovery 9 (2019) https:\/\/doi.org\/10.1002\/widm.1331","DOI":"10.1002\/widm.1331"},{"key":"370_CR2","doi-asserted-by":"publisher","unstructured":"Farabi, S., Ranasinghe, T., Kanojia, D., Kong, Y., Zampieri, M.: A survey of multimodal sarcasm detection. In: Proceedings of the Thirty-ThirdInternational Joint Conference on Artificial Intelligence. IJCAI-2024, pp. 8020\u20138028. International Joint Conferences on Artificial Intelligence Organization, ??? (2024). https:\/\/doi.org\/10.24963\/ijcai.2024\/887","DOI":"10.24963\/ijcai.2024\/887"},{"key":"370_CR3","doi-asserted-by":"publisher","unstructured":"Costin, B.A., Trau\u015fan\u00a0Matu: Automatic sarcasm detection: Systematic literature review. Information 13(8) (2022) https:\/\/doi.org\/10.3390\/info13080399","DOI":"10.3390\/info13080399"},{"key":"370_CR4","doi-asserted-by":"crossref","unstructured":"Tepperman, J., Traum, D., Narayanan, S.: \u201cyeah right\": sarcasm recognition for spoken dialogue systems. In: Ninth International Conference on Spoken Language Processing (2006)","DOI":"10.21437\/Interspeech.2006-507"},{"key":"370_CR5","doi-asserted-by":"publisher","unstructured":"Bharti, S.K., Pradhan, R., Babu, K.S., Jena, S.K.: Sarcasm analysis on twitter data using machine learning approaches. In: In: Missaoui, R., Abdessalem, T., Latapy, M. (eds) Trends in Social Network Analysis. Lecture Notes in Social Networks, pp. 118\u2013126 (2017). https:\/\/doi.org\/10.1007\/978-3-319-53420-6_3","DOI":"10.1007\/978-3-319-53420-6_3"},{"key":"370_CR6","doi-asserted-by":"publisher","first-page":"282","DOI":"10.1016\/j.inffus.2023.01.005","volume":"93","author":"Y Zhang","year":"2023","unstructured":"Zhang, Y., Wang, J., Liu, Y., Rong, L., Zheng, Q., Song, D., Tiwari, P., Qin, J.: A multitask learning model for multimodal sarcasm, sentiment and emotion recognition in conversations. Information Fusion 93, 282\u2013301 (2023)","journal-title":"Information Fusion"},{"key":"370_CR7","doi-asserted-by":"crossref","unstructured":"Liu, Z., Wang, S., Gu, X., Wang, D., Dong, Q., Cui, B.: Intelligent assessment of pavement structural conditions: A novel femvit classification network for gpr images. IEEE Transactions on Intelligent Transportation Systems (2024)","DOI":"10.1109\/TITS.2024.3403144"},{"issue":"5","key":"370_CR8","first-page":"1857","volume":"105","author":"PK Roy","year":"2024","unstructured":"Roy, P.K.: An advanced learning approach for detecting sarcasm in social media posts: Theory and solutions. Social Science Quarterly 105(5), 1857\u20131874 (2024)","journal-title":"Social Science Quarterly"},{"key":"370_CR9","doi-asserted-by":"publisher","unstructured":"Liu, J., Tian, S., Yu, L., Shi, X., Wang, F.: Image-text fusion transformer network for sarcasm detection. Multimedia Tools and Applications, 1\u201315 (2023). https:\/\/doi.org\/10.1007\/s11042-023-17252-2","DOI":"10.1007\/s11042-023-17252-2"},{"key":"370_CR10","doi-asserted-by":"crossref","unstructured":"Liang, B., Gui, L., He, Y., Cambria, E., Xu, R.: Fusion and discrimination: A multimodal graph contrastive learning framework for multimodal sarcasm detection. IEEE Transactions on Affective Computing (2024)","DOI":"10.1109\/TAFFC.2024.3380375"},{"key":"370_CR11","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2023.102132","volume":"103","author":"J Wang","year":"2024","unstructured":"Wang, J., Yang, Y., Jiang, Y., Ma, M., Xie, Z., Li, T.: Cross-modal incongruity aligning and collaborating for multi-modal sarcasm detection. Information Fusion 103, 102132 (2024)","journal-title":"Information Fusion"},{"key":"370_CR12","doi-asserted-by":"publisher","first-page":"5455","DOI":"10.1007\/s10462-020-09825-6","volume":"53","author":"A Khan","year":"2020","unstructured":"Khan, A., Sohail, A., Zahoora, U., Qureshi, A.S.: A survey of the recent architectures of deep convolutional neural networks. Artificial intelligence review 53, 5455\u20135516 (2020)","journal-title":"Artificial intelligence review"},{"key":"370_CR13","unstructured":"Devlin, J., Chang, M.-W., Lee, K., Toutanova, K.: Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)"},{"key":"370_CR14","doi-asserted-by":"publisher","unstructured":"Cai, Y., Cai, H., Wan, X.: Multi-modal sarcasm detection in Twitter with hierarchical fusion model. In: Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics, pp. 2506\u20132515. Association for Computational Linguistics, Florence, Italy (2019). https:\/\/doi.org\/10.18653\/v1\/P19-1239 . https:\/\/aclanthology.org\/P19-1239\/","DOI":"10.18653\/v1\/P19-1239"},{"key":"370_CR15","first-page":"9694","volume":"34","author":"J Li","year":"2021","unstructured":"Li, J., Selvaraju, R., Gotmare, A., Joty, S., Xiong, C., Hoi, S.C.H.: Align before fuse: Vision and language representation learning with momentum distillation. Advances in neural information processing systems 34, 9694\u20139705 (2021)","journal-title":"Advances in neural information processing systems"},{"key":"370_CR16","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141., Polosukhin, I.: Attention is all you need. Advances in neural information processing systems 30 (2017)"},{"key":"370_CR17","unstructured":"Data of Multimodal Sarcasm Detection. https:\/\/github.com\/headacheboy\/data-of-multimodal-sarcasm-detection (2020)"},{"key":"370_CR18","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2024.107881","volume":"131","author":"Y Akkem","year":"2024","unstructured":"Akkem, Y., Biswas, S.K., Varanasi, A.: A comprehensive review of synthetic data generation in smart farming by using variational autoencoder and generative adversarial network. Engineering Applications of Artificial Intelligence 131, 107881 (2024)","journal-title":"Engineering Applications of Artificial Intelligence"},{"issue":"3","key":"370_CR19","doi-asserted-by":"publisher","first-page":"2767","DOI":"10.1007\/s42001-024-00326-y","volume":"7","author":"IKS Al-Tameemi","year":"2024","unstructured":"Al-Tameemi, I.K.S., Feizi-Derakhshi, M.-R., Pashazadeh, S., Asadpour, M.: A comprehensive review of visual-textual sentiment analysis from social media networks. Journal of Computational Social Science 7(3), 2767\u20132838 (2024)","journal-title":"Journal of Computational Social Science"},{"key":"370_CR20","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2024.127428","volume":"578","author":"W Chen","year":"2024","unstructured":"Chen, W., Lin, F., Li, G., Liu, B.: A survey of automatic sarcasm detection: Fundamental theories, formulation, datasets, detection methods, and opportunities. Neurocomputing 578, 127428 (2024)","journal-title":"Neurocomputing"},{"key":"370_CR21","doi-asserted-by":"publisher","unstructured":"Bharti, S.K., Babu, K.S., Jena, S.K.: Parsing-based sarcasm sentiment recognition in twitter data. In: Proceedings of the 2015 IEEE\/ACM International Conference on Advances in Social Networks Analysis and Mining 2015, pp. 1373\u20131380 (2015). https:\/\/doi.org\/10.1145\/2808797.2808910","DOI":"10.1145\/2808797.2808910"},{"key":"370_CR22","doi-asserted-by":"crossref","unstructured":"Riloff, E., Qadir, A., Surve, P., De\u00a0Silva, L., Gilbert, N., Huang, R.: Sarcasm as contrast between a positive sentiment and negative situation. In: Proceedings of the 2013 Conference on Empirical Methods in Natural Language Processing, pp. 704\u2013714 (2013)","DOI":"10.18653\/v1\/D13-1066"},{"key":"370_CR23","doi-asserted-by":"publisher","first-page":"162","DOI":"10.1609\/icwsm.v4i1.14018","volume":"4","author":"O Tsur","year":"2010","unstructured":"Tsur, O., Davidov, D., Rappoport, A.: Icwsm-a great catchy name: Semi-supervised recognition of sarcastic sentences in online product reviews. Proceedings of the International AAAI Conference on Web and Social Media 4, 162\u2013169 (2010). https:\/\/doi.org\/10.1609\/icwsm.v4i1.14018","journal-title":"Proceedings of the International AAAI Conference on Web and Social Media"},{"key":"370_CR24","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2930663","volume":"16","author":"DIH Far\u00edas","year":"2016","unstructured":"Far\u00edas, D.I.H., Patti, V., Rosso, P.: Irony detection in twitter. ACM Transactions on Internet Technology (TOIT) 16, 1\u201324 (2016)","journal-title":"ACM Transactions on Internet Technology (TOIT)"},{"key":"370_CR25","unstructured":"Liebrecht, C., Kunneman, F., Den\u00a0Bosch, A.: The perfect solution for detecting sarcasm in tweets# not (2013)"},{"issue":"8","key":"370_CR26","doi-asserted-by":"publisher","first-page":"5110","DOI":"10.1016\/j.jksuci.2022.01.008","volume":"34","author":"V Govindan","year":"2022","unstructured":"Govindan, V., Balakrishnan, V.: A machine learning approach in analysing the effect of hyperboles using negative sentiment tweets for sarcasm detection. Journal of King Saud University-Computer and Information Sciences 34(8), 5110\u20135120 (2022). https:\/\/doi.org\/10.1016\/j.jksuci.2022.01.008","journal-title":"Journal of King Saud University-Computer and Information Sciences"},{"key":"370_CR27","doi-asserted-by":"crossref","unstructured":"Mishra, A., Kanojia, D., Nagar, S., Dey, K., Bhattacharyya, P.: Harnessing cognitive features for sarcasm detection. arXiv preprint arXiv:1701.05574 (2017)","DOI":"10.18653\/v1\/P16-1104"},{"key":"370_CR28","doi-asserted-by":"publisher","unstructured":"Liu, P., Chen, W., Ou, T. G. Wang, Yang, D., Lei, K.: Sarcasm detection in social media based on imbalanced classification. In: Lecture Notes in Computer Science, Vol 8485. Springer, Cham., vol. 8485 (2014). https:\/\/doi.org\/10.1007\/978-3-319-08010-9_49","DOI":"10.1007\/978-3-319-08010-9_49"},{"key":"370_CR29","doi-asserted-by":"publisher","first-page":"4215","DOI":"10.1007\/s10462-019-09791-8","volume":"53","author":"CI Eke","year":"2020","unstructured":"Eke, C.I., Norman, A.A., Shuib, L.: Sarcasm identification in textual data: systematic review, research challenges and open directions. Artificial Intelligence Review 53, 4215\u20134258 (2020). https:\/\/doi.org\/10.1007\/s10462-019-09791-8","journal-title":"Artificial Intelligence Review"},{"key":"370_CR30","doi-asserted-by":"publisher","unstructured":"N\u00a0Hiremath, B., M.\u00a0Patil, M.: Sarcasm detection using cognitive features of visual data by learning model. Expert Systems with Applications 184 (2021) https:\/\/doi.org\/10.1016\/j.eswa.2021.115476","DOI":"10.1016\/j.eswa.2021.115476"},{"key":"370_CR31","doi-asserted-by":"publisher","unstructured":"Liu, L., Priestley, J.L., Zhou, Y., Ray, H.E., Han, M.: A2text-net: A novel deep neural network for sarcasm detection. In: 2019 IEEE First International Conference on Cognitive Machine Intelligence (CogMI), pp. 118\u2013126 (2019). https:\/\/doi.org\/10.1109\/CogMI48466.2019.00025","DOI":"10.1109\/CogMI48466.2019.00025"},{"key":"370_CR32","doi-asserted-by":"publisher","first-page":"578","DOI":"10.1177\/1470785320921779","volume":"62","author":"SM Sarsam","year":"2020","unstructured":"Sarsam, S.M., Al-Samarraie, H., Alzahrani, A.I., Wright, B.: Sarcasm detection using machine learning algorithms in twitter: A systematic review. International Journal of Market Research 62, 578\u2013598 (2020). https:\/\/doi.org\/10.1177\/1470785320921779","journal-title":"International Journal of Market Research"},{"key":"370_CR33","doi-asserted-by":"publisher","first-page":"424","DOI":"10.1016\/j.inffus.2022.09.025","volume":"91","author":"G Ankita","year":"2023","unstructured":"Ankita, G., Kinjal, A., Soujanya, P., Erik, C., Amir, H.: Multimodal sentiment analysis: A systematic review of history, datasets, multimodal fusion methods, applications, challenges and future directions. Information Fusion 91, 424\u2013444 (2023). https:\/\/doi.org\/10.1016\/j.inffus.2022.09.025","journal-title":"Information Fusion"},{"key":"370_CR34","doi-asserted-by":"publisher","unstructured":"Avvaru, A., Vobilisetty, S., Mamidi, R.: Detecting sarcasm in conversation context using transformer-based models. In: Proceedings of the Second Workshop on Figurative Language Processing, pp. 98\u2013103 (2020). https:\/\/doi.org\/10.18653\/v1\/2020.figlang-1.15","DOI":"10.18653\/v1\/2020.figlang-1.15"},{"key":"370_CR35","doi-asserted-by":"publisher","first-page":"6388","DOI":"10.1109\/ACCESS.2019.2963630","volume":"8","author":"A Kumar","year":"2020","unstructured":"Kumar, A., Narapareddy, V.T., Srikanth, V.A., Malapati, A., Neti, L.B.M.: Sarcasm detection using multi-head attention based bidirectional lstm. Ieee Access 8, 6388\u20136397 (2020). https:\/\/doi.org\/10.1109\/ACCESS.2019.2963630","journal-title":"Ieee Access"},{"key":"370_CR36","doi-asserted-by":"publisher","unstructured":"Liang, B., Lou, C., Li, X., Yang, M., Gui, L., He, Y., Pei, W., Xu, R.: Multi-modal sarcasm detection via cross-modal graph convolutional network. In: Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), 1, pp. 1767\u20131777 (2022). https:\/\/doi.org\/10.18653\/v1\/2022.acl-long.124 . Association for Computational Linguistics","DOI":"10.18653\/v1\/2022.acl-long.124"},{"issue":"5","key":"370_CR37","doi-asserted-by":"publisher","first-page":"2146","DOI":"10.3390\/app14052146","volume":"14","author":"H Liu","year":"2024","unstructured":"Liu, H., Yang, B., Yu, Z.: A multi-view interactive approach for multimodal sarcasm detection in social internet of things with knowledge enhancement. Applied Sciences 14(5), 2146 (2024). https:\/\/doi.org\/10.3390\/app14052146","journal-title":"Applied Sciences"},{"issue":"5","key":"370_CR38","doi-asserted-by":"publisher","first-page":"855","DOI":"10.3390\/electronics13050855","volume":"13","author":"H Fu","year":"2024","unstructured":"Fu, H., Liu, H., Wang, H., Xu, L., Lin, J., Jiang, D.: Multi-modal sarcasm detection with sentiment word embedding. Electronics 13(5), 855 (2024). https:\/\/doi.org\/10.3390\/electronics13050855","journal-title":"Electronics"},{"issue":"2","key":"370_CR39","doi-asserted-by":"publisher","first-page":"317","DOI":"10.3390\/math12020317","volume":"12","author":"W Zhong","year":"2024","unstructured":"Zhong, W., Zhang, Z., Wu, Q., Xue, Y., Cai, Q.: A semantic enhancement framework for multimodal sarcasm detection. Mathematics 12(2), 317 (2024). https:\/\/doi.org\/10.3390\/math12020317","journal-title":"Mathematics"},{"key":"370_CR40","doi-asserted-by":"crossref","unstructured":"Wen, C., Jia, G., Yang, J.: Dip: Dual incongruity perceiving network for sarcasm detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2540\u20132550 (2023)","DOI":"10.1109\/CVPR52729.2023.00250"},{"issue":"6","key":"370_CR41","doi-asserted-by":"publisher","first-page":"7399","DOI":"10.1007\/s12652-022-04447-y","volume":"14","author":"D Tom\u00e1s","year":"2023","unstructured":"Tom\u00e1s, D., Ortega-Bueno, R., Zhang, G., Rosso, P., Schifanella, R.: Transformer-based models for multimodal irony detection. Journal of Ambient Intelligence and Humanized Computing 14(6), 7399\u20137410 (2023). https:\/\/doi.org\/10.1007\/s12652-022-04447-y","journal-title":"Journal of Ambient Intelligence and Humanized Computing"},{"issue":"12","key":"370_CR42","doi-asserted-by":"publisher","first-page":"3696","DOI":"10.1109\/TFUZZ.2021.3072492","volume":"29","author":"Y Zhang","year":"2021","unstructured":"Zhang, Y., Liu, Y., Li, Q., Tiwari, P., Wang, B., Li, Y., Pandey, H.M., Zhang, P., Song, D.: Cfn: a complex-valued fuzzy network for sarcasm detection in conversations. IEEE Transactions on Fuzzy Systems 29(12), 3696\u20133710 (2021). https:\/\/doi.org\/10.1109\/TFUZZ.2021.3072492","journal-title":"IEEE Transactions on Fuzzy Systems"},{"issue":"21","key":"370_CR43","doi-asserted-by":"publisher","first-page":"11235","DOI":"10.3390\/app122111235","volume":"12","author":"Y Sun","year":"2022","unstructured":"Sun, Y., Zhang, H., Yang, S., Wang, J.: Efafn: An efficient feature adaptive fusion network with facial feature for multimodal sarcasm detection. Applied Sciences 12(21), 11235 (2022). https:\/\/doi.org\/10.3390\/app122111235","journal-title":"Applied Sciences"},{"key":"370_CR44","doi-asserted-by":"publisher","unstructured":"Cai, Y., Cai, H., Wan, X.: Multi-modal sarcasm detection in twitter with hierarchical fusion model. In: Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics, pp. 2506\u20132515 (2019). https:\/\/doi.org\/10.18653\/v1\/P19-1239","DOI":"10.18653\/v1\/P19-1239"},{"key":"370_CR45","doi-asserted-by":"publisher","unstructured":"Pan, H., Lin, Z., Fu, P., Qi, Y., Wang, W.: Modeling intra and inter-modality incongruity for multi-modal sarcasm detection. In: Findings of the Association for Computational Linguistics: EMNLP 2020, pp. 1383\u20131392 (2020). https:\/\/doi.org\/10.18653\/v1\/2020.findings-emnlp.124","DOI":"10.18653\/v1\/2020.findings-emnlp.124"},{"issue":"6","key":"370_CR46","doi-asserted-by":"publisher","first-page":"8597","DOI":"10.1007\/s11042-022-12122-9","volume":"81","author":"N Ding","year":"2022","unstructured":"Ding, N., Tian, S.-W., Yu, L.: A multimodal fusion method for sarcasm detection based on late fusion. Multimedia Tools and Applications 81(6), 8597\u20138616 (2022). https:\/\/doi.org\/10.1007\/s11042-022-12122-9","journal-title":"Multimedia Tools and Applications"},{"key":"370_CR47","doi-asserted-by":"publisher","unstructured":"Wang, J., Sun, L., Liu, Y., Shao, M., Zheng, Z.: Multimodal sarcasm target identification in tweets. In: Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 8164\u20138175 (2022). https:\/\/doi.org\/10.18653\/v1\/2022.acl-long.562","DOI":"10.18653\/v1\/2022.acl-long.562"},{"key":"370_CR48","doi-asserted-by":"crossref","unstructured":"Pramanick, S., Roy, A., Patel, V.M.: Multimodal learning using optimal transport for sarcasm and humor detection. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 3930\u20133940 (2022)","DOI":"10.1109\/WACV51458.2022.00062"},{"key":"370_CR49","doi-asserted-by":"publisher","unstructured":"Gupta, S., Shah, A., Shah, M., Syiemlieh, L., Maurya, C.: Filming multimodal sarcasm detection with attention. In: Neural Information Processing: 28th International Conference, ICONIP 2021, Sanur, Bali, Indonesia, December 8\u201312, 2021, Proceedings, Part V 28, pp. 178\u2013186 (2021). https:\/\/doi.org\/10.1007\/978-3-030-92307-5_21 . Springer","DOI":"10.1007\/978-3-030-92307-5_21"},{"key":"370_CR50","doi-asserted-by":"crossref","unstructured":"Cubuk, E.D., Zoph, B., Mane, D., Vasudevan, V., Le, Q.V.: AutoAugment: Learning Augmentation Policies from Data (2019). arXiv:1805.09501","DOI":"10.1109\/CVPR.2019.00020"},{"key":"370_CR51","unstructured":"Takahashi, R., Matsubara, T., Uehara, K.: Ricap: Random image cropping and patching data augmentation for deep cnns. In: Asian Conference on Machine Learning, pp. 786\u2013798 (2018). PMLR"},{"issue":"1","key":"370_CR52","doi-asserted-by":"publisher","first-page":"87","DOI":"10.1109\/TPAMI.2022.3152247","volume":"45","author":"K Han","year":"2022","unstructured":"Han, K., Wang, Y., Chen, H., Chen, X., Guo, J., Liu, Z., Tang, Y., Xiao, A., Xu, C., Xu, Y., et al.: A survey on vision transformer. IEEE transactions on pattern analysis and machine intelligence 45(1), 87\u2013110 (2022). https:\/\/doi.org\/10.1109\/TPAMI.2022.3152247","journal-title":"IEEE transactions on pattern analysis and machine intelligence"},{"key":"370_CR53","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S., Uszkoreit, J., Houlsby, N.: An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale (2021). arXiv:2010.11929"},{"key":"370_CR54","doi-asserted-by":"crossref","unstructured":"Yagawa, G., Oishi, A.: Feedforward neural networks. In: Lecture Notes on Numerical Methods in Engineering and Sciences (2021)","DOI":"10.1007\/978-3-030-66111-3_2"},{"key":"370_CR55","doi-asserted-by":"publisher","unstructured":"Sharma, P., Ding, N., Goodman, S., Soricut, R.: Conceptual captions: A cleaned, hypernymed, image alt-text dataset for automatic image captioning. In: Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 2556\u20132565 (2018). https:\/\/doi.org\/10.18653\/v1\/P18-1238","DOI":"10.18653\/v1\/P18-1238"},{"key":"370_CR56","unstructured":"Ordonez, V., Kulkarni, G., Berg, T.: Im2text: Describing images using 1 million captioned photographs. Advances in neural information processing systems 24 (2011)"},{"key":"370_CR57","doi-asserted-by":"publisher","unstructured":"Lin, T.-Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Doll\u00e1r, P., Zitnick, C.L.: Microsoft coco: Common objects in context. In: Computer Vision\u2013ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6-12, 2014, Proceedings, Part V 13, pp. 740\u2013755 (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48 . Springer","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"370_CR58","doi-asserted-by":"publisher","first-page":"32","DOI":"10.1007\/s11263-016-0981-7","volume":"123","author":"R Krishna","year":"2017","unstructured":"Krishna, R., Zhu, Y., Groth, O., Johnson, J., Hata, K., Kravitz, J., Chen, S., Kalantidis, Y., Li, L.-J., Shamma, D.A., et al.: Visual genome: Connecting language and vision using crowdsourced dense image annotations. International journal of computer vision 123, 32\u201373 (2017). https:\/\/doi.org\/10.1007\/s11263-016-0981-7","journal-title":"International journal of computer vision"},{"key":"370_CR59","doi-asserted-by":"crossref","unstructured":"Shalev-Shwartz, S., Ben-David, S.: Understanding Machine Learning: From Theory to Algorithms. Cambridge university press, ??? (2014)","DOI":"10.1017\/CBO9781107298019"},{"key":"370_CR60","doi-asserted-by":"publisher","unstructured":"Xu, N., Zeng, Z., Mao, W.: Reasoning with multimodal sarcastic tweets via modeling cross-modality contrast and semantic association. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pp. 3777\u20133786 (2020). https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.349","DOI":"10.18653\/v1\/2020.acl-main.349"},{"key":"370_CR61","doi-asserted-by":"crossref","unstructured":"Li, Z., Xu, B., Zhu, C., Zhao, T.: Clmlf: A contrastive learning and multi-layer fusion method for multimodal sentiment detection. arXiv preprint arXiv:2204.05515 (2022)","DOI":"10.18653\/v1\/2022.findings-naacl.175"},{"issue":"8","key":"370_CR62","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural computation 9(8), 1735\u20131780 (1997)","journal-title":"Neural computation"},{"issue":"1","key":"370_CR63","doi-asserted-by":"publisher","first-page":"2939334","DOI":"10.1155\/2021\/2939334","volume":"2021","author":"A Kumar","year":"2021","unstructured":"Kumar, A., Dikshit, S., Albuquerque, V.H.C.: Explainable artificial intelligence for sarcasm detection in dialogues. Wireless Communications and Mobile Computing 2021(1), 2939334 (2021)","journal-title":"Wireless Communications and Mobile Computing"},{"key":"370_CR64","doi-asserted-by":"crossref","unstructured":"Hashmi, E., Yayilgan, S.Y.: A robust hybrid approach with product context-aware learning and explainable ai for sentiment analysis in amazon user reviews. Electronic Commerce Research, 1\u201333 (2024)","DOI":"10.1007\/s10660-024-09896-5"},{"issue":"32","key":"370_CR65","doi-asserted-by":"publisher","first-page":"20011","DOI":"10.1007\/s00521-024-10208-z","volume":"36","author":"Y Akkem","year":"2024","unstructured":"Akkem, Y., Biswas, S.K., Varanasi, A.: Streamlit-based enhancing crop recommendation systems with advanced explainable artificial intelligence for smart farming. Neural Computing and Applications 36(32), 20011\u201320025 (2024)","journal-title":"Neural Computing and Applications"}],"container-title":["Progress in Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13748-025-00370-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s13748-025-00370-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13748-025-00370-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,6]],"date-time":"2025-09-06T14:42:52Z","timestamp":1757169772000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s13748-025-00370-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,16]]},"references-count":65,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2025,9]]}},"alternative-id":["370"],"URL":"https:\/\/doi.org\/10.1007\/s13748-025-00370-3","relation":{},"ISSN":["2192-6352","2192-6360"],"issn-type":[{"value":"2192-6352","type":"print"},{"value":"2192-6360","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,5,16]]},"assertion":[{"value":"23 August 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 April 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 May 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}