{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,28]],"date-time":"2025-03-28T00:08:55Z","timestamp":1743120535138,"version":"3.40.3"},"publisher-location":"Cham","reference-count":36,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031781858"},{"type":"electronic","value":"9783031781865"}],"license":[{"start":{"date-parts":[[2024,11,30]],"date-time":"2024-11-30T00:00:00Z","timestamp":1732924800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,30]],"date-time":"2024-11-30T00:00:00Z","timestamp":1732924800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-78186-5_1","type":"book-chapter","created":{"date-parts":[[2024,11,29]],"date-time":"2024-11-29T14:17:34Z","timestamp":1732889854000},"page":"1-14","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Image-Text Sarcasm Detection for\u00a0Enhanced Understanding"],"prefix":"10.1007","author":[{"given":"Haiyang","family":"Li","sequence":"first","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,30]]},"reference":[{"key":"1_CR1","first-page":"1","volume":"2022","author":"SK Bharti","year":"2022","unstructured":"Bharti, S.K., Gupta, R.K., Shukla, P.K., Hatamleh, W.A., Tarazi, H., Nuagah, S.J.: Multimodal sarcasm detection: a deep learning approach. Wirel. Commun. Mob. Comput. 2022, 1\u201310 (2022)","journal-title":"Wirel. Commun. Mob. Comput."},{"doi-asserted-by":"crossref","unstructured":"Brooks, T., Holynski, A., Efros, A.A.: Instructpix2pix: Learning to follow image editing instructions. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18392\u201318402 (2023)","key":"1_CR2","DOI":"10.1109\/CVPR52729.2023.01764"},{"doi-asserted-by":"crossref","unstructured":"Cai, Y., Cai, H., et\u00a0al.: Multi-modal sarcasm detection in twitter with hierarchical fusion model. In: ACL, pp. 2506\u20132515 (2019)","key":"1_CR3","DOI":"10.18653\/v1\/P19-1239"},{"key":"1_CR4","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2022.109924","volume":"257","author":"DS Chauhan","year":"2022","unstructured":"Chauhan, D.S., Singh, G.V., Arora, A., Ekbal, A., Bhattacharyya, P.: An emoji-aware multitask framework for multimodal sarcasm detection. Knowl.-Based Syst. 257, 109924 (2022)","journal-title":"Knowl.-Based Syst."},{"unstructured":"Devlin, J., Chang, M.W., , et\u00a0al.: Bert: Pre-training of deep bidirectional transformers for language understanding. In: NAACL, pp. 4171\u20134186 (2019)","key":"1_CR5"},{"doi-asserted-by":"crossref","unstructured":"Dutta, P., Bhattacharyya, C.K.: Multi-modal sarcasm detection in social networks: A comparative review. In: 2022 6th International Conference on Computing Methodologies and Communication (ICCMC), pp. 207\u2013214. IEEE (2022)","key":"1_CR6","DOI":"10.1109\/ICCMC53470.2022.9753981"},{"doi-asserted-by":"crossref","unstructured":"Gu, J., Zhao, H., et\u00a0al.: Scene graph generation with external knowledge and image reconstruction. In: CVPR, pp. 1969\u20131978 (2019)","key":"1_CR7","DOI":"10.1109\/CVPR.2019.00207"},{"doi-asserted-by":"crossref","unstructured":"Han, L., et al.: Show me what and tell me how: video synthesis via multimodal conditioning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3615\u20133625 (2022)","key":"1_CR8","DOI":"10.1109\/CVPR52688.2022.00360"},{"issue":"6","key":"1_CR9","doi-asserted-by":"publisher","first-page":"878","DOI":"10.3390\/e25060878","volume":"25","author":"S Hao","year":"2023","unstructured":"Hao, S., et al.: Enhanced semantic representation learning for sarcasm detection by integrating context-aware attention and fusion network. Entropy 25(6), 878 (2023)","journal-title":"Entropy"},{"doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., et\u00a0al.: Deep residual learning for image recognition. In: CVPR, pp. 770\u2013778 (2016)","key":"1_CR10","DOI":"10.1109\/CVPR.2016.90"},{"issue":"5","key":"1_CR11","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3124420","volume":"50","author":"A Joshi","year":"2017","unstructured":"Joshi, A., Bhattacharyya, P., Carman, M.J.: Automatic sarcasm detection: a survey. ACM Comput. Surv. (CSUR) 50(5), 1\u201322 (2017)","journal-title":"ACM Comput. Surv. (CSUR)"},{"doi-asserted-by":"crossref","unstructured":"Karpathy, A., Fei-Fei, L.: Deep visual-semantic alignments for generating image descriptions. In: CVPR, pp. 3128\u20133137 (2015)","key":"1_CR12","DOI":"10.1109\/CVPR.2015.7298932"},{"doi-asserted-by":"crossref","unstructured":"Kim, Y.: Convolutional neural networks for sentence classification. In: EMNLP, pp. 1746\u20131751 (2014)","key":"1_CR13","DOI":"10.3115\/v1\/D14-1181"},{"unstructured":"Kingma, D.P., Ba, J.: Adam: A method for stochastic optimization. CoRR abs\/1412.6980 (2015)","key":"1_CR14"},{"unstructured":"Li, L.H., Yatskar, M., et\u00a0al.: VisualBert: simple and performant baseline for vision and language. arXiv preprint arXiv:1908.03557 (2019)","key":"1_CR15"},{"issue":"3","key":"1_CR16","first-page":"3918","volume":"45","author":"M Li","year":"2022","unstructured":"Li, M., Huang, P.Y., Chang, X., Hu, J., Yang, Y., Hauptmann, A.: Video pivoting unsupervised multi-modal machine translation. IEEE Trans. Pattern Anal. Mach. Intell. 45(3), 3918\u20133932 (2022)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"doi-asserted-by":"crossref","unstructured":"Liang, B., et al.: Multi-modal sarcasm detection via cross-modal graph convolutional network. In: Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), vol.\u00a01, pp. 1767\u20131777. Association for Computational Linguistics (2022)","key":"1_CR17","DOI":"10.18653\/v1\/2022.acl-long.124"},{"key":"1_CR18","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"doi-asserted-by":"crossref","unstructured":"Liu, H., Wang, W., Li, H.: Towards multi-modal sarcasm detection via hierarchical congruity modeling with knowledge enhancement. arXiv preprint arXiv:2210.03501 (2022)","key":"1_CR19","DOI":"10.18653\/v1\/2022.emnlp-main.333"},{"doi-asserted-by":"crossref","unstructured":"Liu, V., Qiao, H., Chilton, L.: Opal: multimodal image generation for news illustration. In: Proceedings of the 35th Annual ACM Symposium on User Interface Software and Technology, pp. 1\u201317 (2022)","key":"1_CR20","DOI":"10.1145\/3526113.3545621"},{"unstructured":"Lu, J., Batra, D., et\u00a0al.: VilBert: pretraining task-agnostic visiolinguistic representations for vision-and-language tasks. arXiv preprint arXiv:1908.02265 (2019)","key":"1_CR21"},{"doi-asserted-by":"crossref","unstructured":"Mou, L., Men, R., et\u00a0al.: Natural language inference by tree-based convolution and heuristic matching. In: ACL, pp. 130\u2013136 (2016)","key":"1_CR22","DOI":"10.18653\/v1\/P16-2022"},{"doi-asserted-by":"crossref","unstructured":"Qiao, Y., Jing, L., Song, X., Chen, X., Zhu, L., Nie, L.: Mutual-enhanced incongruity learning network for multi-modal sarcasm detection. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a037, pp. 9507\u20139515 (2023)","key":"1_CR23","DOI":"10.1609\/aaai.v37i8.26138"},{"doi-asserted-by":"crossref","unstructured":"Ren, S., He, K., et\u00a0al.: Faster R-CNN: towards real-time object detection with region proposal networks. IEEE Trans. Pattern Anal. Mach. Intell. 1137\u20131149 (2016)","key":"1_CR24","DOI":"10.1109\/TPAMI.2016.2577031"},{"issue":"5","key":"1_CR25","doi-asserted-by":"publisher","first-page":"844","DOI":"10.3390\/math10050844","volume":"10","author":"E Savini","year":"2022","unstructured":"Savini, E., Caragea, C.: Intermediate-task transfer learning with BERT for sarcasm detection. Mathematics 10(5), 844 (2022)","journal-title":"Mathematics"},{"doi-asserted-by":"crossref","unstructured":"Sheffer, R., Adi, Y.: I hear your true colors: Image guided audio generation. In: ICASSP 2023-2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp.\u00a01\u20135. IEEE (2023)","key":"1_CR26","DOI":"10.1109\/ICASSP49357.2023.10096023"},{"issue":"21","key":"1_CR27","doi-asserted-by":"publisher","first-page":"11235","DOI":"10.3390\/app122111235","volume":"12","author":"Y Sun","year":"2022","unstructured":"Sun, Y., Zhang, H., Yang, S., Wang, J.: EFAFN: an efficient feature adaptive fusion network with facial feature for multimodal sarcasm detection. Appl. Sci. 12(21), 11235 (2022)","journal-title":"Appl. Sci."},{"doi-asserted-by":"crossref","unstructured":"Tomar, M., Tiwari, A., Saha, T., Saha, S.: Your tone speaks louder than your face! modality order infused multi-modal sarcasm detection. In: Proceedings of the 31st ACM International Conference on Multimedia, pp. 3926\u20133933 (2023)","key":"1_CR28","DOI":"10.1145\/3581783.3612528"},{"key":"1_CR29","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2023.121068","volume":"234","author":"O Vitman","year":"2023","unstructured":"Vitman, O., Kostiuk, Y., Sidorov, G., Gelbukh, A.: Sarcasm detection framework using context, emotion and sentiment features. Expert Syst. Appl. 234, 121068 (2023)","journal-title":"Expert Syst. Appl."},{"doi-asserted-by":"crossref","unstructured":"Wang, S., Gupta, R., et\u00a0al.: A task in a suit and a tie: paraphrase generation with semantic augmentation. In: AAAI, pp. 7176\u20137183 (2019)","key":"1_CR30","DOI":"10.1609\/aaai.v33i01.33017176"},{"unstructured":"Wu, Y., Schuster, M., et\u00a0al.: Google\u2019s neural machine translation system: bridging the gap between human and machine translation. arXiv preprint arXiv:1609.08144 (2016)","key":"1_CR31"},{"key":"1_CR32","doi-asserted-by":"publisher","first-page":"6220","DOI":"10.1109\/JSTARS.2022.3194076","volume":"15","author":"R Yang","year":"2022","unstructured":"Yang, R., et al.: Multimodal fusion remote sensing image-audio retrieval. IEEE J. Sel. Top. Appl. Earth Observ. Remote Sens. 15, 6220\u20136235 (2022)","journal-title":"IEEE J. Sel. Top. Appl. Earth Observ. Remote Sens."},{"unstructured":"Yu, A.W., Dohan, D., et\u00a0al.: QANet: combining local convolution with global self-attention for reading comprehension. In: ICLR (2018)","key":"1_CR33"},{"doi-asserted-by":"crossref","unstructured":"Zhang, X., Chen, Y., Li, G.: Multi-modal sarcasm detection based on contrastive attention mechanism (2021)","key":"1_CR34","DOI":"10.1007\/978-3-030-88480-2_66"},{"unstructured":"Zhang, Z., Yang, J., Zhao, H.: Retrospective reader for machine reading comprehension. arXiv:abs\/2001.09694 (2020)","key":"1_CR35"},{"doi-asserted-by":"crossref","unstructured":"Zhu, T., Li, L., Yang, J., Zhao, S., Liu, H., Qian, J.: Multimodal sentiment analysis with image-text interaction network. IEEE Trans. Multimedia (2022)","key":"1_CR36","DOI":"10.1109\/TMM.2022.3160060"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-78186-5_1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,29]],"date-time":"2024-11-29T15:10:20Z","timestamp":1732893020000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-78186-5_1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,30]]},"ISBN":["9783031781858","9783031781865"],"references-count":36,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-78186-5_1","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,11,30]]},"assertion":[{"value":"30 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICPR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Pattern Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Kolkata","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"India","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 December 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 December 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icpr2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/icpr2024.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}