{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T01:19:26Z","timestamp":1742951966541,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":32,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819755547"},{"type":"electronic","value":"9789819755554"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-97-5555-4_28","type":"book-chapter","created":{"date-parts":[[2025,1,11]],"date-time":"2025-01-11T05:39:51Z","timestamp":1736573991000},"page":"387-402","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Bilingual Multimodal Graph Modeling for Text-Image Relation Inference"],"prefix":"10.1007","author":[{"given":"Dong","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Wenjie","family":"Lu","sequence":"additional","affiliation":[]},{"given":"Shoushan","family":"Li","sequence":"additional","affiliation":[]},{"given":"Guodong","family":"Zhou","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,1,12]]},"reference":[{"key":"28_CR1","unstructured":"Oier\u00a0Lopez de\u00a0Lacalle, Ander Salaberria, Aitor Soroa, Gorka Azkune, and Eneko Agirre, \u2018Evaluating multimodal representations on visual semantic textual similarity\u2019, in Proceedings of ECAI 2020, volume 325, pp. 1990\u20131997, (2020)."},{"key":"28_CR2","first-page":"5301","volume":"2021","author":"J Tang","year":"2021","unstructured":"Jiajia Tang, Kang Li, Xuanyu Jin, Andrzej Cichocki, Qibin Zhao, and Wanzeng Kong, \u2018CTFN: hierarchical learning for multimodal sentiment analysis using coupled-translation fusion network\u2019, in Proceedings of ACL 2021, pp. 5301\u20135311, (2021).","journal-title":"Proceedings of ACL"},{"key":"28_CR3","first-page":"3222","volume":"2021","author":"J Ive","year":"2021","unstructured":"Julia Ive, Andy\u00a0Mingren Li, Yishu Miao, Ozan Caglayan, Pranava Madhyastha, and Lucia Specia, \u2018Exploiting multimodal reinforcement learning for simultaneous machine translation\u2019, in Proceedings of EACL 2021, pp. 3222\u20133233, (2021).","journal-title":"Proceedings of EACL"},{"key":"28_CR4","first-page":"582","volume":"2023","author":"X Bao","year":"2023","unstructured":"Xigang Bao, Shouhui Wang, Pengnian Qi and Biao Qin, \u2018Wukong-CMNER: A Large-Scale Chinese Multimodal NER Dataset with Images Modality\u2019, in Proceedings of DASFAA 2023, pp. 582\u2013596, (2023).","journal-title":"Proceedings of DASFAA"},{"issue":"1","key":"28_CR5","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1007\/s13735-019-00187-6","volume":"9","author":"C Otto","year":"2020","unstructured":"Christian Otto, Matthias Springstein, Avishek Anand, and Ralph Ewerth, \u2018Characterization and classification of semantic image-text relations\u2019, Int. J. Multim. Inf. Retr., 9(1), 31\u201345, (2020).","journal-title":"International Journal of Multimedia Information Retrieval"},{"key":"28_CR6","first-page":"691","volume":"2021","author":"T Sosea","year":"2021","unstructured":"Tiberiu Sosea, Iustin Sirbu, Cornelia Caragea, Doina Caragea, and Traian Rebedea, \u2018Using the image-text relationship to improve multimodal disaster tweet classification\u2019, in Proceedings of ISCRAM 2021, pp. 691\u2013704, (2021).","journal-title":"Proceedings of ISCRAM"},{"key":"28_CR7","first-page":"23318","volume":"2022","author":"P Wang","year":"2022","unstructured":"Peng Wang, An\u00a0Yang, Rui Men, Junyang Lin, Shuai Bai, Zhikang Li, Jianxin Ma, Chang Zhou, Jingren Zhou, and Hongxia Yang, \u2018OFA: unifying architectures, tasks, and modalities through a simple sequence-to-sequence learning framework\u2019, in Proceedings of ICML 2022, pp. 23318\u201323340, (2022).","journal-title":"Proceedings of ICML"},{"key":"28_CR8","first-page":"189","volume":"2022","author":"Yu Yang","year":"2022","unstructured":"Yang Yu, Dong Zhang, and Shoushan Li, \u2018Unified multi-modal pre-training for few-shot sentiment analysis with prompt-based learning\u2019, in Proceedings of ACM MM 2022, pp. 189\u2013198, (2022).","journal-title":"Proceedings of ACM MM"},{"key":"28_CR9","first-page":"4395","volume":"2021","author":"J Xincheng","year":"2021","unstructured":"Xincheng Ju, Dong Zhang, Rong Xiao, Junhui Li, Shoushan Li, Min Zhang, and Guodong Zhou, \u2018Joint multi-modal aspect-sentiment analysis with auxiliary cross-modal relation detection\u2019, in Proceedings of EMNLP 2021, pp. 4395\u20134405, (2021).","journal-title":"Proceedings of EMNLP"},{"key":"28_CR10","first-page":"1852","volume":"2020","author":"L Sun","year":"2020","unstructured":"Lin Sun, Jiquan Wang, Yindu Su, Fangsheng Weng, Yuxuan Sun, Zengwei Zheng, and Yuanyi Chen, \u2018RIVA: A pre-trained tweet multimodal model based on text-image relation for multimodal NER\u2019, in Proceedings of COLING 2020, pp. 1852\u20131862, (2020).","journal-title":"Proceedings of COLING"},{"key":"28_CR11","doi-asserted-by":"publisher","first-page":"13860","DOI":"10.1609\/aaai.v35i15.17633","volume":"2021","author":"L Sun","year":"2021","unstructured":"Lin Sun, Jiquan Wang, Kai Zhang, Yindu Su, and Fangsheng Weng, \u2018Rpbert: A text-image relation propagation-based BERT model for multimodal NER\u2019, in Proceedings of AAAI 2021, pp. 13860\u201313868, (2021).","journal-title":"Proceedings of AAAI"},{"key":"28_CR12","first-page":"2830","volume":"2019","author":"A Vempala","year":"2019","unstructured":"Alakananda Vempala and Daniel Preotiuc-Pietro, \u2018Categorizing and inferring the relationship between the text and image of twitter posts\u2019, in Proceedings of ACL 2019, pp. 2830\u20132840, (2019).","journal-title":"Proceedings of ACL"},{"key":"28_CR13","first-page":"15732","volume":"2023","author":"O Li","year":"2023","unstructured":"Oliver Li, Mallika Subramanian, Arkadiy Saakyan, Sky CH-Wang, and Smaranda Muresan, \u2018NormDial: A Comparable Bilingual Synthetic Dialog Dataset for Modeling Social Norm Adherence and Violation\u2019, in Proceedings of EMNLP 2023, pp. 15732-15744, (2023).","journal-title":"Proceedings of EMNLP"},{"key":"28_CR14","first-page":"6525","volume":"2020","author":"M Alikhani","year":"2020","unstructured":"Malihe Alikhani, Piyush Sharma, Shengjie Li, Radu Soricut, and Matthew Stone, \u2018Cross-modal coherence modeling for caption generation\u2019, in Proceedings of ACL 2020, pp. 6525\u20136535, (2020).","journal-title":"Proceedings of ACL"},{"key":"28_CR15","first-page":"781","volume":"2013","author":"T Chen","year":"2013","unstructured":"Tao Chen, Dongyuan Lu, Min-Yen Kan, and Peng Cui, \u2018Understanding and classifying image tweets\u2019, in Proceedings of ACM MM 2013, pp. 781\u2013784, (2013).","journal-title":"Proceedings of ACM MM"},{"key":"28_CR16","first-page":"3419","volume":"2021","author":"M Inan","year":"2021","unstructured":"Mert Inan, Piyush Sharma, Baber Khalid, Radu Soricut, Matthew Stone, and Malihe Alikhani, \u2018Cosmic: A coherence-aware generation metric for image descriptions\u2019, in Proceedings of EMNLP 2021, pp. 3419\u20133430, (2021).","journal-title":"Proceedings of EMNLP"},{"key":"28_CR17","doi-asserted-by":"publisher","first-page":"10427","DOI":"10.1609\/aaai.v36i10.21285","volume":"2022","author":"M Alikhani","year":"2022","unstructured":"Malihe Alikhani, Fangda Han, Hareesh Ravi, Mubbasir Kapadia, Vladimir Pavlovic, and Matthew Stone, \u2018Cross-modal coherence for text-to-image retrieval\u2019, in Proceedings of AAAI 2022, pp. 10427\u201310435, (2022).","journal-title":"Proceedings of AAAI"},{"key":"28_CR18","first-page":"1","volume":"2022","author":"L Wenjie","year":"2022","unstructured":"Wenjie Lu and Dong Zhang, \u2018Unified multi-modal multi-task joint learning for language-vision relation inference\u2019, in Proceedings of ICME 2022, pp. 1\u20136, (2022).","journal-title":"Proceedings of ICME"},{"key":"28_CR19","unstructured":"Thomas\u00a0N. Kipf and Max Welling, \u2018Semi-supervised classification with graph convolutional networks\u2019, in Proceedings of ICLR 2017, (2017)."},{"key":"28_CR20","first-page":"5415","volume":"2019","author":"D Zhang","year":"2019","unstructured":"Dong Zhang, Liangqing Wu, Changlong Sun, Shoushan Li, Qiaoming Zhu, and Guodong Zhou, \u2018Modeling both context- and speaker-sensitive dependence for emotion detection in multi-speaker conversations\u2019, in Proceedings of IJCAI 2019, pp. 5415\u20135421, (2019).","journal-title":"Proceedings of IJCAI"},{"key":"28_CR21","doi-asserted-by":"publisher","first-page":"3793","DOI":"10.1109\/TMM.2020.3032037","volume":"23","author":"W Nie","year":"2021","unstructured":"Weizhi Nie, Minjie Ren, Jie Nie, and Sicheng Zhao, \u2018C-GCN: correlation based graph convolutional network for audio-video emotion recognition\u2019, IEEE Trans. Multim., 23, 3793\u20133804, (2021).","journal-title":"IEEE Trans. Multim."},{"key":"28_CR22","doi-asserted-by":"crossref","unstructured":"Dong Zhang, Suzhong Wei, Shoushan Li, Hanqian Wu, Qiaoming Zhu, and Guodong Zhou, \u2018Multi-modal graph fusion for named entity recognition with targeted visual guidance\u2019, in Proceedings of AAAI 2021, pp. 14347\u201314355. AAAI Press, (2021).","DOI":"10.1609\/aaai.v35i16.17687"},{"key":"28_CR23","first-page":"208","volume":"2021","author":"B Liang","year":"2021","unstructured":"Bin Liang, Hang Su, Rongdi Yin, Lin Gui, Min Yang, Qin Zhao, Xiaoqi Yu, and Ruifeng Xu, \u2018Beta distribution guided aspect-aware graph for aspect category sentiment analysis with affective knowledge\u2019, in Proceedings of EMNLP 2021, pp. 208\u2013218, (2021).","journal-title":"Proceedings of EMNLP"},{"key":"28_CR24","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, and Neil Houlsby, \u2018An image is worth 16x16 words: Transformers for image recognition at scale\u2019, in Proceedings of ICLR 2021, (2021)."},{"key":"28_CR25","first-page":"1767","volume":"2022","author":"B Liang","year":"2022","unstructured":"Bin Liang, Chenwei Lou, Xiang Li, Min Yang, Lin Gui, Yulan He, Wenjie Pei, and Ruifeng Xu, \u2018Multi-modal sarcasm detection via cross-modal graph convolutional network\u2019, in Proceedings of ACL 2022, pp. 1767\u20131777, (2022).","journal-title":"Proceedings of ACL"},{"key":"28_CR26","first-page":"4707","volume":"2021","author":"B Liang","year":"2021","unstructured":"Bin Liang, Chenwei Lou, Xiang Li, Lin Gui, Min Yang, and Ruifeng Xu, \u2018Multi-modal sarcasm detection with interactive in-modal and cross-modal graphs\u2019, in Proceedings of ACM MM 2021, pp. 4707\u20134715, (2021).","journal-title":"Proceedings of ACM MM"},{"key":"28_CR27","first-page":"3034","volume":"2021","author":"Z Khan","year":"2021","unstructured":"Zaid Khan and Yun Fu, \u2018Exploiting BERT for multimodal target sentiment classification through input space translation\u2019, in Proceedings of ACM MM 2021, pp. 3034\u20133042, (2021).","journal-title":"Proceedings of ACM MM"},{"key":"28_CR28","unstructured":"Liunian\u00a0Harold Li, Mark Yatskar, Da\u00a0Yin, Cho-Jui Hsieh, and Kai-Wei Chang, \u2018Visualbert: A simple and performant baseline for vision and language\u2019, CoRR, (2019)."},{"key":"28_CR29","unstructured":"Douwe Kiela, Suvrat Bhooshan, Hamed Firooz, and Davide Testuggine, \u2018Supervised multimodal bitransformers for classifying images and text\u2019, in Proceedings of ViGIL@NeurIPS 2019, (2019)."},{"key":"28_CR30","doi-asserted-by":"crossref","unstructured":"Xiaocui Yang, Shi Feng, Daling Wang, and Yifei Zhang, \u2018Image-text multimodal emotion classification via multi-view attentional network\u2019, IEEE Transactions on Multimedia, (2020).","DOI":"10.1109\/TMM.2020.3035277"},{"key":"28_CR31","first-page":"6784","volume":"2022","author":"F Zhao","year":"2022","unstructured":"Fei Zhao, Zhen Wu, Siyu Long, Xinyu Dai, Shujian Huang, and Jiajun Chen, \u2018Learning from adjective-noun pairs: A knowledge-enhanced framework for target-oriented multimodal sentiment classification\u2019, in Proceedings of COLING 2022, pp. 6784\u20136794, (2022).","journal-title":"Proceedings of COLING"},{"key":"28_CR32","doi-asserted-by":"crossref","unstructured":"Yaowei Li, Ruijie Quan, Linchao Zhu, and Yi\u00a0Yang, \u2018Efficient multimodal fusion via interactive prompting\u2019, in Proceedings of CVPR 2023, pp. 2604\u20132613. IEEE, (2023).","DOI":"10.1109\/CVPR52729.2023.00256"}],"container-title":["Lecture Notes in Computer Science","Database Systems for Advanced Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-5555-4_28","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,11]],"date-time":"2025-01-11T06:07:37Z","timestamp":1736575657000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-5555-4_28"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9789819755547","9789819755554"],"references-count":32,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-5555-4_28","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"12 January 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"DASFAA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Database Systems for Advanced Applications","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Gifu","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Japan","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 July 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 July 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"dasfaa2024a","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.dasfaa2024.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}