{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T19:37:50Z","timestamp":1776886670799,"version":"3.51.2"},"publisher-location":"New York, NY, USA","reference-count":35,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,21]],"date-time":"2024-10-21T00:00:00Z","timestamp":1729468800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,21]]},"DOI":"10.1145\/3627673.3679883","type":"proceedings-article","created":{"date-parts":[[2024,10,20]],"date-time":"2024-10-20T19:34:21Z","timestamp":1729452861000},"page":"4183-4187","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["CAG: A Consistency-Adaptive Text-Image Alignment Generation for Joint Multimodal Entity-Relation Extraction"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-9028-0548","authenticated-orcid":false,"given":"Xinjie","family":"Yang","sequence":"first","affiliation":[{"name":"Beijing University of Posts and Telecommunications, Beijing University of Posts and Telecommunications, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-5028-2848","authenticated-orcid":false,"given":"Xiaocheng","family":"Gong","sequence":"additional","affiliation":[{"name":"Beijing University of Posts and Telecommunications, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-4998-7743","authenticated-orcid":false,"given":"Binghao","family":"Tang","sequence":"additional","affiliation":[{"name":"Beijing University of Posts and Telecommunications, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-5626-9711","authenticated-orcid":false,"given":"Yang","family":"Lei","sequence":"additional","affiliation":[{"name":"Beijing University of Posts and Telecommunications, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-7642-4942","authenticated-orcid":false,"given":"Yayue","family":"Deng","sequence":"additional","affiliation":[{"name":"Beijing University of Posts and Telecommunications, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-7515-4401","authenticated-orcid":false,"given":"Huan","family":"Ouyang","sequence":"additional","affiliation":[{"name":"Beijing University of Posts and Telecommunications, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7965-8531","authenticated-orcid":false,"given":"Gang","family":"Zhao","sequence":"additional","affiliation":[{"name":"Beijing University of Posts and Telecommunications, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2312-8715","authenticated-orcid":false,"given":"Lei","family":"Luo","sequence":"additional","affiliation":[{"name":"Beijing University of Posts and Telecommunications, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2727-0003","authenticated-orcid":false,"given":"Yunling","family":"Feng","sequence":"additional","affiliation":[{"name":"Beijing University of Posts and Telecommunications, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-2685-9805","authenticated-orcid":false,"given":"Bin","family":"Duan","sequence":"additional","affiliation":[{"name":"Beijing University of Posts and Telecommunications, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9823-3870","authenticated-orcid":false,"given":"Si","family":"Li","sequence":"additional","affiliation":[{"name":"Beijing University of Posts and Telecommunications, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7290-9585","authenticated-orcid":false,"given":"Yajing","family":"Xu","sequence":"additional","affiliation":[{"name":"Beijing University of Posts and Telecommunications, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,10,21]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"crossref","unstructured":"Mathilde Caron Hugo Touvron Ishan Misra Herv\u00e9 J\u00e9gou Julien Mairal Piotr Bojanowski and Armand Joulin. 2021. Emerging Properties in Self-Supervised Vision Transformers. arXiv:2104.14294 [cs.CV]","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.wnut-1.11"},{"key":"e_1_3_2_1_3_1","volume-title":"International conference on machine learning. PMLR, 1597--1607","author":"Chen Ting","year":"2020","unstructured":"Ting Chen, Simon Kornblith, Mohammad Norouzi, and Geoffrey Hinton. 2020. A simple framework for contrastive learning of visual representations. In International conference on machine learning. PMLR, 1597--1607."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2019.112948"},{"key":"e_1_3_2_1_5_1","volume-title":"Good visual guidance makes a better extractor: Hierarchical visual prefix for multimodal entity and relation extraction. arXiv preprint arXiv:2205.03521","author":"Chen Xiang","year":"2022","unstructured":"Xiang Chen, Ningyu Zhang, Lei Li, Yunzhi Yao, Shumin Deng, Chuanqi Tan, Fei Huang, Luo Si, and Huajun Chen. 2022. Good visual guidance makes a better extractor: Hierarchical visual prefix for multimodal entity and relation extraction. arXiv preprint arXiv:2205.03521 (2022)."},{"key":"e_1_3_2_1_6_1","volume-title":"et al","author":"Chung Hyung Won","year":"2022","unstructured":"Hyung Won Chung, Le Hou, Shayne Longpre, Barret Zoph, Yi Tay, William Fedus, Eric Li, Xuezhi Wang, Mostafa Dehghani, Siddhartha Brahma, et al . 2022. Scaling instruction-finetuned language models. arXiv preprint arXiv:2210.11416 (2022)."},{"key":"e_1_3_2_1_7_1","volume-title":"d.]. InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning. arXiv","author":"Dai W","year":"2023","unstructured":"W Dai, J Li, D Li, AMH Tiong, J Zhao, W Wang, B Li, P Fung, and S Hoi. [n. d.]. InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning. arXiv 2023. arXiv preprint arXiv:2305.06500 ([n. d.])."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10095149"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.naacl-main.138"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i7.25971"},{"key":"e_1_3_2_1_13_1","volume-title":"Supervised contrastive learning. Advances in neural information processing systems 33","author":"Khosla Prannay","year":"2020","unstructured":"Prannay Khosla, Piotr Teterwak, Chen Wang, Aaron Sarna, Yonglong Tian, Phillip Isola, Aaron Maschinot, Ce Liu, and Dilip Krishnan. 2020. Supervised contrastive learning. Advances in neural information processing systems 33 (2020), 18661--18673."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_15_1","volume-title":"On analyzing the role of image for visual-enhanced relation extraction. arXiv preprint arXiv:2211.07504","author":"Li Lei","year":"2022","unstructured":"Lei Li, Xiang Chen, Shuofei Qiao, Feiyu Xiong, Huajun Chen, and Ningyu Zhang. 2022. On analyzing the role of image for visual-enhanced relation extraction. arXiv preprint arXiv:2211.07504 (2022)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-21348-0_30"},{"key":"e_1_3_2_1_17_1","volume-title":"Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101","author":"Loshchilov Ilya","year":"2017","unstructured":"Ilya Loshchilov and Frank Hutter. 2017. Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101 (2017)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.395"},{"key":"e_1_3_2_1_20_1","first-page":"27730","article-title":"Training language models to follow instructions with human feedback","volume":"35","author":"Ouyang Long","year":"2022","unstructured":"Long Ouyang, Jeffrey Wu, Xu Jiang, Diogo Almeida, Carroll Wainwright, Pamela Mishkin, Chong Zhang, Sandhini Agarwal, Katarina Slama, Alex Ray, et al. 2022. Training language models to follow instructions with human feedback. Advances in Neural Information Processing Systems 35 (2022), 27730--27744.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_21_1","volume-title":"International conference on machine learning. PMLR, 8748--8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748--8763."},{"key":"e_1_3_2_1_22_1","unstructured":"Victor Sanh Albert Webson Colin Raffel Stephen H. Bach Lintang Sutawika Zaid Alyafeai Antoine Chaffin Arnaud Stiegler Teven Le Scao Arun Raja Manan Dey M Saiful Bari Canwen Xu Urmish Thakker Shanya Sharma Sharma Eliza Szczechla Taewoon Kim Gunjan Chhablani Nihal Nayak Debajyoti Datta Jonathan Chang Mike Tian-Jian Jiang Han Wang Matteo Manica Sheng Shen Zheng Xin Yong Harshit Pandey Rachel Bawden Thomas Wang Trishala Neeraj Jos Rozen Abheesht Sharma Andrea Santilli Thibault Fevry Jason Alan Fries Ryan Teehan Tali Bers Stella Biderman Leo Gao Thomas Wolf and Alexander M. Rush. 2022. Multitask Prompted Training Enables Zero-Shot Task Generalization. arXiv:2110.08207 [cs.LG]"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_24_1","unstructured":"Xiao Wang Weikang Zhou Can Zu Han Xia Tianze Chen Yuansen Zhang Rui Zheng Junjie Ye Qi Zhang Tao Gui et al. 2023. InstructUIE: Multi-task Instruction Tuning for Unified Information Extraction. arXiv preprint arXiv:2304.08085 (2023)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"crossref","unstructured":"Yizhong Wang Swaroop Mishra Pegah Alipoormolabashi Yeganeh Kordi Amirreza Mirzaei Anjana Arunkumar Arjun Ashok Arut Selvan Dhanasekaran Atharva Naik David Stap Eshaan Pathak Giannis Karamanolakis Haizhi Gary Lai Ishan Purohit Ishani Mondal Jacob Anderson Kirby Kuznia Krima Doshi Maitreya Patel Kuntal Kumar Pal Mehrad Moradshahi Mihir Parmar Mirali Purohit Neeraj Varshney Phani Rohitha Kaza Pulkit Verma Ravsehaj Singh Puri Rushang Karia Shailaja Keyur Sampat Savan Doshi Siddhartha Mishra Sujan Reddy Sumanta Patro Tanay Dixit Xudong Shen Chitta Baral Yejin Choi Noah A. Smith Hannaneh Hajishirzi and Daniel Khashabi. 2022. Super-Natural Instructions: Generalization via Declarative Instructions on 1600 NLP Tasks. arXiv:2204.07705 [cs.CL]","DOI":"10.18653\/v1\/2022.emnlp-main.340"},{"key":"e_1_3_2_1_26_1","volume-title":"Brian Lester, Nan Du, Andrew M Dai, and Quoc V Le.","author":"Wei Jason","year":"2021","unstructured":"Jason Wei, Maarten Bosma, Vincent Y Zhao, Kelvin Guu, Adams Wei Yu, Brian Lester, Nan Du, Andrew M Dai, and Quoc V Le. 2021. Finetuned language models are zero-shot learners. arXiv preprint arXiv:2109.01652 (2021)."},{"key":"e_1_3_2_1_27_1","volume-title":"Information screening whilst exploiting! multimodal relation extraction with feature denoising and multimodal topic modeling. arXiv preprint arXiv:2305.11719","author":"Wu Shengqiong","year":"2023","unstructured":"Shengqiong Wu, Hao Fei, Yixin Cao, Lidong Bing, and Tat-Seng Chua. 2023. Information screening whilst exploiting! multimodal relation extraction with feature denoising and multimodal topic modeling. arXiv preprint arXiv:2305.11719 (2023)."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3488560.3498475"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i9.26309"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i16.17687"},{"key":"e_1_3_2_1_31_1","volume-title":"PromptNER: A Prompting Method for Few-shot Named Entity Recognition via k Nearest Neighbor Search. arXiv preprint arXiv:2305.12217","author":"Zhang Mozhi","year":"2023","unstructured":"Mozhi Zhang, Hang Yan, Yaqian Zhou, and Xipeng Qiu. 2023. PromptNER: A Prompting Method for Few-shot Named Entity Recognition via k Nearest Neighbor Search. arXiv preprint arXiv:2305.12217 (2023)."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11962"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"crossref","unstructured":"Gang Zhao Xiaocheng Gong Xinjie Yang Guanting Dong Shudong Lu and Si Li. 2023. DemoSG: Demonstration-enhanced Schema-guided Generation for Low-resource Event Extraction. arXiv:2310.10481 [cs.CL]","DOI":"10.18653\/v1\/2023.findings-emnlp.121"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3476968"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2020.3013398"}],"event":{"name":"CIKM '24: The 33rd ACM International Conference on Information and Knowledge Management","location":"Boise ID USA","acronym":"CIKM '24","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the 33rd ACM International Conference on Information and Knowledge Management"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627673.3679883","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3627673.3679883","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:58:08Z","timestamp":1750294688000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627673.3679883"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,21]]},"references-count":35,"alternative-id":["10.1145\/3627673.3679883","10.1145\/3627673"],"URL":"https:\/\/doi.org\/10.1145\/3627673.3679883","relation":{},"subject":[],"published":{"date-parts":[[2024,10,21]]},"assertion":[{"value":"2024-10-21","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}