{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,9]],"date-time":"2026-05-09T13:09:39Z","timestamp":1778332179178,"version":"3.51.4"},"publisher-location":"Singapore","reference-count":26,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819203772","type":"print"},{"value":"9789819203789","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-92-0378-9_1","type":"book-chapter","created":{"date-parts":[[2026,5,9]],"date-time":"2026-05-09T12:21:09Z","timestamp":1778329269000},"page":"3-19","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["MORE-R1: Guiding LVLM for\u00a0Multimodal Object-Entity Relation Extraction via\u00a0Stepwise Reasoning with\u00a0Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Xiang","family":"Yuan","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xu","family":"Chu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xinrong","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Haochen","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zonghong","family":"Dai","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hongcheng","family":"Fan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaoyue","family":"Yuan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Weiping","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tong","family":"Mo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,5,10]]},"reference":[{"key":"1_CR1","unstructured":"Bai, S., Chen, K., Liu, X., et\u00a0al.: Qwen2.5-vl technical report. arXiv preprint arXiv:2502.13923 (2025)"},{"key":"1_CR2","unstructured":"Chen, F., Feng, Y.: Chain-of-thought prompt distillation for multimodal named entity recognition and multimodal relation extraction. arXiv preprint arXiv:2306.14122 (2023)"},{"key":"1_CR3","unstructured":"Chen, L., Wang, Z., Ren, S., et\u00a0al.: Next token prediction towards multimodal intelligence: A comprehensive survey. arXiv preprint arXiv:2412.18619 (2024)"},{"key":"1_CR4","unstructured":"Chen, X., Zhang, J., Wang, X., et\u00a0al.: Continual multimodal knowledge graph construction. In: Proceedings of the Thirty-Third International Joint Conference on Artificial Intelligence,pp. 6225\u20136233 (2024)"},{"key":"1_CR5","unstructured":"Chen, Y., Yang, Z., Liu, Z., et\u00a0al.: Acereason-nemotron: advancing math and code reasoning through reinforcement learning. arXiv preprint arXiv:2505.16400 (2025)"},{"key":"1_CR6","unstructured":"Chu, X., Chen, X., Wang, G., et\u00a0al.: Qwen look again: Guiding vision-language reasoning models to re-attention visual information. arXiv preprint arXiv:2505.23558 (2025)"},{"key":"1_CR7","doi-asserted-by":"publisher","first-page":"1274","DOI":"10.1109\/TASLP.2023.3345146","volume":"32","author":"S Cui","year":"2024","unstructured":"Cui, S., Cao, J., Cong, X., et al.: Enhancing multimodal entity and relation extraction with variational information bottleneck. IEEE\/ACM Trans. Audio, Speech, Lang. Process. 32, 1274\u20131285 (2024)","journal-title":"IEEE\/ACM Trans. Audio, Speech, Lang. Process."},{"key":"1_CR8","unstructured":"Grattafiori, A., Dubey, A., Jauhri, A., et\u00a0al.: The llama 3 herd of models. arXiv preprint arXiv:2407.21783 (2024)"},{"key":"1_CR9","unstructured":"Guo, D., Yang, D., Zhang, H., et\u00a0al.: Deepseek-r1: Incentivizing reasoning capability in LLMs via reinforcement learning. arXiv preprint arXiv:2501.12948 (2025)"},{"key":"1_CR10","doi-asserted-by":"crossref","unstructured":"He, L., Wang, H., Cao, Y., et\u00a0al.: More: a multimodal object-entity relation extraction dataset with a benchmark evaluation. In: Proceedings of the 31st ACM International Conference on Multimedia, pp. 4564\u20134573 (2023)","DOI":"10.1145\/3581783.3612209"},{"key":"1_CR11","doi-asserted-by":"crossref","unstructured":"He, L., Wang, H., Wu, Z., et\u00a0al.: Focus & gating: a multimodal approach for unveiling relations in noisy social media. In: Proceedings of the 32nd ACM International Conference on Multimedia, pp. 1379\u20131388 (2024)","DOI":"10.1145\/3664647.3680995"},{"key":"1_CR12","unstructured":"Hurst, A., Lerer, A., Goucher, A.P., et\u00a0al.: Gpt-4o system card. arXiv preprint arXiv:2410.21276 (2024)"},{"key":"1_CR13","unstructured":"Jaech, A., Kalai, A., Lerer, A., et\u00a0al.: Openai o1 system card. arXiv preprint arXiv:2412.16720 (2024)"},{"key":"1_CR14","doi-asserted-by":"crossref","unstructured":"Li, L., Chen, X., Qiao, S., et\u00a0al.: On analyzing the role of image for visual-enhanced relation extraction (student abstract). In: Proceedings of the AAAI Conference on Artificial Intelligence, pp. 16254\u201316255 (2023)","DOI":"10.1609\/aaai.v37i13.26987"},{"key":"1_CR15","unstructured":"Lin, Q., Zhu, Y., Pu, B., et\u00a0al.: A foundation model for chest x-ray interpretation with grounded reasoning via online reinforcement learning. arXiv preprint arXiv:2509.03906 (2025)"},{"key":"1_CR16","doi-asserted-by":"crossref","unstructured":"Lin, X., Xu, Y., Tang, M., et\u00a0al.: Remote: a unified multimodal relation extraction framework with multilevel optimal transport and mixture-of-experts. In: Proceedings of the 33rd ACM International Conference on Multimedia, pp. 121\u2013130 (2025)","DOI":"10.1145\/3746027.3754868"},{"key":"1_CR17","doi-asserted-by":"crossref","unstructured":"Liu, X., Hu, C., Zhang, R., et\u00a0al.: Multimodal relation extraction via a mixture of hierarchical visual context learners. In: Proceedings of the ACM Web Conference 2024, pp. 4283\u20134294 (2024)","DOI":"10.1145\/3589334.3645603"},{"issue":"8","key":"1_CR18","first-page":"1","volume":"20","author":"Y Liu","year":"2024","unstructured":"Liu, Y., Yuan, X., Li, H., et al.: Semscene: semantic-consistency enhanced multi-level scene graph matching for image-text retrieval. ACM Trans. Multimed. Comput. Commun. Appl. 20(8), 1\u201328 (2024)","journal-title":"ACM Trans. Multimed. Comput. Commun. Appl."},{"key":"1_CR19","first-page":"53728","volume":"36","author":"R Rafailov","year":"2023","unstructured":"Rafailov, R., Sharma, A., Mitchell, E., et al.: Direct preference optimization: Your language model is secretly a reward model. Adv. Neural. Inf. Process. Syst. 36, 53728\u201353741 (2023)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1_CR20","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., et\u00a0al.: Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347 (2017)"},{"key":"1_CR21","unstructured":"Shao, Z., Wang, P., Zhu, Q., et\u00a0al.: Deepseekmath: Pushing the limits of mathematical reasoning in open language models. arXiv preprint arXiv:2402.03300 (2024)"},{"key":"1_CR22","doi-asserted-by":"crossref","unstructured":"Wei, P., Huang, Z., Ouyang, H., et\u00a0al.: CGI-MRE: a comprehensive genetic-inspired model for multimodal relation extraction. In: Proceedings of the 2024 International Conference on Multimedia Retrieval, pp. 524\u2013532 (2024)","DOI":"10.1145\/3652583.3658103"},{"key":"1_CR23","unstructured":"Xing, F., Wang, Z., Wang, W., Zhang, H.: Benchmarking and improving LVLMs on event extraction from multimedia documents. arXiv preprint arXiv:2509.12876 (2025)"},{"key":"1_CR24","unstructured":"Yang, X., Wu, W., Feng, S., et\u00a0al.: MM-BigBench: evaluating multimodal models on multimodal content comprehension tasks. arXiv preprint arXiv:2310.09036 (2023)"},{"issue":"11","key":"1_CR25","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3674501","volume":"56","author":"X Zhao","year":"2024","unstructured":"Zhao, X., Deng, Y., Yang, M., et al.: A comprehensive survey on relation extraction: Recent advances and new frontiers. ACM Comput. Surv. 56(11), 1\u201339 (2024)","journal-title":"ACM Comput. Surv."},{"key":"1_CR26","doi-asserted-by":"crossref","unstructured":"Zheng, C., Feng, J., Fu, Z., et\u00a0al.: Multimodal relation extraction with efficient graph alignment. In: Proceedings of the 29th ACM International Conference on Multimedia, pp. 5298\u20135306 (2021)","DOI":"10.1145\/3474085.3476968"}],"container-title":["Lecture Notes in Computer Science","Database Systems for Advanced Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-92-0378-9_1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,9]],"date-time":"2026-05-09T12:21:14Z","timestamp":1778329274000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-92-0378-9_1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9789819203772","9789819203789"],"references-count":26,"URL":"https:\/\/doi.org\/10.1007\/978-981-92-0378-9_1","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"10 May 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"DASFAA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Database Systems for Advanced Applications","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Jeju","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Korea (Republic of)","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2026","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 April 2026","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 April 2026","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"31","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"dasfaa2026","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/dasfaa2026.github.io\/index.html","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}