{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T10:46:25Z","timestamp":1776854785552,"version":"3.51.2"},"publisher-location":"New York, NY, USA","reference-count":39,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62476097"],"award-info":[{"award-number":["62476097"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"the Fundamental Research Funds for the Central Universities, South China University of Technology","award":["x2rjD2250190"],"award-info":[{"award-number":["x2rjD2250190"]}]},{"name":"Guangdong Provincial Fund for Basic and Applied Basic Research?Regional Joint Fund Project (Key Project)","award":["2023B1515120078"],"award-info":[{"award-number":["2023B1515120078"]}]},{"name":"Guangdong Provincial Natural Science Foundation for Outstanding Youth Team Project","award":["2024B1515040010"],"award-info":[{"award-number":["2024B1515040010"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3755424","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T06:47:18Z","timestamp":1761374838000},"page":"4418-4426","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Ground and Reconstruct: Entity-Region Bidirectional Alignment Pre-Training for Low-Resource GMNER"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-1629-2233","authenticated-orcid":false,"given":"Runwei","family":"Situ","sequence":"first","affiliation":[{"name":"South China University of Technology, Guangzhou, China and Key Laboratory of Big Data and Intelligent Robot (SCUT), Ministry of Education, Guangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1767-789X","authenticated-orcid":false,"given":"Yi","family":"Cai","sequence":"additional","affiliation":[{"name":"South China University of Technology, Guangzhou, China and Key Laboratory of Big Data and Intelligent Robot (SCUT), Ministry of Education, Guangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7183-3155","authenticated-orcid":false,"given":"Yong","family":"Xu","sequence":"additional","affiliation":[{"name":"South China University of Technology, Guangzhou, China and Peng Cheng Laboratory, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7064-6507","authenticated-orcid":false,"given":"Jiexin","family":"Wang","sequence":"additional","affiliation":[{"name":"South China University of Technology, Guangzhou, China and Key Laboratory of Big Data and Intelligent Robot (SCUT), Ministry of Education, Guangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Building multimodal knowledge bases with multimodal computational sequences and generative adversarial networks","author":"Chen Donghua","year":"2023","unstructured":"Donghua Chen and Runtong Zhang. 2023. Building multimodal knowledge bases with multimodal computational sequences and generative adversarial networks. IEEE Trans. Multimedia (2023)."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58577-8_7"},{"key":"e_1_3_2_1_3_1","volume-title":"Proc. Int. Conf. Machine Learn., PMLR","author":"Cho Jaemin","year":"2021","unstructured":"Jaemin Cho, Jie Lei, Hao Tan, and Mohit Bansal. 2021. Unifying vision-and-language tasks via text generation. In Proc. Int. Conf. Machine Learn., PMLR, 1931-1942."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.3115\/1119176.1119200"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.3115\/1119176.1119195"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01315"},{"key":"e_1_3_2_1_7_1","volume-title":"MixGen: A New Multi-Modal Data Augmentation. In 2023 IEEE\/CVF Winter Conference on Applications of Computer Vision Workshops (WACVW). IEEE, 379-389","author":"Hao Xiaoshuai","year":"2023","unstructured":"Xiaoshuai Hao, Yi Zhu, Srikar Appalaraju, Aston Zhang, Wanqian Zhang, Bo Li, and Mu Li. 2023. MixGen: A New Multi-Modal Data Augmentation. In 2023 IEEE\/CVF Winter Conference on Applications of Computer Vision Workshops (WACVW). IEEE, 379-389."},{"key":"e_1_3_2_1_8_1","first-page":"4171","article-title":"BERT","author":"Ming-Wei Chang Jacob Devlin","year":"2019","unstructured":"Jacob Devlin Ming-Wei Chang Kenton and Lee Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In Proc. NAACL-HLT. 4171-4186.","journal-title":"Pre-training of Deep Bidirectional Transformers for Language Understanding. In Proc. NAACL-HLT."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.703"},{"key":"e_1_3_2_1_10_1","volume-title":"Proc. Int. Conf. Machine Learn., PMLR, 12888-12900","author":"Li Junnan","year":"2022","unstructured":"Junnan Li, Dongxu Li, Caiming Xiong, and Steven Hoi. 2022. Blip: Bootstrapping language-image pre-training for unified vision-language understanding and generation. In Proc. Int. Conf. Machine Learn., PMLR, 12888-12900."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-emnlp.184"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58577-8_8"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681598"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.152"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.542"},{"key":"e_1_3_2_1_16_1","unstructured":"Shilong Liu Zhaoyang Zeng Tianhe Ren Feng Li Hao Zhang Jie Yang Chunyuan Li Jianwei Yang Hang Su Jun Zhu et al. 2023. Grounding dino: Marrying dino with grounded pre-training for open-set object detection. arXiv preprint arXiv:2303.05499 (2023)."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-21348-0_30"},{"key":"e_1_3_2_1_18_1","volume-title":"Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692","author":"Liu Yinhan","year":"2019","unstructured":"Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, and Veselin Stoyanov. 2019b. Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692 (2019)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1185"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N18-1078"},{"key":"e_1_3_2_1_21_1","first-page":"4694","article-title":"When does label smoothing help?","author":"M\u00fcller Rafael","year":"2019","unstructured":"Rafael M\u00fcller, Simon Kornblith, and Geoffrey Hinton. 2019. When does label smoothing help?. In Proc. Adv. Neural Inf. Process. Syst., 4694-4703.","journal-title":"Proc. Adv. Neural Inf. Process. Syst."},{"key":"e_1_3_2_1_22_1","volume-title":"Proc. Annu. Meeting Assoc. Comput. Linguistics: Syst. Demos.","author":"Qi Peng","unstructured":"Peng Qi, Yuhao Zhang, Yuhui Zhang, Jason Bolton, and Christopher D. Manning. 2020. Stanza: A Python Natural Language Processing Toolkit for Many Human Languages. In Proc. Annu. Meeting Assoc. Comput. Linguistics: Syst. Demos."},{"key":"e_1_3_2_1_23_1","volume-title":"Proc. Int. Conf. Machine Learn., PMLR, 8748-8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al., 2021. Learning transferable visual models from natural language supervision. In Proc. Int. Conf. Machine Learn., PMLR, 8748-8763."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.97"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i15.17633"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1514"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612322"},{"key":"e_1_3_2_1_28_1","volume-title":"Proc. Int. Conf. Machine Learn., PMLR, 23318-23340","author":"Wang Peng","year":"2022","unstructured":"Peng Wang, An Yang, Rui Men, Junyang Lin, Shuai Bai, Zhikang Li, Jianxin Ma, Chang Zhou, Jingren Zhou, and Hongxia Yang. 2022b. Ofa: Unifying architectures, tasks, and modalities through a simple sequence-to-sequence learning framework. In Proc. Int. Conf. Machine Learn., PMLR, 23318-23340."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.naacl-main.232"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.44"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.451"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i4.16431"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.306"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.508"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i9.26309"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i16.17687"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00553"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11962"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3476968"}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","location":"Dublin Ireland","acronym":"MM '25","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755424","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:12:46Z","timestamp":1765339966000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3755424"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":39,"alternative-id":["10.1145\/3746027.3755424","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3755424","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}