{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T05:16:21Z","timestamp":1755839781020,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":44,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["No.62222213, 62072423, 62207031"],"award-info":[{"award-number":["No.62222213, 62072423, 62207031"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100018537","name":"National Science and Technology Major Project","doi-asserted-by":"publisher","award":["No.2023ZD0121104"],"award-info":[{"award-number":["No.2023ZD0121104"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100018537","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681661","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:41Z","timestamp":1729925981000},"page":"9311-9320","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Bridging Gaps in Content and Knowledge for Multimodal Entity Linking"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0889-7660","authenticated-orcid":false,"given":"Pengfei","family":"Luo","sequence":"first","affiliation":[{"name":"School of Computer Science and Technology, University of Science and Technology of China &amp; State Key Laboratory of Cognitive Intelligence, Hefei, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4246-5386","authenticated-orcid":false,"given":"Tong","family":"Xu","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, University of Science and Technology of China &amp; State Key Laboratory of Cognitive Intelligence, Hefei, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-6548-1365","authenticated-orcid":false,"given":"Che","family":"Liu","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, University of Science and Technology of China &amp; State Key Laboratory of Cognitive Intelligence, Hefei, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-2193-2288","authenticated-orcid":false,"given":"Suojuan","family":"Zhang","sequence":"additional","affiliation":[{"name":"Army Engineering University of PLA &amp; School of Computer Science and Technology, University of Science and Technology of China, Nanjing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0227-3793","authenticated-orcid":false,"given":"Linli","family":"Xu","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, University of Science and Technology of China &amp; State Key Laboratory of Cognitive Intelligence, Hefei, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1427-3507","authenticated-orcid":false,"given":"Minglei","family":"Li","sequence":"additional","affiliation":[{"name":"Huawei Cloud, ShenZhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4835-4102","authenticated-orcid":false,"given":"Enhong","family":"Chen","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, University of Science and Technology of China &amp; State Key Laboratory of Cognitive Intelligence, Hefei, Anhui Province, China"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1007\/978--3-030--45439--5_31"},{"key":"e_1_3_2_1_2_1","volume-title":"Jamie Ryan Kiros, and Geoffrey E. Hinton","author":"Ba Lei Jimmy","year":"2016","unstructured":"Lei Jimmy Ba, Jamie Ryan Kiros, and Geoffrey E. Hinton. 2016. Layer Normalization. CoRR, Vol. abs\/1607.06450 (2016). showeprint[arXiv]1607.06450 http:\/\/arxiv.org\/abs\/1607.06450"},{"key":"e_1_3_2_1_3_1","volume-title":"Autoregressive Entity Retrieval. In 9th International Conference on Learning Representations, ICLR 2021","author":"Cao Nicola De","year":"2021","unstructured":"Nicola De Cao, Gautier Izacard, Sebastian Riedel, and Fabio Petroni. 2021. Autoregressive Entity Retrieval. In 9th International Conference on Learning Representations, ICLR 2021, Virtual Event, Austria, May 3--7, 2021. OpenReview.net. https:\/\/openreview.net\/forum?id=5k8F6UU39V"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.18653\/V1"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.18653\/V1"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3308558.3313517"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1007\/978--3--319--68204--4_8"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.18653\/V1"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475400"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D17-1277"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D17-1284"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11704-022-1192-8"},{"key":"e_1_3_2_1_13_1","volume-title":"Proceedings of the 38th International Conference on Machine Learning, ICML 2021, 18--24","volume":"5594","author":"Kim Wonjae","year":"2021","unstructured":"Wonjae Kim, Bokyung Son, and Ildoo Kim. 2021. ViLT: Vision-and-Language Transformer Without Convolution or Region Supervision. In Proceedings of the 38th International Conference on Machine Learning, ICML 2021, 18--24 July 2021, Virtual Event (Proceedings of Machine Learning Research, Vol. 139), Marina Meila and Tong Zhang (Eds.). PMLR, 5583--5594. http:\/\/proceedings.mlr.press\/v139\/kim21k.html"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.703"},{"key":"e_1_3_2_1_15_1","volume-title":"Advances in Neural Information Processing Systems 34: Annual Conference on Neural Information Processing Systems 2021","author":"Li Junnan","year":"2021","unstructured":"Junnan Li, Ramprasaath R. Selvaraju, Akhilesh Gotmare, Shafiq R. Joty, Caiming Xiong, and Steven Chu-Hong Hoi. 2021. Align before Fuse: Vision and Language Representation Learning with Momentum Distillation. In Advances in Neural Information Processing Systems 34: Annual Conference on Neural Information Processing Systems 2021, NeurIPS 2021, December 6--14, 2021, virtual, Marc'Aurelio Ranzato, Alina Beygelzimer, Yann N. Dauphin, Percy Liang, and Jennifer Wortman Vaughan (Eds.). 9694--9705. https:\/\/proceedings.neurips.cc\/paper\/2021\/hash\/505259756244493872b7709a8a01b536-Abstract.html"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11704-020-0192-9"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462970"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1007\/978--3-030--21348-0_30"},{"key":"e_1_3_2_1_19_1","volume-title":"Decoupled Weight Decay Regularization. In 7th International Conference on Learning Representations, ICLR 2019","author":"Loshchilov Ilya","year":"2019","unstructured":"Ilya Loshchilov and Frank Hutter. 2019. Decoupled Weight Decay Regularization. In 7th International Conference on Learning Representations, ICLR 2019, New Orleans, LA, USA, May 6--9, 2019. OpenReview.net. https:\/\/openreview.net\/forum?id=Bkg6RiCqY7"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599439"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00331"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.18653\/V1"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_24_1","volume-title":"ACL 2010, Proceedings of the 48th Annual Meeting of the Association for Computational Linguistics, July 11--16","author":"Navigli Roberto","year":"2010","unstructured":"Roberto Navigli and Simone Paolo Ponzetto. 2010. BabelNet: Building a Very Large Multilingual Semantic Network. In ACL 2010, Proceedings of the 48th Annual Meeting of the Association for Computational Linguistics, July 11--16, 2010, Uppsala, Sweden, Jan Hajic, Sandra Carberry, and Stephen Clark (Eds.). The Association for Computer Linguistics, 216--225. https:\/\/aclanthology.org\/P10--1023\/"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1005"},{"key":"e_1_3_2_1_27_1","volume-title":"Proceedings of the 38th International Conference on Machine Learning, ICML 2021, 18--24","volume":"8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, and Ilya Sutskever. 2021. Learning Transferable Visual Models From Natural Language Supervision. In Proceedings of the 38th International Conference on Machine Learning, ICML 2021, 18--24 July 2021, Virtual Event (Proceedings of Machine Learning Research, Vol. 139), Marina Meila and Tong Zhang (Eds.). PMLR, 8748--8763. http:\/\/proceedings.mlr.press\/v139\/radford21a.html"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1007\/978--3-031--20074--8_9"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.3233\/SW-222986"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2306.12725"},{"key":"e_1_3_2_1_31_1","volume-title":"Proceedings of the Twenty-Fourth International Joint Conference on Artificial Intelligence, IJCAI 2015","author":"Sun Yaming","year":"2015","unstructured":"Yaming Sun, Lei Lin, Duyu Tang, Nan Yang, Zhenzhou Ji, and Xiaolong Wang. 2015. Modeling Mention, Context and Entity with Neural Networks for Entity Disambiguation. In Proceedings of the Twenty-Fourth International Joint Conference on Artificial Intelligence, IJCAI 2015, Buenos Aires, Argentina, July 25--31, 2015, Qiang Yang and Michael J. Wooldridge (Eds.). AAAI Press, 1333--1339. http:\/\/ijcai.org\/Abstract\/15\/192"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2302.13971"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","unstructured":"Hugo Touvron Louis Martin Kevin Stone Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra Prajjwal Bhargava Shruti Bhosale Dan Bikel Lukas Blecher Cristian Canton-Ferrer Moya Chen Guillem Cucurull David Esiobu Jude Fernandes Jeremy Fu Wenyin Fu Brian Fuller Cynthia Gao Vedanuj Goswami Naman Goyal Anthony Hartshorn Saghar Hosseini Rui Hou Hakan Inan Marcin Kardas Viktor Kerkez Madian Khabsa Isabel Kloumann Artem Korenev Punit Singh Koura Marie-Anne Lachaux Thibaut Lavril Jenya Lee Diana Liskovich Yinghai Lu Yuning Mao Xavier Martinet Todor Mihaylov Pushkar Mishra Igor Molybog Yixin Nie Andrew Poulton Jeremy Reizenstein Rashi Rungta Kalyan Saladi Alan Schelten Ruan Silva Eric Michael Smith Ranjan Subramanian Xiaoqing Ellen Tan Binh Tang Ross Taylor Adina Williams Jian Xiang Kuan Puxin Xu Zheng Yan Iliyan Zarov Yuchen Zhang Angela Fan Melanie Kambadur Sharan Narang Aur\u00e9lien Rodriguez Robert Stojnic Sergey Edunov and Thomas Scialom. 2023. Llama 2: Open Foundation and Fine-Tuned Chat Models. CoRR Vol. abs\/2307.09288 (2023). https:\/\/doi.org\/10.48550\/ARXIV.2307.09288 showeprint[arXiv]2307.09288","DOI":"10.48550\/ARXIV.2307.09288"},{"key":"e_1_3_2_1_34_1","volume-title":"Advances in Neural Information Processing Systems 30: Annual Conference on Neural Information Processing Systems 2017","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is All you Need. In Advances in Neural Information Processing Systems 30: Annual Conference on Neural Information Processing Systems 2017, December 4--9, 2017, Long Beach, CA, USA, Isabelle Guyon, Ulrike von Luxburg, Samy Bengio, Hanna M. Wallach, Rob Fergus, S. V. N. Vishwanathan, and Roman Garnett (Eds.). 5998--6008. https:\/\/proceedings.neurips.cc\/paper\/2017\/hash\/3f5ee243547dee91fbd053c1c4a845aa-Abstract.html"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1016\/J.BDR.2020.100159"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3531867"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.328"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3366423.3380192"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.519"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612575"},{"key":"e_1_3_2_1_41_1","volume-title":"UAI 2023","author":"Yang Chengmei","year":"2023","unstructured":"Chengmei Yang, Bowei He, Yimeng Wu, Chao Xing, Lianghua He, and Chen Ma. 2023. MMEL: A Joint Learning Framework for Multi-Mention Entity Linking. In Uncertainty in Artificial Intelligence, UAI 2023, July 31 - 4 August 2023, Pittsburgh, PA, USA (Proceedings of Machine Learning Research, Vol. 216), Robin J. Evans and Ilya Shpitser (Eds.). PMLR, 2411--2421. https:\/\/proceedings.mlr.press\/v216\/yang23d.html"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.18653\/V1"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1007\/978--3-031--30675--4_45"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1007\/978--3-030--73197--7_35"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2306.05685"}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Melbourne VIC Australia","acronym":"MM '24"},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681661","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681661","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:50Z","timestamp":1750295870000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681661"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":44,"alternative-id":["10.1145\/3664647.3681661","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681661","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}