{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,27]],"date-time":"2026-04-27T14:11:35Z","timestamp":1777299095073,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":61,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,4,13]]},"DOI":"10.1145\/3774904.3792531","type":"proceedings-article","created":{"date-parts":[[2026,4,9]],"date-time":"2026-04-09T21:54:39Z","timestamp":1775771679000},"page":"4137-4148","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Towards Meta-Cognitive Knowledge Editing for Multimodal LLMs"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-3607-5939","authenticated-orcid":false,"given":"Zhaoyu","family":"Fan","sequence":"first","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-2967-4573","authenticated-orcid":false,"given":"Kaihang","family":"Pan","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-3633-0413","authenticated-orcid":false,"given":"Mingze","family":"Zhou","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1978-9999","authenticated-orcid":false,"given":"Bosheng","family":"Qin","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2258-1291","authenticated-orcid":false,"given":"Juncheng","family":"Li","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0030-8289","authenticated-orcid":false,"given":"Shengyu","family":"Zhang","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5988-7609","authenticated-orcid":false,"given":"Wenqiao","family":"Zhang","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7356-9711","authenticated-orcid":false,"given":"Siliang","family":"Tang","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2139-8807","authenticated-orcid":false,"given":"Fei","family":"Wu","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9017-2508","authenticated-orcid":false,"given":"Yueting","family":"Zhuang","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]}],"member":"320","published-online":{"date-parts":[[2026,4,12]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.279"},{"key":"e_1_3_2_1_2_1","volume-title":"Self-awareness and action. Current opinion in neurobiology","author":"Blakemore Sarah-Jayne","year":"2003","unstructured":"Sarah-Jayne Blakemore and Chris Frith. 2003. Self-awareness and action. Current opinion in neurobiology, Vol. 13, 2 (2003), 219-224."},{"key":"e_1_3_2_1_3_1","volume-title":"Metamemory: A critical examination. Child development","author":"Cavanaugh John C","year":"1982","unstructured":"John C Cavanaugh and Marion Perlmutter. 1982. Metamemory: A critical examination. Child development (1982), 11-28."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.00883"},{"key":"e_1_3_2_1_5_1","volume-title":"Microsoft coco captions: Data collection and evaluation server. arXiv preprint arXiv:1504.00325","author":"Chen Xinlei","year":"2015","unstructured":"Xinlei Chen, Hao Fang, Tsung-Yi Lin, Ramakrishna Vedantam, Saurabh Gupta, Piotr Doll\u00e1r, and C Lawrence Zitnick. 2015. Microsoft coco captions: Data collection and evaluation server. arXiv preprint arXiv:1504.00325 (2015)."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"crossref","unstructured":"Zhe Chen Jiannan Wu Wenhai Wang Weijie Su Guo Chen Sen Xing Muyan Zhong Qinglong Zhang Xizhou Zhu Lewei Lu Bin Li Ping Luo Tong Lu Yu Qiao and Jifeng Dai. 2024. InternVL: Scaling up Vision Foundation Models and Aligning for Generic Visual-Linguistic Tasks. arXiv:2312.14238 [cs.CV] https:\/\/arxiv.org\/abs\/2312.14238","DOI":"10.1109\/CVPR52733.2024.02283"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.856"},{"key":"e_1_3_2_1_8_1","volume-title":"Can we edit multimodal large language models? arXiv preprint arXiv:2310.08475","author":"Cheng Siyuan","year":"2023","unstructured":"Siyuan Cheng, Bozhong Tian, Qingbin Liu, Xi Chen, Yongheng Wang, Huajun Chen, and Ningyu Zhang. 2023b. Can we edit multimodal large language models? arXiv preprint arXiv:2310.08475 (2023)."},{"key":"e_1_3_2_1_9_1","volume-title":"Junqi Zhao, Weisheng Wang, Boyang Li, Pascale Fung, and Steven Hoi.","author":"Dai Wenliang","year":"2023","unstructured":"Wenliang Dai, Junnan Li, Dongxu Li, Anthony Meng Huat Tiong, Junqi Zhao, Weisheng Wang, Boyang Li, Pascale Fung, and Steven Hoi. 2023. InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning. arXiv:2305.06500 [cs.CV] https:\/\/arxiv.org\/abs\/2305.06500"},{"key":"e_1_3_2_1_10_1","volume-title":"Editing factual knowledge in language models. arXiv preprint arXiv:2104.08164","author":"Cao Nicola De","year":"2021","unstructured":"Nicola De Cao, Wilker Aziz, and Ivan Titov. 2021. Editing factual knowledge in language models. arXiv preprint arXiv:2104.08164 (2021)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.02245"},{"key":"e_1_3_2_1_12_1","volume-title":"LLaMA-Adapter V2: Parameter-Efficient Visual Instruction Model. arXiv preprint arXiv:2304.15010","author":"Gao Peng","year":"2023","unstructured":"Peng Gao, Jiaming Han, Renrui Zhang, Ziyi Lin, Shijie Geng, Aojun Zhou, Wei Zhang, Pan Lu, Conghui He, Xiangyu Yue, Hongsheng Li, and Yu Qiao. 2023. LLaMA-Adapter V2: Parameter-Efficient Visual Instruction Model. arXiv preprint arXiv:2304.15010 (2023)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1080\/0950069032000119401"},{"key":"e_1_3_2_1_14_1","volume-title":"Transformer feed-forward layers are key-value memories. arXiv preprint arXiv:2012.14913","author":"Geva Mor","year":"2020","unstructured":"Mor Geva, Roei Schuster, Jonathan Berant, and Omer Levy. 2020. Transformer feed-forward layers are key-value memories. arXiv preprint arXiv:2012.14913 (2020)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.670"},{"key":"e_1_3_2_1_16_1","unstructured":"Aaron Grattafiori Abhimanyu Dubey Abhinav Jauhri Abhinav Pandey Abhishek Kadian Ahmad Al-Dahle Aiesha Letman Akhil Mathur Alan Schelten Alex Vaughan et al. 2024. The llama 3 herd of models. arXiv preprint arXiv:2407.21783 (2024)."},{"key":"e_1_3_2_1_17_1","first-page":"47934","article-title":"Aging with grace: Lifelong model editing with discrete key-value adaptors","volume":"36","author":"Hartvigsen Tom","year":"2023","unstructured":"Tom Hartvigsen, Swami Sankaranarayanan, Hamid Palangi, Yoon Kim, and Marzyeh Ghassemi. 2023. Aging with grace: Lifelong model editing with discrete key-value adaptors. Advances in Neural Information Processing Systems, Vol. 36 (2023), 47934-47959.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_18_1","volume-title":"VLKEB: A Large Vision-Language Model Knowledge Editing Benchmark. arXiv:2403.07350","author":"Huang Han","year":"2024","unstructured":"Han Huang, Haitian Zhong, Tao Yu, Qiang Liu, Shu Wu, Liang Wang, and Tieniu Tan. 2024. VLKEB: A Large Vision-Language Model Knowledge Editing Benchmark. arXiv:2403.07350"},{"key":"e_1_3_2_1_19_1","volume-title":"Transformer-patcher: One mistake worth one neuron. arXiv preprint arXiv:2301.09785","author":"Huang Zeyu","year":"2023","unstructured":"Zeyu Huang, Yikang Shen, Xiaofeng Zhang, Jie Zhou, Wenge Rong, and Zhang Xiong. 2023. Transformer-patcher: One mistake worth one neuron. arXiv preprint arXiv:2301.09785 (2023)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00686"},{"key":"e_1_3_2_1_21_1","volume-title":"A baseline for shapley values in MLPs: From missingness to neutrality. arXiv preprint arXiv:2006.04896","author":"Izzo Cosimo","year":"2020","unstructured":"Cosimo Izzo, Aldo Lipani, Ramin Okhrati, and Francesca Medda. 2020. A baseline for shapley values in MLPs: From missingness to neutrality. arXiv preprint arXiv:2006.04896 (2020)."},{"key":"e_1_3_2_1_22_1","first-page":"18661","volume-title":"Lin (Eds.)","volume":"33","author":"Khosla Prannay","year":"2020","unstructured":"Prannay Khosla, Piotr Teterwak, Chen Wang, Aaron Sarna, Yonglong Tian, Phillip Isola, Aaron Maschinot, Ce Liu, and Dilip Krishnan. 2020. Supervised Contrastive Learning. In Advances in Neural Information Processing Systems, H. Larochelle, M. Ranzato, R. Hadsell, M.F. Balcan, and H. Lin (Eds.), Vol. 33. Curran Associates, Inc., 18661-18673. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2020\/file\/d89a66c7c80a29b1bdbab0f2a1a94af8-Paper.pdf"},{"key":"e_1_3_2_1_23_1","volume-title":"Metacognition: A literature review.","author":"Lai Emily R","year":"2011","unstructured":"Emily R Lai. 2011. Metacognition: A literature review. (2011)."},{"key":"e_1_3_2_1_24_1","volume-title":"Llava-next-interleave: Tackling multi-image, video, and 3d in large multimodal models. arXiv preprint arXiv:2407.07895","author":"Li Feng","year":"2024","unstructured":"Feng Li, Renrui Zhang, Hao Zhang, Yuanhan Zhang, Bo Li, Wei Li, Zejun Ma, and Chunyuan Li. 2024c. Llava-next-interleave: Tackling multi-image, video, and 3d in large multimodal models. arXiv preprint arXiv:2407.07895 (2024)."},{"key":"e_1_3_2_1_25_1","volume-title":"Mike: A new benchmark for fine-grained multimodal entity knowledge editing. arXiv preprint arXiv:2402.14835","author":"Li Jiaqi","year":"2024","unstructured":"Jiaqi Li, Miaozeng Du, Chuanyi Zhang, Yongrui Chen, Nan Hu, Guilin Qi, Haiyun Jiang, Siyuan Cheng, and Bozhong Tian. 2024a. Mike: A new benchmark for fine-grained multimodal entity knowledge editing. arXiv preprint arXiv:2402.14835 (2024)."},{"key":"e_1_3_2_1_26_1","volume-title":"Jun Xiao, Meng Wang, Tat-Seng Chua, and Yueting Zhuang.","author":"Li Juncheng","year":"2026","unstructured":"Juncheng Li, Minghe Gao, Xiangnan He, Siliang Tang, weishi Zheng, Jun Xiao, Meng Wang, Tat-Seng Chua, and Yueting Zhuang. 2026. Momentor: Advancing Video Large Language Models with Fine-Grained Long Video Reasoning. IEEE Transactions on Pattern Analysis and Machine Intelligence (2026)."},{"key":"e_1_3_2_1_27_1","volume-title":"Structure-Induced Gradient Regulation for Generalizable Vision-Language Models","author":"Li Juncheng","year":"2025","unstructured":"Juncheng Li, Minghe Gao, Siliang Tang, Longhui Wei, Jun Xiao, Fei Wu, Richang Hong, Meng Wang, and Qi Tian. 2025. Structure-Induced Gradient Regulation for Generalizable Vision-Language Models. IEEE Transactions on Pattern Analysis and Machine Intelligence (2025)."},{"key":"e_1_3_2_1_28_1","unstructured":"Junnan Li Dongxu Li Silvio Savarese and Steven Hoi. 2023a. BLIP-2: Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models. arXiv:2301.12597 [cs.CV] https:\/\/arxiv.org\/abs\/2301.12597"},{"key":"e_1_3_2_1_29_1","volume-title":"International conference on machine learning. PMLR","author":"Li Junnan","year":"2023","unstructured":"Junnan Li, Dongxu Li, Silvio Savarese, and Steven Hoi. 2023b. Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models. In International conference on machine learning. PMLR, 19730-19742."},{"key":"e_1_3_2_1_30_1","volume-title":"The Twelfth International Conference on Learning Representations.","author":"Li Juncheng","year":"2023","unstructured":"Juncheng Li, Kaihang Pan, Zhiqi Ge, Minghe Gao, Wei Ji, Wenqiao Zhang, Tat-Seng Chua, Siliang Tang, Hanwang Zhang, and Yueting Zhuang. 2023c. Fine-tuning multimodal llms to follow zero-shot demonstrative instructions. In The Twelfth International Conference on Learning Representations."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3274139"},{"key":"e_1_3_2_1_32_1","volume-title":"Knowledge Boundary of Large Language Models: A Survey. arXiv preprint arXiv:2412.12472","author":"Li Moxin","year":"2024","unstructured":"Moxin Li, Yong Zhao, Yang Deng, Wenxuan Zhang, Shuaiyi Li, Wenya Xie, See-Kiong Ng, and Tat-Seng Chua. 2024d. Knowledge Boundary of Large Language Models: A Survey. arXiv preprint arXiv:2412.12472 (2024)."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i17.29818"},{"key":"e_1_3_2_1_34_1","volume-title":"Teamlora: Boosting low-rank adaptation with expert collaboration and competition. arXiv preprint arXiv:2408.09856","author":"Lin Tianwei","year":"2024","unstructured":"Tianwei Lin, Jiang Liu, Wenqiao Zhang, Zhaocheng Li, Yang Dai, Haoyuan Li, Zhelun Yu, Wanggui He, Juncheng Li, Hao Jiang, et al., 2024. Teamlora: Boosting low-rank adaptation with expert collaboration and competition. arXiv preprint arXiv:2408.09856 (2024)."},{"key":"e_1_3_2_1_35_1","unstructured":"Haotian Liu Chunyuan Li Qingyang Wu and Yong Jae Lee. 2023. Visual Instruction Tuning."},{"key":"e_1_3_2_1_36_1","first-page":"2579","article-title":"Visualizing data using t-SNE","volume":"9","author":"van der Maaten Laurens","year":"2008","unstructured":"Laurens van der Maaten and Geoffrey Hinton. 2008. Visualizing data using t-SNE. Journal of machine learning research, Vol. 9, Nov (2008), 2579-2605.","journal-title":"Journal of machine learning research"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"crossref","unstructured":"Kenneth Marino Mohammad Rastegari Ali Farhadi and Roozbeh Mottaghi. 2019. OK-VQA: A Visual Question Answering Benchmark Requiring External Knowledge. arXiv:1906.00067 [cs.CV] https:\/\/arxiv.org\/abs\/1906.00067","DOI":"10.1109\/CVPR.2019.00331"},{"key":"e_1_3_2_1_38_1","volume-title":"Locating and editing factual associations in gpt. Advances in neural information processing systems","author":"Meng Kevin","year":"2022","unstructured":"Kevin Meng, David Bau, Alex Andonian, and Yonatan Belinkov. 2022a. Locating and editing factual associations in gpt. Advances in neural information processing systems, Vol. 35 (2022), 17359-17372."},{"key":"e_1_3_2_1_39_1","volume-title":"Alex Andonian, Yonatan Belinkov, and David Bau.","author":"Meng Kevin","year":"2022","unstructured":"Kevin Meng, Arnab Sen Sharma, Alex Andonian, Yonatan Belinkov, and David Bau. 2022b. Mass-editing memory in a transformer. arXiv preprint arXiv:2210.07229 (2022)."},{"key":"e_1_3_2_1_40_1","volume-title":"Fast model editing at scale. arXiv preprint arXiv:2110.11309","author":"Mitchell Eric","year":"2021","unstructured":"Eric Mitchell, Charles Lin, Antoine Bosselut, Chelsea Finn, and Christopher D Manning. 2021. Fast model editing at scale. arXiv preprint arXiv:2110.11309 (2021)."},{"key":"e_1_3_2_1_41_1","volume-title":"International Conference on Machine Learning. PMLR, 15817-15831","author":"Mitchell Eric","year":"2022","unstructured":"Eric Mitchell, Charles Lin, Antoine Bosselut, Christopher D Manning, and Chelsea Finn. 2022. Memory-based model editing at scale. In International Conference on Machine Learning. PMLR, 15817-15831."},{"key":"e_1_3_2_1_42_1","volume-title":"Metamemory: A theoretical framework and new findings. In Psychology of learning and motivation.","author":"Nelson Thomas O","year":"1990","unstructured":"Thomas O Nelson. 1990. Metamemory: A theoretical framework and new findings. In Psychology of learning and motivation. Vol. 26. Elsevier, 125-173."},{"key":"e_1_3_2_1_43_1","first-page":"110290","article-title":"Towards unified multimodal editing with enhanced knowledge collaboration","volume":"37","author":"Pan Kaihang","year":"2024","unstructured":"Kaihang Pan, Zhaoyu Fan, Juncheng Li, Qifan Yu, Hao Fei, Siliang Tang, Richang Hong, Hanwang Zhang, and Qianru Sun. 2024. Towards unified multimodal editing with enhanced knowledge collaboration. Advances in Neural Information Processing Systems, Vol. 37 (2024), 110290-110314.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_44_1","volume-title":"Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125","author":"Ramesh Aditya","year":"2022","unstructured":"Aditya Ramesh, Prafulla Dhariwal, Alex Nichol, Casey Chu, and Mark Chen. 2022. Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125, Vol. 1, 2 (2022), 3."},{"key":"e_1_3_2_1_45_1","volume-title":"Defining reflection: Another look at John Dewey and reflective thinking. Teachers college record","author":"Rodgers Carol","year":"2002","unstructured":"Carol Rodgers. 2002. Defining reflection: Another look at John Dewey and reflective thinking. Teachers college record, Vol. 104, 4 (2002), 842-866."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511528446"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2022\/778"},{"key":"e_1_3_2_1_49_1","volume-title":"What makes for good views for contrastive learning? Advances in neural information processing systems","author":"Tian Yonglong","year":"2020","unstructured":"Yonglong Tian, Chen Sun, Ben Poole, Dilip Krishnan, Cordelia Schmid, and Phillip Isola. 2020. What makes for good views for contrastive learning? Advances in neural information processing systems, Vol. 33 (2020), 6827-6839."},{"key":"e_1_3_2_1_50_1","volume-title":"Bernadette HAM Van Hout-Wolters, and Peter Afflerbach","author":"Veenman Marcel VJ","year":"2006","unstructured":"Marcel VJ Veenman, Bernadette HAM Van Hout-Wolters, and Peter Afflerbach. 2006. Metacognition and learning: Conceptual and methodological considerations. Metacognition and learning, Vol. 1 (2006), 3-14."},{"key":"e_1_3_2_1_51_1","volume-title":"Pico: Contrastive label disambiguation for robust partial label learning. arXiv preprint arXiv:2201.08984","author":"Wang Haobo","year":"2022","unstructured":"Haobo Wang, Ruixuan Xiao, Yixuan Li, Lei Feng, Gang Niu, Gang Chen, and Junbo Zhao. 2022. Pico: Contrastive label disambiguation for robust partial label learning. arXiv preprint arXiv:2201.08984 (2022)."},{"key":"e_1_3_2_1_52_1","unstructured":"Peng Wang Shuai Bai Sinan Tan Shijie Wang Zhihao Fan Jinze Bai Keqin Chen Xuejing Liu Jialin Wang Wenbin Ge et al. 2024a. Qwen2-vl: Enhancing vision-language model's perception of the world at any resolution. arXiv preprint arXiv:2409.12191 (2024)."},{"key":"e_1_3_2_1_53_1","volume-title":"WISE: Rethinking the Knowledge Memory for Lifelong Model Editing of Large Language Models. arXiv:2405.14768 [cs.CL] https:\/\/arxiv.org\/abs\/2405.14768","author":"Wang Peng","year":"2024","unstructured":"Peng Wang, Zexi Li, Ningyu Zhang, Ziwen Xu, Yunzhi Yao, Yong Jiang, Pengjun Xie, Fei Huang, and Huajun Chen. 2024b. WISE: Rethinking the Knowledge Memory for Lifelong Model Editing of Large Language Models. arXiv:2405.14768 [cs.CL] https:\/\/arxiv.org\/abs\/2405.14768"},{"key":"e_1_3_2_1_54_1","volume-title":"Lemoe: Advanced mixture of experts adaptor for lifelong model editing of large language models. arXiv preprint arXiv:2406.20030","author":"Wang Renzhi","year":"2024","unstructured":"Renzhi Wang and Piji Li. 2024. Lemoe: Advanced mixture of experts adaptor for lifelong model editing of large language models. arXiv preprint arXiv:2406.20030 (2024)."},{"key":"e_1_3_2_1_55_1","volume-title":"The shapley value. Handbook of game theory with economic applications","author":"Winter Eyal","year":"2002","unstructured":"Eyal Winter. 2002. The shapley value. Handbook of game theory with economic applications, Vol. 3 (2002), 2025-2054."},{"key":"e_1_3_2_1_56_1","volume-title":"Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing: System Demonstrations. 543-553","author":"Zhang Hang","year":"2023","unstructured":"Hang Zhang, Xin Li, and Lidong Bing. 2023. Video-LLaMA: An Instruction-tuned Audio-Visual Language Model for Video Understanding. In Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing: System Demonstrations. 543-553."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"crossref","unstructured":"Junzhe Zhang Huixuan Zhang Xunjian Yin Baizhou Huang Xu Zhang Xinyu Hu and Xiaojun Wan. 2024b. MC-MKE: A Fine-Grained Multimodal Knowledge Editing Benchmark Emphasizing Modality Consistency. arXiv:2406.13219 [cs.CV] https:\/\/arxiv.org\/abs\/2406.13219","DOI":"10.18653\/v1\/2025.findings-acl.896"},{"key":"e_1_3_2_1_58_1","unstructured":"Ningyu Zhang Yunzhi Yao Bozhong Tian Peng Wang Shumin Deng Mengru Wang Zekun Xi Shengyu Mao Jintian Zhang Yuansheng Ni et al. 2024a. A comprehensive study of knowledge editing for large language models. arXiv preprint arXiv:2401.01286 (2024)."},{"key":"e_1_3_2_1_59_1","volume-title":"Can we edit factual knowledge by in-context learning? arXiv preprint arXiv:2305.12740","author":"Zheng Ce","year":"2023","unstructured":"Ce Zheng, Lei Li, Qingxiu Dong, Yuxuan Fan, Zhiyong Wu, Jingjing Xu, and Baobao Chang. 2023. Can we edit factual knowledge by in-context learning? arXiv preprint arXiv:2305.12740 (2023)."},{"key":"e_1_3_2_1_60_1","volume-title":"Mquake: Assessing knowledge editing in language models via multi-hop questions. arXiv preprint arXiv:2305.14795","author":"Zhong Zexuan","year":"2023","unstructured":"Zexuan Zhong, Zhengxuan Wu, Christopher D Manning, Christopher Potts, and Danqi Chen. 2023. Mquake: Assessing knowledge editing in language models via multi-hop questions. arXiv preprint arXiv:2305.14795 (2023)."},{"key":"e_1_3_2_1_61_1","volume-title":"Minigpt-4: Enhancing vision-language understanding with advanced large language models. arXiv preprint arXiv:2304.10592","author":"Zhu Deyao","year":"2023","unstructured":"Deyao Zhu, Jun Chen, Xiaoqian Shen, Xiang Li, and Mohamed Elhoseiny. 2023. Minigpt-4: Enhancing vision-language understanding with advanced large language models. arXiv preprint arXiv:2304.10592 (2023)."}],"event":{"name":"WWW '26: The ACM Web Conference 2026","location":"Dubai United Arab Emirates","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Proceedings of the ACM Web Conference 2026"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3774904.3792531","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,27]],"date-time":"2026-04-27T13:21:27Z","timestamp":1777296087000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3774904.3792531"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4,12]]},"references-count":61,"alternative-id":["10.1145\/3774904.3792531","10.1145\/3774904"],"URL":"https:\/\/doi.org\/10.1145\/3774904.3792531","relation":{},"subject":[],"published":{"date-parts":[[2026,4,12]]},"assertion":[{"value":"2026-04-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}