{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:47:06Z","timestamp":1765309626469,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":66,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3755769","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T06:54:17Z","timestamp":1761375257000},"page":"11111-11120","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["TongGu-VL: Advancing Visual-Language Understanding in Chinese Classical Studies through Parameter Sensitivity-Guided Instruction Tuning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-9483-2424","authenticated-orcid":false,"given":"Jiahuan","family":"Cao","sequence":"first","affiliation":[{"name":"South China University of Technology, Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-6528-6849","authenticated-orcid":false,"given":"Yang","family":"Liu","sequence":"additional","affiliation":[{"name":"South China University of Technology, Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1857-5473","authenticated-orcid":false,"given":"Peirong","family":"Zhang","sequence":"additional","affiliation":[{"name":"South China University of Technology, Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-2650-1663","authenticated-orcid":false,"given":"Yongxin","family":"Shi","sequence":"additional","affiliation":[{"name":"South China University of Technology, Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9371-0751","authenticated-orcid":false,"given":"Kai","family":"Ding","sequence":"additional","affiliation":[{"name":"INTSIG Information, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5456-0957","authenticated-orcid":false,"given":"Lianwen","family":"Jin","sequence":"additional","affiliation":[{"name":"South China University of Technology, Guangzhou, China"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.2196\/59505"},{"key":"e_1_3_2_1_2_1","unstructured":"Anthropic. 2024. Introducing Claude 3.5 Sonnet. https-:\/\/www.anthropic.com\/news\/claude-3-5-sonnet."},{"key":"e_1_3_2_1_3_1","volume-title":"Qwen-VL: A versatile vision-language model for understanding, localization, text reading, and beyond. arXiv preprint arXiv:2308.12966","author":"Bai Jinze","year":"2023","unstructured":"Jinze Bai, Shuai Bai, Shusheng Yang, Shijie Wang, Sinan Tan, Peng Wang, Junyang Lin, Chang Zhou, and Jingren Zhou. 2023. Qwen-VL: A versatile vision-language model for understanding, localization, text reading, and beyond. arXiv preprint arXiv:2308.12966, Vol. 1, 2 (2023), 3."},{"key":"e_1_3_2_1_4_1","unstructured":"Baidu. 2008. Baidu Baike. https:\/\/baike.baidu.com\/."},{"key":"e_1_3_2_1_5_1","unstructured":"Baidu. 2023. ERNIE-4.0. https:\/\/yiyan.baidu.com\/."},{"key":"e_1_3_2_1_6_1","volume-title":"Mansheej Paul, Philip Greengard, Connor Jennings, Daniel King, Sam Havens, Vitaliy Chiley, Jonathan Frankle, et al.","author":"Biderman Dan","year":"2024","unstructured":"Dan Biderman, Jacob Portes, Jose Javier Gonzalez Ortiz, Mansheej Paul, Philip Greengard, Connor Jennings, Daniel King, Sam Havens, Vitaliy Chiley, Jonathan Frankle, et al., 2024. LoRA learns less and forgets less. arXiv preprint arXiv:2405.09673 (2024)."},{"key":"e_1_3_2_1_7_1","volume-title":"Predicting Punctuation in Ancient Chinese Texts: A Multi-Layered LS\u2122 and Attention-Based Approach. arXiv preprint arXiv:2409.10783","author":"Cai Tracy","year":"2024","unstructured":"Tracy Cai, Kimmy Chang, and Fahad Nabi. 2024. Predicting Punctuation in Ancient Chinese Texts: A Multi-Layered LS\u2122 and Attention-Based Approach. arXiv preprint arXiv:2409.10783 (2024)."},{"key":"e_1_3_2_1_8_1","unstructured":"Jiahuan Cao Yang Liu Yongxin Shi Kai Ding and Lianwen Jin. 2024a. WenMind: A Comprehensive Benchmark for Evaluating Large Language Models in Chinese Classical Literature and Language Arts. In The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track."},{"key":"e_1_3_2_1_9_1","volume-title":"TongGu: Mastering Classical Chinese Understanding with Knowledge-Grounded Large Language Models. arXiv preprint arXiv:2407.03937","author":"Cao Jiahuan","year":"2024","unstructured":"Jiahuan Cao, Dezhi Peng, Peirong Zhang, Yongxin Shi, Yang Liu, Kai Ding, and Lianwen Jin. 2024b. TongGu: Mastering Classical Chinese Understanding with Knowledge-Grounded Large Language Models. arXiv preprint arXiv:2407.03937 (2024)."},{"key":"e_1_3_2_1_10_1","unstructured":"Jiahuan Cao Yongxin Shi Dezhi Peng Yang Liu and Lianwen Jin. 2024c. C3Bench: A Comprehensive Classical Chinese Understanding Benchmark for Large Language Models. arXiv:2405.17732 [cs.CL]"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"crossref","unstructured":"Lin Chen Jinsong Li Xiaoyi Dong Pan Zhang Yuhang Zang Zehui Chen Haodong Duan Jiaqi Wang Yu Qiao Dahua Lin et al. 2024a. Are We on the Right Way for Evaluating Large Vision-Language Models? arXiv preprint arXiv:2403.20330 (2024).","DOI":"10.52202\/079017-0850"},{"key":"e_1_3_2_1_12_1","unstructured":"Zhe Chen Weiyun Wang Yue Cao Yangzhou Liu Zhangwei Gao Erfei Cui Jinguo Zhu Shenglong Ye Hao Tian Zhaoyang Liu et al. 2024b. Expanding performance boundaries of open-source multimodal models with model data and test-time scaling. arXiv preprint arXiv:2412.05271 (2024)."},{"key":"e_1_3_2_1_13_1","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 24185-24198","author":"Chen Zhe","year":"2024","unstructured":"Zhe Chen, Jiannan Wu, Wenhai Wang, Weijie Su, Guo Chen, Sen Xing, Muyan Zhong, Qinglong Zhang, Xizhou Zhu, Lewei Lu, et al., 2024c. InternVL: Scaling up vision foundation models and aligning for generic visual-linguistic tasks. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 24185-24198."},{"key":"e_1_3_2_1_14_1","volume-title":"Mohammadreza Salehi, Niklas Muennighoff, Kyle Lo, Luca Soldaini, et al.","author":"Deitke Matt","year":"2024","unstructured":"Matt Deitke, Christopher Clark, Sangho Lee, Rohun Tripathi, Yue Yang, Jae Sung Park, Mohammadreza Salehi, Niklas Muennighoff, Kyle Lo, Luca Soldaini, et al., 2024. Molmo and Pixmo: Open weights and open data for state-of-the-art multimodal models. arXiv preprint arXiv:2409.17146 (2024)."},{"volume-title":"CCLUE: Classical Chinese Language Understanding Evaluation Benchmark: datasets, baselines, pre-trained models, corpus and leaderboard. https:\/\/github.com\/Ethan-yt\/CCLUE.","year":"2021","key":"e_1_3_2_1_15_1","unstructured":"Ethan. 2021. CCLUE: Classical Chinese Language Understanding Evaluation Benchmark: datasets, baselines, pre-trained models, corpus and leaderboard. https:\/\/github.com\/Ethan-yt\/CCLUE."},{"key":"e_1_3_2_1_16_1","unstructured":"EvolvingLMMs-Lab. 2024. lmms-eval: Accelerating the development of large multimodal models (LMMs) with one-click evaluation module. https:\/\/github.com\/EvolvingLMMs-Lab\/lmms-eval"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01315"},{"key":"e_1_3_2_1_18_1","volume-title":"EMMA: Efficient Visual Alignment in Multi-Modal LLMs. arXiv preprint arXiv:2410.02080","author":"Ghazanfari Sara","year":"2024","unstructured":"Sara Ghazanfari, Alexandre Araujo, Prashanth Krishnamurthy, Siddharth Garg, and Farshad Khorrami. 2024. EMMA: Efficient Visual Alignment in Multi-Modal LLMs. arXiv preprint arXiv:2410.02080 (2024)."},{"key":"e_1_3_2_1_19_1","unstructured":"Team GLM Aohan Zeng Bin Xu Bowen Wang Chenhui Zhang Da Yin Diego Rojas Guanyu Feng Hanlin Zhao Hanyu Lai et al. 2024. ChatGLM: A Family of Large Language Models from GLM-130B to GLM-4 All Tools."},{"key":"e_1_3_2_1_20_1","unstructured":"Google. 2024. Introducing Gemini 2.0: Our new AI model for the agentic era. https:\/\/blog.google\/techn-ology\/google-deepmind\/google-gemini-ai-update-december-2024\/."},{"key":"e_1_3_2_1_21_1","volume-title":"An open dataset for the evolution of oracle bone characters: EVOBC. arXiv preprint arXiv:2401.12467","author":"Guan Haisu","year":"2024","unstructured":"Haisu Guan, Jinpeng Wan, Yuliang Liu, Pengjie Wang, Kaile Zhang, Zhebin Kuang, Xinyu Wang, Xiang Bai, and Lianwen Jin. 2024. An open dataset for the evolution of oracle bone characters: EVOBC. arXiv preprint arXiv:2401.12467 (2024)."},{"key":"e_1_3_2_1_22_1","unstructured":"Anwen Hu Haiyang Xu Jiabo Ye Ming Yan Liang Zhang Bo Zhang Chen Li Ji Zhang Qin Jin Fei Huang et al. 2024. mPLUG-DocOwl 1.5: Unified structure learning for OCR-free document understanding. arXiv preprint arXiv:2403.12895 (2024)."},{"key":"e_1_3_2_1_23_1","volume-title":"LoRA: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685","author":"Hu Edward J","year":"2021","unstructured":"Edward J Hu, Yelong Shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, and Weizhu Chen. 2021. LoRA: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 (2021)."},{"key":"e_1_3_2_1_24_1","volume-title":"HFT: Half Fine-Tuning for Large Language Models. arXiv preprint arXiv:2404.18466","author":"Hui Tingfeng","year":"2024","unstructured":"Tingfeng Hui, Zhenyu Zhang, Shuohuan Wang, Weiran Xu, Yu Sun, and Hua Wu. 2024. HFT: Half Fine-Tuning for Large Language Models. arXiv preprint arXiv:2404.18466 (2024)."},{"volume-title":"Proc","author":"Ji Zijing","key":"e_1_3_2_1_25_1","unstructured":"Zijing Ji, Yuxin Shen, Yining Sun, Tian Yu, and Xin Wang. 2021. C-CLUE: A benchmark of classical Chinese based on a crowdsourcing system for knowledge graph construction. In Proc. CCKS. Springer, 295-301."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-44693-1_31"},{"key":"e_1_3_2_1_27_1","first-page":"229","volume-title":"Proceedings of the Third Workshop on Language Technologies for Historical and Ancient Languages (LT4HALA)@ LREC-COLING-2024","author":"Li Bin","year":"2024","unstructured":"Bin Li, Bolin Chang, Zhixing Xu, Minxuan Feng, Chao Xu, Weiguang Qu, Si Shen, and Dongbo Wang. 2024. Overview of EvaHan2024: The First International Evaluation on Ancient Chinese Sentence Segmentation and Punctuation. In Proceedings of the Third Workshop on Language Technologies for Historical and Ancient Languages (LT4HALA)@ LREC-COLING-2024. 229-236."},{"key":"e_1_3_2_1_28_1","volume-title":"International conference on machine learning. PMLR","author":"Li Junnan","year":"2023","unstructured":"Junnan Li, Dongxu Li, Silvio Savarese, and Steven Hoi. 2023. BLIP-2: Bootstrapping language-image pre-training with frozen image encoders and large language models. In International conference on machine learning. PMLR, 19730-19742."},{"key":"e_1_3_2_1_29_1","unstructured":"LinkSoul. 2024. Chinese-LLaVA-CLLaMA2. https:\/\/h-uggingface.co\/LinkSoul\/Chinese-LLaVA-Cllama2."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41746-023-00952-2"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02484"},{"key":"e_1_3_2_1_32_1","unstructured":"Haotian Liu Chunyuan Li Yuheng Li Bo Li Yuanhan Zhang Sheng Shen and Yong Jae Lee. 2024b. LLaVA-NeXT: Improved reasoning OCR and world knowledge. https:\/\/llava-vl.github.io\/blog\/2024-01-30-llava-next\/."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1504\/ijipt.2021.113897"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.acl-long.515"},{"key":"e_1_3_2_1_35_1","volume-title":"Ovis: Structural Embedding Alignment for Multimodal Large Language Model. arXiv:2405.20797","author":"Lu Shiyin","year":"2024","unstructured":"Shiyin Lu, Yang Li, Qing-Guo Chen, Zhao Xu, Weihua Luo, Kaifu Zhang, and Han-Jia Ye. 2024. Ovis: Structural Embedding Alignment for Multimodal Large Language Model. arXiv:2405.20797 (2024)."},{"key":"e_1_3_2_1_36_1","volume-title":"An empirical study of catastrophic forgetting in large language models during continual fine-tuning. arXiv preprint arXiv:2308.08747","author":"Luo Yun","year":"2023","unstructured":"Yun Luo, Zhen Yang, Fandong Meng, Yafu Li, Jie Zhou, and Yue Zhang. 2023. An empirical study of catastrophic forgetting in large language models during continual fine-tuning. arXiv preprint arXiv:2308.08747 (2023)."},{"key":"e_1_3_2_1_37_1","unstructured":"OpenAI. 2024. Hello GPT-4o. https:\/\/openai.com\/in-dex\/hello-gpt-4o\/."},{"key":"e_1_3_2_1_38_1","unstructured":"OpenGVLab. 2024. InternVL2: Better than the Best Expanding Performance Boundaries of Open-Source Multimodal Models with the Progressive Scaling Strategy. https:\/\/internvl.github.io\/blog\/2024-07-02-InternVL-2.0\/."},{"key":"e_1_3_2_1_39_1","volume-title":"Scalable language model with generalized continual learning. arXiv preprint arXiv:2404.07470","author":"Peng Bohao","year":"2024","unstructured":"Bohao Peng, Zhuotao Tian, Shu Liu, Mingchang Yang, and Jiaya Jia. 2024. Scalable language model with generalized continual learning. arXiv preprint arXiv:2404.07470 (2024)."},{"volume-title":"QVQ: To See the World with Wisdom. https:\/\/qwenlm.github.io\/blog\/qvq-72b-preview\/.","year":"2024","key":"e_1_3_2_1_40_1","unstructured":"Qwen. 2024. QVQ: To See the World with Wisdom. https:\/\/qwenlm.github.io\/blog\/qvq-72b-preview\/."},{"key":"e_1_3_2_1_41_1","unstructured":"Qwen. 2025. Qwen2.5-VL. https:\/\/qwenlm.github.io\/blog\/qwen2.5-vl\/"},{"key":"e_1_3_2_1_42_1","volume-title":"International conference on machine learning. PmLR, 8748-8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al., 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PmLR, 8748-8763."},{"key":"e_1_3_2_1_43_1","volume-title":"Experience replay for continual learning. Advances in neural information processing systems","author":"Rolnick David","year":"2019","unstructured":"David Rolnick, Arun Ahuja, Jonathan Schwarz, Timothy Lillicrap, and Gregory Wayne. 2019. Experience replay for continual learning. Advances in neural information processing systems, Vol. 32 (2019)."},{"key":"e_1_3_2_1_44_1","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Shi Yongxin","year":"2024","unstructured":"Yongxin Shi, Chongyu Liu, Dezhi Peng, Cheng Jian, Jiarong Huang, and Lianwen Jin. 2024. M5HisDoc: A Large-scale Multi-style Chinese Historical Document Analysis Benchmark. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_45_1","volume-title":"HisDoc1B: A Large-Scale Dataset for Chinese Historical Document Recognition and Analysis. Scientific Data","author":"Shi Yongxin","year":"2025","unstructured":"Yongxin Shi, Dezhi Peng, Yuyi Zhang, Jiahuan Cao, and Lianwen Jin. 2025. HisDoc1B: A Large-Scale Dataset for Chinese Historical Document Recognition and Analysis. Scientific Data (2025)."},{"key":"e_1_3_2_1_46_1","volume-title":"Ryan Burnell, Libin Bai, Anmol Gulati, Garrett Tanzer, Damien Vincent, Zhufeng Pan, Shibo Wang, et al.","author":"Team Gemini","year":"2024","unstructured":"Gemini Team, Petko Georgiev, Ving Ian Lei, Ryan Burnell, Libin Bai, Anmol Gulati, Garrett Tanzer, Damien Vincent, Zhufeng Pan, Shibo Wang, et al., 2024. Gemini 1.5: Unlocking multimodal understanding across millions of tokens of context. arXiv preprint arXiv:2403.05530 (2024)."},{"key":"e_1_3_2_1_47_1","unstructured":"Hugo Touvron Louis Martin Kevin Stone Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra Prajjwal Bhargava Shruti Bhosale et al. 2023. Llama 2: Open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288 (2023)."},{"key":"e_1_3_2_1_48_1","unstructured":"Peng Wang Shuai Bai Sinan Tan Shijie Wang Zhihao Fan Jinze Bai Keqin Chen Xuejing Liu Jialin Wang Wenbin Ge et al. 2024a. Qwen2-VL: Enhancing vision-language model's perception of the world at any resolution. arXiv preprint arXiv:2409.12191 (2024)."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41597-024-03807-x"},{"key":"e_1_3_2_1_50_1","volume-title":"TRACE: A Comprehensive Benchmark for Continual Learning in Large Language Models. arXiv preprint arXiv:2310.06762","author":"Wang Xiao","year":"2023","unstructured":"Xiao Wang, Yuansen Zhang, Tianze Chen, Songyang Gao, Senjie Jin, Xianjun Yang, Zhiheng Xi, Rui Zheng, Yicheng Zou, Tao Gui, et al., 2023. TRACE: A Comprehensive Benchmark for Continual Learning in Large Language Models. arXiv preprint arXiv:2310.06762 (2023)."},{"key":"e_1_3_2_1_51_1","unstructured":"Ceng Wei. 2013. ShuGe Library. https:\/\/www.shuge.or-g\/."},{"key":"e_1_3_2_1_52_1","unstructured":"Zhiyu Wu Xiaokang Chen Zizheng Pan Xingchao Liu Wen Liu Damai Dai Huazuo Gao Yiyang Ma Chengyue Wu Bingxuan Wang et al. 2024. DeepSeek-VL2: Mixture-of-Experts Vision-Language Models for Advanced Multimodal Understanding. arXiv preprint arXiv:2412.10302 (2024)."},{"key":"e_1_3_2_1_53_1","unstructured":"Xunzi-LLM-of-Chinese-classics. 2023. Xunzi-LLM-of-Chinese-classics\/XunziALLM. https:\/\/github.com\/Xunzi-LLM-of-Chinese-classics\/XunziALLM."},{"key":"e_1_3_2_1_54_1","unstructured":"An Yang Baosong Yang Beichen Zhang Binyuan Hui Bo Zheng Bowen Yu Chengyuan Li Dayiheng Liu Fei Huang Haoran Wei et al. 2024. Qwen2.5 Technical Report. arXiv preprint arXiv:2412.15115 (2024)."},{"key":"e_1_3_2_1_55_1","unstructured":"Yuan Yao Tianyu Yu Ao Zhang Chongyi Wang Junbo Cui Hongji Zhu Tianchi Cai Haoyu Li Weilin Zhao Zhihui He et al. 2024. MiniCPM-V: A GPT-4V Level MLLM on Your Phone. arXiv preprint arXiv:2408.01800 (2024)."},{"key":"e_1_3_2_1_56_1","unstructured":"Jiabo Ye Anwen Hu Haiyang Xu Qinghao Ye Ming Yan Yuhao Dan Chenlin Zhao Guohai Xu Chenliang Li Junfeng Tian et al. 2023. mPLUG-DocOwl: Modularized multimodal large language model for document understanding. arXiv preprint arXiv:2307.02499 (2023)."},{"key":"e_1_3_2_1_57_1","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 13040-13051","author":"Ye Qinghao","year":"2024","unstructured":"Qinghao Ye, Haiyang Xu, Jiabo Ye, Ming Yan, Anwen Hu, Haowei Liu, Qi Qian, Ji Zhang, and Fei Huang. 2024. mPLUG-Owl2: Revolutionizing multi-modal large language model with modality collaboration. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 13040-13051."},{"key":"e_1_3_2_1_58_1","unstructured":"Kaining Ying Fanqing Meng Jin Wang Zhiqian Li Han Lin Yue Yang Hao Zhang Wenbo Zhang Yuqi Lin Shuo Liu et al. 2024. MMT-Bench: A comprehensive multimodal benchmark for evaluating large vision-language models towards multitask AGI. arXiv preprint arXiv:2404.16006 (2024)."},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-60029-7_27"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00913"},{"key":"e_1_3_2_1_61_1","volume-title":"CMMMU: A Chinese massive multi-discipline multimodal understanding benchmark. arXiv preprint arXiv:2401.11944","author":"Zhang Ge","year":"2024","unstructured":"Ge Zhang, Xinrun Du, Bei Chen, Yiming Liang, Tongxu Luo, Tianyu Zheng, Kang Zhu, Yuyang Cheng, Chunpu Xu, Shuyue Guo, et al., 2024. CMMMU: A Chinese massive multi-discipline multimodal understanding benchmark. arXiv preprint arXiv:2401.11944 (2024)."},{"key":"e_1_3_2_1_62_1","volume-title":"International conference on machine learning. PMLR, 26809-26823","author":"Zhang Qingru","year":"2022","unstructured":"Qingru Zhang, Simiao Zuo, Chen Liang, Alexander Bukharin, Pengcheng He, Weizhu Chen, and Tuo Zhao. 2022. Platon: Pruning large transformer models with upper confidence bound of weight importance. In International conference on machine learning. PMLR, 26809-26823."},{"key":"e_1_3_2_1_63_1","volume-title":"LLaVA-Mini: Efficient Image and Video Large Multimodal Models with One Vision Token. arXiv preprint arXiv:2501.03895","author":"Zhang Shaolei","year":"2025","unstructured":"Shaolei Zhang, Qingkai Fang, Zhe Yang, and Yang Feng. 2025. LLaVA-Mini: Efficient Image and Video Large Multimodal Models with One Vision Token. arXiv preprint arXiv:2501.03895 (2025)."},{"key":"e_1_3_2_1_64_1","first-page":"80","article-title":"Can Large Langauge Model Comprehend Ancient Chinese","author":"Zhang Yixuan","year":"2023","unstructured":"Yixuan Zhang and Haonan Li. 2023. Can Large Langauge Model Comprehend Ancient Chinese? A Preliminary Test on ACLUE. In Proc. ALP. 80-87.","journal-title":"A Preliminary Test on ACLUE. In Proc. ALP."},{"key":"e_1_3_2_1_65_1","volume-title":"Unveiling a core linguistic region in large language models. arXiv preprint arXiv:2310.14928","author":"Zhao Jun","year":"2023","unstructured":"Jun Zhao, Zhihao Zhang, Yide Ma, Qi Zhang, Tao Gui, Luhui Gao, and Xuanjing Huang. 2023. Unveiling a core linguistic region in large language models. arXiv preprint arXiv:2310.14928 (2023)."},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-acl.204"}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755769","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:44:35Z","timestamp":1765309475000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3755769"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":66,"alternative-id":["10.1145\/3746027.3755769","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3755769","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}