{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T08:13:56Z","timestamp":1765008836634,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":57,"publisher":"ACM","funder":[{"name":"The Natural Science Foundation of Beijing,China","award":["L252035"],"award-info":[{"award-number":["L252035"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,12,9]]},"DOI":"10.1145\/3743093.3771068","type":"proceedings-article","created":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T08:06:16Z","timestamp":1765008376000},"page":"1-8","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Uni-IL: Unified Incremental Learning of Vision-Language Models via Mixture of Attribute-Guided Experts"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-3705-746X","authenticated-orcid":false,"given":"Fanyu","family":"Meng","sequence":"first","affiliation":[{"name":"Beijing Institute of Technology, Beijing, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-2940-142X","authenticated-orcid":false,"given":"Yufeng","family":"Zhan","sequence":"additional","affiliation":[{"name":"Beijing Institute of Technology, Beijing, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8073-2118","authenticated-orcid":false,"given":"Jie","family":"Zhang","sequence":"additional","affiliation":[{"name":"The Hong Kong University of Science and Technology, Hong Kong, Hong Kong"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-1890-909X","authenticated-orcid":false,"given":"Zhiyuan","family":"Wang","sequence":"additional","affiliation":[{"name":"Beijing Institute of Technology, Beijing, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5977-4911","authenticated-orcid":false,"given":"Yuanqing","family":"Xia","sequence":"additional","affiliation":[{"name":"Beijing Institute of Technology, Beijing, Beijing, China and Zhongyuan University of Technology, Zhengzhou, Henan, China"}]}],"member":"320","published-online":{"date-parts":[[2025,12,6]]},"reference":[{"key":"e_1_3_3_1_2_2","unstructured":"Josh Achiam Steven Adler Sandhini Agarwal Lama Ahmad Ilge Akkaya Florencia\u00a0Leoni Aleman Diogo Almeida Janko Altenschmidt Sam Altman Shyamal Anadkat et\u00a0al. 2023. Gpt-4 technical report. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2303.08774 (2023)."},{"key":"e_1_3_3_1_3_2","unstructured":"Jean-Baptiste Alayrac Jeff Donahue Pauline Luc Antoine Miech Iain Barr Yana Hasson Karel Lenc Arthur Mensch Katherine Millican Malcolm Reynolds et\u00a0al. 2022. Flamingo: a visual language model for few-shot learning. Advances in neural information processing systems 35 (2022) 23716\u201323736."},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00938"},{"key":"e_1_3_3_1_5_2","unstructured":"Yuxuan Ding Lingqiao Liu Chunna Tian Jingyuan Yang and Haoxuan Ding. 2022. Don\u2019t stop learning: Towards continual learning for the clip model. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2207.09248 (2022)."},{"key":"e_1_3_3_1_6_2","unstructured":"Alexey Dosovitskiy. 2020. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2010.11929 (2020)."},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.106"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00907"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"crossref","unstructured":"Maximilian Gilles Kai Furmans and Rania Rayyes. 2025. MetaMVUC: Active Learning for Sample-Efficient Sim-to-Real Domain Adaptation in Robotic Grasping. IEEE Robotics and Automation Letters (2025).","DOI":"10.1109\/LRA.2025.3544083"},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"publisher","DOI":"10.1109\/RAAI56146.2022.10092970"},{"key":"e_1_3_3_1_11_2","first-page":"2790","volume-title":"International conference on machine learning","author":"Houlsby Neil","year":"2019","unstructured":"Neil Houlsby, Andrei Giurgiu, Stanislaw Jastrzebski, Bruna Morrone, Quentin De\u00a0Laroussilhe, Andrea Gesmundo, Mona Attariyan, and Sylvain Gelly. 2019. Parameter-efficient transfer learning for NLP. In International conference on machine learning. PMLR, 2790\u20132799."},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"publisher","DOI":"10.1109\/IV51971.2022.9827450"},{"key":"e_1_3_3_1_13_2","unstructured":"Edward\u00a0J Hu Yelong Shen Phillip Wallis Zeyuan Allen-Zhu Yuanzhi Li Shean Wang Lu Wang and Weizhu Chen. 2021. Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2106.09685 (2021)."},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11595"},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"crossref","unstructured":"Robert\u00a0A Jacobs Michael\u00a0I Jordan Steven\u00a0J Nowlan and Geoffrey\u00a0E Hinton. 1991. Adaptive mixtures of local experts. Neural computation 3 1 (1991) 79\u201387.","DOI":"10.1162\/neco.1991.3.1.79"},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"crossref","unstructured":"James Kirkpatrick Razvan Pascanu Neil Rabinowitz Joel Veness Guillaume Desjardins Andrei\u00a0A Rusu Kieran Milan John Quan Tiago Ramalho Agnieszka Grabska-Barwinska et\u00a0al. 2017. Overcoming catastrophic forgetting in neural networks. Proceedings of the national academy of sciences 114 13 (2017) 3521\u20133526.","DOI":"10.1073\/pnas.1611835114"},{"key":"e_1_3_3_1_17_2","unstructured":"Alex Krizhevsky Geoffrey Hinton et\u00a0al. 2009. Learning multiple layers of features from tiny images. (2009)."},{"key":"e_1_3_3_1_18_2","unstructured":"Ya Le and Xuan Yang. 2015. Tiny imagenet visual recognition challenge. CS 231N 7 7 (2015) 3."},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"publisher","DOI":"10.1109\/WACV56688.2023.00068"},{"key":"e_1_3_3_1_20_2","unstructured":"Xiang\u00a0Lisa Li and Percy Liang. 2021. Prefix-tuning: Optimizing continuous prompts for generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2101.00190 (2021)."},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"crossref","unstructured":"Zhizhong Li and Derek Hoiem. 2017. Learning without forgetting. IEEE transactions on pattern analysis and machine intelligence 40 12 (2017) 2935\u20132947.","DOI":"10.1109\/TPAMI.2017.2773081"},{"key":"e_1_3_3_1_22_2","unstructured":"Bin Lin Zhenyu Tang Yang Ye Jiaxi Cui Bin Zhu Peng Jin Jinfa Huang Junwu Zhang Yatian Pang Munan Ning et\u00a0al. 2024. Moe-llava: Mixture of experts for large vision-language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2401.15947 (2024)."},{"key":"e_1_3_3_1_23_2","unstructured":"Qidong Liu Xian Wu Xiangyu Zhao Yuanshao Zhu Derong Xu Feng Tian and Yefeng Zheng. 2023. Moelora: An moe-based parameter efficient fine-tuning method for multi-task medical applications. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2310.18339 (2023)."},{"key":"e_1_3_3_1_24_2","unstructured":"I Loshchilov. 2017. Decoupled weight decay regularization. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1711.05101 (2017)."},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00267"},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"crossref","unstructured":"Nicolas\u00a0Y Masse Gregory\u00a0D Grant and David\u00a0J Freedman. 2018. Alleviating catastrophic forgetting using context-dependent gating and synaptic stabilization. Proceedings of the National Academy of Sciences 115 44 (2018) E10467\u2013E10475.","DOI":"10.1073\/pnas.1803839115"},{"key":"e_1_3_3_1_27_2","unstructured":"Sachit Menon and Carl Vondrick. 2022. Visual classification via description from large language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2210.07183 (2022)."},{"key":"e_1_3_3_1_28_2","unstructured":"Bolin Ni Xing Nie Chenghao Zhang Shixiong Xu Xin Zhang Gaofeng Meng and Shiming Xiang. 2024. MoBoo: Memory-Boosted Vision Transformer for Class-Incremental Learning. IEEE Transactions on Circuits and Systems for Video Technology (2024)."},{"key":"e_1_3_3_1_29_2","first-page":"60","volume-title":"Conference on lifelong learning agents","author":"Ostapenko Oleksiy","year":"2022","unstructured":"Oleksiy Ostapenko, Timothee Lesort, Pau Rodriguez, Md\u00a0Rifat Arefin, Arthur Douillard, Irina Rish, and Laurent Charlin. 2022. Continual learning with foundation models: An empirical study of latent replay. In Conference on lifelong learning agents. PMLR, 60\u201391."},{"key":"e_1_3_3_1_30_2","unstructured":"Pingbo Pan Siddharth Swaroop Alexander Immer Runa Eschenhagen Richard Turner and Mohammad Emtiyaz\u00a0E Khan. 2020. Continual deep learning by functional regularisation of memorable past. Advances in neural information processing systems 33 (2020) 4453\u20134464."},{"key":"e_1_3_3_1_31_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00149"},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58536-5_31"},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01438"},{"key":"e_1_3_3_1_34_2","first-page":"8748","volume-title":"International conference on machine learning","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et\u00a0al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748\u20138763."},{"key":"e_1_3_3_1_35_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01443"},{"key":"e_1_3_3_1_36_2","unstructured":"Noam Shazeer Azalia Mirhoseini Krzysztof Maziarz Andy Davis Quoc Le Geoffrey Hinton and Jeff Dean. 2017. Outrageously large neural networks: The sparsely-gated mixture-of-experts layer. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1701.06538 (2017)."},{"key":"e_1_3_3_1_37_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.02068"},{"key":"e_1_3_3_1_38_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02700"},{"key":"e_1_3_3_1_39_2","unstructured":"Hugo Touvron Thibaut Lavril Gautier Izacard Xavier Martinet Marie-Anne Lachaux Timoth\u00e9e Lacroix Baptiste Rozi\u00e8re Naman Goyal Eric Hambro Faisal Azhar et\u00a0al. 2023. Llama: Open and efficient foundation language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2302.13971 (2023)."},{"key":"e_1_3_3_1_40_2","doi-asserted-by":"crossref","unstructured":"Gido\u00a0M Van\u00a0de Ven Hava\u00a0T Siegelmann and Andreas\u00a0S Tolias. 2020. Brain-inspired replay for continual learning with artificial neural networks. Nature communications 11 1 (2020) 4069.","DOI":"10.1038\/s41467-020-17866-2"},{"key":"e_1_3_3_1_41_2","doi-asserted-by":"crossref","unstructured":"Gido\u00a0M Van\u00a0de Ven Tinne Tuytelaars and Andreas\u00a0S Tolias. 2022. Three types of incremental learning. Nature Machine Intelligence 4 12 (2022) 1185\u20131197.","DOI":"10.1038\/s42256-022-00568-3"},{"key":"e_1_3_3_1_42_2","doi-asserted-by":"crossref","unstructured":"Liyuan Wang Xingxing Zhang Hang Su and Jun Zhu. 2024. A comprehensive survey of continual learning: theory method and application. IEEE Transactions on Pattern Analysis and Machine Intelligence (2024).","DOI":"10.1109\/TPAMI.2024.3367329"},{"key":"e_1_3_3_1_43_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00356"},{"key":"e_1_3_3_1_44_2","unstructured":"Yabin Wang Zhiwu Huang and Xiaopeng Hong. 2022. S-prompts learning with pre-trained transformers: An occam\u2019s razor for domain incremental learning. Advances in Neural Information Processing Systems 35 (2022) 5682\u20135695."},{"key":"e_1_3_3_1_45_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00024"},{"key":"e_1_3_3_1_46_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00046"},{"key":"e_1_3_3_1_47_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01395"},{"key":"e_1_3_3_1_48_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00287"},{"key":"e_1_3_3_1_49_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00303"},{"key":"e_1_3_3_1_50_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.02020"},{"key":"e_1_3_3_1_51_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02191"},{"key":"e_1_3_3_1_52_2","first-page":"3987","volume-title":"International conference on machine learning","author":"Zenke Friedemann","year":"2017","unstructured":"Friedemann Zenke, Ben Poole, and Surya Ganguli. 2017. Continual learning through synaptic intelligence. In International conference on machine learning. PMLR, 3987\u20133995."},{"key":"e_1_3_3_1_53_2","doi-asserted-by":"crossref","unstructured":"Jingyi Zhang Jiaxing Huang Sheng Jin and Shijian Lu. 2024. Vision-language models for vision tasks: A survey. IEEE Transactions on Pattern Analysis and Machine Intelligence (2024).","DOI":"10.1109\/TPAMI.2024.3369699"},{"key":"e_1_3_3_1_54_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00553"},{"key":"e_1_3_3_1_55_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01752"},{"key":"e_1_3_3_1_56_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01631"},{"key":"e_1_3_3_1_57_2","doi-asserted-by":"crossref","unstructured":"Kaiyang Zhou Jingkang Yang Chen\u00a0Change Loy and Ziwei Liu. 2022. Learning to prompt for vision-language models. International Journal of Computer Vision 130 9 (2022) 2337\u20132348.","DOI":"10.1007\/s11263-022-01653-1"},{"key":"e_1_3_3_1_58_2","unstructured":"Deyao Zhu Jun Chen Xiaoqian Shen Xiang Li and Mohamed Elhoseiny. 2023. Minigpt-4: Enhancing vision-language understanding with advanced large language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2304.10592 (2023)."}],"event":{"name":"MMAsia '25: ACM Multimedia Asia","location":"Kuala Lumpur Malaysia","acronym":"MMAsia '25","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 7th ACM International Conference on Multimedia in Asia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3743093.3771068","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T08:09:29Z","timestamp":1765008569000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3743093.3771068"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,6]]},"references-count":57,"alternative-id":["10.1145\/3743093.3771068","10.1145\/3743093"],"URL":"https:\/\/doi.org\/10.1145\/3743093.3771068","relation":{},"subject":[],"published":{"date-parts":[[2025,12,6]]},"assertion":[{"value":"2025-12-06","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}