{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T09:08:37Z","timestamp":1765357717548,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":55,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/100017052","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["Grant No. 61977045"],"award-info":[{"award-number":["Grant No. 61977045"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/100017052","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3680843","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:27Z","timestamp":1729925967000},"page":"8835-8844","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":4,"title":["GIST: Improving Parameter Efficient Fine-Tuning via Knowledge Interaction"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5469-507X","authenticated-orcid":false,"given":"Jiacheng","family":"Ruan","sequence":"first","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6271-0903","authenticated-orcid":false,"given":"Jingsheng","family":"Gao","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9826-9806","authenticated-orcid":false,"given":"Mingye","family":"Xie","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9141-6460","authenticated-orcid":false,"given":"Suncheng","family":"Xiang","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-7198-3664","authenticated-orcid":false,"given":"Zefang","family":"Yu","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3489-4578","authenticated-orcid":false,"given":"Ting","family":"Liu","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5516-3016","authenticated-orcid":false,"given":"Yuzhuo","family":"Fu","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4907-3978","authenticated-orcid":false,"given":"Xiaoye","family":"Qu","sequence":"additional","affiliation":[{"name":"Shanghai Artificial Intelligence Laboratory, Shanghai, China"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.446"},{"key":"e_1_3_2_1_2_1","volume-title":"Jamie Ryan Kiros, and Geoffrey E Hinton","author":"Ba Jimmy Lei","year":"2016","unstructured":"Jimmy Lei Ba, Jamie Ryan Kiros, and Geoffrey E Hinton. 2016. Layer normalization. arXiv preprint arXiv:1607.06450 (2016)."},{"key":"e_1_3_2_1_3_1","volume-title":"Proceedings, Part VI 13","author":"Bossard Lukas","year":"2014","unstructured":"Lukas Bossard, Matthieu Guillaumin, and Luc Van Gool. 2014. Food-101--mining discriminative components with random forests. In Computer Vision--ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6--12, 2014, Proceedings, Part VI 13. Springer, 446--461."},{"key":"e_1_3_2_1_4_1","first-page":"16664","article-title":"Adaptformer: Adapting vision transformers for scalable visual recognition","volume":"35","author":"Chen Shoufa","year":"2022","unstructured":"Shoufa Chen, Chongjian Ge, Zhan Tong, Jiangliu Wang, Yibing Song, Jue Wang, and Ping Luo. 2022. Adaptformer: Adapting vision transformers for scalable visual recognition. Advances in Neural Information Processing Systems, Vol. 35 (2022), 16664--16678.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.461"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-023-00626-4"},{"key":"e_1_3_2_1_8_1","unstructured":"Alexey Dosovitskiy Lucas Beyer Alexander Kolesnikov Dirk Weissenborn Xiaohua Zhai Thomas Unterthiner Mostafa Dehghani Matthias Minderer Georg Heigold Sylvain Gelly et al. 2020. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)."},{"key":"e_1_3_2_1_9_1","volume-title":"Improving Low-resource Prompt-based Relation Representation with Multi-view Decoupling Learning. arXiv preprint arXiv:2312.17267","author":"Fan Chenghao","year":"2023","unstructured":"Chenghao Fan, Wei Wei, Xiaoye Qu, Zhenyi Lu, Wenfeng Xie, Yu Cheng, and Dangyang Chen. 2023. Improving Low-resource Prompt-based Relation Representation with Multi-view Decoupling Learning. arXiv preprint arXiv:2312.17267 (2023)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1561\/9781638281337"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i3.27950"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-021-01453-z"},{"key":"e_1_3_2_1_13_1","volume-title":"Knowledge Distillation of Large Language Models. arXiv preprint arXiv:2306.08543","author":"Gu Yuxian","year":"2023","unstructured":"Yuxian Gu, Li Dong, Furu Wei, and Minlie Huang. 2023. Knowledge Distillation of Large Language Models. arXiv preprint arXiv:2306.08543 (2023)."},{"key":"e_1_3_2_1_14_1","volume-title":"Distilling the knowledge in a neural network. arXiv preprint arXiv:1503.02531","author":"Hinton Geoffrey","year":"2015","unstructured":"Geoffrey Hinton, Oriol Vinyals, and Jeff Dean. 2015. Distilling the knowledge in a neural network. arXiv preprint arXiv:1503.02531 (2015)."},{"key":"e_1_3_2_1_15_1","volume-title":"Proceedings of the 36th International Conference on Machine Learning (Proceedings of Machine Learning Research","volume":"2799","author":"Houlsby Neil","year":"2019","unstructured":"Neil Houlsby, Andrei Giurgiu, Stanislaw Jastrzebski, Bruna Morrone, Quentin De Laroussilhe, Andrea Gesmundo, Mona Attariyan, and Sylvain Gelly. 2019. Parameter-Efficient Transfer Learning for NLP. In Proceedings of the 36th International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 97), Kamalika Chaudhuri and Ruslan Salakhutdinov (Eds.). PMLR, 2790--2799. https:\/\/proceedings.mlr.press\/v97\/houlsby19a.html"},{"key":"e_1_3_2_1_16_1","volume-title":"LoRA: Low-Rank Adaptation of Large Language Models. In International Conference on Learning Representations.","author":"Hu Edward J","year":"2021","unstructured":"Edward J Hu, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, Weizhu Chen, et al. 2021. LoRA: Low-Rank Adaptation of Large Language Models. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19827-4_41"},{"key":"e_1_3_2_1_18_1","volume-title":"Convolutional bypasses are better vision transformer adapters. arXiv preprint arXiv:2207.07039","author":"Jie Shibo","year":"2022","unstructured":"Shibo Jie and Zhi-Hong Deng. 2022. Convolutional bypasses are better vision transformer adapters. arXiv preprint arXiv:2207.07039 (2022)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i1.25187"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01579"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01832"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2013.77"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.243"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_25_1","first-page":"109","article-title":"Scaling & shifting your features: A new baseline for efficient model tuning","volume":"35","author":"Lian Dongze","year":"2022","unstructured":"Dongze Lian, Daquan Zhou, Jiashi Feng, and Xinchao Wang. 2022. Scaling & shifting your features: A new baseline for efficient model tuning. Advances in Neural Information Processing Systems, Vol. 35 (2022), 109--123.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_26_1","volume-title":"A survey of visual transformers","author":"Liu Yang","year":"2023","unstructured":"Yang Liu, Yao Zhang, Yixin Wang, Feng Hou, Jin Yuan, Jiang Tian, Yang Zhang, Zhongchao Shi, Jianping Fan, and Zhiqiang He. 2023. A survey of visual transformers. IEEE Transactions on Neural Networks and Learning Systems (2023)."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"e_1_3_2_1_28_1","unstructured":"Yuning Lu Jianzhuang Liu Yonggang Zhang et al. 2022. Prompt distribution learning. In CVPR. 5206--5215."},{"key":"e_1_3_2_1_29_1","volume-title":"Towards efficient visual adaption via structural re-parameterization. arXiv preprint arXiv:2302.08106","author":"Luo Gen","year":"2023","unstructured":"Gen Luo, Minglang Huang, Yiyi Zhou, Xiaoshuai Sun, Guannan Jiang, Zhiyu Wang, and Rongrong Ji. 2023. Towards efficient visual adaption via structural re-parameterization. arXiv preprint arXiv:2302.08106 (2023)."},{"key":"e_1_3_2_1_30_1","volume-title":"Fine-grained visual classification of aircraft. arXiv preprint arXiv:1306.5151","author":"Maji Subhransu","year":"2013","unstructured":"Subhransu Maji, Esa Rahtu, Juho Kannala, Matthew Blaschko, and Andrea Vedaldi. 2013. Fine-grained visual classification of aircraft. arXiv preprint arXiv:1306.5151 (2013)."},{"key":"e_1_3_2_1_31_1","unstructured":"Yuval Netzer Tao Wang Adam Coates Alessandro Bissacco Bo Wu and Andrew Y Ng. 2011. Reading digits in natural images with unsupervised feature learning. (2011)."},{"volume-title":"A visual vocabulary for flower classification. In 2006 IEEE computer society conference on computer vision and pattern recognition (CVPR'06)","author":"Nilsback M-E","key":"e_1_3_2_1_32_1","unstructured":"M-E Nilsback and Andrew Zisserman. 2006. A visual vocabulary for flower classification. In 2006 IEEE computer society conference on computer vision and pattern recognition (CVPR'06), Vol. 2. IEEE, 1447--1454."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICVGIP.2008.47"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAI.2021.3054609"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2012.6248092"},{"key":"e_1_3_2_1_36_1","volume-title":"Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems","author":"Paszke Adam","year":"2019","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, et al. 2019. Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems, Vol. 32 (2019)."},{"key":"e_1_3_2_1_37_1","volume-title":"International conference on machine learning. PMLR, 8748--8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748--8763."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.5555\/3455716.3455856"},{"key":"e_1_3_2_1_39_1","volume-title":"iDAT: inverse Distillation Adapter-Tuning. arXiv preprint arXiv:2403.15750","author":"Ruan Jiacheng","year":"2024","unstructured":"Jiacheng Ruan, Jingsheng Gao, Mingye Xie, Daize Dong, Suncheng Xiang, Ting Liu, and Yuzhuo Fu. 2024. iDAT: inverse Distillation Adapter-Tuning. arXiv preprint arXiv:2403.15750 (2024)."},{"key":"e_1_3_2_1_40_1","volume-title":"Amir Roshan Zamir, and Mubarak Shah","author":"Soomro Khurram","year":"2012","unstructured":"Khurram Soomro, Amir Roshan Zamir, and Mubarak Shah. 2012. UCF101: A dataset of 101 human actions classes from videos in the wild. arXiv preprint arXiv:1212.0402 (2012)."},{"key":"e_1_3_2_1_41_1","article-title":"Visualizing data using t-SNE","volume":"9","author":"der Maaten Laurens Van","year":"2008","unstructured":"Laurens Van der Maaten and Geoffrey Hinton. 2008. Visualizing data using t-SNE. Journal of machine learning research, Vol. 9, 11 (2008).","journal-title":"Journal of machine learning research"},{"key":"e_1_3_2_1_42_1","volume-title":"GLUE: A multi-task benchmark and analysis platform for natural language understanding. arXiv preprint arXiv:1804.07461","author":"Wang Alex","year":"2018","unstructured":"Alex Wang, Amanpreet Singh, Julian Michael, Felix Hill, Omer Levy, and Samuel R Bowman. 2018. GLUE: A multi-task benchmark and analysis platform for natural language understanding. arXiv preprint arXiv:1804.07461 (2018)."},{"key":"e_1_3_2_1_43_1","volume-title":"Revisiting the Trade-off between Accuracy and Robustness via Weight Distribution of Filters. arXiv preprint arXiv:2306.03430","author":"Wei Xingxing","year":"2023","unstructured":"Xingxing Wei and Shiji Zhao. 2023. Revisiting the Trade-off between Accuracy and Robustness via Weight Distribution of Filters. arXiv preprint arXiv:2306.03430 (2023)."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-demos.6"},{"volume-title":"Sun database: Large-scale scene recognition from abbey to zoo. In 2010 IEEE computer society conference on computer vision and pattern recognition","author":"Xiao Jianxiong","key":"e_1_3_2_1_45_1","unstructured":"Jianxiong Xiao, James Hays, Krista A Ehinger, Aude Oliva, and Antonio Torralba. 2010. Sun database: Large-scale scene recognition from abbey to zoo. In 2010 IEEE computer society conference on computer vision and pattern recognition. IEEE, 3485--3492."},{"volume-title":"Transfer learning","author":"Yang Qiang","key":"e_1_3_2_1_46_1","unstructured":"Qiang Yang, Yu Zhang, Wenyuan Dai, and Sinno Jialin Pan. 2020. Transfer learning. Cambridge University Press."},{"key":"e_1_3_2_1_47_1","volume-title":"From Knowledge Distillation to Self-Knowledge Distillation: A Unified Approach with Normalized Loss and Customized Soft Labels. arXiv preprint arXiv:2303.13005","author":"Yang Zhendong","year":"2023","unstructured":"Zhendong Yang, Ailing Zeng, Zhe Li, Tianke Zhang, Chun Yuan, and Yu Li. 2023. From Knowledge Distillation to Self-Knowledge Distillation: A Unified Approach with Normalized Loss and Customized Soft Labels. arXiv preprint arXiv:2303.13005 (2023)."},{"key":"e_1_3_2_1_48_1","unstructured":"Bruce XB Yu Jianlong Chang Haixin Wang Lingbo Liu Shijie Wang Zhiyu Wang Junfan Lin Lingxi Xie Haojie Li Zhouchen Lin et al. 2023. Visual Tuning. arXiv preprint arXiv:2305.06061 (2023)."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01389"},{"key":"e_1_3_2_1_50_1","volume-title":"Maxim Neumann, Alexey Dosovitskiy, et al.","author":"Zhai Xiaohua","year":"2019","unstructured":"Xiaohua Zhai, Joan Puigcerver, Alexander Kolesnikov, Pierre Ruyssen, Carlos Riquelme, Mario Lucic, Josip Djolonga, Andre Susano Pinto, Maxim Neumann, Alexey Dosovitskiy, et al. 2019. A large-scale study of representation learning with the visual task adaptation benchmark. arXiv preprint arXiv:1910.04867 (2019)."},{"key":"e_1_3_2_1_51_1","volume-title":"Vision-language models for vision tasks: A survey. arXiv preprint arXiv:2304.00685","author":"Zhang Jingyi","year":"2023","unstructured":"Jingyi Zhang, Jiaxing Huang, Sheng Jin, and Shijian Lu. 2023. Vision-language models for vision tasks: A survey. arXiv preprint arXiv:2304.00685 (2023)."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01228"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00381"},{"key":"e_1_3_2_1_54_1","volume-title":"Neural prompt search. arXiv preprint arXiv:2206.04673","author":"Zhang Yuanhan","year":"2022","unstructured":"Yuanhan Zhang, Kaiyang Zhou, and Ziwei Liu. 2022. Neural prompt search. arXiv preprint arXiv:2206.04673 (2022)."},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2020.3004555"}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Melbourne VIC Australia","acronym":"MM '24"},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3680843","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3680843","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:18:08Z","timestamp":1750295888000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3680843"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":55,"alternative-id":["10.1145\/3664647.3680843","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3680843","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}