{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,7]],"date-time":"2026-04-07T16:17:29Z","timestamp":1775578649657,"version":"3.50.1"},"publisher-location":"Cham","reference-count":95,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031727634","type":"print"},{"value":"9783031727641","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,10,25]],"date-time":"2024-10-25T00:00:00Z","timestamp":1729814400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,10,25]],"date-time":"2024-10-25T00:00:00Z","timestamp":1729814400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72764-1_20","type":"book-chapter","created":{"date-parts":[[2024,10,24]],"date-time":"2024-10-24T14:03:10Z","timestamp":1729778590000},"page":"346-365","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":9,"title":["Mind the\u00a0Interference: Retaining Pre-trained Knowledge in\u00a0Parameter Efficient Continual Learning of\u00a0Vision-Language Models"],"prefix":"10.1007","author":[{"given":"Longxiang","family":"Tang","sequence":"first","affiliation":[]},{"given":"Zhuotao","family":"Tian","sequence":"additional","affiliation":[]},{"given":"Kai","family":"Li","sequence":"additional","affiliation":[]},{"given":"Chunming","family":"He","sequence":"additional","affiliation":[]},{"given":"Hantao","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"Hengshuang","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"Xiu","family":"Li","sequence":"additional","affiliation":[]},{"given":"Jiaya","family":"Jia","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,10,25]]},"reference":[{"key":"20_CR1","unstructured":"Ahn, H., Cha, S., Lee, D., Moon, T.: Uncertainty-based continual learning with adaptive regularization. In: Advances in Neural Information Processing Systems, vol. 32 (2019)"},{"key":"20_CR2","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"144","DOI":"10.1007\/978-3-030-01219-9_9","volume-title":"Computer Vision \u2013 ECCV 2018","author":"R Aljundi","year":"2018","unstructured":"Aljundi, R., Babiloni, F., Elhoseiny, M., Rohrbach, M., Tuytelaars, T.: Memory aware synapses: learning what (not) to forget. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11207, pp. 144\u2013161. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01219-9_9"},{"key":"20_CR3","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"446","DOI":"10.1007\/978-3-319-10599-4_29","volume-title":"Computer Vision \u2013 ECCV 2014","author":"L Bossard","year":"2014","unstructured":"Bossard, L., Guillaumin, M., Van Gool, L.: Food-101 \u2013 mining discriminative components with random forests. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8694, pp. 446\u2013461. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10599-4_29"},{"key":"20_CR4","doi-asserted-by":"crossref","unstructured":"Bowman, B., et al: A-La-Carte Prompt Tuning (APT): combining distinct data via composable prompting. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 14984\u201314993 (2023)","DOI":"10.1109\/CVPR52729.2023.01439"},{"key":"20_CR5","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1007\/978-3-030-58452-8_13","volume-title":"Computer Vision \u2013 ECCV 2020","author":"N Carion","year":"2020","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12346, pp. 213\u2013229. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58452-8_13"},{"key":"20_CR6","unstructured":"Chen, S., et al.: AdaptFormer: adapting vision transformers for scalable visual recognition. In: Advances in Neural Information Processing Systems, vol. 35, pp. 16664\u201316678 (2022)"},{"key":"20_CR7","doi-asserted-by":"crossref","unstructured":"Cimpoi, M., Maji, S., Kokkinos, I., Mohamed, S., Vedaldi, A.: Describing textures in the wild. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3606\u20133613 (2014)","DOI":"10.1109\/CVPR.2014.461"},{"issue":"7","key":"20_CR8","first-page":"3366","volume":"44","author":"M De Lange","year":"2021","unstructured":"De Lange, M., et al.: A continual learning survey: defying forgetting in classification tasks. IEEE Trans. Pattern Anal. Mach. Intell. 44(7), 3366\u20133385 (2021)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"20_CR9","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: ImageNet: a large-scale hierarchical image database. In: 2009 IEEE Conference on Computer Vision and Pattern Recognition, pp. 248\u2013255. IEEE (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"issue":"6","key":"20_CR10","doi-asserted-by":"publisher","first-page":"141","DOI":"10.1109\/MSP.2012.2211477","volume":"29","author":"L Deng","year":"2012","unstructured":"Deng, L.: The MNIST database of handwritten digit images for machine learning research [best of the web]. IEEE Signal Process. Mag. 29(6), 141\u2013142 (2012)","journal-title":"IEEE Signal Process. Mag."},{"key":"20_CR11","unstructured":"Dhariwal, P., Nichol, A.: Diffusion models beat GANs on image synthesis. In: Advances in Neural Information Processing Systems, vol. 34, pp. 8780\u20138794 (2021)"},{"key":"20_CR12","unstructured":"Ding, Y., Liu, L., Tian, C., Yang, J., Ding, H.: Don\u2019t stop learning: towards continual learning for the clip model. arXiv preprint arXiv:2207.09248 (2022)"},{"key":"20_CR13","unstructured":"Dosovitskiy, A., et\u00a0al.: An image is worth 16x16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"20_CR14","doi-asserted-by":"crossref","unstructured":"Douillard, A., Ram\u00e9, A., Couairon, G., Cord, M.: DyTox: transformers for continual learning with dynamic token expansion. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9285\u20139295 (2022)","DOI":"10.1109\/CVPR52688.2022.00907"},{"key":"20_CR15","unstructured":"Fang, C., et al.: Real-world image dehazing with coherence-based label generator and cooperative unfolding network. arXiv preprint arXiv:2406.07966 (2024)"},{"key":"20_CR16","unstructured":"Fei-Fei, L., Fergus, R., Perona, P.: Learning generative visual models from few training examples: an incremental Bayesian approach tested on 101 object categories. In: 2004 Conference on Computer Vision and Pattern Recognition Workshop, pp. 178\u2013178. IEEE (2004)"},{"key":"20_CR17","doi-asserted-by":"crossref","unstructured":"Gao, P., et al.: CLIP-adapter: better vision-language models with feature adapters. Int. J. Comput. Vis., 1\u201315 (2023)","DOI":"10.1007\/s11263-023-01891-x"},{"key":"20_CR18","unstructured":"He, C., et al.: Reti-Diff: illumination degradation image restoration with retinex-based latent diffusion model. arXiv preprint arXiv:2311.11638 (2023)"},{"key":"20_CR19","doi-asserted-by":"crossref","unstructured":"He, C., et al.: Camouflaged object detection with feature decomposition and edge reconstruction. In: CVPR, pp. 22046\u201322055 (2023)","DOI":"10.1109\/CVPR52729.2023.02111"},{"key":"20_CR20","unstructured":"He, C., Li, K., Zhang, Y., Xu, G., Tang, L.: Weakly-supervised concealed object segmentation with SAM-based pseudo labeling and multi-scale feature grouping. In: NeurIPS (2024)"},{"key":"20_CR21","unstructured":"He, C., Li, K., Zhang, Y., Zhang, Y., Guo, Z., Li, X.: Strategic preys make acute predators: enhancing camouflaged object detectors by generating camouflaged objects. In: ICLR (2024)"},{"key":"20_CR22","unstructured":"He, C., et al.: Diffusion models in low-level vision: a survey. arXiv preprint arXiv:2406.11138 (2024)"},{"key":"20_CR23","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"20_CR24","doi-asserted-by":"crossref","unstructured":"Hegde, D., Valanarasu, J.M.J., Patel, V.: CLIP goes 3D: leveraging prompt tuning for language grounded 3D recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 2028\u20132038 (2023)","DOI":"10.1109\/ICCVW60793.2023.00217"},{"issue":"7","key":"20_CR25","doi-asserted-by":"publisher","first-page":"2217","DOI":"10.1109\/JSTARS.2019.2918242","volume":"12","author":"P Helber","year":"2019","unstructured":"Helber, P., Bischke, B., Dengel, A., Borth, D.: EuroSAT: a novel dataset and deep learning benchmark for land use and land cover classification. IEEE J. Sel. Top. Appl. Earth Obs. Remote Sens. 12(7), 2217\u20132226 (2019)","journal-title":"IEEE J. Sel. Top. Appl. Earth Obs. Remote Sens."},{"key":"20_CR26","unstructured":"Houlsby, N., et al.: Parameter-efficient transfer learning for NLP. In: International Conference on Machine Learning, pp. 2790\u20132799. PMLR (2019)"},{"key":"20_CR27","unstructured":"Hu, E.J., et al.: LoRA: low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 (2021)"},{"key":"20_CR28","unstructured":"Hu, Z., Lyu, J., Gao, D., Vasconcelos, N.: Pop: prompt of prompts for continual learning. arXiv preprint arXiv:2306.08200 (2023)"},{"key":"20_CR29","doi-asserted-by":"crossref","unstructured":"Isele, D., Cosgun, A.: Selective experience replay for lifelong learning. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a032 (2018)","DOI":"10.1609\/aaai.v32i1.11595"},{"key":"20_CR30","unstructured":"Jia, C., et al.: Scaling up visual and vision-language representation learning with noisy text supervision. In: International Conference on Machine Learning, pp. 4904\u20134916. PMLR (2021)"},{"key":"20_CR31","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"709","DOI":"10.1007\/978-3-031-19827-4_41","volume-title":"ECCV 2022","author":"M Jia","year":"2022","unstructured":"Jia, M., et al.: Visual prompt tuning. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13693, pp. 709\u2013727. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19827-4_41"},{"key":"20_CR32","doi-asserted-by":"crossref","unstructured":"Jie, S., Deng, Z.H.: FacT: factor-tuning for lightweight adaptation on vision transformer. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a037, pp. 1060\u20131068 (2023)","DOI":"10.1609\/aaai.v37i1.25187"},{"key":"20_CR33","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"105","DOI":"10.1007\/978-3-031-19833-5_7","volume-title":"ECCV 2022","author":"C Ju","year":"2022","unstructured":"Ju, C., Han, T., Zheng, K., Zhang, Y., Xie, W.: Prompting visual-language models for efficient video understanding. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13695, pp. 105\u2013124. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19833-5_7"},{"key":"20_CR34","doi-asserted-by":"crossref","unstructured":"Khan, M.G.Z.A., Naeem, M.F., Van\u00a0Gool, L., Stricker, D., Tombari, F., Afzal, M.Z.: Introducing language guidance in prompt-based continual learning. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 11463\u201311473 (2023)","DOI":"10.1109\/ICCV51070.2023.01053"},{"key":"20_CR35","doi-asserted-by":"crossref","unstructured":"Khattak, M.U., Rasheed, H., Maaz, M., Khan, S., Khan, F.S.: MaPLe: multi-modal prompt learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 19113\u201319122 (2023)","DOI":"10.1109\/CVPR52729.2023.01832"},{"issue":"13","key":"20_CR36","doi-asserted-by":"publisher","first-page":"3521","DOI":"10.1073\/pnas.1611835114","volume":"114","author":"J Kirkpatrick","year":"2017","unstructured":"Kirkpatrick, J., et al.: Overcoming catastrophic forgetting in neural networks. Proc. Nat. Acad. Sci. 114(13), 3521\u20133526 (2017)","journal-title":"Proc. Nat. Acad. Sci."},{"key":"20_CR37","doi-asserted-by":"crossref","unstructured":"Krause, J., Stark, M., Deng, J., Fei-Fei, L.: 3D object representations for fine-grained categorization. In: Proceedings of the IEEE International Conference on Computer Vision Workshops, pp. 554\u2013561 (2013)","DOI":"10.1109\/ICCVW.2013.77"},{"key":"20_CR38","unstructured":"Krizhevsky, A., Hinton, G., et\u00a0al.: Learning multiple layers of features from tiny images (2009)"},{"key":"20_CR39","doi-asserted-by":"crossref","unstructured":"Lai, X., et al.: LISA: reasoning segmentation via large language model. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9579\u20139589 (2024)","DOI":"10.1109\/CVPR52733.2024.00915"},{"key":"20_CR40","doi-asserted-by":"crossref","unstructured":"Lai, X., et al.: Semi-supervised semantic segmentation with directional context-aware consistency. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1205\u20131214 (2021)","DOI":"10.1109\/CVPR46437.2021.00126"},{"key":"20_CR41","unstructured":"Li, J., Li, D., Xiong, C., Hoi, S.: BLIP: bootstrapping language-image pre-training for unified vision-language understanding and generation. In: International Conference on Machine Learning, pp. 12888\u201312900. PMLR (2022)"},{"key":"20_CR42","doi-asserted-by":"crossref","unstructured":"Li, X.L., Liang, P.: Prefix-tuning: optimizing continuous prompts for generation. arXiv preprint arXiv:2101.00190 (2021)","DOI":"10.18653\/v1\/2021.acl-long.353"},{"key":"20_CR43","unstructured":"Li, X., Zhou, Y., Wu, T., Socher, R., Xiong, C.: Learn to grow: a continual structure learning framework for overcoming catastrophic forgetting. In: International Conference on Machine Learning, pp. 3925\u20133934. PMLR (2019)"},{"issue":"12","key":"20_CR44","doi-asserted-by":"publisher","first-page":"2935","DOI":"10.1109\/TPAMI.2017.2773081","volume":"40","author":"Z Li","year":"2017","unstructured":"Li, Z., Hoiem, D.: Learning without forgetting. IEEE Trans. Pattern Anal. Mach. Intell. 40(12), 2935\u20132947 (2017)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"9","key":"20_CR45","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3560815","volume":"55","author":"P Liu","year":"2023","unstructured":"Liu, P., Yuan, W., Fu, J., Jiang, Z., Hayashi, H., Neubig, G.: Pre-train, prompt, and predict: a systematic survey of prompting methods in natural language processing. ACM Comput. Surv. 55(9), 1\u201335 (2023)","journal-title":"ACM Comput. Surv."},{"key":"20_CR46","unstructured":"Lopez-Paz, D., Ranzato, M.: Gradient episodic memory for continual learning. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"20_CR47","unstructured":"Maji, S., Rahtu, E., Kannala, J., Blaschko, M., Vedaldi, A.: Fine-grained visual classification of aircraft. arXiv preprint arXiv:1306.5151 (2013)"},{"key":"20_CR48","doi-asserted-by":"crossref","unstructured":"Mallya, A., Lazebnik, S.: PackNet: adding multiple tasks to a single network by iterative pruning. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7765\u20137773 (2018)","DOI":"10.1109\/CVPR.2018.00810"},{"key":"20_CR49","doi-asserted-by":"crossref","unstructured":"Nilsback, M.E., Zisserman, A.: Automated flower classification over a large number of classes. In: 2008 Sixth Indian Conference on Computer Vision, Graphics & Image Processing, pp. 722\u2013729. IEEE (2008)","DOI":"10.1109\/ICVGIP.2008.47"},{"key":"20_CR50","doi-asserted-by":"crossref","unstructured":"Parkhi, O.M., Vedaldi, A., Zisserman, A., Jawahar, C.: Cats and dogs. In: 2012 IEEE Conference on Computer Vision and Pattern Recognition, pp. 3498\u20133505. IEEE (2012)","DOI":"10.1109\/CVPR.2012.6248092"},{"key":"20_CR51","doi-asserted-by":"crossref","unstructured":"Peng, B., et al..: Hierarchical dense correlation distillation for few-shot segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 23641\u201323651 (2023)","DOI":"10.1109\/CVPR52729.2023.02264"},{"key":"20_CR52","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"524","DOI":"10.1007\/978-3-030-58536-5_31","volume-title":"Computer Vision \u2013 ECCV 2020","author":"A Prabhu","year":"2020","unstructured":"Prabhu, A., Torr, P.H.S., Dokania, P.K.: GDumb: a simple approach that questions our progress in continual learning. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12347, pp. 524\u2013540. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58536-5_31"},{"key":"20_CR53","doi-asserted-by":"crossref","unstructured":"Pu, Y., Han, Y., Wang, Y., Feng, J., Deng, C., Huang, G.: Fine-grained recognition with learnable semantic data augmentation. IEEE Trans. Image Process. (2024)","DOI":"10.1109\/TIP.2024.3364500"},{"key":"20_CR54","unstructured":"Pu, Y., et al.: Rank-DETR for high quality object detection. In: Advances in Neural Information Processing Systems, vol. 36 (2024)"},{"key":"20_CR55","doi-asserted-by":"crossref","unstructured":"Pu, Y., et al.: Adaptive rotated convolution for rotated object detection. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6589\u20136600 (2023)","DOI":"10.1109\/ICCV51070.2023.00606"},{"key":"20_CR56","doi-asserted-by":"crossref","unstructured":"Qian, Z., Wang, X., Duan, X., Qin, P., Li, Y., Zhu, W.: Decouple before interact: multi-modal prompt learning for continual visual question answering. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 2953\u20132962 (2023)","DOI":"10.1109\/ICCV51070.2023.00276"},{"key":"20_CR57","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"20_CR58","unstructured":"Rao, D., Visin, F., Rusu, A., Pascanu, R., Teh, Y.W., Hadsell, R.: Continual unsupervised representation learning. In: Advances in Neural Information Processing Systems, vol. 32 (2019)"},{"key":"20_CR59","doi-asserted-by":"crossref","unstructured":"Rebuffi, S.A., Kolesnikov, A., Sperl, G., Lampert, C.H.: iCaRL: incremental classifier and representation learning. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2001\u20132010 (2017)","DOI":"10.1109\/CVPR.2017.587"},{"key":"20_CR60","unstructured":"Rolnick, D., Ahuja, A., Schwarz, J., Lillicrap, T., Wayne, G.: Experience replay for continual learning. In: Advances in Neural Information Processing Systems, vol. 32 (2019)"},{"key":"20_CR61","unstructured":"Saharia, C., et al.: Photorealistic text-to-image diffusion models with deep language understanding. In: Advances in Neural Information Processing Systems, vol. 35, pp. 36479\u201336494 (2022)"},{"key":"20_CR62","doi-asserted-by":"crossref","unstructured":"Selvaraju, R.R., Cogswell, M., Das, A., Vedantam, R., Parikh, D., Batra, D.: Grad-CAM: visual explanations from deep networks via gradient-based localization. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 618\u2013626 (2017)","DOI":"10.1109\/ICCV.2017.74"},{"key":"20_CR63","unstructured":"Shen, S., et al.: How much can CLIP benefit vision-and-language tasks? arXiv preprint arXiv:2107.06383 (2021)"},{"key":"20_CR64","unstructured":"Shin, H., Lee, J.K., Kim, J., Kim, J.: Continual learning with deep generative replay. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"20_CR65","doi-asserted-by":"crossref","unstructured":"Smith, J.S., et al.: Construct-VL: data-free continual structured VL concepts learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14994\u201315004 (2023)","DOI":"10.1109\/CVPR52729.2023.01440"},{"key":"20_CR66","doi-asserted-by":"crossref","unstructured":"Smith, J.S., et al.: CODA-Prompt: continual decomposed attention-based prompting for rehearsal-free continual learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11909\u201311919 (2023)","DOI":"10.1109\/CVPR52729.2023.01146"},{"key":"20_CR67","doi-asserted-by":"crossref","unstructured":"Sohn, K., et al.: Visual prompt tuning for generative transfer learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 19840\u201319851 (2023)","DOI":"10.1109\/CVPR52729.2023.01900"},{"key":"20_CR68","doi-asserted-by":"crossref","unstructured":"Tang, L., Li, K., He, C., Zhang, Y., Li, X.: Consistency regularization for generalizable source-free domain adaptation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 4323\u20134333 (2023)","DOI":"10.1109\/ICCVW60793.2023.00467"},{"key":"20_CR69","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"684","DOI":"10.1007\/978-3-031-43907-0_65","volume-title":"MICCAI 2023","author":"L Tang","year":"2023","unstructured":"Tang, L., Li, K., He, C., Zhang, Y., Li, X.: Source-free domain adaptive fundus image segmentation with class-balanced mean teacher. In: Greenspan, H., et al. (eds.) MICCAI 2023. LNCS, vol. 14220, pp. 684\u2013694. Springer, Cham (2023). https:\/\/doi.org\/10.1007\/978-3-031-43907-0_65"},{"key":"20_CR70","doi-asserted-by":"crossref","unstructured":"Tian, Z., et al.: Generalized few-shot semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11563\u201311572 (2022)","DOI":"10.1109\/CVPR52688.2022.01127"},{"key":"20_CR71","doi-asserted-by":"crossref","unstructured":"Tian, Z., et al.: Learning shape-aware embedding for scene text detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4234\u20134243 (2019)","DOI":"10.1109\/CVPR.2019.00436"},{"issue":"2","key":"20_CR72","doi-asserted-by":"publisher","first-page":"1050","DOI":"10.1109\/TPAMI.2020.3013717","volume":"44","author":"Z Tian","year":"2020","unstructured":"Tian, Z., Zhao, H., Shu, M., Yang, Z., Li, R., Jia, J.: Prior guided feature enrichment network for few-shot segmentation. IEEE Trans. Pattern Anal. Mach. Intell. 44(2), 1050\u20131065 (2020)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"20_CR73","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"20_CR74","unstructured":"Van\u00a0de Ven, G.M., Tolias, A.S.: Three scenarios for continual learning. arXiv preprint arXiv:1904.07734 (2019)"},{"key":"20_CR75","unstructured":"Wang, J., Ma, Y., Guo, J., Xiao, Y., Huang, G., Li, X.: COVE: unleashing the diffusion feature correspondence for consistent video editing. arXiv preprint arXiv:2406.08850 (2024)"},{"key":"20_CR76","doi-asserted-by":"crossref","unstructured":"Wang, J., et al.: GRA: detecting oriented objects through group-wise rotating and attention. arXiv preprint arXiv:2403.11127 (2024)","DOI":"10.1007\/978-3-031-72643-9_18"},{"key":"20_CR77","doi-asserted-by":"crossref","unstructured":"Wang, R., et al.: K-adapter: infusing knowledge into pre-trained models with adapters (2020)","DOI":"10.18653\/v1\/2021.findings-acl.121"},{"key":"20_CR78","unstructured":"Wang, Y., Huang, Z., Hong, X.: S-prompts learning with pre-trained transformers: an Occam\u2019s razor for domain incremental learning. In: Advances in Neural Information Processing Systems, vol. 35, pp. 5682\u20135695 (2022)"},{"key":"20_CR79","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"631","DOI":"10.1007\/978-3-031-19809-0_36","volume-title":"ECCV 2022","author":"Z Wang","year":"2022","unstructured":"Wang, Z., et al.: DualPrompt: complementary prompting for rehearsal-free continual learning. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13686, pp. 631\u2013648. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19809-0_36"},{"key":"20_CR80","doi-asserted-by":"crossref","unstructured":"Wang, Z., et al.: Learning to prompt for continual learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 139\u2013149 (2022)","DOI":"10.1109\/CVPR52688.2022.00024"},{"key":"20_CR81","doi-asserted-by":"crossref","unstructured":"Wortsman, M., et\u00a0al.: Robust fine-tuning of zero-shot models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7959\u20137971 (2022)","DOI":"10.1109\/CVPR52688.2022.00780"},{"key":"20_CR82","doi-asserted-by":"crossref","unstructured":"Xiao, J., Hays, J., Ehinger, K.A., Oliva, A., Torralba, A.: Sun database: large-scale scene recognition from abbey to zoo. In: 2010 IEEE Computer Society Conference on Computer Vision and Pattern Recognition, pp. 3485\u20133492. IEEE (2010)","DOI":"10.1109\/CVPR.2010.5539970"},{"key":"20_CR83","doi-asserted-by":"crossref","unstructured":"Yang, J., Ding, R., Brown, E., Qi, X., Xie, S.: V-IRL: grounding virtual intelligence in real life. arXiv preprint arXiv:2402.03310 (2024)","DOI":"10.1007\/978-3-031-72995-9_3"},{"key":"20_CR84","doi-asserted-by":"crossref","unstructured":"Yang, S., Tian, Z., Jiang, L., Jia, J.: Unified language-driven zero-shot domain adaptation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 23407\u201323415 (2024)","DOI":"10.1109\/CVPR52733.2024.02209"},{"key":"20_CR85","doi-asserted-by":"crossref","unstructured":"Yang, S., et al.: Exploring sparse visual prompt for domain adaptive dense prediction. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a038, pp. 16334\u201316342 (2024)","DOI":"10.1609\/aaai.v38i15.29569"},{"key":"20_CR86","unstructured":"Yao, L., et al.: FILIP: fine-grained interactive language-image pre-training. arXiv preprint arXiv:2111.07783 (2021)"},{"key":"20_CR87","unstructured":"Yoon, J., Yang, E., Lee, J., Hwang, S.J.: Lifelong learning with dynamically expandable networks. arXiv preprint arXiv:1708.01547 (2017)"},{"key":"20_CR88","doi-asserted-by":"crossref","unstructured":"Zhai, X., et al.: LiT: zero-shot transfer with locked-image text tuning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18123\u201318133 (2022)","DOI":"10.1109\/CVPR52688.2022.01759"},{"key":"20_CR89","doi-asserted-by":"crossref","unstructured":"Zhang, J., et al.: Class-incremental learning via deep model consolidation. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 1131\u20131140 (2020)","DOI":"10.1109\/WACV45572.2020.9093365"},{"key":"20_CR90","unstructured":"Zhang, R., et al.: LLaMA-adapter: efficient fine-tuning of language models with zero-init attention. arXiv preprint arXiv:2303.16199 (2023)"},{"key":"20_CR91","doi-asserted-by":"crossref","unstructured":"Zheng, Z., Ma, M., Wang, K., Qin, Z., Yue, X., You, Y.: Preventing zero-shot transfer degradation in continual learning of vision-language models. arXiv preprint arXiv:2303.06628 (2023)","DOI":"10.1109\/ICCV51070.2023.01752"},{"key":"20_CR92","unstructured":"Zhou, D.W., Zhang, Y., Ning, J., Ye, H.J., Zhan, D.C., Liu, Z.: Learning without forgetting for vision-language models. arXiv preprint arXiv:2305.19270 (2023)"},{"key":"20_CR93","doi-asserted-by":"crossref","unstructured":"Zhou, H., et al.: UniHead: unifying multi-perception for detection heads. IEEE Trans. Neural Netw. Learn. Syst. (2024)","DOI":"10.1109\/TNNLS.2024.3412947"},{"key":"20_CR94","doi-asserted-by":"crossref","unstructured":"Zhou, K., Yang, J., Loy, C.C., Liu, Z.: Conditional prompt learning for vision-language models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16816\u201316825 (2022)","DOI":"10.1109\/CVPR52688.2022.01631"},{"issue":"9","key":"20_CR95","doi-asserted-by":"publisher","first-page":"2337","DOI":"10.1007\/s11263-022-01653-1","volume":"130","author":"K Zhou","year":"2022","unstructured":"Zhou, K., Yang, J., Loy, C.C., Liu, Z.: Learning to prompt for vision-language models. Int. J. Comput. Vision 130(9), 2337\u20132348 (2022)","journal-title":"Int. J. Comput. Vision"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72764-1_20","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,30]],"date-time":"2024-11-30T06:30:36Z","timestamp":1732948236000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72764-1_20"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,25]]},"ISBN":["9783031727634","9783031727641"],"references-count":95,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72764-1_20","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,10,25]]},"assertion":[{"value":"25 October 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}