{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,7]],"date-time":"2026-04-07T16:17:01Z","timestamp":1775578621100,"version":"3.50.1"},"publisher-location":"Cham","reference-count":46,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031729485","type":"print"},{"value":"9783031729492","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T00:00:00Z","timestamp":1730332800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T00:00:00Z","timestamp":1730332800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72949-2_7","type":"book-chapter","created":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T15:22:17Z","timestamp":1730301737000},"page":"109-125","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Adapt Without Forgetting: Distill Proximity from\u00a0Dual Teachers in\u00a0Vision-Language Models"],"prefix":"10.1007","author":[{"given":"Mengyu","family":"Zheng","sequence":"first","affiliation":[]},{"given":"Yehui","family":"Tang","sequence":"additional","affiliation":[]},{"given":"Zhiwei","family":"Hao","sequence":"additional","affiliation":[]},{"given":"Kai","family":"Han","sequence":"additional","affiliation":[]},{"given":"Yunhe","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Chang","family":"Xu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,10,31]]},"reference":[{"key":"7_CR1","doi-asserted-by":"crossref","unstructured":"Aljundi, R., Babiloni, F., Elhoseiny, M., Rohrbach, M., Tuytelaars, T.: Memory aware synapses: learning what (not) to forget. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 139\u2013154 (2018)","DOI":"10.1007\/978-3-030-01219-9_9"},{"key":"7_CR2","unstructured":"Bai, Z., Liu, X., Hu, H., Guo, T., Zhang, Q., Wang, Y.: Data-free distillation of language model by text-to-text transfer. arXiv preprint arXiv:2311.01689 (2023)"},{"key":"7_CR3","unstructured":"Chen, H., et al.: Learning student networks in the wild. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6428\u20136437 (2021)"},{"key":"7_CR4","unstructured":"Ding, Y., Liu, L., Tian, C., Yang, J., Ding, H.: Don\u2019t stop learning: towards continual learning for the clip model. arXiv preprint arXiv:2207.09248 (2022)"},{"key":"7_CR5","unstructured":"Dosovitskiy, A., et al.: An image is worth 16x16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"7_CR6","doi-asserted-by":"crossref","unstructured":"Douillard, A., Ram\u00e9, A., Couairon, G., Cord, M.: Dytox: transformers for continual learning with dynamic token expansion. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9285\u20139295 (2022)","DOI":"10.1109\/CVPR52688.2022.00907"},{"key":"7_CR7","unstructured":"Fernando, C., et al.: Pathnet: evolution channels gradient descent in super neural networks. arXiv preprint arXiv:1701.08734 (2017)"},{"key":"7_CR8","doi-asserted-by":"crossref","unstructured":"Gao, P., et al.: Clip-adapter: better vision-language models with feature adapters. Int. J. Comput. Vision 1\u201315 (2023)","DOI":"10.1007\/s11263-023-01891-x"},{"issue":"7","key":"7_CR9","first-page":"2455","volume":"31","author":"T Guo","year":"2019","unstructured":"Guo, T., Xu, C., He, S., Shi, B., Xu, C., Tao, D.: Robust student network learning. IEEE Trans. Neural Netw. Learn. Syst. 31(7), 2455\u20132468 (2019)","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"7_CR10","unstructured":"Hinton, G., Vinyals, O., Dean, J.: Distilling the knowledge in a neural network. arXiv preprint arXiv:1503.02531 (2015)"},{"key":"7_CR11","doi-asserted-by":"crossref","unstructured":"Hou, S., Pan, X., Loy, C.C., Wang, Z., Lin, D.: Learning a unified classifier incrementally via rebalancing. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 831\u2013839 (2019)","DOI":"10.1109\/CVPR.2019.00092"},{"key":"7_CR12","unstructured":"Jia, C., et al.: Scaling up visual and vision-language representation learning with noisy text supervision. In: International Conference on Machine Learning, pp. 4904\u20134916. PMLR (2021)"},{"issue":"13","key":"7_CR13","doi-asserted-by":"publisher","first-page":"3521","DOI":"10.1073\/pnas.1611835114","volume":"114","author":"J Kirkpatrick","year":"2017","unstructured":"Kirkpatrick, J., et al.: Overcoming catastrophic forgetting in neural networks. Proc. Nat. Acad. Sci. 114(13), 3521\u20133526 (2017)","journal-title":"Proc. Nat. Acad. Sci."},{"key":"7_CR14","unstructured":"Krizhevsky, A., Hinton, G., et\u00a0al.: Learning multiple layers of features from tiny images (2009)"},{"key":"7_CR15","unstructured":"Le, Y., Yang, X.: Tiny imagenet visual recognition challenge. CS 231N 7(7), 3 (2015)"},{"key":"7_CR16","doi-asserted-by":"crossref","unstructured":"Li, L.H., et al.: Grounded language-image pre-training. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10965\u201310975 (2022)","DOI":"10.1109\/CVPR52688.2022.01069"},{"issue":"12","key":"7_CR17","doi-asserted-by":"publisher","first-page":"2935","DOI":"10.1109\/TPAMI.2017.2773081","volume":"40","author":"Z Li","year":"2017","unstructured":"Li, Z., Hoiem, D.: Learning without forgetting. IEEE Trans. Pattern Anal. Mach. Intell. 40(12), 2935\u20132947 (2017)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"7_CR18","doi-asserted-by":"crossref","unstructured":"Liu, Y., et al.: Knowledge distillation via instance relationship graph. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7096\u20137104 (2019)","DOI":"10.1109\/CVPR.2019.00726"},{"key":"7_CR19","unstructured":"Lopes, R.G., Fenu, S., Starner, T.: Data-free knowledge distillation for deep neural networks. arXiv preprint arXiv:1710.07535 (2017)"},{"key":"7_CR20","unstructured":"Lopez-Paz, D., Ranzato, M.: Gradient episodic memory for continual learning. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"7_CR21","doi-asserted-by":"crossref","unstructured":"Mallya, A., Lazebnik, S.: Packnet: adding multiple tasks to a single network by iterative pruning. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7765\u20137773 (2018)","DOI":"10.1109\/CVPR.2018.00810"},{"issue":"5","key":"7_CR22","doi-asserted-by":"publisher","first-page":"5513","DOI":"10.1109\/TPAMI.2022.3213473","volume":"45","author":"M Masana","year":"2022","unstructured":"Masana, M., Liu, X., Twardowski, B., Menta, M., Bagdanov, A.D., Van De Weijer, J.: Class-incremental learning: survey and performance evaluation on image classification. IEEE Trans. Pattern Anal. Mach. Intell. 45(5), 5513\u20135533 (2022)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"7_CR23","unstructured":"Nie, Y., et al.: Lightclip: learning multi-level interaction for lightweight vision-language models. arXiv preprint arXiv:2312.00674 (2023)"},{"key":"7_CR24","doi-asserted-by":"crossref","unstructured":"Park, W., Kim, D., Lu, Y., Cho, M.: Relational knowledge distillation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3967\u20133976 (2019)","DOI":"10.1109\/CVPR.2019.00409"},{"key":"7_CR25","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"7_CR26","doi-asserted-by":"crossref","unstructured":"Rannen, A., Aljundi, R., Blaschko, M.B., Tuytelaars, T.: Encoder based lifelong learning. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1320\u20131328 (2017)","DOI":"10.1109\/ICCV.2017.148"},{"key":"7_CR27","doi-asserted-by":"crossref","unstructured":"Rebuffi, S.A., Kolesnikov, A., Sperl, G., Lampert, C.H.: ICARL: incremental classifier and representation learning. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2001\u20132010 (2017)","DOI":"10.1109\/CVPR.2017.587"},{"key":"7_CR28","unstructured":"Romero, A., Ballas, N., Kahou, S.E., Chassang, A., Gatta, C., Bengio, Y.: Fitnets: hints for thin deep nets. arXiv preprint arXiv:1412.6550 (2014)"},{"key":"7_CR29","unstructured":"Schuhmann, C., et al.: LAION-5B: an open large-scale dataset for training next generation image-text models. In: Advances in Neural Information Processing Systems, vol. 35, pp. 25278\u201325294 (2022)"},{"key":"7_CR30","doi-asserted-by":"crossref","unstructured":"Shmelkov, K., Schmid, C., Alahari, K.: Incremental learning of object detectors without catastrophic forgetting. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 3400\u20133409 (2017)","DOI":"10.1109\/ICCV.2017.368"},{"key":"7_CR31","doi-asserted-by":"crossref","unstructured":"Tang, J., Qu, M., Wang, M., Zhang, M., Yan, J., Mei, Q.: Line: large-scale information network embedding. In: Proceedings of the 24th International Conference on World Wide Web, pp. 1067\u20131077 (2015)","DOI":"10.1145\/2736277.2741093"},{"key":"7_CR32","unstructured":"Thengane, V., Khan, S., Hayat, M., Khan, F.: Clip model is an efficient continual learner. arXiv:2210.03114 (2022)"},{"key":"7_CR33","doi-asserted-by":"crossref","unstructured":"Wang, R., et al.: Attriclip: a non-incremental learner for incremental knowledge learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3654\u20133663 (2023)","DOI":"10.1109\/CVPR52729.2023.00356"},{"key":"7_CR34","doi-asserted-by":"crossref","unstructured":"Wortsman, M., et al.: Robust fine-tuning of zero-shot models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7959\u20137971 (2022)","DOI":"10.1109\/CVPR52688.2022.00780"},{"key":"7_CR35","unstructured":"Xing, Y., Wu, Q., Cheng, D., Zhang, S., Liang, G., Zhang, Y.: Class-aware visual prompt tuning for vision-language pre-trained model. arXiv preprint arXiv:2208.08340 (2022)"},{"key":"7_CR36","doi-asserted-by":"crossref","unstructured":"Yan, S., Xie, J., He, X.: Der: dynamically expandable representation for class incremental learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3014\u20133023 (2021)","DOI":"10.1109\/CVPR46437.2021.00303"},{"key":"7_CR37","unstructured":"Yao, L., et al.: Detclip: dictionary-enriched visual-concept paralleled pre-training for open-world detection. In: Advances in Neural Information Processing Systems, vol. 35, pp. 9125\u20139138 (2022)"},{"key":"7_CR38","unstructured":"Yao, L., et al.: Filip: fine-grained interactive language-image pre-training. arXiv preprint arXiv:2111.07783 (2021)"},{"key":"7_CR39","doi-asserted-by":"crossref","unstructured":"Yim, J., Joo, D., Bae, J., Kim, J.: A gift from knowledge distillation: fast optimization, network minimization and transfer learning. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4133\u20134141 (2017)","DOI":"10.1109\/CVPR.2017.754"},{"key":"7_CR40","unstructured":"Yu, J., Wang, Z., Vasudevan, V., Yeung, L., Seyedhosseini, M., Wu, Y.: Coca: contrastive captioners are image-text foundation models. arXiv preprint arXiv:2205.01917 (2022)"},{"key":"7_CR41","doi-asserted-by":"crossref","unstructured":"Yu, Y.C., et al.: Select and distill: selective dual-teacher knowledge transfer for continual learning on vision-language models. arXiv preprint arXiv:2403.09296 (2024)","DOI":"10.1007\/978-3-031-73347-5_13"},{"key":"7_CR42","unstructured":"Zagoruyko, S., Komodakis, N.: Paying more attention to attention: improving the performance of convolutional neural networks via attention transfer. arXiv preprint arXiv:1612.03928 (2016)"},{"key":"7_CR43","unstructured":"Zang, Y., Li, W., Zhou, K., Huang, C., Loy, C.C.: Unified vision and language prompt learning. arXiv preprint arXiv:2210.07225 (2022)"},{"key":"7_CR44","unstructured":"Zhang, J., Huang, J., Jin, S., Lu, S.: Vision-language models for vision tasks: a survey. arXiv preprint arXiv:2304.00685 (2023)"},{"key":"7_CR45","doi-asserted-by":"crossref","unstructured":"Zheng, Z., Ma, M., Wang, K., Qin, Z., Yue, X., You, Y.: Preventing zero-shot transfer degradation in continual learning of vision-language models. arXiv preprint arXiv:2303.06628 (2023)","DOI":"10.1109\/ICCV51070.2023.01752"},{"issue":"9","key":"7_CR46","doi-asserted-by":"publisher","first-page":"2337","DOI":"10.1007\/s11263-022-01653-1","volume":"130","author":"K Zhou","year":"2022","unstructured":"Zhou, K., Yang, J., Loy, C.C., Liu, Z.: Learning to prompt for vision-language models. Int. J. Comput. Vision 130(9), 2337\u20132348 (2022)","journal-title":"Int. J. Comput. Vision"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72949-2_7","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T15:40:53Z","timestamp":1730302853000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72949-2_7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,31]]},"ISBN":["9783031729485","9783031729492"],"references-count":46,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72949-2_7","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,10,31]]},"assertion":[{"value":"31 October 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}