{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T16:10:59Z","timestamp":1778083859699,"version":"3.51.4"},"publisher-location":"Cham","reference-count":77,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031726545","type":"print"},{"value":"9783031726552","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,12,6]],"date-time":"2024-12-06T00:00:00Z","timestamp":1733443200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,6]],"date-time":"2024-12-06T00:00:00Z","timestamp":1733443200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72655-2_4","type":"book-chapter","created":{"date-parts":[[2024,12,5]],"date-time":"2024-12-05T10:11:14Z","timestamp":1733393474000},"page":"54-73","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":7,"title":["Quantized Prompt for\u00a0Efficient Generalization of\u00a0Vision-Language Models"],"prefix":"10.1007","author":[{"given":"Tianxiang","family":"Hao","sequence":"first","affiliation":[]},{"given":"Xiaohan","family":"Ding","sequence":"additional","affiliation":[]},{"given":"Juexiao","family":"Feng","sequence":"additional","affiliation":[]},{"given":"Yuhong","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Hui","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Guiguang","family":"Ding","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,12,6]]},"reference":[{"key":"4_CR1","unstructured":"Bahng, H., Jahanian, A., Sankaranarayanan, S., Isola, P.: Visual prompting: modifying pixel space to adapt pre-trained models. arXiv preprint arXiv:2203.17274 (2022)"},{"key":"4_CR2","unstructured":"Banner, R., Nahshan, Y., Soudry, D.: Post training 4-bit quantization of convolutional networks for rapid-deployment. In: Advances in Neural Information Processing Systems, vol. 32 (2019)"},{"key":"4_CR3","doi-asserted-by":"crossref","unstructured":"Bhalgat, Y., Lee, J., Nagel, M., Blankevoort, T., Kwak, N.: LSQ+: improving low-bit quantization through learnable offsets and better initialization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops, pp. 696\u2013697 (2020)","DOI":"10.1109\/CVPRW50498.2020.00356"},{"key":"4_CR4","unstructured":"Bolya, D., Fu, C.Y., Dai, X., Zhang, P., Feichtenhofer, C., Hoffman, J.: Token merging: your VIT but faster. In: The Eleventh International Conference on Learning Representations (2023)"},{"key":"4_CR5","doi-asserted-by":"crossref","unstructured":"Bulat, A., Tzimiropoulos, G.: LASP: text-to-text optimization for language-aware soft prompting of vision & language models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 23232\u201323241 (2023)","DOI":"10.1109\/CVPR52729.2023.02225"},{"key":"4_CR6","first-page":"16664","volume":"35","author":"S Chen","year":"2022","unstructured":"Chen, S., et al.: Adaptformer: adapting vision transformers for scalable visual recognition. Adv. Neural. Inf. Process. Syst. 35, 16664\u201316678 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"4_CR7","doi-asserted-by":"crossref","unstructured":"Ding, J., Xue, N., Xia, G.S., Dai, D.: Decoupling zero-shot semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11583\u201311592 (2022)","DOI":"10.1109\/CVPR52688.2022.01129"},{"key":"4_CR8","doi-asserted-by":"crossref","unstructured":"Ding, X., Ding, G., Guo, Y., Han, J.: Centripetal SGD for pruning very deep convolutional networks with complicated structure. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4943\u20134953 (2019)","DOI":"10.1109\/CVPR.2019.00508"},{"key":"4_CR9","doi-asserted-by":"crossref","unstructured":"Ding, X., Guo, Y., Ding, G., Han, J.: ACNET: strengthening the kernel skeletons for powerful CNN via asymmetric convolution blocks. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1911\u20131920 (2019)","DOI":"10.1109\/ICCV.2019.00200"},{"key":"4_CR10","doi-asserted-by":"crossref","unstructured":"Ding, X., Hao, T., Tan, J., Liu, J., Han, J., Guo, Y., Ding, G.: Resrep: lossless CNN pruning via decoupling remembering and forgetting. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 4510\u20134520 (2021)","DOI":"10.1109\/ICCV48922.2021.00447"},{"key":"4_CR11","doi-asserted-by":"crossref","unstructured":"Ding, X., Zhang, X., Han, J., Ding, G.: Scaling up your kernels to 31x31: revisiting large kernel design in CNNs. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11963\u201311975 (2022)","DOI":"10.1109\/CVPR52688.2022.01166"},{"key":"4_CR12","doi-asserted-by":"crossref","unstructured":"Ding, X., Zhang, X., Ma, N., Han, J., Ding, G., Sun, J.: Repvgg: making VGG-style convnets great again. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13733\u201313742 (2021)","DOI":"10.1109\/CVPR46437.2021.01352"},{"key":"4_CR13","unstructured":"Dosovitskiy, A., et\u00a0al.: An image is worth 16x16 words: transformers for image recognition at scale. In: International Conference on Learning Representations (2020)"},{"key":"4_CR14","unstructured":"Esser, S.K., McKinstry, J.L., Bablani, D., Appuswamy, R., Modha, D.S.: Learned step size quantization. In: International Conference on Learning Representations (2019)"},{"key":"4_CR15","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"701","DOI":"10.1007\/978-3-031-20077-9_41","volume-title":"European Conference on Computer Vision","author":"C Feng","year":"2022","unstructured":"Feng, C., et al.: PromptDet: towards open-vocabulary detection using uncurated images. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13669, pp. 701\u2013717. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-20077-9_41"},{"key":"4_CR16","unstructured":"Finkelstein, A., Almog, U., Grobman, M.: Fighting quantization bias with bias. arXiv preprint arXiv:1906.03193 (2019)"},{"key":"4_CR17","doi-asserted-by":"crossref","unstructured":"Gholami, A., Kim, S., Dong, Z., Yao, Z., Mahoney, M.W., Keutzer, K.: A survey of quantization methods for efficient neural network inference. In: Low-Power Computer Vision, pp. 291\u2013326. Chapman and Hall\/CRC (2022)","DOI":"10.1201\/9781003162810-13"},{"key":"4_CR18","unstructured":"Han, S., Mao, H., Dally, W.J.: Deep compression: compressing deep neural networks with pruning, trained quantization and huffman coding. arXiv preprint arXiv:1510.00149 (2015)"},{"key":"4_CR19","unstructured":"Hao, T., Chen, H., Guo, Y., Ding, G.: Consolidator: mergeable adapter with grouped connections for visual adaptation. arXiv preprint arXiv:2305.00603 (2023)"},{"key":"4_CR20","doi-asserted-by":"crossref","unstructured":"Hao, T., Ding, X., Han, J., Guo, Y., Ding, G.: Manipulating identical filter redundancy for efficient pruning on deep and complicated CNN. IEEE Trans. Neural Netw. Learn. Syst. (2023)","DOI":"10.1109\/TNNLS.2023.3298263"},{"key":"4_CR21","unstructured":"Hao, T., Lyu, M., Chen, H., Zhao, S., Han, J., Ding, G.: Re-parameterized low-rank prompt: generalize a vision-language model within 0.5 k parameters. arXiv preprint arXiv:2312.10813 (2023)"},{"key":"4_CR22","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"4_CR23","unstructured":"Houlsby, N., et al.: Parameter-efficient transfer learning for NLP. In: International Conference on Machine Learning, pp. 2790\u20132799. PMLR (2019)"},{"key":"4_CR24","unstructured":"Hu, E.J., et al.: Lora: low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 (2021)"},{"key":"4_CR25","unstructured":"Hubara, I., Nahshan, Y., Hanani, Y., Banner, R., Soudry, D.: Accurate post training quantization with small calibration sets. In: International Conference on Machine Learning, pp. 4466\u20134475. PMLR (2021)"},{"key":"4_CR26","unstructured":"Jia, C., et al.: Scaling up visual and vision-language representation learning with noisy text supervision. In: International Conference on Machine Learning, pp. 4904\u20134916. PMLR (2021)"},{"key":"4_CR27","unstructured":"Jia, M., et al.: Visual prompt tuning. arXiv preprint arXiv:2203.12119 (2022)"},{"key":"4_CR28","doi-asserted-by":"publisher","first-page":"962","DOI":"10.1162\/tacl_a_00407","volume":"9","author":"Z Jiang","year":"2021","unstructured":"Jiang, Z., Araki, J., Ding, H., Neubig, G.: How can we know when language models know? On the calibration of language models for question answering. Trans. Assoc. Comput. Linguist. 9, 962\u2013977 (2021)","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"4_CR29","doi-asserted-by":"crossref","unstructured":"Jie, S., Wang, H., Deng, Z.H.: Revisiting the parameter efficiency of adapters from the perspective of precision redundancy. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 17217\u201317226 (2023)","DOI":"10.1109\/ICCV51070.2023.01579"},{"key":"4_CR30","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"105","DOI":"10.1007\/978-3-031-19833-5_7","volume-title":"European Conference on Computer Vision","author":"C Ju","year":"2022","unstructured":"Ju, C., Han, T., Zheng, K., Zhang, Y., Xie, W.: Prompting visual-language models for efficient video understanding. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13695, pp. 105\u2013124. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19833-5_7"},{"key":"4_CR31","doi-asserted-by":"crossref","unstructured":"Jung, S., et al.: Learning to quantize deep networks by optimizing quantization intervals with task loss. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4350\u20134359 (2019)","DOI":"10.1109\/CVPR.2019.00448"},{"key":"4_CR32","doi-asserted-by":"crossref","unstructured":"Khattak, M.U., Rasheed, H., Maaz, M., Khan, S., Khan, F.S.: Maple: multi-modal prompt learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 19113\u201319122 (2023)","DOI":"10.1109\/CVPR52729.2023.01832"},{"key":"4_CR33","doi-asserted-by":"crossref","unstructured":"Khattak, M.U., Wasim, S.T., Naseer, M., Khan, S., Yang, M.H., Khan, F.S.: Self-regulating prompts: foundational model adaptation without forgetting. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 15190\u201315200 (2023)","DOI":"10.1109\/ICCV51070.2023.01394"},{"key":"4_CR34","doi-asserted-by":"crossref","unstructured":"Lee, D., Song, S., Suh, J., Choi, J., Lee, S., Kim, H.J.: Read-only prompt optimization for vision-language few-shot learning. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1401\u20131411 (2023)","DOI":"10.1109\/ICCV51070.2023.00135"},{"key":"4_CR35","doi-asserted-by":"crossref","unstructured":"Lester, B., Al-Rfou, R., Constant, N.: The power of scale for parameter-efficient prompt tuning. arXiv preprint arXiv:2104.08691 (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.243"},{"key":"4_CR36","unstructured":"Li, B., Weinberger, K.Q., Belongie, S., Koltun, V., Ranftl, R.: Language-driven semantic segmentation. In: International Conference on Learning Representations (2022). https:\/\/openreview.net\/forum?id=RriDjddCLN"},{"key":"4_CR37","unstructured":"Li, H., Kadav, A., Durdanovic, I., Samet, H., Graf, H.P.: Pruning filters for efficient convnets. arXiv preprint arXiv:1608.08710 (2016)"},{"key":"4_CR38","doi-asserted-by":"crossref","unstructured":"Li, X.L., Liang, P.: Prefix-tuning: optimizing continuous prompts for generation. arXiv preprint arXiv:2101.00190 (2021)","DOI":"10.18653\/v1\/2021.acl-long.353"},{"key":"4_CR39","first-page":"34451","volume":"35","author":"Y Li","year":"2022","unstructured":"Li, Y., Xu, S., Zhang, B., Cao, X., Gao, P., Guo, G.: Q-VIT: accurate and fully quantized low-bit vision transformer. Adv. Neural. Inf. Process. Syst. 35, 34451\u201334463 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"4_CR40","doi-asserted-by":"crossref","unstructured":"Li, Z., Xiao, J., Yang, L., Gu, Q.: Repq-vit: scale reparameterization for post-training quantization of vision transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 17227\u201317236 (2023)","DOI":"10.1109\/ICCV51070.2023.01580"},{"key":"4_CR41","unstructured":"Lian, D., Zhou, D., Feng, J., Wang, X.: Scaling & shifting your features: a new baseline for efficient model tuning. In: Advances in Neural Information Processing Systems (NeurIPS) (2022)"},{"key":"4_CR42","unstructured":"Liu, P., Yuan, W., Fu, J., Jiang, Z., Hayashi, H., Neubig, G.: Pre-train, prompt, and predict: a systematic survey of prompting methods in natural language processing. arXiv preprint arXiv:2107.13586 (2021)"},{"key":"4_CR43","doi-asserted-by":"crossref","unstructured":"Liu, X., Ji, K., Fu, Y., Du, Z., Yang, Z., Tang, J.: P-tuning v2: prompt tuning can be comparable to fine-tuning universally across scales and tasks. arXiv preprint arXiv:2110.07602 (2021)","DOI":"10.18653\/v1\/2022.acl-short.8"},{"key":"4_CR44","unstructured":"Liu, X., et al.: GPT understands, too. arXiv preprint arXiv:2103.10385 (2021)"},{"key":"4_CR45","first-page":"28092","volume":"34","author":"Z Liu","year":"2021","unstructured":"Liu, Z., Wang, Y., Han, K., Zhang, W., Ma, S., Gao, W.: Post-training quantization for vision transformer. Adv. Neural. Inf. Process. Syst. 34, 28092\u201328103 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"4_CR46","doi-asserted-by":"crossref","unstructured":"Liu, Z., Li, J., Shen, Z., Huang, G., Yan, S., Zhang, C.: Learning efficient convolutional networks through network slimming. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2736\u20132744 (2017)","DOI":"10.1109\/ICCV.2017.298"},{"key":"4_CR47","doi-asserted-by":"crossref","unstructured":"L\u00fcddecke, T., Ecker, A.: Image segmentation using text and image prompts. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7086\u20137096 (2022)","DOI":"10.1109\/CVPR52688.2022.00695"},{"key":"4_CR48","series-title":"LNCS","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73232-4_13","volume-title":"European Conference on Computer Vision (ECCV)","author":"M Lyu","year":"2024","unstructured":"Lyu, M., Hao, T., Xu, X., Chen, H., Han, J., Ding, G.: Learn from the learnt: source-free active domain adaptation via contrastive sampling and visual persistence. In: Leonardis, A., Ricci, E., Roth, S., Russakovsky, O., Sattler, T., Varol, G. (eds.) ECCV 2024. LNCS, vol. 15059. Springer, Cham (2024). https:\/\/doi.org\/10.1007\/978-3-031-73232-4_13"},{"key":"4_CR49","doi-asserted-by":"crossref","unstructured":"Lyu, M., et al.: One-dimensional adapter to rule them all: Concepts diffusion models and erasing applications. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7559\u20137568 (2024)","DOI":"10.1109\/CVPR52733.2024.00722"},{"key":"4_CR50","doi-asserted-by":"crossref","unstructured":"Lyu, M., et al.: Box-level active detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 23766\u201323775 (2023)","DOI":"10.1109\/CVPR52729.2023.02276"},{"key":"4_CR51","series-title":"LNCS","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20080-9_30","volume-title":"The European Conference on Computer Vision","author":"M Maaz","year":"2022","unstructured":"Maaz, M., Rasheed, H., Khan, S., Khan, F.S., Anwer, R.M., Yang, M.H.: Class-agnostic object detection with multi-modal transformer. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13670. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-20080-9_30"},{"key":"4_CR52","unstructured":"Meller, E., Finkelstein, A., Almog, U., Grobman, M.: Same, same but different: recovering neural network quantization error through weight factorization. In: International Conference on Machine Learning, pp. 4486\u20134495. PMLR (2019)"},{"key":"4_CR53","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1007\/978-3-031-19809-0_30","volume-title":"European Conference on Computer Vision","author":"N Mu","year":"2022","unstructured":"Mu, N., Kirillov, A., Wagner, D., Xie, S.: Slip: self-supervision meets language-image pre-training. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13686, pp. 529\u2013544. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19809-0_30"},{"key":"4_CR54","doi-asserted-by":"crossref","unstructured":"Nagel, M., Baalen, M.V., Blankevoort, T., Welling, M.: Data-free quantization through weight equalization and bias correction. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1325\u20131334 (2019)","DOI":"10.1109\/ICCV.2019.00141"},{"key":"4_CR55","unstructured":"Nagel, M., Fournarakis, M., Amjad, R.A., Bondarenko, Y., Van\u00a0Baalen, M., Blankevoort, T.: A white paper on neural network quantization. arXiv preprint arXiv:2106.08295 (2021)"},{"key":"4_CR56","unstructured":"Nagel, M., Fournarakis, M., Bondarenko, Y., Blankevoort, T.: Overcoming oscillations in quantization-aware training. In: International Conference on Machine Learning, pp. 16318\u201316330. PMLR (2022)"},{"key":"4_CR57","unstructured":"Qian, R., Li, Y., Xu, Z., Yang, M.H., Belongie, S., Cui, Y.: Multimodal open-vocabulary video classification via pre-trained vision and language models. arXiv preprint arXiv:2207.07646 (2022)"},{"key":"4_CR58","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"4_CR59","doi-asserted-by":"crossref","unstructured":"Rao, Y., et al.: Denseclip: language-guided dense prediction with context-aware prompting. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18082\u201318091 (2022)","DOI":"10.1109\/CVPR52688.2022.01755"},{"key":"4_CR60","unstructured":"Rasheed, H.A., Maaz, M., Khattak, M.U., Khan, S., Khan, F.: Bridging the gap between object and image-level representations for open-vocabulary detection. In: Oh, A.H., Agarwal, A., Belgrave, D., Cho, K. (eds.) Advances in Neural Information Processing Systems (2022). https:\/\/openreview.net\/forum?id=aKXBrj0DHm"},{"key":"4_CR61","doi-asserted-by":"crossref","unstructured":"Shin, T., Razeghi, Y., Logan\u00a0IV, R.L., Wallace, E., Singh, S.: Autoprompt: eliciting knowledge from language models with automatically generated prompts. In: Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 4222\u20134235 (2020)","DOI":"10.18653\/v1\/2020.emnlp-main.346"},{"issue":"1","key":"4_CR62","first-page":"1929","volume":"15","author":"N Srivastava","year":"2014","unstructured":"Srivastava, N., Hinton, G., Krizhevsky, A., Sutskever, I., Salakhutdinov, R.: Dropout: a simple way to prevent neural networks from overfitting. J. Mach. Learn. Res. 15(1), 1929\u20131958 (2014)","journal-title":"J. Mach. Learn. Res."},{"key":"4_CR63","doi-asserted-by":"crossref","unstructured":"Xian, Y., Schiele, B., Akata, Z.: Zero-shot learning-the good, the bad and the ugly. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.328"},{"key":"4_CR64","doi-asserted-by":"crossref","unstructured":"Xiong, Y., et al.: Pyra: parallel yielding re-activation for training-inference efficient task adaptation. arXiv preprint arXiv:2403.09192 (2024)","DOI":"10.1007\/978-3-031-72673-6_25"},{"key":"4_CR65","doi-asserted-by":"crossref","unstructured":"Xiong, Y., Chen, H., Lin, Z., Zhao, S., Ding, G.: Confidence-based visual dispersal for few-shot unsupervised domain adaptation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 11621\u201311631 (2023)","DOI":"10.1109\/ICCV51070.2023.01067"},{"key":"4_CR66","unstructured":"Xiong, Y., et al.: Temporal scaling law for large language models. arXiv preprint arXiv:2404.17785 (2024)"},{"key":"4_CR67","doi-asserted-by":"crossref","unstructured":"Yao, H., Zhang, R., Xu, C.: Visual-language prompt tuning with knowledge-guided context optimization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6757\u20136767 (2023)","DOI":"10.1109\/CVPR52729.2023.00653"},{"key":"4_CR68","unstructured":"Yao, L., et al.: FILIP: fine-grained interactive language-image pre-training. In: International Conference on Learning Representations (2022). https:\/\/openreview.net\/forum?id=cpDhcsEDC2"},{"key":"4_CR69","unstructured":"Yu, S., et al.: Unified visual transformer compression. In: International Conference on Learning Representations (2022)"},{"key":"4_CR70","unstructured":"Yuan, L., et\u00a0al.: Florence: a new foundation model for computer vision. arXiv preprint arXiv:2111.11432 (2021)"},{"key":"4_CR71","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"191","DOI":"10.1007\/978-3-031-19775-8_12","volume-title":"European Conference on Computer Vision","author":"Z Yuan","year":"2022","unstructured":"Yuan, Z., Xue, C., Chen, Y., Wu, Q., Sun, G.: PTQ4ViT: post-training quantization for vision transformers with twin uniform quantization. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13672, pp. 191\u2013207. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19775-8_12"},{"key":"4_CR72","doi-asserted-by":"crossref","unstructured":"Zang, Y., Li, W., Zhou, K., Huang, C., Loy, C.C.: Open-vocabulary detr with conditional matching. arXiv preprint arXiv:2203.11876 (2022)","DOI":"10.1007\/978-3-031-20077-9_7"},{"key":"4_CR73","doi-asserted-by":"crossref","unstructured":"Zhai, X., et al.: Lit: zero-shot transfer with locked-image text tuning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18123\u201318133 (2022)","DOI":"10.1109\/CVPR52688.2022.01759"},{"key":"4_CR74","unstructured":"Zhang, Y., Zhou, K., Liu, Z.: Neural prompt search. arXiv preprint arXiv:2206.04673 (2022)"},{"key":"4_CR75","doi-asserted-by":"crossref","unstructured":"Zheng, K., et al.: Regularized mask tuning: uncovering hidden knowledge in pre-trained vision-language models. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 11663\u201311673 (2023)","DOI":"10.1109\/ICCV51070.2023.01071"},{"key":"4_CR76","doi-asserted-by":"crossref","unstructured":"Zhou, K., Yang, J., Loy, C.C., Liu, Z.: Conditional prompt learning for vision-language models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16816\u201316825 (2022)","DOI":"10.1109\/CVPR52688.2022.01631"},{"issue":"9","key":"4_CR77","doi-asserted-by":"publisher","first-page":"2337","DOI":"10.1007\/s11263-022-01653-1","volume":"130","author":"K Zhou","year":"2022","unstructured":"Zhou, K., Yang, J., Loy, C.C., Liu, Z.: Learning to prompt for vision-language models. Int. J. Comput. Vision 130(9), 2337\u20132348 (2022)","journal-title":"Int. J. Comput. Vision"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72655-2_4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,5]],"date-time":"2024-12-05T11:27:15Z","timestamp":1733398035000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72655-2_4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,6]]},"ISBN":["9783031726545","9783031726552"],"references-count":77,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72655-2_4","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,12,6]]},"assertion":[{"value":"6 December 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}