{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,25]],"date-time":"2026-02-25T03:40:34Z","timestamp":1771990834378,"version":"3.50.1"},"publisher-location":"Cham","reference-count":42,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031732225","type":"print"},{"value":"9783031732232","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,11,8]],"date-time":"2024-11-08T00:00:00Z","timestamp":1731024000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,8]],"date-time":"2024-11-08T00:00:00Z","timestamp":1731024000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73223-2_17","type":"book-chapter","created":{"date-parts":[[2024,11,7]],"date-time":"2024-11-07T18:48:58Z","timestamp":1731005338000},"page":"291-308","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Introducing Routing Functions to\u00a0Vision-Language Parameter-Efficient Fine-Tuning with\u00a0Low-Rank Bottlenecks"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0656-5745","authenticated-orcid":false,"given":"Tingyu","family":"Qu","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3307-9723","authenticated-orcid":false,"given":"Tinne","family":"Tuytelaars","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3732-9323","authenticated-orcid":false,"given":"Marie-Francine","family":"Moens","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,8]]},"reference":[{"key":"17_CR1","doi-asserted-by":"publisher","unstructured":"Aghajanyan, A., Gupta, S., Zettlemoyer, L.: Intrinsic dimensionality explains the effectiveness of language model fine-tuning. In: Zong, C., Xia, F., Li, W., Navigli, R. (eds.) Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers), pp. 7319\u20137328. Association for Computational Linguistics (2021). https:\/\/doi.org\/10.18653\/v1\/2021.acl-long.568, https:\/\/aclanthology.org\/2021.acl-long.568","DOI":"10.18653\/v1\/2021.acl-long.568"},{"key":"17_CR2","doi-asserted-by":"publisher","unstructured":"Ben\u00a0Zaken, E., Goldberg, Y., Ravfogel, S.: BitFit: Simple parameter-efficient fine-tuning for transformer-based masked language-models. In: Muresan, S., Nakov, P., Villavicencio, A. (eds.) Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers), Dublin, Ireland, pp.\u00a01\u20139. Association for Computational Linguistics (2022). https:\/\/doi.org\/10.18653\/v1\/2022.acl-short.1, https:\/\/aclanthology.org\/2022.acl-short.1","DOI":"10.18653\/v1\/2022.acl-short.1"},{"key":"17_CR3","unstructured":"Cho, J., Lei, J., Tan, H., Bansal, M.: Unifying vision-and-language tasks via text generation. In: ICML (2021)"},{"key":"17_CR4","unstructured":"Choromanski, K.M., et al.: Rethinking attention with performers. In: International Conference on Learning Representations (2021). https:\/\/openreview.net\/forum?id=Ua6zuk0WRH"},{"key":"17_CR5","unstructured":"Dettmers, T., Pagnoni, A., Holtzman, A., Zettlemoyer, L.: Qlora: efficient finetuning of quantized llms. In: Oh, A., Neumann, T., Globerson, A., Saenko, K., Hardt, M., Levine, S. (eds.) Advances in Neural Information Processing Systems, vol.\u00a036, pp. 10088\u201310115. Curran Associates, Inc. (2023). https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2023\/file\/1feb87871436031bdc0f2beaa62a049b-Paper-Conference.pdf"},{"key":"17_CR6","unstructured":"Dosovitskiy, A., et al..: An image is worth 16x16 words: transformers for image recognition at scale. In: International Conference on Learning Representations (2021). https:\/\/openreview.net\/forum?id=YicbFdNTTy"},{"key":"17_CR7","doi-asserted-by":"crossref","unstructured":"Goyal, Y., Khot, T., Summers-Stay, D., Batra, D., Parikh, D.: Making the V in VQA matter: Elevating the role of image understanding in Visual Question Answering. In: Conference on Computer Vision and Pattern Recognition (CVPR) (2017)","DOI":"10.1109\/CVPR.2017.670"},{"key":"17_CR8","unstructured":"He, J., Zhou, C., Ma, X., Berg-Kirkpatrick, T., Neubig, G.: Towards a unified view of parameter-efficient transfer learning. In: International Conference on Learning Representations (2022). https:\/\/openreview.net\/forum?id=0RDcd5Axok"},{"key":"17_CR9","unstructured":"Houlsby, N., et al.: Parameter-efficient transfer learning for NLP. In: Chaudhuri, K., Salakhutdinov, R. (eds.) Proceedings of the 36th International Conference on Machine Learning. Proceedings of Machine Learning Research, vol.\u00a097, pp. 2790\u20132799. PMLR (2019). https:\/\/proceedings.mlr.press\/v97\/houlsby19a.html"},{"key":"17_CR10","unstructured":"Hu, E.J., et al.: LoRA: low-rank adaptation of large language models. In: International Conference on Learning Representations (2022). https:\/\/openreview.net\/forum?id=nZeVKeeFYf9"},{"key":"17_CR11","doi-asserted-by":"crossref","unstructured":"Hu, Z.Y., Li, Y., Lyu, M.R., Wang, L.: Vl-pet: vision-and-language parameter-efficient tuning via granularity control. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 3010\u20133020 (2023)","DOI":"10.1109\/ICCV51070.2023.00281"},{"key":"17_CR12","doi-asserted-by":"crossref","unstructured":"Hudson, D.A., Manning, C.D.: GQA: a new dataset for real-world visual reasoning and compositional question answering. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2019)","DOI":"10.1109\/CVPR.2019.00686"},{"key":"17_CR13","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"709","DOI":"10.1007\/978-3-031-19827-4_41","volume-title":"ECCV 2022","author":"M Jia","year":"2022","unstructured":"Jia, M., et al.: Visual prompt tuning. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13693, pp. 709\u2013727. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19827-4_41"},{"key":"17_CR14","doi-asserted-by":"publisher","unstructured":"Karimi\u00a0Mahabadi, R., Ruder, S., Dehghani, M., Henderson, J.: Parameter-efficient multi-task fine-tuning for transformers via shared hypernetworks. In: Zong, C., Xia, F., Li, W., Navigli, R. (eds.) Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers). pp. 565\u2013576. Association for Computational Linguistics (2021). https:\/\/doi.org\/10.18653\/v1\/2021.acl-long.47, https:\/\/aclanthology.org\/2021.acl-long.47","DOI":"10.18653\/v1\/2021.acl-long.47"},{"key":"17_CR15","unstructured":"Katharopoulos, A., Vyas, A., Pappas, N., Fleuret, F.: Transformers are RNNs: Fast autoregressive transformers with linear attention. In: III, H.D., Singh, A. (eds.) Proceedings of the 37th International Conference on Machine Learning. Proceedings of Machine Learning Research, vol.\u00a0119, pp. 5156\u20135165. PMLR (2020). https:\/\/proceedings.mlr.press\/v119\/katharopoulos20a.html"},{"key":"17_CR16","unstructured":"Koohpayegani, S.A., L, N.K., Nooralinejad, P., Kolouri, S., Pirsiavash, H.: NOLA: Networks as linear combination of low rank random basis. In: The Twelfth International Conference on Learning Representations (2024). https:\/\/openreview.net\/forum?id=TjfXcDgvzk"},{"key":"17_CR17","unstructured":"Kopiczko, D.J., Blankevoort, T., Asano, Y.M.: VeRA: Vector-based random matrix adaptation. In: The Twelfth International Conference on Learning Representations (2024). https:\/\/openreview.net\/forum?id=NjNfLdxr3A"},{"key":"17_CR18","unstructured":"Lei, T., et al.: Conditional adapters: parameter-efficient transfer learning with fast inference. In: Thirty-seventh Conference on Neural Information Processing Systems (2023). https:\/\/openreview.net\/forum?id=IyYyKov0Aj"},{"key":"17_CR19","doi-asserted-by":"publisher","unstructured":"Lester, B., Al-Rfou, R., Constant, N.: The power of scale for parameter-efficient prompt tuning. In: Moens, M.F., Huang, X., Specia, L., Yih, S.W.T. (eds.) Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing, Punta Cana, Dominican Republic, pp. 3045\u20133059. Association for Computational Linguistics (2021). https:\/\/doi.org\/10.18653\/v1\/2021.emnlp-main.243, https:\/\/aclanthology.org\/2021.emnlp-main.243","DOI":"10.18653\/v1\/2021.emnlp-main.243"},{"key":"17_CR20","doi-asserted-by":"publisher","unstructured":"Lewis, M., et al.: BART: Denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension. In: Jurafsky, D., Chai, J., Schluter, N., Tetreault, J. (eds.) Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pp. 7871\u20137880. Association for Computational Linguistics (2020). https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.703, https:\/\/aclanthology.org\/2020.acl-main.703","DOI":"10.18653\/v1\/2020.acl-main.703"},{"key":"17_CR21","unstructured":"Li, C., Farkhoor, H., Liu, R., Yosinski, J.: Measuring the intrinsic dimension of objective landscapes. In: International Conference on Learning Representations (2018). https:\/\/openreview.net\/forum?id=ryup8-WCW"},{"key":"17_CR22","doi-asserted-by":"publisher","unstructured":"Li, X.L., Liang, P.: Prefix-tuning: Optimizing continuous prompts for generation. In: Zong, C., Xia, F., Li, W., Navigli, R. (eds.) Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers), pp. 4582\u20134597. Association for Computational Linguistics (2021). https:\/\/doi.org\/10.18653\/v1\/2021.acl-long.353, https:\/\/aclanthology.org\/2021.acl-long.353","DOI":"10.18653\/v1\/2021.acl-long.353"},{"key":"17_CR23","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"key":"17_CR24","doi-asserted-by":"publisher","unstructured":"Lin, Z., Madotto, A., Fung, P.: Exploring versatile generative language model via parameter-efficient transfer learning. In: Cohn, T., He, Y., Liu, Y. (eds.) Findings of the Association for Computational Linguistics: EMNLP 2020, pp. 441\u2013459. Association for Computational Linguistics (2020). https:\/\/doi.org\/10.18653\/v1\/2020.findings-emnlp.41, https:\/\/aclanthology.org\/2020.findings-emnlp.41","DOI":"10.18653\/v1\/2020.findings-emnlp.41"},{"key":"17_CR25","unstructured":"Liu, Y., et al.: Roberta: a robustly optimized BERT pretraining approach. CoRR abs\/1907.11692 (2019). http:\/\/arxiv.org\/abs\/1907.11692"},{"key":"17_CR26","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. In: International Conference on Learning Representations (2019). https:\/\/openreview.net\/forum?id=Bkg6RiCqY7"},{"key":"17_CR27","doi-asserted-by":"publisher","unstructured":"Luo, Z., Hu, Z., Xi, Y., Zhang, R., Ma, J.: I-tuning: tuning frozen language models with image for lightweight image captioning. In: ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp.\u00a01\u20135 (2023). https:\/\/doi.org\/10.1109\/ICASSP49357.2023.10096424","DOI":"10.1109\/ICASSP49357.2023.10096424"},{"key":"17_CR28","unstructured":"mahabadi, R.K., Henderson, J., Ruder, S.: Compacter: efficient low-rank hypercomplex adapter layers. In: Beygelzimer, A., Dauphin, Y., Liang, P., Vaughan, J.W. (eds.) Advances in Neural Information Processing Systems (2021). https:\/\/openreview.net\/forum?id=bqGK5PyI6-N"},{"key":"17_CR29","doi-asserted-by":"publisher","unstructured":"Mao, Y., et al.: UniPELT: a unified framework for parameter-efficient language model tuning. In: Muresan, S., Nakov, P., Villavicencio, A. (eds.) Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), Dublin, Ireland, pp. 6253\u20136264. Association for Computational Linguistics (2022). https:\/\/doi.org\/10.18653\/v1\/2022.acl-long.433, https:\/\/aclanthology.org\/2022.acl-long.433","DOI":"10.18653\/v1\/2022.acl-long.433"},{"key":"17_CR30","unstructured":"Qin, Z., et al.: cosformer: Rethinking softmax in attention. In: International Conference on Learning Representations (2022). https:\/\/openreview.net\/forum?id=Bl8CQrx2Up4"},{"key":"17_CR31","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. In: Meila, M., Zhang, T. (eds.) Proceedings of the 38th International Conference on Machine Learning. Proceedings of Machine Learning Research, vol.\u00a0139, pp. 8748\u20138763. PMLR (2021). https:\/\/proceedings.mlr.press\/v139\/radford21a.html"},{"key":"17_CR32","unstructured":"Radford, A., Wu, J., Child, R., Luan, D., Amodei, D., Sutskever, I.: Language models are unsupervised multitask learners. In: Preprint (2019)"},{"key":"17_CR33","unstructured":"Raffel, C., et al.: Exploring the limits of transfer learning with a unified text-to-text transformer. J. Mach. Learn. Res. 21(140), 1\u201367 (2020). http:\/\/jmlr.org\/papers\/v21\/20-074.html"},{"key":"17_CR34","doi-asserted-by":"publisher","unstructured":"R\u00fcckl\u00e9, A., et al.: AdapterDrop: on the efficiency of adapters in transformers. In: Moens, M.F., Huang, X., Specia, L., Yih, S.W.T. (eds.) Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing Punta Cana, Dominican Republic, pp. 7930\u20137946. Association for Computational Linguistics (2021). https:\/\/doi.org\/10.18653\/v1\/2021.emnlp-main.626, https:\/\/aclanthology.org\/2021.emnlp-main.626","DOI":"10.18653\/v1\/2021.emnlp-main.626"},{"key":"17_CR35","doi-asserted-by":"publisher","unstructured":"Suhr, A., Zhou, S., Zhang, A., Zhang, I., Bai, H., Artzi, Y.: A corpus for reasoning about natural language grounded in photographs. In: Korhonen, A., Traum, D., M\u00e0rquez, L. (eds.) Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics, Florence, Italy, pp. 6418\u20136428. Association for Computational Linguistics (2019). https:\/\/doi.org\/10.18653\/v1\/P19-1644, https:\/\/aclanthology.org\/P19-1644","DOI":"10.18653\/v1\/P19-1644"},{"key":"17_CR36","unstructured":"Sung, Y.L., Cho, J., Bansal, M.: LST: Ladder side-tuning for parameter and memory efficient transfer learning. In: Oh, A.H., Agarwal, A., Belgrave, D., Cho, K. (eds.) Advances in Neural Information Processing Systems (2022). https:\/\/openreview.net\/forum?id=isPnnaTZaP5"},{"key":"17_CR37","doi-asserted-by":"crossref","unstructured":"Sung, Y.L., Cho, J., Bansal, M.: Vl-adapter: parameter-efficient transfer learning for vision-and-language tasks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 5227\u20135237 (2022)","DOI":"10.1109\/CVPR52688.2022.00516"},{"key":"17_CR38","doi-asserted-by":"publisher","unstructured":"Valipour, M., Rezagholizadeh, M., Kobyzev, I., Ghodsi, A.: DyLoRA: parameter-efficient tuning of pre-trained models using dynamic search-free low-rank adaptation. In: Vlachos, A., Augenstein, I. (eds.) Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics, Dubrovnik, Croatia, pp. 3274\u20133287. Association for Computational Linguistics (2023).https:\/\/doi.org\/10.18653\/v1\/2023.eacl-main.239, https:\/\/aclanthology.org\/2023.eacl-main.239","DOI":"10.18653\/v1\/2023.eacl-main.239"},{"key":"17_CR39","doi-asserted-by":"publisher","unstructured":"Wang, Y., et al.: AdaMix: mixture-of-adaptations for parameter-efficient model tuning. In: Goldberg, Y., Kozareva, Z., Zhang, Y. (eds.) Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing, Abu Dhabi, United Arab Emirates, pp. 5744\u20135760. Association for Computational Linguistics (2022). https:\/\/doi.org\/10.18653\/v1\/2022.emnlp-main.388, https:\/\/aclanthology.org\/2022.emnlp-main.388","DOI":"10.18653\/v1\/2022.emnlp-main.388"},{"key":"17_CR40","unstructured":"Xu, Y., et al.: QA-loRA: quantization-aware low-rank adaptation of large language models. In: The Twelfth International Conference on Learning Representations (2024). https:\/\/openreview.net\/forum?id=WvFoJccpo8"},{"key":"17_CR41","unstructured":"Zhang, Q., et al.: Adaptive budget allocation for parameter-efficient fine-tuning. In: The Eleventh International Conference on Learning Representations (2023). https:\/\/openreview.net\/forum?id=lq62uWRJjiY"},{"key":"17_CR42","doi-asserted-by":"crossref","unstructured":"Zhou, H., Wan, X., Vuli\u0107, I., Korhonen, A.: Autopeft: automatic configuration search for parameter-efficient fine-tuning. Trans. Assoc. Comput. Linguistics 12 (2024)","DOI":"10.1162\/tacl_a_00662"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73223-2_17","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,7]],"date-time":"2024-11-07T19:06:18Z","timestamp":1731006378000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73223-2_17"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,8]]},"ISBN":["9783031732225","9783031732232"],"references-count":42,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73223-2_17","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,8]]},"assertion":[{"value":"8 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}