{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,29]],"date-time":"2026-05-29T20:56:16Z","timestamp":1780088176706,"version":"3.54.0"},"publisher-location":"Cham","reference-count":29,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031721137","type":"print"},{"value":"9783031721144","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-72114-4_68","type":"book-chapter","created":{"date-parts":[[2024,10,2]],"date-time":"2024-10-02T13:01:43Z","timestamp":1727874103000},"page":"712-722","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["VLSM-Adapter: Finetuning Vision-Language Segmentation Efficiently with\u00a0Lightweight Blocks"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5019-2205","authenticated-orcid":false,"given":"Manish","family":"Dhakal","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0101-5592","authenticated-orcid":false,"given":"Rabin","family":"Adhikari","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4463-6700","authenticated-orcid":false,"given":"Safal","family":"Thapaliya","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2775-4748","authenticated-orcid":false,"given":"Bishesh","family":"Khanal","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2024,10,3]]},"reference":[{"key":"68_CR1","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"89","DOI":"10.1007\/978-3-031-44521-7_9","volume-title":"ASMUS 2023","author":"R Adhikari","year":"2023","unstructured":"Adhikari, R., Dhakal, M., Thapaliya, S., Poudel, K., Bhandari, P., Khanal, B.: Synthetic boost: leveraging synthetic data for enhanced vision-language segmentation in echocardiography. In: Kainz, B., Noble, A., Schnabel, J., Khanal, B., M\u00fcller, J.P., Day, T. (eds.) ASMUS 2023. LNCS, vol. 14337, pp. 89\u201399. Springer, Cham (2023). https:\/\/doi.org\/10.1007\/978-3-031-44521-7_9"},{"key":"68_CR2","doi-asserted-by":"publisher","DOI":"10.1016\/j.dib.2019.104863","volume":"28","author":"W Al-Dhabyani","year":"2020","unstructured":"Al-Dhabyani, W., Gomaa, M., Khaled, H., Fahmy, A.: Dataset of breast ultrasound images. Data Brief 28, 104863 (2020)","journal-title":"Data Brief"},{"key":"68_CR3","doi-asserted-by":"crossref","unstructured":"Bernal, J., S\u00e1nchez, F.J., Fern\u00e1ndez-Esparrach, G., Gil, D., Rodr\u00edguez, C., Vilari\u00f1o, F.: WM-DOVA maps for accurate polyp highlighting in colonoscopy: validation vs. saliency maps from physicians. Comput. Med. Imaging Graph. 43, 99\u2013111 (2015)","DOI":"10.1016\/j.compmedimag.2015.02.007"},{"key":"68_CR4","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: Imagenet: a large-scale hierarchical image database. In: 2009 IEEE Conference on Computer Vision and Pattern Recognition, pp. 248\u2013255. IEEE (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"68_CR5","unstructured":"Dosovitskiy, A., et al.: An image is worth 16x16 words: transformers for image recognition at scale. In: International Conference on Learning Representations (2020)"},{"key":"68_CR6","doi-asserted-by":"publisher","first-page":"231","DOI":"10.1007\/978-90-481-8847-5_10","volume-title":"Theory and Applications of Ontology: Computer Applications","author":"C Fellbaum","year":"2010","unstructured":"Fellbaum, C.: Wordnet. In: Poli, R., Healy, M., Kameas, A. (eds.) Theory and Applications of Ontology: Computer Applications, pp. 231\u2013243. Springer, Dordrecht (2010). https:\/\/doi.org\/10.1007\/978-90-481-8847-5_10"},{"key":"68_CR7","doi-asserted-by":"crossref","unstructured":"Gao, P., et al.: CLIP-adapter: better vision-language models with feature adapters. Int. J. Comput. Vision 1\u201315 (2023)","DOI":"10.1007\/s11263-023-01891-x"},{"key":"68_CR8","unstructured":"Gutman, D., et al.: Skin Lesion Analysis toward Melanoma Detection: A Challenge at ISBI 2016, hosted by ISIC. arXiv preprint arXiv:1605.01397 (2016)"},{"key":"68_CR9","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"68_CR10","unstructured":"Houlsby, N., et al.: Parameter-efficient transfer learning for NLP. In: International Conference on Machine Learning, pp. 2790\u20132799. PMLR (2019)"},{"key":"68_CR11","doi-asserted-by":"crossref","unstructured":"Howard, J., Ruder, S.: Universal language model fine-tuning for text classification. In: Association for Computational Linguistics, vol.\u00a01, pp. 328\u2013339 (2018)","DOI":"10.18653\/v1\/P18-1031"},{"key":"68_CR12","unstructured":"Hu, E.J., et al.: LoRA: low-rank adaptation of large language models. In: International Conference on Learning Representations (2022)"},{"key":"68_CR13","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"451","DOI":"10.1007\/978-3-030-37734-2_37","volume-title":"MultiMedia Modeling","author":"D Jha","year":"2020","unstructured":"Jha, D., et al.: Kvasir-SEG: a segmented polyp dataset. In: Ro, Y.M., et al. (eds.) MMM 2020. LNCS, vol. 11962, pp. 451\u2013462. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-37734-2_37"},{"key":"68_CR14","unstructured":"Kendrick, C., et al.: Translating clinical delineation of diabetic foot ulcers into machine-interpretable segmentation. arXiv preprint arXiv:2204.11618 (2022)"},{"issue":"9","key":"68_CR15","doi-asserted-by":"publisher","first-page":"2198","DOI":"10.1109\/TMI.2019.2900516","volume":"38","author":"S Leclerc","year":"2019","unstructured":"Leclerc, S., et al.: Deep learning for segmentation using an open large-scale dataset in 2D echocardiography. IEEE Trans. Med. Imaging 38(9), 2198\u20132210 (2019)","journal-title":"IEEE Trans. Med. Imaging"},{"key":"68_CR16","unstructured":"Long, M., Cao, Y., Wang, J., Jordan, M.: Learning transferable features with deep adaptation networks. In: International Conference on Machine Learning, pp. 97\u2013105. PMLR (2015)"},{"key":"68_CR17","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. In: International Conference on Learning Representations (2018)"},{"key":"68_CR18","doi-asserted-by":"crossref","unstructured":"L\u00fcddecke, T., Ecker, A.: Image segmentation using text and image prompts. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7086\u20137096 (2022)","DOI":"10.1109\/CVPR52688.2022.00695"},{"key":"68_CR19","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1007\/978-3-030-90436-4_2","volume-title":"Advances in Visual Computing","author":"P Ngoc Lan","year":"2021","unstructured":"Ngoc Lan, P., et al.: NeoUNet: towards accurate colon polyp segmentation and\u00a0neoplasm detection. In: Bebis, G., et al. (eds.) ISVC 2021. LNCS, vol. 13018, pp. 15\u201328. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-90436-4_2"},{"key":"68_CR20","unstructured":"Poudel, K., Dhakal, M., Bhandari, P., Adhikari, R., Thapaliya, S., Khanal, B.: Exploring transfer learning in medical image segmentation using vision-language models. arXiv preprint arXiv:2308.07706 (2023)"},{"key":"68_CR21","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"issue":"10","key":"68_CR22","doi-asserted-by":"publisher","first-page":"867","DOI":"10.1038\/s42256-022-00536-x","volume":"4","author":"A Saporta","year":"2022","unstructured":"Saporta, A., et al.: Benchmarking saliency methods for chest x-ray interpretation. Nat. Mach. Intell. 4(10), 867\u2013878 (2022)","journal-title":"Nat. Mach. Intell."},{"key":"68_CR23","unstructured":"Song, L., et al.: Meta-adapter: an online few-shot learner for vision-language model. In: Advances in Neural Information Processing Systems, vol.\u00a036, pp. 55361\u201355374 (2023)"},{"key":"68_CR24","doi-asserted-by":"crossref","unstructured":"Sung, Y.L., Cho, J., Bansal, M.: VL-adapter: parameter-efficient transfer learning for vision-and-language tasks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5227\u20135237 (2022)","DOI":"10.1109\/CVPR52688.2022.00516"},{"key":"68_CR25","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems, vol.\u00a030, pp. 5998\u20136008 (2017)"},{"key":"68_CR26","doi-asserted-by":"crossref","unstructured":"Wang, Z., et al.: CRIS: clip-driven referring image segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11686\u201311695 (2022)","DOI":"10.1109\/CVPR52688.2022.01139"},{"key":"68_CR27","doi-asserted-by":"crossref","unstructured":"Xu, M., Zhang, Z., Wei, F., Hu, H., Bai, X.: Side adapter network for open-vocabulary semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2945\u20132954 (2023)","DOI":"10.1109\/CVPR52729.2023.00288"},{"key":"68_CR28","unstructured":"Yosinski, J., Clune, J., Bengio, Y., Lipson, H.: How transferable are features in deep neural networks? In: Advances in Neural Information Processing Systems, vol.\u00a027, pp. 3320\u20133328 (2014)"},{"key":"68_CR29","doi-asserted-by":"crossref","unstructured":"Yu, S., Seo, P.H., Son, J.: Zero-shot referring image segmentation with global-local context features. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 19456\u201319465 (2023)","DOI":"10.1109\/CVPR52729.2023.01864"}],"container-title":["Lecture Notes in Computer Science","Medical Image Computing and Computer Assisted Intervention \u2013 MICCAI 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72114-4_68","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,2]],"date-time":"2024-10-02T13:09:10Z","timestamp":1727874550000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72114-4_68"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031721137","9783031721144"],"references-count":29,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72114-4_68","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"3 October 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"MICCAI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Medical Image Computing and Computer-Assisted Intervention","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Marrakesh","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Morocco","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7 October 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"miccai2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/conferences.miccai.org\/2024\/en\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}