{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T19:27:32Z","timestamp":1772825252835,"version":"3.50.1"},"publisher-location":"Cham","reference-count":34,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031919787","type":"print"},{"value":"9783031919794","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-91979-4_14","type":"book-chapter","created":{"date-parts":[[2025,5,31]],"date-time":"2025-05-31T19:07:00Z","timestamp":1748718420000},"page":"167-182","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Generalized SAM: Efficient Fine-Tuning of\u00a0SAM for\u00a0Variable Input Image Sizes"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0392-6426","authenticated-orcid":false,"given":"Sota","family":"Kato","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0005-6969-4017","authenticated-orcid":false,"given":"Hinako","family":"Mitsuoka","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5675-8713","authenticated-orcid":false,"given":"Kazuhiro","family":"Hotta","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,5,12]]},"reference":[{"key":"14_CR1","doi-asserted-by":"crossref","unstructured":"Alom, M.Z., Hasan, M., Yakopcic, C., Taha, T.M., Asari, V.K.: Recurrent residual convolutional neural network based on u-net (r2u-net) for medical image segmentation. arXiv preprint arXiv:1802.06955 (2018)","DOI":"10.1109\/NAECON.2018.8556686"},{"issue":"2","key":"14_CR2","doi-asserted-by":"publisher","first-page":"88","DOI":"10.1016\/j.patrec.2008.04.005","volume":"30","author":"GJ Brostow","year":"2009","unstructured":"Brostow, G.J., Fauqueur, J., Cipolla, R.: Semantic object classes in video: a high-definition ground truth database. Pattern Recogn. Lett. 30(2), 88\u201397 (2009)","journal-title":"Pattern Recogn. Lett."},{"key":"14_CR3","unstructured":"Bubeck, S., et\u00a0al.: Sparks of artificial general intelligence: Early experiments with gpt-4. arXiv preprint arXiv:2303.12712 (2023)"},{"key":"14_CR4","doi-asserted-by":"crossref","unstructured":"Cao, H., et al.: Swin-unet: Unet-like pure transformer for medical image segmentation. In: European Conference on Computer Vision, pp. 205\u2013218. Springer (2022)","DOI":"10.1007\/978-3-031-25066-8_9"},{"key":"14_CR5","unstructured":"Chen, J., et al.: Transunet: transformers make strong encoders for medical image segmentation. arXiv preprint arXiv:2102.04306 (2021)"},{"issue":"4","key":"14_CR6","doi-asserted-by":"publisher","first-page":"834","DOI":"10.1109\/TPAMI.2017.2699184","volume":"40","author":"LC Chen","year":"2017","unstructured":"Chen, L.C., Papandreou, G., Kokkinos, I., Murphy, K., Yuille, A.L.: Deeplab: semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected crfs. IEEE Trans. Pattern Anal. Mach. Intell. 40(4), 834\u2013848 (2017)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"14_CR7","unstructured":"Chen, L.C., Papandreou, G., Schroff, F., Adam, H.: Rethinking atrous convolution for semantic image segmentation. arXiv preprint arXiv:1706.05587 (2017)"},{"key":"14_CR8","doi-asserted-by":"crossref","unstructured":"Chen, L.C., Zhu, Y., Papandreou, G., Schroff, F., Adam, H.: Encoder-decoder with atrous separable convolution for semantic image segmentation. In: Proceedings of the European conference on computer vision (ECCV). pp. 801\u2013818 (2018)","DOI":"10.1007\/978-3-030-01234-2_49"},{"key":"14_CR9","first-page":"16664","volume":"35","author":"S Chen","year":"2022","unstructured":"Chen, S., Ge, C., Tong, Z., Wang, J., Song, Y., Wang, J., Luo, P.: Adaptformer: adapting vision transformers for scalable visual recognition. Adv. Neural. Inf. Process. Syst. 35, 16664\u201316678 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"14_CR10","unstructured":"Chu, X., Tian, Z., Zhang, B., Wang, X., Shen, C.: Conditional positional encodings for vision transformers. arXiv preprint arXiv:2102.10882 (2021)"},{"key":"14_CR11","doi-asserted-by":"crossref","unstructured":"Cordts, M., et al.: The cityscapes dataset for semantic urban scene understanding. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3213\u20133223 (2016)","DOI":"10.1109\/CVPR.2016.350"},{"key":"14_CR12","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)"},{"key":"14_CR13","unstructured":"Dosovitskiy, A., et al.: An image is worth 16x16 words: Transformers for image recognition at scale. CoRR abs\/2010.11929 (2020). https:\/\/arxiv.org\/abs\/2010.11929"},{"key":"14_CR14","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"14_CR15","unstructured":"Segmentation of neuronal structures in em stacks challenge (2012). https:\/\/imagej.net\/events\/isbi-2012-segmentation-challenge"},{"key":"14_CR16","unstructured":"Hu, E.J., et al.: Lora: low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 (2021)"},{"key":"14_CR17","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"451","DOI":"10.1007\/978-3-030-37734-2_37","volume-title":"MultiMedia Modeling","author":"D Jha","year":"2020","unstructured":"Jha, D., et al.: Kvasir-SEG: a segmented polyp dataset. In: Ro, Y.M., et al. (eds.) MMM 2020. LNCS, vol. 11962, pp. 451\u2013462. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-37734-2_37"},{"key":"14_CR18","doi-asserted-by":"crossref","unstructured":"Kirillov, A., et\u00a0al.: Segment anything. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 4015\u20134026 (2023)","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"14_CR19","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: Imagenet classification with deep convolutional neural networks. Advances in neural information processing systems 25 (2012)"},{"key":"14_CR20","unstructured":"Lin, X., Xiang, Y., Zhang, L., Yang, X., Yan, Z., Yu, L.: Samus: adapting segment anything model for clinically-friendly and generalizable ultrasound image segmentation. arXiv preprint arXiv:2309.06824 (2023)"},{"key":"14_CR21","unstructured":"Loshchilov, I., Hutter, F.: Sgdr: stochastic gradient descent with warm restarts. arXiv preprint arXiv:1608.03983 (2016)"},{"key":"14_CR22","unstructured":"Mnih, V.: Machine Learning for Aerial Image Labeling. Ph.D. thesis, University of Toronto (2013)"},{"key":"14_CR23","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"14_CR24","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"14_CR25","doi-asserted-by":"crossref","unstructured":"Ronneberger, O., Fischer, P., Brox, T.: U-net: convolutional networks for biomedical image segmentation. In: Medical image computing and computer-assisted intervention\u2013MICCAI 2015: 18th international conference, Munich, Germany, October 5-9, 2015, proceedings, part III 18, pp. 234\u2013241. Springer (2015)","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"14_CR26","unstructured":"Multi-atlas labeling beyond the cranial vault - workshop and challenge. https:\/\/www.synapse.org\/#!Synapse:syn3193805\/wiki\/217789 (2015)"},{"key":"14_CR27","unstructured":"Touvron, H., et\u00a0al.: Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971 (2023)"},{"key":"14_CR28","unstructured":"Vaswani, A., et al.: Attention is all you need. Advances in neural information processing systems 30 (2017)"},{"key":"14_CR29","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"696","DOI":"10.1007\/978-3-030-58601-0_41","volume-title":"Computer Vision \u2013 ECCV 2020","author":"E Xie","year":"2020","unstructured":"Xie, E., Wang, W., Wang, W., Ding, M., Shen, C., Luo, P.: Segmenting transparent objects in the wild. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12358, pp. 696\u2013711. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58601-0_41"},{"key":"14_CR30","unstructured":"Yu, F., Koltun, V.: Multi-scale context aggregation by dilated convolutions. arXiv preprint arXiv:1511.07122 (2015)"},{"key":"14_CR31","doi-asserted-by":"crossref","unstructured":"Zhang, K., Liu, D.: Customized segment anything model for medical image segmentation. arXiv preprint arXiv:2304.13785 (2023)","DOI":"10.2139\/ssrn.4495221"},{"key":"14_CR32","doi-asserted-by":"crossref","unstructured":"Zhao, H., Shi, J., Qi, X., Wang, X., Jia, J.: Pyramid scene parsing network. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2881\u20132890 (2017)","DOI":"10.1109\/CVPR.2017.660"},{"key":"14_CR33","unstructured":"Zhong, Z., Tang, Z., He, T., Fang, H., Yuan, C.: Convolution meets lora: parameter efficient finetuning for segment anything model. arXiv preprint arXiv:2401.17868 (2024)"},{"key":"14_CR34","doi-asserted-by":"crossref","unstructured":"Zhou, Z., Rahman\u00a0Siddiquee, M.M., Tajbakhsh, N., Liang, J.: Unet++: A nested u-net architecture for medical image segmentation. In: Deep Learning in Medical Image Analysis and Multimodal Learning for Clinical Decision Support: 4th International Workshop, DLMIA 2018, and 8th International Workshop, ML-CDS 2018, Held in Conjunction with MICCAI 2018, Granada, Spain, September 20, 2018, Proceedings 4, pp. 3\u201311. Springer (2018)","DOI":"10.1007\/978-3-030-00889-5_1"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024 Workshops"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-91979-4_14","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,31]],"date-time":"2025-05-31T19:07:10Z","timestamp":1748718430000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-91979-4_14"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031919787","9783031919794"],"references-count":34,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-91979-4_14","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"12 May 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}