{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,21]],"date-time":"2026-01-21T08:36:04Z","timestamp":1768984564137,"version":"3.49.0"},"publisher-location":"Cham","reference-count":49,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031727535","type":"print"},{"value":"9783031727542","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T00:00:00Z","timestamp":1730332800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T00:00:00Z","timestamp":1730332800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72754-2_26","type":"book-chapter","created":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T14:57:07Z","timestamp":1730300227000},"page":"456-471","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Lite-SAM Is Actually What You Need for\u00a0Segment Everything"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-2819-3717","authenticated-orcid":false,"given":"Jianhai","family":"Fu","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0006-1245-3316","authenticated-orcid":false,"given":"Yuanjie","family":"Yu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0005-2852-1312","authenticated-orcid":false,"given":"Ningchuan","family":"Li","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0002-5631-5637","authenticated-orcid":false,"given":"Yi","family":"Zhang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0008-7580-0068","authenticated-orcid":false,"given":"Qichao","family":"Chen","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9564-1860","authenticated-orcid":false,"given":"Jianping","family":"Xiong","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0007-1905-5377","authenticated-orcid":false,"given":"Jun","family":"Yin","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3329-7037","authenticated-orcid":false,"given":"Zhiyu","family":"Xiang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,10,31]]},"reference":[{"issue":"5","key":"26_CR1","doi-asserted-by":"publisher","first-page":"898","DOI":"10.1109\/TPAMI.2010.161","volume":"33","author":"P Arbelaez","year":"2010","unstructured":"Arbelaez, P., Maire, M., Fowlkes, C., Malik, J.: Contour detection and hierarchical image segmentation. IEEE Trans. Pattern Anal. Mach. Intell. 33(5), 898\u2013916 (2010)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"26_CR2","unstructured":"Bommasani, R., et\u00a0al.: On the opportunities and risks of foundation models. arXiv preprint arXiv:2108.07258 (2021)"},{"key":"26_CR3","unstructured":"Brown, T., et al.: Language models are few-shot learners. In: Advances in Neural Information Processing Systems, vol. 33, pp. 1877\u20131901 (2020)"},{"key":"26_CR4","doi-asserted-by":"crossref","unstructured":"Cai, H., Li, J., Hu, M., Gan, C., Han, S.: EfficientViT: multi-scale linear attention for high-resolution dense prediction (2023)","DOI":"10.1109\/ICCV51070.2023.01587"},{"key":"26_CR5","doi-asserted-by":"crossref","unstructured":"Canny, J.: A computational approach to edge detection (1986)","DOI":"10.1016\/B978-0-08-051581-6.50024-6"},{"key":"26_CR6","unstructured":"Dongcai, S.: Efficient graph based image segmentation. Image Processing (2004)"},{"key":"26_CR7","unstructured":"Dosovitskiy, A., et\u00a0al.: An image is worth 16x16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"26_CR8","doi-asserted-by":"crossref","unstructured":"Gupta, A., Dollar, P., Girshick, R.: LVIS: a dataset for large vocabulary instance segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5356\u20135364 (2019)","DOI":"10.1109\/CVPR.2019.00550"},{"key":"26_CR9","doi-asserted-by":"crossref","unstructured":"He, K., Fan, H., Wu, Y., Xie, S., Girshick, R.: Momentum contrast for unsupervised visual representation learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9729\u20139738 (2020)","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"26_CR10","doi-asserted-by":"crossref","unstructured":"Howard, A., et\u00a0al.: Searching for MobileNetV3. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1314\u20131324 (2019)","DOI":"10.1109\/ICCV.2019.00140"},{"key":"26_CR11","unstructured":"Howard, A.G., et al.: MobileNets: efficient convolutional neural networks for mobile vision applications. arXiv preprint arXiv:1704.04861 (2017)"},{"key":"26_CR12","unstructured":"Jocher, G., Chaurasia, A., Qiu, J.: YOLO by Ultralytics (2023). https:\/\/github.com\/ultralytics\/ultralytics"},{"key":"26_CR13","unstructured":"Kirillov, A., et\u00a0al.: Segment anything. arXiv preprint arXiv:2304.02643 (2023)"},{"key":"26_CR14","doi-asserted-by":"crossref","unstructured":"Law, H., Deng, J.: CornerNet: detecting objects as paired keypoints (2019)","DOI":"10.1007\/978-3-030-01264-9_45"},{"key":"26_CR15","unstructured":"Li, F., et al.: Semantic-SAM: segment and recognize anything at any granularity. arXiv preprint arXiv:2307.04767 (2023)"},{"key":"26_CR16","unstructured":"Li, J., et al.: Next-ViT: next generation vision transformer for efficient deployment in realistic industrial scenarios. arXiv preprint arXiv:2207.05501 (2022)"},{"key":"26_CR17","unstructured":"Li, Y., et al.: EfficientFormer: vision transformers at MobileNet speed. In: Advances in Neural Information Processing Systems, vol. 35, pp. 12934\u201312949 (2022)"},{"key":"26_CR18","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Goyal, P., Girshick, R., He, K., Doll\u00e1r, P.: Focal loss for dense object detection (2018)","DOI":"10.1109\/ICCV.2017.324"},{"key":"26_CR19","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"key":"26_CR20","doi-asserted-by":"crossref","unstructured":"Liu, X., Peng, H., Zheng, N., Yang, Y., Hu, H., Yuan, Y.: EfficientViT: memory efficient vision transformer with cascaded group attention. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14420\u201314430 (2023)","DOI":"10.1109\/CVPR52729.2023.01386"},{"key":"26_CR21","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"122","DOI":"10.1007\/978-3-030-01264-9_8","volume-title":"Computer Vision \u2013 ECCV 2018","author":"N Ma","year":"2018","unstructured":"Ma, N., Zhang, X., Zheng, H.-T., Sun, J.: ShuffleNet V2: practical guidelines for efficient CNN architecture design. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) Computer Vision \u2013 ECCV 2018. LNCS, vol. 11218, pp. 122\u2013138. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01264-9_8"},{"key":"26_CR22","doi-asserted-by":"crossref","unstructured":"Martin, D., Fowlkes, C., Tal, D., Malik, J.: A database of human segmented natural images and its application to evaluating segmentation algorithms and measuring ecological statistics. In: Proceedings Eighth IEEE International Conference on Computer Vision, ICCV 2001. vol.\u00a02, pp. 416\u2013423. IEEE (2001)","DOI":"10.1109\/ICCV.2001.937655"},{"key":"26_CR23","unstructured":"Mehta, S., Rastegari, M.: MobileViT: light-weight, general-purpose, and mobile-friendly vision transformer. arXiv preprint arXiv:2110.02178 (2021)"},{"key":"26_CR24","unstructured":"Mehta, S., Rastegari, M.: MobileViT: light-weight, general-purpose, and mobile-friendly vision transformer (2022)"},{"key":"26_CR25","doi-asserted-by":"crossref","unstructured":"Pu, M., Huang, Y., Liu, Y., Guan, Q., Ling, H.: EDTER: edge detection with transformer (2022)","DOI":"10.1109\/CVPR52688.2022.00146"},{"key":"26_CR26","unstructured":"Qiao, Y., et al.: MP-FedCL: multi-prototype federated contrastive learning for edge intelligence. arXiv preprint arXiv:2304.01950 (2023)"},{"key":"26_CR27","unstructured":"Radford, A., Narasimhan, K., Salimans, T., Sutskever, I., et\u00a0al.: Improving language understanding by generative pre-training (2018)"},{"key":"26_CR28","unstructured":"Ren, T., et al.: Grounded SAM: assembling open-world models for diverse visual tasks (2024)"},{"key":"26_CR29","doi-asserted-by":"crossref","unstructured":"Sandler, M., Howard, A., Zhu, M., Zhmoginov, A., Chen, L.C.: MobileNetV2: inverted residuals and linear bottlenecks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4510\u20134520 (2018)","DOI":"10.1109\/CVPR.2018.00474"},{"key":"26_CR30","unstructured":"Touvron, H., Cord, M., Douze, M., Massa, F., Sablayrolles, A., J\u00e9gou, H.: Training data-efficient image transformers & distillation through attention. In: International Conference on Machine Learning, pp. 10347\u201310357. PMLR (2021)"},{"key":"26_CR31","unstructured":"Vasu, P.K.A., Gabriel, J., Zhu, J., Tuzel, O., Ranjan, A.: FastViT: a fast hybrid vision transformer using structural reparameterization. arXiv preprint arXiv:2303.14189 (2023)"},{"key":"26_CR32","doi-asserted-by":"crossref","unstructured":"Wang, X., Zhang, X., Cao, Y., Wang, W., Shen, C., Huang, T.: SegGPT: segmenting everything in context (2023). https:\/\/arxiv.org\/abs\/2304.03284","DOI":"10.1109\/ICCV51070.2023.00110"},{"key":"26_CR33","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"68","DOI":"10.1007\/978-3-031-19803-8_5","volume-title":"ECCV 2022","author":"K Wu","year":"2022","unstructured":"Wu, K., et al.: TinyViT: fast pretraining distillation for small vision transformers. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13681, pp. 68\u201385. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19803-8_5"},{"key":"26_CR34","doi-asserted-by":"crossref","unstructured":"Xie, S., Tu, Z.: Holistically-nested edge detection (2015)","DOI":"10.1109\/ICCV.2015.164"},{"key":"26_CR35","doi-asserted-by":"crossref","unstructured":"Xiong, Y., et al.: EfficientSAM: leveraged masked image pretraining for efficient segment anything (2023)","DOI":"10.1109\/CVPR52733.2024.01525"},{"key":"26_CR36","unstructured":"Yang, Y., Wu, X., He, T., Zhao, H., Liu, X.: SAM3D: segment anything in 3D scenes (2023). https:\/\/arxiv.org\/abs\/2306.03908"},{"key":"26_CR37","unstructured":"Yu, T., et al.: Inpaint anything: segment anything meets image inpainting (2023). https:\/\/arxiv.org\/abs\/2304.06790"},{"key":"26_CR38","doi-asserted-by":"crossref","unstructured":"Yu, W., et al.: MetaFormer is actually what you need for vision (2022)","DOI":"10.1109\/CVPR52688.2022.01055"},{"key":"26_CR39","unstructured":"Zhang, C., et al.: Faster segment anything: towards lightweight SAM for mobile applications. arXiv preprint arXiv:2306.14289 (2023)"},{"key":"26_CR40","unstructured":"Zhang, C., Han, D., Zheng, S., Choi, J., Kim, T.H., Hong, C.S.: MobileSAMv2: faster segment anything to everything (2023)"},{"key":"26_CR41","unstructured":"Zhang, C., et\u00a0al.: One small step for generative AI, one giant leap for AGI: a complete survey on ChatGPT in AIGC era. arXiv preprint arXiv:2304.06488 (2023)"},{"key":"26_CR42","unstructured":"Zhang, C., et\u00a0al.: A complete survey on generative AI (AIGC): is ChatGPT from GPT-4 to GPT-5 all you need? arXiv preprint arXiv:2303.11717 (2023)"},{"key":"26_CR43","doi-asserted-by":"crossref","unstructured":"Zhang, C., et al.: Dual temperature helps contrastive learning without many negative samples: towards understanding and simplifying MOCO. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14441\u201314450 (2022)","DOI":"10.1109\/CVPR52688.2022.01404"},{"key":"26_CR44","unstructured":"Zhang, C., Zhang, K., Zhang, C., Pham, T.X., Yoo, C.D., Kweon, I.S.: How does SimSiam avoid collapse without negative samples? A unified understanding with self-supervised contrastive learning. arXiv preprint arXiv:2203.16262 (2022)"},{"key":"26_CR45","doi-asserted-by":"crossref","unstructured":"Zhang, X., Zhou, X., Lin, M., Sun, J.: ShuffleNet: an extremely efficient convolutional neural network for mobile devices. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6848\u20136856 (2018)","DOI":"10.1109\/CVPR.2018.00716"},{"key":"26_CR46","unstructured":"Zhao, X., et al.: Fast segment anything. arXiv preprint arXiv:2306.12156 (2023)"},{"key":"26_CR47","unstructured":"Zhou, C., Li, X., Loy, C.C., Dai, B.: EdgeSAM: prompt-in-the-loop distillation for on-device deployment of SAM (2023)"},{"key":"26_CR48","unstructured":"Zhou, X., Wang, D., Kr\u00e4henb\u00fchl, P.: Objects as points. arXiv preprint arXiv:1904.07850 (2019)"},{"key":"26_CR49","unstructured":"Zou, X., et al.: Segment everything everywhere all at once (2023). https:\/\/arxiv.org\/abs\/2304.06718"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72754-2_26","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T15:14:49Z","timestamp":1730301289000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72754-2_26"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,31]]},"ISBN":["9783031727535","9783031727542"],"references-count":49,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72754-2_26","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,10,31]]},"assertion":[{"value":"31 October 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}