{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T14:56:50Z","timestamp":1776956210062,"version":"3.51.4"},"publisher-location":"Cham","reference-count":25,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031919787","type":"print"},{"value":"9783031919794","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-91979-4_2","type":"book-chapter","created":{"date-parts":[[2025,5,31]],"date-time":"2025-05-31T19:06:45Z","timestamp":1748718405000},"page":"12-22","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":161,"title":["LocalMamba: Visual State Space Model with\u00a0Windowed Selective Scan"],"prefix":"10.1007","author":[{"given":"Tao","family":"Huang","sequence":"first","affiliation":[]},{"given":"Xiaohuan","family":"Pei","sequence":"additional","affiliation":[]},{"given":"Shan","family":"You","sequence":"additional","affiliation":[]},{"given":"Fei","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Chen","family":"Qian","sequence":"additional","affiliation":[]},{"given":"Chang","family":"Xu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,5,12]]},"reference":[{"key":"2_CR1","first-page":"9355","volume":"34","author":"X Chu","year":"2021","unstructured":"Chu, X., Tian, Z., Wang, Y., Zhang, B., Ren, H., Wei, X., Xia, H., Shen, C.: Twins: revisiting the design of spatial attention in vision transformers. Adv. Neural. Inf. Process. Syst. 34, 9355\u20139366 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"2_CR2","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: Imagenet: a large-scale hierarchical image database. In: 2009 IEEE Conference on Computer Vision and Pattern Recognition, pp. 248\u2013255. IEEE (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"2_CR3","unstructured":"Dosovitskiy, A., et al.: An image is worth 16x16 words: transformers for image recognition at scale. In: International Conference on Learning Representations (2021). https:\/\/openreview.net\/forum?id=YicbFdNTTy"},{"key":"2_CR4","unstructured":"Gu, A., Dao, T.: Mamba: linear-time sequence modeling with selective state spaces. arXiv preprint arXiv:2312.00752 (2023)"},{"key":"2_CR5","unstructured":"Gu, A., Goel, K., Re, C.: Efficiently modeling long sequences with structured state spaces. In: International Conference on Learning Representations (2022). https:\/\/openreview.net\/forum?id=uYLFoz1vlAC"},{"key":"2_CR6","first-page":"572","volume":"34","author":"A Gu","year":"2021","unstructured":"Gu, A., Johnson, I., Goel, K., Saab, K., Dao, T., Rudra, A., R\u00e9, C.: Combining recurrent, convolutional, and continuous-time models with linear state space layers. Adv. Neural. Inf. Process. Syst. 34, 572\u2013585 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"2_CR7","doi-asserted-by":"crossref","unstructured":"Guo, H., Li, J., Dai, T., Ouyang, Z., Ren, X., Xia, S.T.: Mambair: a simple baseline for image restoration with state-space model. arXiv preprint arXiv:2402.15648 (2024)","DOI":"10.1007\/978-3-031-72649-1_13"},{"key":"2_CR8","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"2_CR9","doi-asserted-by":"crossref","unstructured":"Kalman, R.E.: A new approach to linear filtering and prediction problems (1960)","DOI":"10.1115\/1.3662552"},{"key":"2_CR10","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: Imagenet classification with deep convolutional neural networks. In: Advances in Neural Information Processing Systems, vol. 25 (2012)"},{"key":"2_CR11","unstructured":"Liu, H., Simonyan, K., Yang, Y.: DARTS: differentiable architecture search. In: International Conference on Learning Representations (2019). https:\/\/openreview.net\/forum?id=S1eYHoC5FX"},{"key":"2_CR12","doi-asserted-by":"crossref","unstructured":"Liu, J., et\u00a0al.: Swin-umamba: mamba-based unet with imagenet-based pretraining. arXiv preprint arXiv:2402.03302 (2024)","DOI":"10.1007\/978-3-031-72114-4_59"},{"key":"2_CR13","unstructured":"Liu, Y., et al.: Vmamba: visual state space model. arXiv preprint arXiv:2401.10166 (2024)"},{"key":"2_CR14","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: Swin transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"2_CR15","unstructured":"Ma, J., Li, F., Wang, B.: U-mamba: enhancing long-range dependency for biomedical image segmentation. arXiv preprint arXiv:2401.04722 (2024)"},{"key":"2_CR16","unstructured":"Mehta, H., Gupta, A., Cutkosky, A., Neyshabur, B.: Long range language modeling via gated state spaces. In: The Eleventh International Conference on Learning Representations (2023). https:\/\/openreview.net\/forum?id=5MkYIYCbva"},{"key":"2_CR17","doi-asserted-by":"crossref","unstructured":"Radosavovic, I., Kosaraju, R.P., Girshick, R., He, K., Doll\u00e1r, P.: Designing network design spaces. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10428\u201310436 (2020)","DOI":"10.1109\/CVPR42600.2020.01044"},{"key":"2_CR18","unstructured":"Ruan, J., Xiang, S.: Vm-unet: vision mamba unet for medical image segmentation. arXiv preprint arXiv:2402.02491 (2024)"},{"key":"2_CR19","doi-asserted-by":"crossref","unstructured":"Sandler, M., Howard, A., Zhu, M., Zhmoginov, A., Chen, L.C.: Mobilenetv2: inverted residuals and linear bottlenecks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4510\u20134520 (2018)","DOI":"10.1109\/CVPR.2018.00474"},{"key":"2_CR20","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)"},{"key":"2_CR21","unstructured":"Touvron, H., Cord, M., Douze, M., Massa, F., Sablayrolles, A., J\u00e9gou, H.: Training data-efficient image transformers & distillation through attention. In: International Conference on Machine Learning, pp. 10347\u201310357. PMLR (2021)"},{"key":"2_CR22","doi-asserted-by":"crossref","unstructured":"Xiao, T., Liu, Y., Zhou, B., Jiang, Y., Sun, J.: Unified perceptual parsing for scene understanding. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 418\u2013434 (2018)","DOI":"10.1007\/978-3-030-01228-1_26"},{"key":"2_CR23","doi-asserted-by":"crossref","unstructured":"Xie, S., Girshick, R., Doll\u00e1r, P., Tu, Z., He, K.: Aggregated residual transformations for deep neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1492\u20131500 (2017)","DOI":"10.1109\/CVPR.2017.634"},{"key":"2_CR24","doi-asserted-by":"crossref","unstructured":"Xing, Z., Ye, T., Yang, Y., Liu, G., Zhu, L.: Segmamba: long-range sequential modeling mamba for 3d medical image segmentation. arXiv preprint arXiv:2401.13560 (2024)","DOI":"10.1007\/978-3-031-72111-3_54"},{"key":"2_CR25","unstructured":"Zhu, L., Liao, B., Zhang, Q., Wang, X., Liu, W., Wang, X.: Vision mamba: efficient visual representation learning with bidirectional state space model. arXiv preprint arXiv:2401.09417 (2024)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024 Workshops"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-91979-4_2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,31]],"date-time":"2025-05-31T19:07:07Z","timestamp":1748718427000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-91979-4_2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031919787","9783031919794"],"references-count":25,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-91979-4_2","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"12 May 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}