{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,17]],"date-time":"2026-04-17T15:46:26Z","timestamp":1776440786164,"version":"3.51.2"},"reference-count":288,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T00:00:00Z","timestamp":1769904000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T00:00:00Z","timestamp":1769904000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T00:00:00Z","timestamp":1769904000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100012456","name":"National Natural Science Fund of China","doi-asserted-by":"publisher","award":["62271090"],"award-info":[{"award-number":["62271090"]}],"id":[{"id":"10.13039\/501100012456","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012456","name":"National Natural Science Fund of China","doi-asserted-by":"publisher","award":["62221005"],"award-info":[{"award-number":["62221005"]}],"id":[{"id":"10.13039\/501100012456","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"Chongqing Natural Science Fund and National Youth Talent Project","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Neural Netw. Learning Syst."],"published-print":{"date-parts":[[2026,2]]},"DOI":"10.1109\/tnnls.2025.3610435","type":"journal-article","created":{"date-parts":[[2025,9,22]],"date-time":"2025-09-22T17:45:13Z","timestamp":1758563113000},"page":"505-525","source":"Crossref","is-referenced-by-count":55,"title":["Vision Mamba: A Comprehensive Survey and Taxonomy"],"prefix":"10.1109","volume":"37","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-9731-448X","authenticated-orcid":false,"given":"Xiao","family":"Liu","sequence":"first","affiliation":[{"name":"Chongqing Key Laboratory of Bio-Perception and Multimodal Intelligent Information Processing and the School of Microelectronics and Communication Engineering, Chongqing University, Chongqing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7079-7284","authenticated-orcid":false,"given":"Chenxu","family":"Zhang","sequence":"additional","affiliation":[{"name":"Chongqing Key Laboratory of Bio-Perception and Multimodal Intelligent Information Processing and the School of Microelectronics and Communication Engineering, Chongqing University, Chongqing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0399-9932","authenticated-orcid":false,"given":"Fuxiang","family":"Huang","sequence":"additional","affiliation":[{"name":"School of Data Science, Lingnan University, Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5993-9563","authenticated-orcid":false,"given":"Shuyin","family":"Xia","sequence":"additional","affiliation":[{"name":"Chongqing Key Laboratory of Computational Intelligence, Key Laboratory of Cyberspace Big Data Intelligent Security, Ministry of Education, Chongqing University of Posts and Telecommunications, Chongqing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8521-5232","authenticated-orcid":false,"given":"Guoyin","family":"Wang","sequence":"additional","affiliation":[{"name":"National Center for Applied Mathematics in Chongqing, Chongqing Normal University, Chongqing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5305-8543","authenticated-orcid":false,"given":"Lei","family":"Zhang","sequence":"additional","affiliation":[{"name":"Chongqing Key Laboratory of Bio-Perception and Multimodal Intelligent Information Processing and the School of Microelectronics and Communication Engineering, Chongqing University, Chongqing, China"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Spatial and spatial\u2013spectral morphological mamba for hyperspectral image classification","author":"Ahmad","year":"2024","journal-title":"arXiv:2408.01372"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/LGRS.2024.3506034"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00676"},{"key":"ref4","article-title":"I2I-mamba: Multi-modal medical image synthesis via selective state space modeling","author":"Atli","year":"2024","journal-title":"arXiv:2405.14022"},{"key":"ref5","article-title":"SiMBA: Simplified mamba-based architecture for vision and multivariate time series","author":"Patro","year":"2024","journal-title":"arXiv:2403.15360"},{"key":"ref6","article-title":"Retinexmamba: Retinex-based mamba for low-light image enhancement","author":"Bai","year":"2024","journal-title":"arXiv:2405.03349"},{"key":"ref7","article-title":"Vision mamba in remote sensing: A comprehensive survey of techniques, applications and outlook","author":"Bao","year":"2025","journal-title":"arXiv:2505.00630"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TMI.2018.2837502"},{"key":"ref9","article-title":"ISIC 2017\u2013skin lesion analysis towards melanoma detection","author":"Berseth","year":"2017","journal-title":"arXiv:1703.00523"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2102.05095"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-25066-8_9"},{"key":"ref12","article-title":"SR-mamba: Effective surgical phase recognition with state space model","author":"Cao","year":"2024","journal-title":"arXiv:2407.08333"},{"key":"ref13","article-title":"DCT-Mamba3D: Spectral decorrelation and spatial\u2013spectral feature extraction for hyperspectral image classification","author":"Cao","year":"2025","journal-title":"arXiv:2502.01986"},{"key":"ref14","article-title":"Remote sensing image segmentation using vision mamba and multi-scale multi-frequency feature fusion","author":"Cao","year":"2024","journal-title":"arXiv:2410.05624"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3680905"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72114-4_26"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/BIBM62325.2024.10821761"},{"key":"ref18","article-title":"Res-VMamba: Fine-grained food category visual classification using selective state space models with deep residual learning","author":"Chen","year":"2024","journal-title":"arXiv:2402.15761"},{"key":"ref19","article-title":"Video mamba suite: State space model as a versatile alternative for video understanding","author":"Chen","year":"2024","journal-title":"arXiv:2403.09626"},{"key":"ref20","article-title":"DeMamba: AI-generated video detection on million-scale GenVideo benchmark","author":"Chen","year":"2024","journal-title":"arXiv:2405.19707"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2024.3417253"},{"key":"ref22","article-title":"PointABM: Integrating bidirectional state space model with multi-head self-attention for point cloud analysis","author":"Chen","year":"2024","journal-title":"arXiv:2406.06069"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/LGRS.2024.3407111"},{"key":"ref24","article-title":"MxT: Mamba X transformer for image inpainting","author":"Chen","year":"2024","journal-title":"arXiv:2407.16126"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2024.3485721"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i2.32227"},{"key":"ref27","article-title":"TokenUnify: Scaling up autoregressive pretraining for neuron segmentation","author":"Chen","year":"2024","journal-title":"arXiv:2405.16847"},{"key":"ref28","article-title":"SurvMamba: State space model with multi-grained multi-modal interaction for survival prediction","author":"Chen","year":"2024","journal-title":"arXiv:2404.08027"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00135"},{"key":"ref30","article-title":"Activating wider areas in image super-resolution","author":"Cheng","year":"2024","journal-title":"arXiv:2403.08330"},{"key":"ref31","article-title":"PTQ4 VM: Post-training quantization for visual mamba","author":"Cho","year":"2024","journal-title":"arXiv:2412.20386"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1902.03368"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i3.32264"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-96-0901-7_14"},{"key":"ref35","first-page":"10041","article-title":"Transformers are SSMs: Generalized models and efficient algorithms through structured state space duality","volume-title":"Proc. ICML","author":"Dao"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/MIPR62202.2024.00059"},{"key":"ref37","first-page":"2127","article-title":"Hamba: Single-view 3D hand reconstruction with graph-guided bi-scanning mamba","volume-title":"Proc. NIPS","volume":"37","author":"Dong"},{"key":"ref38","article-title":"Dual hyperspectral mamba for efficient spectral compressive imaging","author":"Dong","year":"2024","journal-title":"arXiv:2406.00449"},{"key":"ref39","first-page":"1","article-title":"ECMamba: Consolidating selective state space model with retinex guidance for efficient multiple exposure correction","volume-title":"Proc. NIPS","volume":"37","author":"Dong"},{"key":"ref40","article-title":"Fusion-mamba for cross-modality object detection","author":"Dong","year":"2024","journal-title":"arXiv:2404.09146"},{"key":"ref41","first-page":"1","article-title":"An image is worth 16\u00d716 words: Transformers for image recognition at scale","volume-title":"Proc. ICLR","author":"Dosovitskiy"},{"key":"ref42","article-title":"Understanding robustness of visual state space models for image classification","author":"Du","year":"2024","journal-title":"arXiv:2403.10935"},{"key":"ref43","article-title":"SliceMamba with neural architecture search for medical image segmentation","author":"Fan","year":"2024","journal-title":"arXiv:2407.08481"},{"key":"ref44","article-title":"GFE-mamba: Mamba-based AD multi-modal progression assessment via generative feature extraction from MCI","author":"Fang","year":"2024","journal-title":"arXiv:2407.15719"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/BIBM62325.2024.10822552"},{"key":"ref46","article-title":"Dimba: Transformer-mamba diffusion models","author":"Fei","year":"2024","journal-title":"arXiv:2406.01159"},{"key":"ref47","article-title":"Hungry hungry hippos: Towards language modeling with state space models","author":"Fu","year":"2022","journal-title":"arXiv:2212.14052"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2024.3446812"},{"key":"ref49","article-title":"HDMba: Hyperspectral remote sensing imagery dehazing with state space model","author":"Fu","year":"2024","journal-title":"arXiv:2406.05700"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/BIBM62325.2024.10822581"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1007\/s11227-025-06946-z"},{"key":"ref52","article-title":"OPa-ma: Text guided mamba for 360-degree image out-painting","author":"Gao","year":"2024","journal-title":"arXiv:2407.10923"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-96-0917-8_24"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC58415.2024.10920115"},{"key":"ref55","article-title":"Matten: Video generation with mamba-attention","author":"Gao","year":"2024","journal-title":"arXiv:2405.03025"},{"key":"ref56","article-title":"A data-scalable transformer for medical image segmentation: Architecture, model efficiency, and benchmark","author":"Gao","year":"2022","journal-title":"arXiv:2203.00131"},{"key":"ref57","article-title":"NnMamba: 3D biomedical image segmentation, classification and landmark detection with state space model","author":"Gong","year":"2024","journal-title":"arXiv:2402.03526"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2025.3542995"},{"key":"ref59","article-title":"MambaDepth: Enhancing long-range dependency for self-supervised fine-structured monocular depth estimation","author":"Grigore","year":"2024","journal-title":"arXiv:2406.04532"},{"key":"ref60","volume-title":"A Visual Guide to Mamba and State Space Models","author":"Grootendorst","year":"2024"},{"key":"ref61","article-title":"Mamba: Linear-time sequence modeling with selective state spaces","author":"Gu","year":"2023","journal-title":"arXiv:2312.00752"},{"key":"ref62","first-page":"1","article-title":"Efficiently modeling long sequences with structured state spaces","volume-title":"Proc. 10th Int. Conf. Learn. Represent.","author":"Gu"},{"key":"ref63","first-page":"572","article-title":"Combining recurrent, convolutional, and continuous-time models with linear state space layers","volume-title":"Proc. NeurIPS","author":"Gu"},{"key":"ref64","article-title":"QMamba: On first exploration of vision mamba for image quality assessment","author":"Guan","year":"2024","journal-title":"arXiv:2406.09546"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72649-1_13"},{"key":"ref66","first-page":"55286","article-title":"START: A generalized state space model with saliency-driven token-aware transformation","volume-title":"Proc. NIPS","volume":"37","author":"Guo"},{"key":"ref67","article-title":"MambaMorph: A mamba-based framework for medical MR-CT deformable registration","author":"Guo","year":"2024","journal-title":"arXiv:2401.13934"},{"key":"ref68","article-title":"Diagonal state spaces are as effective as structured state spaces","author":"Gupta","year":"2022","journal-title":"arXiv:2203.14343"},{"key":"ref69","first-page":"127181","article-title":"Demystify mamba in vision: A linear attention perspective","volume-title":"Proc. NIPS","volume":"37","author":"Han"},{"key":"ref70","article-title":"T-Mamba: A unified framework with long-range dependency in dual-domain for 2D & 3D tooth segmentation","author":"Hao","year":"2024","journal-title":"arXiv:2404.01065"},{"key":"ref71","article-title":"MambaVision: A hybrid mamba-transformer vision backbone","author":"Hatamizadeh","year":"2024","journal-title":"arXiv:2407.08083"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-08999-2_22"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1109\/WACV51458.2022.00181"},{"key":"ref74","first-page":"71162","article-title":"MambaAD: Exploring state space models for multi-class unsupervised anomaly detection","volume-title":"Proc. NIPS","author":"He"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2024.102779"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2024.3502055"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2024.3472091"},{"key":"ref79","article-title":"SUM: Saliency unification through mamba for visual attention modeling","author":"Hosseini","year":"2024","journal-title":"arXiv:2406.17815"},{"key":"ref80","first-page":"7132","article-title":"Squeeze-and-low-shotitation networks","volume-title":"Proc. CVPR","author":"Hu"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72664-4_9"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1561\/116.20250012"},{"key":"ref83","article-title":"MambaMIR: An arbitrary-masked mamba for joint medical image reconstruction and uncertainty estimation","author":"Huang","year":"2024","journal-title":"arXiv:2402.18451"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-97-8858-3_1"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.3390\/rs16132449"},{"key":"ref86","article-title":"LocalMamba: Visual state space model with windowed selective scan","author":"Huang","year":"2024","journal-title":"arXiv:2403.09338"},{"key":"ref87","article-title":"CLIP-mamba: CLIP pretrained mamba models with OOD and Hessian evaluation","author":"Huang","year":"2024","journal-title":"arXiv:2404.19394"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i4.32391"},{"key":"ref89","article-title":"IRSRMamba: Infrared image super-resolution via mamba-based wavelet transform feature modulation model","author":"Huang","year":"2024","journal-title":"arXiv:2405.09873"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i4.32401"},{"key":"ref91","article-title":"A survey on mamba architecture for vision applications","author":"Ibrahim","year":"2025","journal-title":"arXiv:2502.07161"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.1038\/s41592-020-01008-z"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01798"},{"key":"ref94","article-title":"AMOS: A large-scale abdominal multi-organ benchmark for versatile medical image segmentation","author":"Ji","year":"2022","journal-title":"arXiv:2206.08023"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72104-5_24"},{"key":"ref96","article-title":"MHS-VM: Multi-head scanning in parallel subspaces for vision mamba","author":"Ji","year":"2024","journal-title":"arXiv:2406.05992"},{"key":"ref97","article-title":"VM-DDPM: Vision mamba diffusion for medical image synthesis","author":"Ju","year":"2024","journal-title":"arXiv:2405.05667"},{"key":"ref98","article-title":"CAMS: Convolution and attention-free mamba-based cardiac image segmentation","author":"Khan","year":"2024","journal-title":"arXiv:2406.05786"},{"key":"ref99","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"ref100","first-page":"1","article-title":"ImageNet classification with deep convolutional neural networks","volume-title":"Proc. NeurIPS","author":"Krizhevsky"},{"key":"ref101","article-title":"Meteor: Mamba-based traversal of rationale for large language and vision models","author":"Lee","year":"2024","journal-title":"arXiv:2405.15574"},{"key":"ref102","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW63382.2024.00653"},{"key":"ref103","first-page":"1","article-title":"Jamba: Hybrid transformer-mamba language models","volume-title":"Proc. ICLR","author":"Lenz"},{"key":"ref104","article-title":"FourierMamba: Fourier learning integration with state space models for image deraining","author":"Li","year":"2024","journal-title":"arXiv:2405.19450"},{"key":"ref105","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73347-5_14"},{"key":"ref106","article-title":"UV-mamba: A DCN-enhanced state space model for urban village boundary identification in high-resolution remote sensing images","author":"Li","year":"2024","journal-title":"arXiv:2409.03431"},{"key":"ref107","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73414-4_5"},{"key":"ref108","article-title":"SpikeMba: Multi-modal spiking saliency mamba for temporal video grounding","author":"Li","year":"2024","journal-title":"arXiv:2404.01174"},{"key":"ref109","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00060"},{"key":"ref110","article-title":"Mamba-FSCIL: Dynamic adaptation with selective state space model for few-shot class-incremental learning","author":"Li","year":"2024","journal-title":"arXiv:2407.06136"},{"key":"ref111","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2024.3430985"},{"key":"ref112","article-title":"3DMambaComplete: Exploring structured state space model for point cloud completion","author":"Li","year":"2024","journal-title":"arXiv:2404.07106"},{"key":"ref113","article-title":"What makes convolutional models great on long sequence modeling?","author":"Li","year":"2022","journal-title":"arXiv:2210.09298"},{"key":"ref114","article-title":"MambaDFuse: A mamba-based dual-phase model for multi-modality image fusion","author":"Li","year":"2024","journal-title":"arXiv:2404.08406"},{"key":"ref115","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i5.32540"},{"key":"ref116","first-page":"32653","article-title":"PointMamba: A simple state space model for point cloud analysis","volume-title":"Proc. NIPS","volume":"37","author":"Liang"},{"key":"ref117","article-title":"LightM-UNet: Mamba assists in lightweight UNet for medical image segmentation","author":"Liao","year":"2024","journal-title":"arXiv:2403.05246"},{"key":"ref118","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72897-6_18"},{"key":"ref119","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"ref120","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-96-0911-6_11"},{"key":"ref121","doi-asserted-by":"publisher","DOI":"10.1109\/LGRS.2024.3404604"},{"key":"ref122","first-page":"9204","article-title":"Pay attention to MLPs","volume-title":"Proc. NIPS","volume":"34","author":"Liu"},{"key":"ref123","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-032-05127-1_42"},{"key":"ref124","first-page":"34892","article-title":"Visual instruction tuning","volume-title":"Proc. NeurIPS","volume":"36","author":"Liu"},{"key":"ref125","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.02738"},{"key":"ref126","first-page":"1","article-title":"RoboMamba: Efficient vision-language-action model for robotic reasoning and manipulation","volume-title":"Proc. NIPS","volume":"37","author":"Liu"},{"key":"ref127","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72114-4_59"},{"key":"ref128","article-title":"Point mamba: A novel point cloud backbone based on state space model with octree-based ordering strategy","author":"Liu","year":"2024","journal-title":"arXiv:2403.06467"},{"key":"ref129","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.00826"},{"key":"ref130","article-title":"CM-UNet: Hybrid CNN-mamba UNet for remote sensing image semantic segmentation","author":"Liu","year":"2024","journal-title":"arXiv:2405.10530"},{"key":"ref131","article-title":"Harnessing temporal causality for advanced temporal action detection","author":"Liu","year":"2024","journal-title":"arXiv:2407.17792"},{"key":"ref132","article-title":"HSIDMamba: Exploring bidirectional state-space models for hyperspectral denoising","author":"Liu","year":"2024","journal-title":"arXiv:2404.09697"},{"key":"ref133","first-page":"1","article-title":"VMamba: Visual state space model","volume-title":"Proc. NIPS","volume":"37","author":"Liu"},{"key":"ref134","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"ref135","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01167"},{"key":"ref136","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2024.3496895"},{"key":"ref137","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681247"},{"key":"ref138","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i18.34103"},{"key":"ref139","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i6.32618"},{"key":"ref140","article-title":"VideoMambaPro: A leap forward for mamba in video understanding","author":"Lu","year":"2024","journal-title":"arXiv:2406.19006"},{"key":"ref141","article-title":"LFMamba: Light field image super-resolution with state space model","author":"Xia","year":"2024","journal-title":"arXiv:2406.12463"},{"key":"ref142","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-96-1071-6_23"},{"key":"ref143","article-title":"Semi-mamba-UNet: Pixel-level contrastive and pixel-level cross-supervised visual mamba-based UNet for semi-supervised medical image segmentation","author":"Ma","year":"2024","journal-title":"arXiv:2402.07245"},{"key":"ref144","article-title":"VMambaCC: A visual state space model for crowd counting","author":"Ma","year":"2024","journal-title":"arXiv:2405.03978"},{"key":"ref145","article-title":"FER-YOLO-mamba: Facial expression detection and classification based on selective state space","author":"Ma","year":"2024","journal-title":"arXiv:2405.01828"},{"key":"ref146","article-title":"U-mamba: Enhancing long-range dependency for biomedical image segmentation","author":"Ma","year":"2024","journal-title":"arXiv:2401.04722"},{"key":"ref147","doi-asserted-by":"publisher","DOI":"10.1038\/s41592-024-02233-6"},{"key":"ref148","doi-asserted-by":"publisher","DOI":"10.1109\/LGRS.2024.3414293"},{"key":"ref149","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.01459"},{"key":"ref150","article-title":"On the low-shot transferability of [V]-Mamba","author":"Misra","year":"2024","journal-title":"arXiv:2403.10696"},{"key":"ref151","article-title":"Efficient 3D shape generation via diffusion mamba with bidirectional SSMs","author":"Mo","year":"2024","journal-title":"arXiv:2406.05038"},{"key":"ref152","article-title":"Scaling diffusion mamba with bidirectional SSMs for efficient image and video generation","author":"Mo","year":"2024","journal-title":"arXiv:2405.15881"},{"key":"ref153","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-11726-9_28"},{"key":"ref154","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-76197-3_2"},{"key":"ref155","article-title":"Resurrecting recurrent neural networks for long sequences","author":"Orvieto","year":"2023","journal-title":"arXiv:2303.06349"},{"key":"ref156","first-page":"80056","article-title":"MambaSCI: Efficient mamba-UNet for quad-bayer patterned video snapshot compressive imaging","volume-title":"Proc. NIPS","volume":"37","author":"Pan"},{"key":"ref157","article-title":"A mamba-based Siamese network for remote sensing change detection","author":"Paranjape","year":"2024","journal-title":"arXiv:2407.06839"},{"key":"ref158","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72698-9_1"},{"key":"ref159","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i6.32690"},{"key":"ref160","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2024.3496073"},{"key":"ref161","first-page":"1","article-title":"DiMSUM: Diffusion mamba\u2014A scalable and unified spatial-frequency method for image generation","volume-title":"Proc. NIPS","volume":"37","author":"Phung"},{"key":"ref162","article-title":"MoE-mamba: Efficient selective state space models with mixture of experts","author":"Pi\u00f3ro","year":"2024","journal-title":"arXiv:2401.04081"},{"key":"ref163","first-page":"652","article-title":"PointNet: Deep learning on point sets for 3D classification and segmentation","volume-title":"Proc. CVPR","author":"Qi"},{"key":"ref164","article-title":"SMCD: High realism motion style transfer via mamba-based diffusion","author":"Qian","year":"2024","journal-title":"arXiv:2405.02844"},{"key":"ref165","article-title":"VL-mamba: Exploring state space models for multimodal learning","author":"Qiao","year":"2024","journal-title":"arXiv:2403.13600"},{"key":"ref166","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i6.32707"},{"key":"ref167","doi-asserted-by":"publisher","DOI":"10.1109\/igarss55030.2025.11243832"},{"key":"ref168","first-page":"1","article-title":"Learning transferable visual models from natural language supervision","volume-title":"Proc. ICML","author":"Radford"},{"key":"ref169","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01044"},{"key":"ref170","article-title":"Mamba in vision: A comprehensive survey of techniques and applications","author":"Maklachur Rahman","year":"2024","journal-title":"arXiv:2410.03105"},{"key":"ref171","article-title":"Rethinking efficient and effective point-based networks for event camera classification and regression: EventMamba","author":"Ren","year":"2024","journal-title":"arXiv:2405.06116"},{"key":"ref172","article-title":"RemoteDet-mamba: A hybrid mamba-CNN network for multi-modal object detection in remote sensing images","author":"Ren","year":"2024","journal-title":"arXiv:2410.13532"},{"key":"ref173","article-title":"Autoregressive pretraining with mamba in vision","author":"Ren","year":"2024","journal-title":"arXiv:2406.07537"},{"key":"ref174","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"ref175","doi-asserted-by":"publisher","DOI":"10.1145\/3767748"},{"key":"ref176","doi-asserted-by":"publisher","DOI":"10.1109\/BIBM55620.2022.9995040"},{"key":"ref177","doi-asserted-by":"publisher","DOI":"10.1109\/MC.2025.3571322"},{"key":"ref178","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-96-0188-2_29"},{"key":"ref179","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.01389"},{"key":"ref180","article-title":"HTD-mamba: Efficient hyperspectral target detection with pyramid state space model","author":"Shen","year":"2024","journal-title":"arXiv:2407.06841"},{"key":"ref181","article-title":"Gamba: Marry Gaussian splatting with mamba for single view 3D reconstruction","author":"Shen","year":"2024","journal-title":"arXiv:2403.18795"},{"key":"ref182","article-title":"OTCE: Hybrid SSM and attention with cross domain mixture of experts to construct observer-thinker-conceiver-expresser","author":"Shi","year":"2024","journal-title":"arXiv:2406.16495"},{"key":"ref183","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72390-2_68"},{"key":"ref184","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2025.3530090"},{"key":"ref185","first-page":"25687","article-title":"Multi-scale VMamba: Hierarchy in hierarchy visual state space model","volume-title":"Proc. NIPS","author":"Shi"},{"key":"ref186","article-title":"NormFormer: Improved transformer pretraining with extra normalization","author":"Shleifer","year":"2021","journal-title":"arXiv:2110.09456"},{"key":"ref187","article-title":"2017 robotic instrument segmentation challenge","author":"Allan","year":"2019","journal-title":"arXiv:1902.06426"},{"key":"ref188","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-96-0911-6_10"},{"key":"ref189","first-page":"6105","article-title":"EfficientNet: Rethinking model scaling for convolutional neural networks","volume-title":"Proc. ICML","author":"Tan"},{"key":"ref190","article-title":"Rotate to scan: UNet-like mamba with triplet SSM module for medical image segmentation","author":"Tang","year":"2024","journal-title":"arXiv:2403.17701"},{"key":"ref191","article-title":"Scalable visual state space model with fractal scanning","author":"Tang","year":"2024","journal-title":"arXiv:2405.14480"},{"key":"ref192","article-title":"DiM: Diffusion mamba for efficient high-resolution image synthesis","author":"Teng","year":"2024","journal-title":"arXiv:2405.14224"},{"key":"ref193","article-title":"Empowering snapshot compressive imaging: Spatial\u2013spectral state space model with across-scanning and local enhancement","author":"Tian","year":"2024","journal-title":"arXiv:2408.00629"},{"key":"ref194","first-page":"10347","article-title":"Training data-efficient image transformers & distillation through attention","volume-title":"Proc. ICML","author":"Touvron"},{"key":"ref195","article-title":"SOAR: Advancements in small body object detection for aerial imagery using state space models and programmable gradients","author":"Verma","year":"2024","journal-title":"arXiv:2405.01699"},{"key":"ref196","doi-asserted-by":"publisher","DOI":"10.1016\/j.patter.2025.101298"},{"key":"ref197","doi-asserted-by":"publisher","DOI":"10.37349\/emed.2024.00250"},{"key":"ref198","article-title":"Sigma: Siamese mamba network for multi-modal semantic segmentation","author":"Wan","year":"2024","journal-title":"arXiv:2404.04256"},{"key":"ref199","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.01392"},{"key":"ref200","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2025.3530993"},{"key":"ref201","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i7.32824"},{"key":"ref202","article-title":"SAM-Med3D: Towards general-purpose segmentation models for volumetric medical images","author":"Wang","year":"2023","journal-title":"arXiv:2310.15161"},{"key":"ref203","article-title":"Serp-mamba: Advancing high-resolution retinal vessel segmentation with selective state-space model","author":"Wang","year":"2024","journal-title":"arXiv:2409.04356"},{"key":"ref204","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72111-3_34"},{"key":"ref205","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2024.3518076"},{"key":"ref206","article-title":"PyramidMamba: Rethinking pyramid feature fusion with selective space state model for semantic segmentation of remote sensing imagery","author":"Wang","year":"2024","journal-title":"arXiv:2406.10828"},{"key":"ref207","article-title":"MemoryMamba: Memory-augmented state space model for defect recognition","author":"Wang","year":"2024","journal-title":"arXiv:2405.03673"},{"key":"ref208","article-title":"InsectMamba: Insect pest classification with state space model","author":"Wang","year":"2024","journal-title":"arXiv:2404.03611"},{"key":"ref209","article-title":"Serialized point mamba: A serialized point cloud mamba segmentation model","author":"Wang","year":"2024","journal-title":"arXiv:2407.12319"},{"key":"ref210","doi-asserted-by":"publisher","DOI":"10.1007\/s41095-022-0274-8"},{"key":"ref211","article-title":"GMSR: Gradient-guided mamba for spectral reconstruction from RGB images","author":"Wang","year":"2024","journal-title":"arXiv:2405.07777"},{"key":"ref212","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i8.32885"},{"key":"ref213","article-title":"Soft masked mamba diffusion model for CT to MRI conversion","author":"Wang","year":"2024","journal-title":"arXiv:2406.15910"},{"key":"ref214","article-title":"PoinTramba: A hybrid transformer-mamba framework for point cloud analysis","author":"Wang","year":"2024","journal-title":"arXiv:2405.15463"},{"key":"ref215","article-title":"Weak-mamba-UNet: Visual mamba makes CNN and ViT work better for scribble-based medical image segmentation","author":"Wang","year":"2024","journal-title":"arXiv:2402.10887"},{"key":"ref216","article-title":"VMambaMorph: A multi-modality deformable image registration framework based on visual state space model with cross-scan module","author":"Wang","year":"2024","journal-title":"arXiv:2404.05105"},{"key":"ref217","article-title":"Mamba-UNet: UNet-like pure visual mamba for medical image segmentation","author":"Wang","year":"2024","journal-title":"arXiv:2402.05079"},{"key":"ref218","first-page":"583","article-title":"OneBEV: Using one panoramic image for bird, aos-eye-view semantic mapping","volume-title":"Proc. ACCV","author":"Wei"},{"key":"ref219","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1807.06521"},{"key":"ref220","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2025.129447"},{"key":"ref221","doi-asserted-by":"publisher","DOI":"10.1109\/jstars.2025.3650075"},{"key":"ref222","article-title":"OverlapMamba: Novel shift state space model for LiDAR-based place recognition","author":"Xiang","year":"2024","journal-title":"arXiv:2405.07966"},{"key":"ref223","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3680944"},{"key":"ref224","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01228-1_26"},{"key":"ref225","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2024.3521798"},{"key":"ref226","first-page":"1","article-title":"QuadMamba: Learning quadtree-based selective scan for visual state space model","volume-title":"Proc. NIPS","author":"Xie"},{"key":"ref227","article-title":"ProMamba: Prompt-mamba for polyp segmentation","author":"Xie","year":"2024","journal-title":"arXiv:2403.13660"},{"key":"ref228","doi-asserted-by":"publisher","DOI":"10.1007\/s44267-024-00072-9"},{"key":"ref229","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72111-3_54"},{"key":"ref230","article-title":"HC-mamba: Vision MAMBA with hybrid convolutional techniques for medical image segmentation","author":"Xu","year":"2024","journal-title":"arXiv:2405.05007"},{"key":"ref231","first-page":"1","article-title":"Hybrid mamba for few-shot segmentation","volume-title":"Proc. NIPS","volume":"37","author":"Xu"},{"key":"ref232","article-title":"Visual mamba: A survey and new outlooks","author":"Xu","year":"2024","journal-title":"arXiv:2404.18861"},{"key":"ref233","article-title":"Sports-traj: A unified trajectory generation model for multi-agent movement in sports","author":"Xu","year":"2024","journal-title":"arXiv:2405.17680"},{"key":"ref234","first-page":"20055","article-title":"MambaTalk: Efficient holistic gesture synthesis with selective state space models","volume-title":"Proc. NIPS","volume":"37","author":"Xu"},{"key":"ref235","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2024.3493101"},{"key":"ref236","first-page":"1","article-title":"PlainMamba: Improving non-hierarchical mamba in visual recognition","volume-title":"Proc. BMVC","author":"Yang"},{"key":"ref237","article-title":"CMViM: Contrastive masked vim autoencoder for 3D multi-modal representation learning for AD classification","author":"Yang","year":"2024","journal-title":"arXiv:2403.16520"},{"key":"ref238","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72083-3_28"},{"key":"ref239","article-title":"Vivim: A video vision mamba for medical video segmentation","author":"Yang","year":"2024","journal-title":"arXiv:2401.14168"},{"key":"ref240","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72684-2_7"},{"key":"ref241","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72086-4_13"},{"key":"ref242","article-title":"SpectralMamba: Efficient mamba for hyperspectral image classification","author":"Yao","year":"2024","journal-title":"arXiv:2404.08489"},{"key":"ref243","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i21.34361"},{"key":"ref244","article-title":"P-mamba: Marrying perona malik diffusion with mamba for efficient pediatric echocardiographic left ventricular segmentation","author":"Ye","year":"2024","journal-title":"arXiv:2402.08506"},{"key":"ref245","article-title":"MambaPEFT: Exploring parameter-efficient fine-tuning for mamba","author":"Yoshimura","year":"2024","journal-title":"arXiv:2411.03855"},{"key":"ref246","article-title":"MambaOut: Do we really need mamba for vision?","author":"Yu","year":"2024","journal-title":"arXiv:2405.07992"},{"key":"ref247","doi-asserted-by":"publisher","DOI":"10.1016\/j.bspc.2024.106573"},{"key":"ref248","article-title":"ABS-mamba: SAM2-driven bidirectional spiral mamba network for medical image translation","author":"Yuan","year":"2025","journal-title":"arXiv:2505.07687"},{"key":"ref249","article-title":"MedMamba: Vision mamba for medical image classification","author":"Yue","year":"2024","journal-title":"arXiv:2403.03849"},{"key":"ref250","first-page":"1","article-title":"LCM: Locally constrained compact point cloud model for masked point modeling","volume-title":"Proc. NeurIPS","volume":"37","author":"Zha"},{"key":"ref251","first-page":"106582","article-title":"DeMo: Decoupling motion forecasting into directional intentions and dynamic states","volume-title":"Proc. NeurIPS","volume":"37","author":"Zhang"},{"key":"ref252","first-page":"81489","article-title":"Voxel mamba: Group-free state space models for point cloud based 3D object detection","volume-title":"Proc. NIPS","volume":"37","author":"Zhang"},{"key":"ref253","doi-asserted-by":"publisher","DOI":"10.3390\/app14135683"},{"key":"ref254","article-title":"DINO: DETR with improved DeNoising anchor boxes for end-to-end object detection","author":"Zhang","year":"2022","journal-title":"arXiv:2203.03605"},{"key":"ref255","article-title":"CDMamba: Incorporating local clues into mamba for remote sensing image binary change detection","author":"Zhang","year":"2024","journal-title":"arXiv:2406.04207"},{"key":"ref256","article-title":"Vim-F: Visual state space model benefiting from learning in the frequency domain","author":"Zhang","year":"2024","journal-title":"arXiv:2405.18679"},{"key":"ref257","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-97-5128-0_27"},{"key":"ref258","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i10.33098"},{"key":"ref259","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.01354"},{"key":"ref260","article-title":"LLEMamba: Low-light enhancement via relighting-guided mamba with deep unfolding network","author":"Zhang","year":"2024","journal-title":"arXiv:2406.01028"},{"key":"ref261","article-title":"Motion-guided dual-camera tracker for endoscope tracking and motion analysis in a mechanical gastric simulator","author":"Zhang","year":"2024","journal-title":"arXiv:2403.05146"},{"key":"ref262","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-032-04927-8_39"},{"key":"ref263","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-87193-2_2"},{"key":"ref264","article-title":"InfiniMotion: Mamba boosts memory in transformer for arbitrary long motion generation","author":"Zhang","year":"2024","journal-title":"arXiv:2407.10061"},{"key":"ref265","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73232-4_15"},{"key":"ref266","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i10.33131"},{"key":"ref267","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2024.3425540"},{"key":"ref268","article-title":"U-shaped vision mamba for single image dehazing","author":"Zheng","year":"2024","journal-title":"arXiv:2402.04139"},{"key":"ref269","article-title":"FD-vision mamba for endoscopic exposure correction","author":"Zheng","year":"2024","journal-title":"arXiv:2402.06378"},{"key":"ref270","article-title":"QueryMamba: A mamba-based encoder\u2013decoder architecture with a statistical verb-noun interaction module for video action forecasting @ Ego4D long-term action anticipation challenge 2024","author":"Zhong","year":"2024","journal-title":"arXiv:2407.04184"},{"key":"ref271","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-018-1140-0"},{"key":"ref272","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2023.3293771"},{"key":"ref273","article-title":"RSDehamba: Lightweight vision mamba for remote sensing satellite image dehazing","author":"Zhou","year":"2024","journal-title":"arXiv:2405.10030"},{"key":"ref274","article-title":"MGI: Multimodal contrastive pre-training of genomic and medical imaging","author":"Zhou","year":"2024","journal-title":"arXiv:2406.00631"},{"key":"ref275","article-title":"DMM: Disparity-guided multispectral mamba for oriented object detection in remote sensing","author":"Zhou","year":"2024","journal-title":"arXiv:2407.08132"},{"key":"ref276","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.00824"},{"key":"ref277","article-title":"Mamba-in-mamba: Centralized mamba-cross-scan in tokenized mamba model for hyperspectral image classification","author":"Zhou","year":"2024","journal-title":"arXiv:2405.12003"},{"key":"ref278","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-032-05573-6_2"},{"key":"ref279","doi-asserted-by":"publisher","DOI":"10.1109\/LGRS.2024.3505193"},{"key":"ref280","first-page":"1","article-title":"Vision mamba: Efficient visual representation learning with bidirectional state space model","volume-title":"Proc. ICML","author":"Zhu"},{"key":"ref281","doi-asserted-by":"publisher","DOI":"10.1016\/j.heliyon.2024.e38495"},{"key":"ref282","doi-asserted-by":"publisher","DOI":"10.1109\/JSTARS.2024.3472296"},{"key":"ref283","doi-asserted-by":"publisher","DOI":"10.1109\/TMI.2025.3525673"},{"key":"ref284","article-title":"RhythmMamba: Fast remote physiological measurement with arbitrary length videos","author":"Zou","year":"2024","journal-title":"arXiv:2404.06483"},{"key":"ref285","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2025.103549"},{"key":"ref286","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681580"},{"key":"ref287","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3680862"},{"key":"ref288","article-title":"Falcon mamba: The first competitive attention-free 7B language model","author":"Zuo","year":"2024","journal-title":"arXiv:2410.05355"}],"container-title":["IEEE Transactions on Neural Networks and Learning Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/5962385\/11372199\/11175044.pdf?arnumber=11175044","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,9]],"date-time":"2026-02-09T21:07:20Z","timestamp":1770671240000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11175044\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,2]]},"references-count":288,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/tnnls.2025.3610435","relation":{},"ISSN":["2162-237X","2162-2388"],"issn-type":[{"value":"2162-237X","type":"print"},{"value":"2162-2388","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,2]]}}}