{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,19]],"date-time":"2026-03-19T06:01:11Z","timestamp":1773900071739,"version":"3.50.1"},"reference-count":42,"publisher":"MDPI AG","issue":"14","license":[{"start":{"date-parts":[[2024,7,17]],"date-time":"2024-07-17T00:00:00Z","timestamp":1721174400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"Key R&amp;D Program of Shandong Province of China","award":["2023CXGC010112"],"award-info":[{"award-number":["2023CXGC010112"]}]},{"name":"Key R&amp;D Program of Shandong Province of China","award":["2022YFB4500602"],"award-info":[{"award-number":["2022YFB4500602"]}]},{"name":"Key R&amp;D Program of Shandong Province of China","award":["BE2021093"],"award-info":[{"award-number":["BE2021093"]}]},{"name":"Key R&amp;D Program of Shandong Province of China","award":["ZR2023JQ025"],"award-info":[{"award-number":["ZR2023JQ025"]}]},{"name":"Key R&amp;D Program of Shandong Province of China","award":["tsqn202211290"],"award-info":[{"award-number":["tsqn202211290"]}]},{"name":"Key R&amp;D Program of Shandong Province of China","award":["ZR2022ZD32"],"award-info":[{"award-number":["ZR2022ZD32"]}]},{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2023CXGC010112"],"award-info":[{"award-number":["2023CXGC010112"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2022YFB4500602"],"award-info":[{"award-number":["2022YFB4500602"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["BE2021093"],"award-info":[{"award-number":["BE2021093"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["ZR2023JQ025"],"award-info":[{"award-number":["ZR2023JQ025"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["tsqn202211290"],"award-info":[{"award-number":["tsqn202211290"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["ZR2022ZD32"],"award-info":[{"award-number":["ZR2022ZD32"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Key Research and Development Program of Jiangsu Province","award":["2023CXGC010112"],"award-info":[{"award-number":["2023CXGC010112"]}]},{"name":"Key Research and Development Program of Jiangsu Province","award":["2022YFB4500602"],"award-info":[{"award-number":["2022YFB4500602"]}]},{"name":"Key Research and Development Program of Jiangsu Province","award":["BE2021093"],"award-info":[{"award-number":["BE2021093"]}]},{"name":"Key Research and Development Program of Jiangsu Province","award":["ZR2023JQ025"],"award-info":[{"award-number":["ZR2023JQ025"]}]},{"name":"Key Research and Development Program of Jiangsu Province","award":["tsqn202211290"],"award-info":[{"award-number":["tsqn202211290"]}]},{"name":"Key Research and Development Program of Jiangsu Province","award":["ZR2022ZD32"],"award-info":[{"award-number":["ZR2022ZD32"]}]},{"name":"Distinguished Young Scholar of Shandong Province","award":["2023CXGC010112"],"award-info":[{"award-number":["2023CXGC010112"]}]},{"name":"Distinguished Young Scholar of Shandong Province","award":["2022YFB4500602"],"award-info":[{"award-number":["2022YFB4500602"]}]},{"name":"Distinguished Young Scholar of Shandong Province","award":["BE2021093"],"award-info":[{"award-number":["BE2021093"]}]},{"name":"Distinguished Young Scholar of Shandong Province","award":["ZR2023JQ025"],"award-info":[{"award-number":["ZR2023JQ025"]}]},{"name":"Distinguished Young Scholar of Shandong Province","award":["tsqn202211290"],"award-info":[{"award-number":["tsqn202211290"]}]},{"name":"Distinguished Young Scholar of Shandong Province","award":["ZR2022ZD32"],"award-info":[{"award-number":["ZR2022ZD32"]}]},{"name":"Taishan Scholars Program","award":["2023CXGC010112"],"award-info":[{"award-number":["2023CXGC010112"]}]},{"name":"Taishan Scholars Program","award":["2022YFB4500602"],"award-info":[{"award-number":["2022YFB4500602"]}]},{"name":"Taishan Scholars Program","award":["BE2021093"],"award-info":[{"award-number":["BE2021093"]}]},{"name":"Taishan Scholars Program","award":["ZR2023JQ025"],"award-info":[{"award-number":["ZR2023JQ025"]}]},{"name":"Taishan Scholars Program","award":["tsqn202211290"],"award-info":[{"award-number":["tsqn202211290"]}]},{"name":"Taishan Scholars Program","award":["ZR2022ZD32"],"award-info":[{"award-number":["ZR2022ZD32"]}]},{"name":"Major Basic Research Projects of Shandong Province","award":["2023CXGC010112"],"award-info":[{"award-number":["2023CXGC010112"]}]},{"name":"Major Basic Research Projects of Shandong Province","award":["2022YFB4500602"],"award-info":[{"award-number":["2022YFB4500602"]}]},{"name":"Major Basic Research Projects of Shandong Province","award":["BE2021093"],"award-info":[{"award-number":["BE2021093"]}]},{"name":"Major Basic Research Projects of Shandong Province","award":["ZR2023JQ025"],"award-info":[{"award-number":["ZR2023JQ025"]}]},{"name":"Major Basic Research Projects of Shandong Province","award":["tsqn202211290"],"award-info":[{"award-number":["tsqn202211290"]}]},{"name":"Major Basic Research Projects of Shandong Province","award":["ZR2022ZD32"],"award-info":[{"award-number":["ZR2022ZD32"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Remote Sensing"],"abstract":"<jats:p>Real-time remote sensing segmentation technology is crucial for unmanned aerial vehicles (UAVs) in battlefield surveillance, land characterization observation, earthquake disaster assessment, etc., and can significantly enhance the application value of UAVs in military and civilian fields. To realize this potential, it is essential to develop real-time semantic segmentation methods that can be applied to resource-limited platforms, such as edge devices. The majority of mainstream real-time semantic segmentation methods rely on convolutional neural networks (CNNs) and transformers. However, CNNs cannot effectively capture long-range dependencies, while transformers have high computational complexity. This paper proposes a novel remote sensing Mamba architecture for real-time segmentation tasks in remote sensing, named RTMamba. Specifically, the backbone utilizes a Visual State-Space (VSS) block to extract deep features and maintains linear computational complexity, thereby capturing long-range contextual information. Additionally, a novel Inverted Triangle Pyramid Pooling (ITP) module is incorporated into the decoder. The ITP module can effectively filter redundant feature information and enhance the perception of objects and their boundaries in remote sensing images. Extensive experiments were conducted on three challenging aerial remote sensing segmentation benchmarks, including Vaihingen, Potsdam, and LoveDA. The results show that RTMamba achieves competitive performance advantages in terms of segmentation accuracy and inference speed compared to state-of-the-art CNN and transformer methods. To further validate the deployment potential of the model on embedded devices with limited resources, such as UAVs, we conducted tests on the Jetson AGX Orin edge device. The experimental results demonstrate that RTMamba achieves impressive real-time segmentation performance.<\/jats:p>","DOI":"10.3390\/rs16142620","type":"journal-article","created":{"date-parts":[[2024,7,17]],"date-time":"2024-07-17T16:35:58Z","timestamp":1721234158000},"page":"2620","update-policy":"https:\/\/doi.org\/10.3390\/mdpi_crossmark_policy","source":"Crossref","is-referenced-by-count":49,"title":["A Novel Mamba Architecture with a Semantic Transformer for Efficient Real-Time Remote Sensing Semantic Segmentation"],"prefix":"10.3390","volume":"16","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-4768-4606","authenticated-orcid":false,"given":"Hao","family":"Ding","sequence":"first","affiliation":[{"name":"School of Information Science and Engineering, Linyi University, Linyi 276000, China"}]},{"given":"Bo","family":"Xia","sequence":"additional","affiliation":[{"name":"School of Information Science and Engineering, Linyi University, Linyi 276000, China"}]},{"given":"Weilin","family":"Liu","sequence":"additional","affiliation":[{"name":"School of Information Science and Engineering, Linyi University, Linyi 276000, China"}]},{"given":"Zekai","family":"Zhang","sequence":"additional","affiliation":[{"name":"Department of Control Science and Engineering, Shandong University, Jinan 250061, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1618-8493","authenticated-orcid":false,"given":"Jinglin","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Information Science and Engineering, Linyi University, Linyi 276000, China"},{"name":"Department of Control Science and Engineering, Shandong University, Jinan 250061, China"},{"name":"Department of Information Science and Engineering, Shandong Research Institute of Industrial Technology, Jinan 250100, China"}]},{"given":"Xing","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Information Science and Engineering, Linyi University, Linyi 276000, China"}]},{"given":"Sen","family":"Xu","sequence":"additional","affiliation":[{"name":"School of Information Engineering, Yancheng Institute of Technology, Yancheng 224051, China"}]}],"member":"1968","published-online":{"date-parts":[[2024,7,17]]},"reference":[{"key":"ref_1","doi-asserted-by":"crossref","unstructured":"Talukdar, S., Singha, P., Mahato, S., Pal, S., Liou, Y.A., and Rahman, A. (2020). Land-use land-cover classification by machine learning classifiers for satellite observations\u2014A review. Remote Sens., 12.","DOI":"10.3390\/rs12071135"},{"key":"ref_2","doi-asserted-by":"crossref","unstructured":"Phan, T.N., Kuch, V., and Lehnert, L.W. (2020). Land cover classification using Google Earth Engine and random forest classifier\u2014The role of image composition. Remote Sens., 12.","DOI":"10.3390\/rs12152411"},{"key":"ref_3","first-page":"84","article-title":"Imagenet classification with deep convolutional neural networks","volume":"25","author":"Krizhevsky","year":"2012","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref_4","doi-asserted-by":"crossref","first-page":"145","DOI":"10.1016\/j.isprsjprs.2016.10.010","article-title":"MRF-based segmentation and unsupervised classification for building and road detection in peri-urban areas of high-resolution satellite images","volume":"122","author":"Grinias","year":"2016","journal-title":"ISPRS J. Photogramm. Remote Sens."},{"key":"ref_5","doi-asserted-by":"crossref","unstructured":"Long, J., Shelhamer, E., and Darrell, T. (2015, January 7\u201312). Fully convolutional networks for semantic segmentation. Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), Boston, MA, USA.","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"ref_6","doi-asserted-by":"crossref","unstructured":"Ronneberger, O., Fischer, P., and Brox, T. (2015, January 5\u20139). U-net: Convolutional networks for biomedical image segmentation. Proceedings of the Medical Image Computing and Computer-Assisted Intervention\u2013MICCAI 2015: 18th International Conference (Part III 18), Munich, Germany.","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"ref_7","doi-asserted-by":"crossref","unstructured":"Chen, L.C., Zhu, Y., Papandreou, G., Schroff, F., and Adam, H. (2018, January 8\u201314). Encoder-decoder with atrous separable convolution for semantic image segmentation. Proceedings of the European Conference on Computer Vision (ECCV), Munich, Germany.","DOI":"10.1007\/978-3-030-01234-2_49"},{"key":"ref_8","doi-asserted-by":"crossref","unstructured":"Zhao, H., Shi, J., Qi, X., Wang, X., and Jia, J. (2017, January 21\u201326). Pyramid scene parsing network. Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), Honolulu, HI, USA.","DOI":"10.1109\/CVPR.2017.660"},{"key":"ref_9","doi-asserted-by":"crossref","unstructured":"Yu, C., Wang, J., Peg, C., Gao, C., Yu, G., and Sang, N. (2018, January 8\u201314). Bisenet: Bilateral segmentation network for real-time semantic segmentation. Proceedings of the European Conference on Computer Vision (ECCV), Munich, Germany.","DOI":"10.1007\/978-3-030-01261-8_20"},{"key":"ref_10","doi-asserted-by":"crossref","first-page":"3051","DOI":"10.1007\/s11263-021-01515-2","article-title":"Bisenet v2: Bilateral network with guided aggregation for real-time semantic segmentation","volume":"129","author":"Yu","year":"2021","journal-title":"Int. J. Comput. Vis."},{"key":"ref_11","first-page":"4506015","article-title":"Graph attention guidance network with knowledge distillation for semantic segmentation of remote sensing images","volume":"61","author":"Zhou","year":"2023","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"ref_12","unstructured":"Hong, Y., Pan, H., Sun, W., and Jia, Y. (2021). Deep dual-resolution networks for real-time and accurate semantic segmentation of road scenes. arXiv."},{"key":"ref_13","doi-asserted-by":"crossref","unstructured":"Shi, W., Meng, Q., Zhang, L., Zhao, M., Su, C., and Jancs\u00f3, T. (2022). DSANet: A deep supervision-based simple attention network for efficient semantic segmentation in remote sensing imagery. Remote Sens., 14.","DOI":"10.3390\/rs14215399"},{"key":"ref_14","unstructured":"Chen, J., Lu, Y., Yu, Q., Luo, X., Adeli, E., Wang, Y., Lu, L., Yuille, A.L., and Zhou, Y. (2021). Transunet: Transformers make strong encoders for medical image segmentation. arXiv."},{"key":"ref_15","doi-asserted-by":"crossref","first-page":"196","DOI":"10.1016\/j.isprsjprs.2022.06.008","article-title":"UNetFormer: A UNet-like transformer for efficient semantic segmentation of remote sensing urban scene imagery","volume":"190","author":"Wang","year":"2022","journal-title":"ISPRS J. Photogramm. Remote Sens."},{"key":"ref_16","doi-asserted-by":"crossref","unstructured":"Xu, J., Xiong, Z., and Bhattacharyya, S.P. (2023, January 17\u201324). PIDNet: A real-time semantic segmentation network inspired by PID controllers. Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), Vancouver, BC, Canada.","DOI":"10.1109\/CVPR52729.2023.01871"},{"key":"ref_17","first-page":"6378","article-title":"SCTNet: Single-Branch CNN with Transformer Semantic Information for Real-Time Segmentation","volume":"38","author":"Xu","year":"2024","journal-title":"Proc. Aaai Conf. Artif. Intell."},{"key":"ref_18","unstructured":"Gu, A., and Dao, T. (2023). Mamba: Linear-time sequence modeling with selective state spaces. arXiv."},{"key":"ref_19","doi-asserted-by":"crossref","unstructured":"Chen, K., Chen, B., Liu, C., Li, W., Zou, Z., and Shi, Z. (2024). Rsmamba: Remote sensing image classification with state space model. arXiv.","DOI":"10.1109\/LGRS.2024.3407111"},{"key":"ref_20","doi-asserted-by":"crossref","unstructured":"Zhao, S., Chen, H., Zhang, X., Xiao, P., Bai, L., and Ouyang, W. (2024). Rs-mamba for large remote sensing image dense prediction. arXiv.","DOI":"10.1109\/TGRS.2024.3425540"},{"key":"ref_21","doi-asserted-by":"crossref","unstructured":"Ma, X., Zhang, X., and Pun, M.O. (2024). RS3Mamba: Visual State Space Model for Remote Sensing Images Semantic Segmentation. arXiv.","DOI":"10.1109\/LGRS.2024.3414293"},{"key":"ref_22","doi-asserted-by":"crossref","unstructured":"Barnell, M., Raymond, C., Smiley, S., Isereau, D., and Brown, D. (2022, January 19\u201323). Ultra low-power deep learning applications at the edge with Jetson Orin AGX hardware. Proceedings of the 2022 IEEE High Performance Extreme Computing Conference (HPEC), Waltham, MA, USA.","DOI":"10.1109\/HPEC55821.2022.9926369"},{"key":"ref_23","doi-asserted-by":"crossref","first-page":"2481","DOI":"10.1109\/TPAMI.2016.2644615","article-title":"Segnet: A deep convolutional encoder-decoder architecture for image segmentation","volume":"39","author":"Badrinarayanan","year":"2017","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"ref_24","doi-asserted-by":"crossref","first-page":"5874","DOI":"10.1080\/01431161.2022.2073795","article-title":"MANet: A multi-level aggregation network for semantic segmentation of high-resolution remote sensing images","volume":"43","author":"Chen","year":"2022","journal-title":"Int. J. Remote Sens."},{"key":"ref_25","unstructured":"He, W., Han, K., Tang, Y., Wang, C., Yang, Y., Guo, T., and Wang, Y. (2024). Densemamba: State space models with dense hidden connection for efficient large language models. arXiv."},{"key":"ref_26","unstructured":"Zhu, L., Liao, B., Zhang, Q., Wang, X., Liu, W., and Wang, X. (2024). Vision mamba: Efficient visual representation learning with bidirectional state space model. arXiv."},{"key":"ref_27","unstructured":"Liu, Y., Tian, Y., Zhao, Y., Yu, H., Xie, L., Wang, Y., Ye, Q., and Liu, Y. (2024). Vmamba: Visual state space model. arXiv."},{"key":"ref_28","unstructured":"Zhang, Z., Liu, A., Reid, I., Hartley, R., Zhuang, B., and Tang, H. (2024). Motion mamba: Efficient and long sequence motion generation with hierarchical and bidirectional selective ssm. arXiv."},{"key":"ref_29","doi-asserted-by":"crossref","unstructured":"Shu, C., Liu, Y., Gao, J., Yan, Z., and Shen, C. (2021, January 10\u201317). Channel-wise knowledge distillation for dense prediction. Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), Montreal, QC, Canada.","DOI":"10.1109\/ICCV48922.2021.00526"},{"key":"ref_30","doi-asserted-by":"crossref","unstructured":"Chen, J., Kao, S.h., He, H., Zhuo, W., Wen, S., Lee, C.H., and Chan, S.H.G. (2023, January 17\u201324). Run, Don\u2019t walk: Chasing higher FLOPS for faster neural networks. Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), Vancouver, BC, Canada.","DOI":"10.1109\/CVPR52729.2023.01157"},{"key":"ref_31","doi-asserted-by":"crossref","unstructured":"Milletari, F., Navab, N., and Ahmadi, S.A. (2016, January 25\u201328). V-net: Fully convolutional neural networks for volumetric medical image segmentation. Proceedings of the 2016 Fourth International Conference on 3D Vision (3DV), Stanford, CA, USA.","DOI":"10.1109\/3DV.2016.79"},{"key":"ref_32","unstructured":"Wang, J., Zheng, Z., Ma, A., Lu, X., and Zhong, Y. (2021). LoveDA: A remote sensing land-cover dataset for domain adaptive semantic segmentation. arXiv."},{"key":"ref_33","unstructured":"(2022, February 08). 2D Semantic Labeling Contest\u2014Vaihingen. Available online: https:\/\/www.isprs.org\/education\/benchmarks\/UrbanSemLab\/2d-semlabel-vaihingen.aspx."},{"key":"ref_34","unstructured":"(2022, February 08). 2D Semantic Labeling Contest\u2014Potsdam. Available online: https:\/\/www.isprs.org\/education\/benchmarks\/UrbanSemLab\/2d-semlabel-potsdam.aspx."},{"key":"ref_35","first-page":"12077","article-title":"SegFormer: Simple and efficient design for semantic segmentation with transformers","volume":"34","author":"Xie","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref_36","first-page":"5610713","article-title":"LSRFormer: Efficient Transformer Supply Convolutional Neural Networks with Global Information for Aerial Image Segmentation","volume":"62","author":"Zhang","year":"2024","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"ref_37","unstructured":"Loshchilov, I., and Hutter, F. (2017). Decoupled weight decay regularization. arXiv."},{"key":"ref_38","unstructured":"Contributors, M. (2022, February 08). MMSegmentation: OpenMMLab Semantic Segmentation Toolbox and Benchmark. Available online: https:\/\/github.com\/open-mmlab\/mmsegmentation."},{"key":"ref_39","unstructured":"Xue, H., Liu, C., Wan, F., Jiao, J., Ji, X., and Ye, Q. (November, January 27). Danet: Divergent activation for weakly supervised object localization. Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), Seoul, Republic of Korea."},{"key":"ref_40","doi-asserted-by":"crossref","first-page":"15","DOI":"10.1109\/LES.2021.3087707","article-title":"Deep learning inference parallelization on heterogeneous processors with tensorrt","volume":"14","author":"Jeong","year":"2021","journal-title":"IEEE Embed. Syst. Lett."},{"key":"ref_41","unstructured":"Jajal, P., Jiang, W., Tewari, A., Woo, J., Thiruvathukal, G.K., and Davis, J.C. (2023). Analysis of failures and risks in deep learning model converters: A case study in the onnx ecosystem. arXiv."},{"key":"ref_42","doi-asserted-by":"crossref","unstructured":"Selvaraju, R.R., Cogswell, M., Das, A., Vedantam, R., Parikh, D., and Batra, D. (2017, January 22\u201329). Grad-cam: Visual explanations from deep networks via gradient-based localization. Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), Venice, Italy.","DOI":"10.1109\/ICCV.2017.74"}],"container-title":["Remote Sensing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.mdpi.com\/2072-4292\/16\/14\/2620\/pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,10]],"date-time":"2025-10-10T15:18:37Z","timestamp":1760109517000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.mdpi.com\/2072-4292\/16\/14\/2620"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,17]]},"references-count":42,"journal-issue":{"issue":"14","published-online":{"date-parts":[[2024,7]]}},"alternative-id":["rs16142620"],"URL":"https:\/\/doi.org\/10.3390\/rs16142620","relation":{},"ISSN":["2072-4292"],"issn-type":[{"value":"2072-4292","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,7,17]]}}}