{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T09:50:09Z","timestamp":1774000209458,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":32,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,12,12]]},"DOI":"10.1145\/3788149.3788174","type":"proceedings-article","created":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T06:35:19Z","timestamp":1773988519000},"page":"249-257","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["CMTNet: Mamba-Transformer Collaboration for Real-Time Semantic Segmentation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-6559-8633","authenticated-orcid":false,"given":"Shiqi","family":"Zhong","sequence":"first","affiliation":[{"name":"Wuhan University of Science and Technology, Wuhan, Hubei, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7782-1371","authenticated-orcid":false,"given":"Wenwu","family":"Wang","sequence":"additional","affiliation":[{"name":"Wuhan University of Science and Technology, Wuhan, Hubei, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7001-5775","authenticated-orcid":false,"given":"Lei","family":"Zhu","sequence":"additional","affiliation":[{"name":"Wuhan University of Science and Technology, Wuhan, Hubei, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-9419-0761","authenticated-orcid":false,"given":"Wenzhuo","family":"Tan","sequence":"additional","affiliation":[{"name":"Wuhan University of Science and Technology, Wuhan, Hubei, China"}]}],"member":"320","published-online":{"date-parts":[[2026,3,19]]},"reference":[{"key":"e_1_3_3_1_2_2","doi-asserted-by":"crossref","unstructured":"Gabriel\u00a0J Brostow Julien Fauqueur and Roberto Cipolla. 2009. Semantic object classes in video: A high-definition ground truth database. Pattern recognition letters 30 2 (2009) 88\u201397.","DOI":"10.1016\/j.patrec.2008.04.005"},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"publisher","DOI":"10.1109\/BIBM55620.2022.9995440"},{"key":"e_1_3_3_1_4_2","unstructured":"Wuyang Chen Xinyu Gong Xianming Liu Qian Zhang Yuan Li and Zhangyang Wang. 2019. Fasterseg: Searching for faster real-time semantic segmentation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1912.10917 (2019)."},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.350"},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00326"},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"crossref","unstructured":"Guangwei Gao Guoan Xu Juncheng Li Yi Yu Huimin Lu and Jian Yang. 2022. FBSNet: A fast bilateral symmetrical network for real-time semantic segmentation. IEEE Transactions on Multimedia 25 (2022) 3273\u20133283.","DOI":"10.1109\/TMM.2022.3157995"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"crossref","unstructured":"Guangwei Gao Guoan Xu Yi Yu Jin Xie Jian Yang and Dong Yue. 2021. MSCFNet: A lightweight network with multi-scale context fusion for real-time semantic segmentation. IEEE Transactions on Intelligent Transportation Systems 23 12 (2021) 25489\u201325499.","DOI":"10.1109\/TITS.2021.3098355"},{"key":"e_1_3_3_1_9_2","unstructured":"Albert Gu and Tri Dao. 2023. Mamba: Linear-time sequence modeling with selective state spaces. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2312.00752 (2023)."},{"key":"e_1_3_3_1_10_2","unstructured":"Meng-Hao Guo Zheng-Ning Liu Tai-Jiang Mu and Shi-Min Hu. 2022. Beyond self-attention: External attention using two linear layers for visual tasks. IEEE Transactions on Pattern Analysis and Machine Intelligence 45 5 (2022) 5436\u20135447."},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00069"},{"key":"e_1_3_3_1_13_2","unstructured":"Gen Li Inyoung Yun Jonghyun Kim and Joongkyu Kim. 2019. Dabnet: Depth-wise asymmetric bottleneck for real-time semantic segmentation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1907.11357 (2019)."},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00975"},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"publisher","DOI":"10.1109\/CAC59555.2023.10450923"},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"e_1_3_3_1_17_2","unstructured":"Jun Ma Feifei Li and Bo Wang. 2024. U-mamba: Enhancing long-range dependency for biomedical image segmentation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2401.04722 (2024)."},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01264-9_8"},{"key":"e_1_3_3_1_19_2","unstructured":"Xianping Ma Xiaokang Zhang and Man-On Pun. 2024. RS 3 Mamba: Visual State Space Model for Remote Sensing Image Semantic Segmentation. IEEE Geoscience and Remote Sensing Letters (2024)."},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00941"},{"key":"e_1_3_3_1_21_2","unstructured":"Adam Paszke Abhishek Chaurasia Sangpil Kim and Eugenio Culurciello. 2016. Enet: A deep neural network architecture for real-time semantic segmentation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1606.02147 (2016)."},{"key":"e_1_3_3_1_22_2","unstructured":"Rudra\u00a0PK Poudel Stephan Liwicki and Roberto Cipolla. 2019. Fast-scnn: Fast semantic segmentation network. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1902.04502 (2019)."},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"crossref","unstructured":"Eduardo Romera Jos\u00e9\u00a0M Alvarez Luis\u00a0M Bergasa and Roberto Arroyo. 2017. Erfnet: Efficient residual factorized convnet for real-time semantic segmentation. IEEE Transactions on Intelligent Transportation Systems 19 1 (2017) 263\u2013272.","DOI":"10.1109\/TITS.2017.2750080"},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00474"},{"key":"e_1_3_3_1_26_2","unstructured":"A Vaswani. 2017. Attention is all you need. Advances in Neural Information Processing Systems (2017)."},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2019.8803154"},{"key":"e_1_3_3_1_28_2","unstructured":"Renkai Wu Yinghao Liu Pengchen Liang and Qing Chang. 2024. Ultralight vm-unet: Parallel vision mamba significantly reduces parameters for skin lesion segmentation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2403.20035 (2024)."},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"crossref","unstructured":"Changqian Yu Changxin Gao Jingbo Wang Gang Yu Chunhua Shen and Nong Sang. 2021. Bisenet v2: Bilateral network with guided aggregation for real-time semantic segmentation. International journal of computer vision 129 (2021) 3051\u20133068.","DOI":"10.1007\/s11263-021-01515-2"},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01261-8_20"},{"key":"e_1_3_3_1_31_2","doi-asserted-by":"crossref","unstructured":"Tianyi Zhang Guosheng Lin Jianfei Cai Tong Shen Chunhua Shen and Alex\u00a0C Kot. 2019. Decoupled spatial neural attention for weakly supervised semantic segmentation. IEEE Transactions on Multimedia 21 11 (2019) 2930\u20132941.","DOI":"10.1109\/TMM.2019.2914870"},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00716"},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01219-9_25"}],"event":{"name":"CSAI 2025: 2025 The 9th International Conference on Computer Science and Artificial Intelligence","location":"Beijing China","acronym":"CSAI 2025"},"container-title":["Proceedings of the 2025 9th International Conference on Computer Science and Artificial Intelligence"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3788149.3788174","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T06:39:56Z","timestamp":1773988796000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3788149.3788174"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,12]]},"references-count":32,"alternative-id":["10.1145\/3788149.3788174","10.1145\/3788149"],"URL":"https:\/\/doi.org\/10.1145\/3788149.3788174","relation":{},"subject":[],"published":{"date-parts":[[2025,12,12]]},"assertion":[{"value":"2026-03-19","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}