{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,25]],"date-time":"2026-01-25T00:57:43Z","timestamp":1769302663309,"version":"3.49.0"},"reference-count":46,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2024,11,18]],"date-time":"2024-11-18T00:00:00Z","timestamp":1731888000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2024,11,18]],"date-time":"2024-11-18T00:00:00Z","timestamp":1731888000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"name":"the National Key R&D Program of China","award":["2022YFB4702300"],"award-info":[{"award-number":["2022YFB4702300"]}]},{"DOI":"10.13039\/501100001809","name":"the National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["62273097"],"award-info":[{"award-number":["62273097"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"name":"the Guangdong Basic and Applied Basic Research Foundation","award":["2022A1515140044"],"award-info":[{"award-number":["2022A1515140044"]}]},{"name":"the Research Foundation of Universities of Guangdong Province","award":["2021KCXTD083"],"award-info":[{"award-number":["2021KCXTD083"]}]},{"name":"the Foshan Key Area Technology Research Foundation","award":["2120001011009"],"award-info":[{"award-number":["2120001011009"]}]},{"name":"the Guangdong Philosophy and Social Science Program","award":["GD23XTS03"],"award-info":[{"award-number":["GD23XTS03"]}]},{"name":"the Research project of Guangdong Special Equipment Inspection  and Research Institute","award":["2024JD-2-05"],"award-info":[{"award-number":["2024JD-2-05"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Intell Syst"],"DOI":"10.1007\/s44196-024-00630-5","type":"journal-article","created":{"date-parts":[[2024,11,18]],"date-time":"2024-11-18T16:41:47Z","timestamp":1731948107000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Channel2DTransformer: A Multi-level Features Self-attention Fusion Module for Semantic Segmentation"],"prefix":"10.1007","volume":"17","author":[{"given":"Weitao","family":"Liu","sequence":"first","affiliation":[]},{"given":"Junjun","family":"Wu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,18]]},"reference":[{"issue":"4","key":"630_CR1","doi-asserted-by":"publisher","first-page":"90","DOI":"10.3390\/technologies10040090","volume":"10","author":"G Rizzoli","year":"2022","unstructured":"Rizzoli, G., Barbato, F., Zanuttigh, P.: Multimodal semantic segmentation in autonomous driving: a review of current approaches and future perspectives. Technologies 10(4), 90 (2022)","journal-title":"Technologies"},{"issue":"1","key":"630_CR2","doi-asserted-by":"publisher","first-page":"11","DOI":"10.3390\/electronics11010011","volume":"11","author":"X Xie","year":"2021","unstructured":"Xie, X., Bai, L., Huang, X.: Real-time lidar point cloud semantic segmentation for autonomous driving. Electronics 11(1), 11 (2021)","journal-title":"Electronics"},{"issue":"2","key":"630_CR3","doi-asserted-by":"publisher","first-page":"1032","DOI":"10.1109\/LRA.2020.2967313","volume":"5","author":"N Marchal","year":"2020","unstructured":"Marchal, N., Moraldo, C., Blum, H., Siegwart, R., Cadena, C., Gawel, A.: Learning densities in feature space for reliable segmentation of indoor scenes. IEEE Robot. Autom. Lett. 5(2), 1032\u20131038 (2020)","journal-title":"IEEE Robot. Autom. Lett."},{"issue":"1","key":"630_CR4","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s40537-019-0212-5","volume":"6","author":"G Sreenu","year":"2019","unstructured":"Sreenu, G., Durai, S.: Intelligent video surveillance: a review through deep learning techniques for crowd analysis. J. Big Data 6(1), 1\u201327 (2019)","journal-title":"J. Big Data"},{"key":"630_CR5","doi-asserted-by":"crossref","unstructured":"Long, J., Shelhamer, E., Darrell, T.: Fully convolutional networks for semantic segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3431\u20133440 (2015)","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"630_CR6","doi-asserted-by":"publisher","first-page":"313","DOI":"10.1016\/j.ins.2022.06.071","volume":"608","author":"B Zhang","year":"2022","unstructured":"Zhang, B., Gong, X., Wang, J., Tang, F., Zhang, K., Wu, W.: Nonstationary fuzzy neural network based on FCMnet clustering and a modified CG method with Armijo-type rule. Inf. Sci. 608, 313\u2013338 (2022)","journal-title":"Inf. Sci."},{"key":"630_CR7","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)"},{"key":"630_CR8","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"630_CR9","doi-asserted-by":"crossref","unstructured":"Zhao, H., Shi, J., Qi, X., Wang, X., Jia, J.: Pyramid scene parsing network. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2881\u20132890 (2017)","DOI":"10.1109\/CVPR.2017.660"},{"issue":"10","key":"630_CR10","doi-asserted-by":"publisher","first-page":"3349","DOI":"10.1109\/TPAMI.2020.2983686","volume":"43","author":"J Wang","year":"2020","unstructured":"Wang, J., Sun, K., Cheng, T., Jiang, B., Deng, C., Zhao, Y., Liu, D., Mu, Y., Tan, M., Wang, X., et al.: Deep high-resolution representation learning for visual recognition. IEEE Trans. Pattern Anal. Mach. Intell. 43(10), 3349\u20133364 (2020)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"630_CR11","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y., Doll\u00e1r, P., Girshick, R., He, K., Hariharan, B., Belongie, S.: Feature pyramid networks for object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2117\u20132125 (2017)","DOI":"10.1109\/CVPR.2017.106"},{"key":"630_CR12","doi-asserted-by":"crossref","unstructured":"Zhang, Z., Zhang, X., Peng, C., Xue, X., Sun, J.: ExFuse: enhancing feature fusion for semantic segmentation. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 269\u2013284 (2018)","DOI":"10.1007\/978-3-030-01249-6_17"},{"key":"630_CR13","doi-asserted-by":"crossref","unstructured":"Yu, C., Wang, J., Peng, C., Gao, C., Yu, G., Sang, N.: BiSeNet: bilateral segmentation network for real-time semantic segmentation. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 325\u2013341 (2018)","DOI":"10.1007\/978-3-030-01261-8_20"},{"key":"630_CR14","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141, Polosukhin, I.: Attention is all you need. In: Advances in Neural Information Processing Systems 30 (2017)"},{"key":"630_CR15","doi-asserted-by":"crossref","unstructured":"Zheng, S., Lu, J., Zhao, H., Zhu, X., Luo, Z., Wang, Y., Fu, Y., Feng, J., Xiang, T., Torr, P.H., et al.: Rethinking semantic segmentation from a sequence-to-sequence perspective with transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6881\u20136890 (2021)","DOI":"10.1109\/CVPR46437.2021.00681"},{"key":"630_CR16","unstructured":"Bahdanau, D., Cho, K., Bengio, Y.: Neural machine translation by jointly learning to align and translate. arXiv preprint arXiv:1409.0473 (2014)"},{"key":"630_CR17","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S., et\u00a0al.: An image is worth 16x16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"630_CR18","doi-asserted-by":"crossref","unstructured":"Zheng, S., Lu, J., Zhao, H., Zhu, X., Luo, Z., Wang, Y., Fu, Y., Feng, J., Xiang, T., Torr, P.H., et al.: Rethinking semantic segmentation from a sequence-to-sequence perspective with transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6881\u20136890 (2021)","DOI":"10.1109\/CVPR46437.2021.00681"},{"key":"630_CR19","unstructured":"Chen, J., Lu, Y., Yu, Q., Luo, X., Adeli, E., Wang, Y., Lu, L., Yuille, A.L., Zhou, Y.: TransUNet: transformers make strong encoders for medical image segmentation. arXiv preprint arXiv:2102.04306 (2021)"},{"key":"630_CR20","first-page":"12077","volume":"34","author":"E Xie","year":"2021","unstructured":"Xie, E., Wang, W., Yu, Z., Anandkumar, A., Alvarez, J.M., Luo, P.: SegFormer: simple and efficient design for semantic segmentation with transformers. Adv. Neural Inf. Process. Syst. 34, 12077\u201312090 (2021)","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"630_CR21","doi-asserted-by":"crossref","unstructured":"Liu, Z., Lin, Y., Cao, Y., Hu, H., Wei, Y., Zhang, Z., Lin, S., Guo, B.: Swin transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"630_CR22","doi-asserted-by":"crossref","unstructured":"Liu, Z., Hu, H., Lin, Y., Yao, Z., Xie, Z., Wei, Y., Ning, J., Cao, Y., Zhang, Z., Dong, L., et al.: Swin transformer v2: scaling up capacity and resolution. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12009\u201312019 (2022)","DOI":"10.1109\/CVPR52688.2022.01170"},{"key":"630_CR23","doi-asserted-by":"crossref","unstructured":"Wang, W., Xie, E., Li, X., Fan, D.-P., Song, K., Liang, D., Lu, T., Luo, P., Shao, L.: Pyramid vision transformer: a versatile backbone for dense prediction without convolutions. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 568\u2013578 (2021)","DOI":"10.1109\/ICCV48922.2021.00061"},{"issue":"3","key":"630_CR24","doi-asserted-by":"publisher","first-page":"415","DOI":"10.1007\/s41095-022-0274-8","volume":"8","author":"W Wang","year":"2022","unstructured":"Wang, W., Xie, E., Li, X., Fan, D.-P., Song, K., Liang, D., Lu, T., Luo, P., Shao, L.: PVT v2: improved baselines with pyramid vision transformer. Comput. Vis. Media 8(3), 415\u2013424 (2022)","journal-title":"Comput. Vis. Media"},{"key":"630_CR25","doi-asserted-by":"crossref","unstructured":"Kirillov, A., Girshick, R., He, K., Doll\u00e1r, P.: Panoptic feature pyramid networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6399\u20136408 (2019)","DOI":"10.1109\/CVPR.2019.00656"},{"key":"630_CR26","unstructured":"Li, H., Xiong, P., An, J., Wang, L.: Pyramid attention network for semantic segmentation. arXiv preprint arXiv:1805.10180 (2018)"},{"key":"630_CR27","unstructured":"Qin, Z., Liu, J., Zhang, X., Tian, M., Zhou, A., Yi, S., Li, H.: Pyramid fusion transformer for semantic segmentation. arXiv preprint arXiv:2201.04019 (2022)"},{"key":"630_CR28","unstructured":"Yu, F., Koltun, V.: Multi-scale context aggregation by dilated convolutions. arXiv preprint arXiv:1511.07122 (2015)"},{"key":"630_CR29","doi-asserted-by":"crossref","unstructured":"Sun, K., Xiao, B., Liu, D., Wang, J.: Deep high-resolution representation learning for human pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5693\u20135703 (2019)","DOI":"10.1109\/CVPR.2019.00584"},{"key":"630_CR30","doi-asserted-by":"crossref","unstructured":"Liu, S., Qi, L., Qin, H., Shi, J., Jia, J.: Path aggregation network for instance segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 8759\u20138768 (2018)","DOI":"10.1109\/CVPR.2018.00913"},{"key":"630_CR31","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.-J., Li, K., Fei-Fei, L., ImageNet: a large-scale hierarchical image database. In: 2009 IEEE Conference on Computer Vision and Pattern Recognition. pp. 248\u2013255. IEEE (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"issue":"10","key":"630_CR32","doi-asserted-by":"publisher","first-page":"2702","DOI":"10.1109\/TPAMI.2019.2926463","volume":"42","author":"X Huang","year":"2019","unstructured":"Huang, X., Wang, P., Cheng, X., Zhou, D., Geng, Q., Yang, R.: The ApolloScape open dataset for autonomous driving and its application. IEEE Trans. Pattern Anal. Mach. Intell. 42(10), 2702\u20132719 (2019)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"630_CR33","doi-asserted-by":"crossref","unstructured":"Silberman, N., Hoiem, D., Kohli, P., Fergus, R.: Indoor segmentation and support inference from RGBD images. In: Computer Vision\u2013ECCV 2012: 12th European Conference on Computer Vision, Florence, Italy, October 7\u201313, 2012. Proceedings, Part V 12, pp. 746\u2013760. Springer (2012)","DOI":"10.1007\/978-3-642-33715-4_54"},{"issue":"4","key":"630_CR34","doi-asserted-by":"publisher","first-page":"834","DOI":"10.1109\/TPAMI.2017.2699184","volume":"40","author":"L-C Chen","year":"2017","unstructured":"Chen, L.-C., Papandreou, G., Kokkinos, I., Murphy, K., Yuille, A.L.: DeepLab: semantic image segmentation with deep convolutional nets, Atrous convolution, and fully connected CRFs. IEEE Trans. Pattern Anal. Mach. Intell. 40(4), 834\u2013848 (2017)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"630_CR35","doi-asserted-by":"crossref","unstructured":"Lin, G., Milan, A., Shen, C., Reid, I.: RefineNet: multi-path refinement networks for high-resolution semantic segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1925\u20131934 (2017)","DOI":"10.1109\/CVPR.2017.549"},{"key":"630_CR36","doi-asserted-by":"crossref","unstructured":"Zhang, R., Tang, S., Zhang, Y., Li, J., Yan, S.: Scale-adaptive convolutions for scene parsing. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2031\u20132039 (2017)","DOI":"10.1109\/ICCV.2017.224"},{"key":"630_CR37","doi-asserted-by":"crossref","unstructured":"Yu, C., Wang, J., Peng, C., Gao, C., Yu, G., Sang, N.: BiSeNet: bilateral segmentation network for real-time semantic segmentation. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 325\u2013341 (2018)","DOI":"10.1007\/978-3-030-01261-8_20"},{"key":"630_CR38","doi-asserted-by":"crossref","unstructured":"Xu, D., Ouyang, W., Wang, X., Sebe, N.: PAD-Net: multi-tasks guided prediction-and-distillation network for simultaneous depth estimation and scene parsing. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 675\u2013684 (2018)","DOI":"10.1109\/CVPR.2018.00077"},{"key":"630_CR39","doi-asserted-by":"crossref","unstructured":"Ding, H., Jiang, X., Shuai, B., Liu, A.Q., Wang, G.: Semantic correlation promoted shape-variant context for segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8885\u20138894 (2019)","DOI":"10.1109\/CVPR.2019.00909"},{"key":"630_CR40","doi-asserted-by":"crossref","unstructured":"Zhu, Z., Xu, M., Bai, S., Huang, T., Bai, X.: Asymmetric non-local neural networks for semantic segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 593\u2013602 (2019)","DOI":"10.1109\/ICCV.2019.00068"},{"key":"630_CR41","doi-asserted-by":"crossref","unstructured":"Fu, J., Liu, J., Tian, H., Li, Y., Bao, Y., Fang, Z., Lu, H.: Dual attention network for scene segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3146\u20133154 (2019)","DOI":"10.1109\/CVPR.2019.00326"},{"key":"630_CR42","doi-asserted-by":"crossref","unstructured":"Qi, X., Liao, R., Jia, J., Fidler, S., Urtasun, R.: 3D graph neural networks for RGBD semantic segmentation. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 5199\u20135208 (2017)","DOI":"10.1109\/ICCV.2017.556"},{"key":"630_CR43","doi-asserted-by":"crossref","unstructured":"Kong, S., Fowlkes, C.C.: Recurrent scene parsing with perspective understanding in the loop. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 956\u2013965 (2018)","DOI":"10.1109\/CVPR.2018.00106"},{"key":"630_CR44","doi-asserted-by":"crossref","unstructured":"Cheng, Y., Cai, R., Li, Z., Zhao, X., Huang, K.: Locality-sensitive deconvolution networks with gated fusion for RGB-D indoor semantic segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3029\u20133037 (2017)","DOI":"10.1109\/CVPR.2017.161"},{"key":"630_CR45","doi-asserted-by":"crossref","unstructured":"Lin, D., Chen, G., Cohen-Or, D., Heng, P.-A., Huang, H.: Cascaded feature network for semantic segmentation of RGB-D images. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1311\u20131319 (2017)","DOI":"10.1109\/ICCV.2017.147"},{"key":"630_CR46","doi-asserted-by":"crossref","unstructured":"Hu, X., Yang, K., Fei, L., Wang, K.: ACNet: attention based network to exploit complementary features for RGBD semantic segmentation. In: 2019 IEEE International Conference on Image Processing (ICIP). pp. 1440\u20131444. IEEE (2019)","DOI":"10.1109\/ICIP.2019.8803025"}],"container-title":["International Journal of Computational Intelligence Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s44196-024-00630-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s44196-024-00630-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s44196-024-00630-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,18]],"date-time":"2024-11-18T17:07:21Z","timestamp":1731949641000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s44196-024-00630-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,18]]},"references-count":46,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2024,12]]}},"alternative-id":["630"],"URL":"https:\/\/doi.org\/10.1007\/s44196-024-00630-5","relation":{},"ISSN":["1875-6883"],"issn-type":[{"value":"1875-6883","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,18]]},"assertion":[{"value":"17 June 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 August 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 November 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"All authors declare that they have no conflict of interest regarding this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"282"}}