{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,9]],"date-time":"2026-05-09T10:57:35Z","timestamp":1778324255901,"version":"3.51.4"},"reference-count":47,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2024,2,23]],"date-time":"2024-02-23T00:00:00Z","timestamp":1708646400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,2,23]],"date-time":"2024-02-23T00:00:00Z","timestamp":1708646400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"Guizhou Provincial Science and Technology Foundation","award":["No. QKHJC-ZK[2021]Key001"],"award-info":[{"award-number":["No. QKHJC-ZK[2021]Key001"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Evolving Systems"],"published-print":{"date-parts":[[2024,8]]},"DOI":"10.1007\/s12530-024-09567-8","type":"journal-article","created":{"date-parts":[[2024,2,23]],"date-time":"2024-02-23T10:02:39Z","timestamp":1708682559000},"page":"1429-1440","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":15,"title":["Rgb-t semantic segmentation based on cross-operational fusion attention in autonomous driving scenario"],"prefix":"10.1007","volume":"15","author":[{"given":"Jiyou","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Rongfen","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Wenhao","family":"Yuan","sequence":"additional","affiliation":[]},{"given":"Yuhong","family":"Liu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,2,23]]},"reference":[{"issue":"12","key":"9567_CR1","doi-asserted-by":"publisher","first-page":"2481","DOI":"10.1109\/TPAMI.2016.2644615","volume":"39","author":"V Badrinarayanan","year":"2017","unstructured":"Badrinarayanan V, Kendall A, Cipolla R (2017) Segnet: A deep convolutional encoder-decoder architecture for image segmentation. IEEE Trans Pattern Anal Mach Intell 39(12):2481\u20132495","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"1928","key":"9567_CR2","doi-asserted-by":"publisher","first-page":"4649","DOI":"10.1098\/rsta.2010.0110","volume":"368","author":"M Campbell","year":"2010","unstructured":"Campbell M, Egerstedt M, How JP, Murray RM (2010) Autonomous driving in urban environments: approaches, lessons and challenges. Philosophical Transactions of the Royal Society A: Mathematical, Physical and Engineering Sciences 368(1928):4649\u20134672","journal-title":"Philosophical Transactions of the Royal Society A: Mathematical, Physical and Engineering Sciences"},{"issue":"4","key":"9567_CR3","doi-asserted-by":"publisher","first-page":"834","DOI":"10.1109\/TPAMI.2017.2699184","volume":"40","author":"L-C Chen","year":"2017","unstructured":"Chen L-C, Papandreou G, Kokkinos I, Murphy K, Yuille AL (2017) Deeplab: Semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected crfs. IEEE Trans Pattern Anal Mach Intell 40(4):834\u2013848","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"9567_CR4","doi-asserted-by":"crossref","unstructured":"Chen, L.-C., Zhu, Y., Papandreou, G., Schroff, F., Adam, H.: Encoder-decoder with atrous separable convolution for semantic image segmentation. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 801\u2013818 (2018)","DOI":"10.1007\/978-3-030-01234-2_49"},{"key":"9567_CR5","unstructured":"Chen, L.-C., Papandreou, G., Schroff, F., Adam, H.: Rethinking atrous convolution for semantic image segmentation. arXiv preprint arXiv:1706.05587 (2017)"},{"key":"9567_CR6","doi-asserted-by":"crossref","unstructured":"Deng, F., Feng, H., Liang, M., Wang, H., Yang, Y., Gao, Y., Chen, J., Hu, J., Guo, X., Lam, T.L.: Feanet: Feature-enhanced attention network for rgb-thermal real-time semantic segmentation. In: 2021 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 4467\u20134473 (2021). IEEE","DOI":"10.1109\/IROS51168.2021.9636084"},{"key":"9567_CR7","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S., et al.: An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"9567_CR8","doi-asserted-by":"crossref","unstructured":"Fan, D.-P., Zhai, Y., Borji, A., Yang, J., Shao, L.: Bbs-net: Rgb-d salient object detection with a bifurcated backbone strategy network. In: Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XII, pp. 275\u2013292 (2020). Springer","DOI":"10.1007\/978-3-030-58610-2_17"},{"key":"9567_CR9","doi-asserted-by":"crossref","unstructured":"Fan, R., Wang, Z., Zhu, Q.: Egfnet: Efficient guided feature fusion network for skin cancer lesion segmentation. In: 2022 the 6th International Conference on Innovation in Artificial Intelligence (ICIAI), pp. 95\u201399 (2022)","DOI":"10.1145\/3529466.3529482"},{"issue":"3","key":"9567_CR10","doi-asserted-by":"publisher","first-page":"1341","DOI":"10.1109\/TITS.2020.2972974","volume":"22","author":"D Feng","year":"2020","unstructured":"Feng D, Haase-Sch\u00fctz C, Rosenbaum L, Hertlein H, Glaeser C, Timm F, Wiesbeck W, Dietmayer K (2020) Deep multi-modal object detection and semantic segmentation for autonomous driving: Datasets, methods, and challenges. IEEE Trans Intell Transp Syst 22(3):1341\u20131360","journal-title":"IEEE Trans Intell Transp Syst"},{"issue":"1","key":"9567_CR11","first-page":"137","volume":"34","author":"M Firdaus-Nawi","year":"2011","unstructured":"Firdaus-Nawi M, Noraini O, Sabri M, Siti-Zahrah A, Zamri-Saad M, Latifah H (2011) Deeplabv3+ _encoder-decoder with atrous separable convolution for semantic image segmentation. Pertanika J Trop Agric Sci 34(1):137\u2013143","journal-title":"Pertanika J Trop Agric Sci"},{"issue":"3","key":"9567_CR12","doi-asserted-by":"publisher","first-page":"331","DOI":"10.1007\/s41095-022-0271-y","volume":"8","author":"M-H Guo","year":"2022","unstructured":"Guo M-H, Xu T-X, Liu J-J, Liu Z-N, Jiang P-T, Mu T-J, Zhang S-H, Martin RR, Cheng M-M, Hu S-M (2022) Attention mechanisms in computer vision: A survey. Computational visual media 8(3):331\u2013368","journal-title":"Computational visual media"},{"key":"9567_CR13","doi-asserted-by":"crossref","unstructured":"Ha, Q., Watanabe, K., Karasawa, T., Ushiku, Y., Harada, T.: Mfnet: Towards real-time semantic segmentation for autonomous vehicles with multi-spectral scenes. In: 2017 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 5108\u20135115 (2017). IEEE","DOI":"10.1109\/IROS.2017.8206396"},{"key":"9567_CR14","doi-asserted-by":"crossref","unstructured":"Hazirbas, C., Ma, L., Domokos, C., Cremers, D.: Fusenet: Incorporating depth into semantic segmentation via fusion-based cnn architecture. In: Computer Vision\u2013ACCV 2016: 13th Asian Conference on Computer Vision, Taipei, Taiwan, November 20-24, 2016, Revised Selected Papers, Part I 13, pp. 213\u2013228 (2017). Springer","DOI":"10.1007\/978-3-319-54181-5_14"},{"key":"9567_CR15","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"9567_CR16","doi-asserted-by":"crossref","unstructured":"Hu, J., Shen, L., Sun, G.: Squeeze-and-excitation networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7132\u20137141 (2018)","DOI":"10.1109\/CVPR.2018.00745"},{"key":"9567_CR17","doi-asserted-by":"crossref","unstructured":"Huang, G., Liu, Z., Van Der\u00a0Maaten, L., Weinberger, K.Q.: Densely connected convolutional networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4700\u20134708 (2017)","DOI":"10.1109\/CVPR.2017.243"},{"key":"9567_CR18","doi-asserted-by":"crossref","unstructured":"Huang, Z., Wang, X., Huang, L., Huang, C., Wei, Y., Liu, W.: Ccnet: Criss-cross attention for semantic segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 603\u2013612 (2019)","DOI":"10.1109\/ICCV.2019.00069"},{"issue":"11","key":"9567_CR19","doi-asserted-by":"publisher","first-page":"1254","DOI":"10.1109\/34.730558","volume":"20","author":"L Itti","year":"1998","unstructured":"Itti L, Koch C, Niebur E (1998) A model of saliency-based visual attention for rapid scene analysis. IEEE Trans Pattern Anal Mach Intell 20(11):1254\u20131259","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"9567_CR20","unstructured":"Jaderberg, M., Simonyan, K., Zisserman, A., et al.: Spatial transformer networks. Advances in neural information processing systems 28 (2015)"},{"key":"9567_CR21","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TIM.2020.3022438","volume":"70","author":"L Jian","year":"2020","unstructured":"Jian L, Yang X, Liu Z, Jeon G, Gao M, Chisholm D (2020) Sedrfuse: A symmetric encoder-decoder with residual block network for infrared and visible image fusion. IEEE Trans Instrum Meas 70:1\u201315","journal-title":"IEEE Trans Instrum Meas"},{"key":"9567_CR22","doi-asserted-by":"crossref","unstructured":"Knyaz, V.A., Vygolov, O., Kniaz, V.V., Vizilter, Y., Gorbatsevich, V., Luhmann, T., Conen, N.: Deep learning of convolutional auto-encoder for image matching and 3d object reconstruction in the infrared range. In: Proceedings of the IEEE International Conference on Computer Vision Workshops, pp. 2155\u20132164 (2017)","DOI":"10.1109\/ICCVW.2017.252"},{"issue":"6","key":"9567_CR23","doi-asserted-by":"publisher","first-page":"84","DOI":"10.1145\/3065386","volume":"60","author":"A Krizhevsky","year":"2017","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2017) Imagenet classification with deep convolutional neural networks. Commun ACM 60(6):84\u201390","journal-title":"Commun ACM"},{"key":"9567_CR24","doi-asserted-by":"crossref","unstructured":"Li, X., Zhong, Z., Wu, J., Yang, Y., Lin, Z., Liu, H.: Expectation-maximization attention networks for semantic segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9167\u20139176 (2019)","DOI":"10.1109\/ICCV.2019.00926"},{"key":"9567_CR25","doi-asserted-by":"crossref","unstructured":"Liu, Z., Lin, Y., Cao, Y., Hu, H., Wei, Y., Zhang, Z., Lin, S., Guo, B.: Swin transformer: Hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"9567_CR26","doi-asserted-by":"crossref","unstructured":"Long, J., Shelhamer, E., Darrell, T.: Fully convolutional networks for semantic segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3431\u20133440 (2015)","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"9567_CR27","doi-asserted-by":"crossref","unstructured":"Milletari, F., Navab, N., Ahmadi, S.-A.: V-net: Fully convolutional neural networks for volumetric medical image segmentation. In: 2016 Fourth International Conference on 3D Vision (3DV), pp. 565\u2013571 (2016). Ieee","DOI":"10.1109\/3DV.2016.79"},{"key":"9567_CR28","unstructured":"Mnih, V., Heess, N., Graves, A., et al.: Recurrent models of visual attention. Advances in neural information processing systems 27 (2014)"},{"issue":"4","key":"9567_CR29","doi-asserted-by":"publisher","first-page":"90","DOI":"10.3390\/technologies10040090","volume":"10","author":"G Rizzoli","year":"2022","unstructured":"Rizzoli G, Barbato F, Zanuttigh P (2022) Multimodal semantic segmentation in autonomous driving: A review of current approaches and future perspectives. Technologies 10(4):90","journal-title":"Technologies"},{"key":"9567_CR30","doi-asserted-by":"crossref","unstructured":"Ronneberger, O., Fischer, P., Brox, T.: U-net: Convolutional networks for biomedical image segmentation. In: Medical Image Computing and Computer-Assisted Intervention\u2013MICCAI 2015: 18th International Conference, Munich, Germany, October 5-9, 2015, Proceedings, Part III 18, pp. 234\u2013241 (2015). Springer","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"9567_CR31","doi-asserted-by":"crossref","unstructured":"Shivakumar, S.S., Rodrigues, N., Zhou, A., Miller, I.D., Kumar, V., Taylor, C.J.: Pst900: Rgb-thermal calibration, dataset and segmentation network. In: 2020 IEEE International Conference on Robotics and Automation (ICRA), pp. 9441\u20139447 (2020). IEEE","DOI":"10.1109\/ICRA40945.2020.9196831"},{"issue":"3","key":"9567_CR32","doi-asserted-by":"publisher","first-page":"2576","DOI":"10.1109\/LRA.2019.2904733","volume":"4","author":"Y Sun","year":"2019","unstructured":"Sun Y, Zuo W, Liu M (2019) Rtfnet: Rgb-thermal fusion network for semantic segmentation of urban scenes. IEEE Robotics and Automation Letters 4(3):2576\u20132583","journal-title":"IEEE Robotics and Automation Letters"},{"issue":"3","key":"9567_CR33","doi-asserted-by":"publisher","first-page":"1000","DOI":"10.1109\/TASE.2020.2993143","volume":"18","author":"Y Sun","year":"2020","unstructured":"Sun Y, Zuo W, Yun P, Wang H, Liu M (2020) Fuseseg: Semantic segmentation of urban scenes based on rgb and thermal data fusion. IEEE Trans Autom Sci Eng 18(3):1000\u20131011","journal-title":"IEEE Trans Autom Sci Eng"},{"issue":"5","key":"9567_CR34","doi-asserted-by":"publisher","first-page":"2949","DOI":"10.1109\/TCSVT.2021.3099120","volume":"32","author":"J Wang","year":"2021","unstructured":"Wang J, Song K, Bao Y, Huang L, Yan Y (2021) Cgfnet: Cross-guided fusion network for rgb-t salient object detection. IEEE Trans Circuits Syst Video Technol 32(5):2949\u20132961","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"9567_CR35","doi-asserted-by":"crossref","unstructured":"Wang, X., Girshick, R., Gupta, A., He, K.: Non-local neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7794\u20137803 (2018)","DOI":"10.1109\/CVPR.2018.00813"},{"key":"9567_CR36","doi-asserted-by":"crossref","unstructured":"Wang, W., Neumann, U.: Depth-aware cnn for rgb-d segmentation. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 135\u2013150 (2018)","DOI":"10.1007\/978-3-030-01252-6_9"},{"key":"9567_CR37","doi-asserted-by":"crossref","unstructured":"Woo, S., Park, J., Lee, J.-Y., Kweon, I.S.: Cbam: Convolutional block attention module. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 3\u201319 (2018)","DOI":"10.1007\/978-3-030-01234-2_1"},{"key":"9567_CR38","doi-asserted-by":"crossref","unstructured":"Wu, X., Wu, Z., Guo, H., Ju, L., Wang, S.: Dannet: A one-stage domain adaptation network for unsupervised nighttime semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 15769\u201315778 (2021)","DOI":"10.1109\/CVPR46437.2021.01551"},{"key":"9567_CR39","doi-asserted-by":"crossref","unstructured":"Wu J, Zhou W, Qian X, Lei J, Yu L, Luo T (2023) Mfenet: Multitype fusion and enhancement network for detecting salient objects in rgb-t images. Digital Signal Processing 133:103827","DOI":"10.1016\/j.dsp.2022.103827"},{"key":"9567_CR40","unstructured":"XIONG, H.T.: Research of thermal image semantic segmentation in driving scenes. Master\u2019s thesis, South China University of Technology (2021)"},{"key":"9567_CR41","doi-asserted-by":"crossref","unstructured":"Yang, M., Yu, K., Zhang, C., Li, Z., Yang, K.: Denseaspp for semantic segmentation in street scenes. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3684\u20133692 (2018)","DOI":"10.1109\/CVPR.2018.00388"},{"key":"9567_CR42","doi-asserted-by":"crossref","unstructured":"Yi, K., Wu, J.: Probabilistic end-to-end noise correction for learning with noisy labels. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7017\u20137025 (2019)","DOI":"10.1109\/CVPR.2019.00718"},{"key":"9567_CR43","doi-asserted-by":"crossref","unstructured":"Yuan, L., Chen, Y., Wang, T., Yu, W., Shi, Y., Jiang, Z.-H., Tay, F.E., Feng, J., Yan, S.: Tokens-to-token vit: Training vision transformers from scratch on imagenet. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 558\u2013567 (2021)","DOI":"10.1109\/ICCV48922.2021.00060"},{"key":"9567_CR44","unstructured":"Yuan\u00a0H, Z.Y. Zhao\u00a0T: Nested semantic segmentation network fusing deep difference features. Application Research of Computers 39(9), 2850\u201328532860 (2022)"},{"key":"9567_CR45","doi-asserted-by":"crossref","unstructured":"Zhang, Q., Zhao, S., Luo, Y., Zhang, D., Huang, N., Han, J.: Abmdrnet: Adaptive-weighted bi-directional modality difference reduction network for rgb-t semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2633\u20132642 (2021)","DOI":"10.1109\/CVPR46437.2021.00266"},{"key":"9567_CR46","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Ding, L., Sharma, G.: Hazerd: an outdoor scene dataset and benchmark for single image dehazing. In: 2017 IEEE International Conference on Image Processing (ICIP), pp. 3205\u20133209 (2017). IEEE","DOI":"10.1109\/ICIP.2017.8296874"},{"key":"9567_CR47","doi-asserted-by":"crossref","unstructured":"Zhao, H., Shi, J., Qi, X., Wang, X., Jia, J.: Pyramid scene parsing network. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2881\u20132890 (2017)","DOI":"10.1109\/CVPR.2017.660"}],"container-title":["Evolving Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s12530-024-09567-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s12530-024-09567-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s12530-024-09567-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,23]],"date-time":"2024-07-23T10:44:03Z","timestamp":1721731443000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s12530-024-09567-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,2,23]]},"references-count":47,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2024,8]]}},"alternative-id":["9567"],"URL":"https:\/\/doi.org\/10.1007\/s12530-024-09567-8","relation":{},"ISSN":["1868-6478","1868-6486"],"issn-type":[{"value":"1868-6478","type":"print"},{"value":"1868-6486","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,2,23]]},"assertion":[{"value":"10 August 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 January 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"23 February 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}