{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,23]],"date-time":"2025-09-23T12:24:03Z","timestamp":1758630243127},"reference-count":40,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2024,2,16]],"date-time":"2024-02-16T00:00:00Z","timestamp":1708041600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,2,16]],"date-time":"2024-02-16T00:00:00Z","timestamp":1708041600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["SIViP"],"published-print":{"date-parts":[[2024,6]]},"DOI":"10.1007\/s11760-024-03030-6","type":"journal-article","created":{"date-parts":[[2024,2,16]],"date-time":"2024-02-16T19:03:12Z","timestamp":1708110192000},"page":"3661-3672","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["TBFormer: three-branch efficient transformer for semantic segmentation"],"prefix":"10.1007","volume":"18","author":[{"given":"Can","family":"Wei","sequence":"first","affiliation":[]},{"given":"Yan","family":"Wei","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,2,16]]},"reference":[{"key":"3030_CR1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2572683","author":"J Long","year":"2017","unstructured":"Long, J., Shelhamer, E., Darrell, T.: Fully convolutional networks for semantic segmentation. IEEE Trans. Pattern Anal. Mach. Intell. (2017). https:\/\/doi.org\/10.1109\/TPAMI.2016.2572683","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"3030_CR2","unstructured":"Liang-Chieh, C., Papandreou, G., Kokkinos, I., Murphy, K., Yuille, A.: Semantic image segmentation with deep convolutional nets and fully connected CRFs. In International Conference on Learning Representations (2015)"},{"issue":"4","key":"3030_CR3","doi-asserted-by":"publisher","first-page":"834","DOI":"10.1109\/tpami.2017.2699184","volume":"40","author":"LC Chen","year":"2017","unstructured":"Chen, L.C., Papandreou, G., Kokkinos, I., Murphy, K., Yuille, A.L.: Deeplab: semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected crfs. IEEE Trans. Pattern Anal. Mach. Intell. 40(4), 834\u2013848 (2017). https:\/\/doi.org\/10.1109\/tpami.2017.2699184","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"3030_CR4","unstructured":"Chen, L.C.: Rethinking atrous convolution for semantic image segmentation. Comput. Res. Reposit. (2017)"},{"key":"3030_CR5","doi-asserted-by":"publisher","unstructured":"Chen, L.C., Zhu, Y., Papandreou, G., Schroff, F., Adam, H.: Encoder\u2013decoder with atrous separable convolution for semantic image segmentation. In Proceedings of the European conference on computer vision (ECCV), pp. 801\u2013818 (2018). https:\/\/doi.org\/10.1007\/978-3-030-01234-2_49","DOI":"10.1007\/978-3-030-01234-2_49"},{"key":"3030_CR6","unstructured":"Kolesnikov, A., Dosovitskiy, A., Weissenborn, D., Heigold, G., Uszkoreit, J., Beyer, L., Zhai, X.: An image is worth 16x16 words: transformers for image recognition at scale (2021)"},{"key":"3030_CR7","doi-asserted-by":"publisher","unstructured":"Zheng, S., Lu, J., Zhao, H., Zhu, X., Luo, Z., Wang, Y., Zhang, L.: Rethinking semantic segmentation from a sequence-to-sequence perspective with transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6881\u20136890 (2021). https:\/\/doi.org\/10.1109\/cvpr46437.2021.00681","DOI":"10.1109\/cvpr46437.2021.00681"},{"key":"3030_CR8","doi-asserted-by":"publisher","unstructured":"Liu, Z., Lin, Y., Cao, Y., Hu, H., Wei, Y., Zhang, Z., Guo, B.: Swin transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10012\u201310022 (2021). https:\/\/doi.org\/10.1109\/iccv48922.2021.00986","DOI":"10.1109\/iccv48922.2021.00986"},{"key":"3030_CR9","first-page":"12077","volume":"34","author":"E Xie","year":"2021","unstructured":"Xie, E., Wang, W., Yu, Z., Anandkumar, A., Alvarez, J.M., Luo, P.: SegFormer: simple and efficient design for semantic segmentation with transformers. Adv. Neural Inf. Process. Syst. 34, 12077\u201312090 (2021)","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"3030_CR10","doi-asserted-by":"publisher","unstructured":"Wang, W., Xie, E., Li, X., Fan, D. P., Song, K., Liang, D., Shao, L.: Pyramid vision transformer: a versatile backbone for dense prediction without convolutions. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 568\u2013578 (2021). https:\/\/doi.org\/10.1109\/iccv48922.2021.00061","DOI":"10.1109\/iccv48922.2021.00061"},{"key":"3030_CR11","doi-asserted-by":"publisher","unstructured":"Boykov, Y.Y., Jolly, M.P.: interactive graph cuts for optimal boundary and region segmentation of objects in ND images. In: Proceedings Eighth IEEE International Conference on Computer Vision. ICCV 2001, vol. 1, pp. 105\u2013112. IEEE (2001). https:\/\/doi.org\/10.1109\/iccv.2001.937505","DOI":"10.1109\/iccv.2001.937505"},{"issue":"1","key":"3030_CR12","doi-asserted-by":"publisher","first-page":"62","DOI":"10.1109\/tsmc.1979.4310076","volume":"9","author":"N Otsu","year":"1979","unstructured":"Otsu, N.: A threshold selection method from gray-level histograms. IEEE Trans. Syst. Man Cybern. 9(1), 62\u201366 (1979). https:\/\/doi.org\/10.1109\/tsmc.1979.4310076","journal-title":"IEEE Trans. Syst. Man Cybern."},{"issue":"11","key":"3030_CR13","doi-asserted-by":"publisher","first-page":"2274","DOI":"10.1109\/tpami.2012.120","volume":"34","author":"R Achanta","year":"2012","unstructured":"Achanta, R., Shaji, A., Smith, K., Lucchi, A., Fua, P., S\u00fcsstrunk, S.: SLIC superpixels compared to state-of-the-art superpixel methods. IEEE Trans. Pattern Anal. Mach. Intell. 34(11), 2274\u20132282 (2012). https:\/\/doi.org\/10.1109\/tpami.2012.120","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"3030_CR14","doi-asserted-by":"publisher","unstructured":"Zhao, H., Shi, J., Qi, X., Wang, X., Jia, J.: Pyramid scene parsing network. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2881\u20132890 (2017). https:\/\/doi.org\/10.1109\/cvpr.2017.660","DOI":"10.1109\/cvpr.2017.660"},{"key":"3030_CR15","doi-asserted-by":"publisher","unstructured":"Fu, J., Liu, J., Tian, H., Li, Y., Bao, Y., Fang, Z., Lu, H.: Dual attention network for scene segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3146\u20133154 (2019). https:\/\/doi.org\/10.1109\/cvpr.2019.00326","DOI":"10.1109\/cvpr.2019.00326"},{"key":"3030_CR16","doi-asserted-by":"publisher","unstructured":"Huang, Z., Wang, X., Huang, L., Huang, C., Wei, Y., Liu, W.: Ccnet: criss-cross attention for semantic segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 603\u2013612 (2019). https:\/\/doi.org\/10.1109\/iccv.2019.00069","DOI":"10.1109\/iccv.2019.00069"},{"key":"3030_CR17","doi-asserted-by":"publisher","unstructured":"Yu, C., Wang, J., Peng, C., Gao, C., Yu, G., Sang, N.: Bisenet: bilateral segmentation network for real-time semantic segmentation. In Proceedings of the European Conference on Computer Vision (ECCV), pp. 325\u2013341 (2018). https:\/\/doi.org\/10.1007\/978-3-030-01261-8_20","DOI":"10.1007\/978-3-030-01261-8_20"},{"issue":"3","key":"3030_CR18","doi-asserted-by":"publisher","first-page":"3448","DOI":"10.1109\/tits.2022.3228042","volume":"24","author":"H Pan","year":"2022","unstructured":"Pan, H., Hong, Y., Sun, W., Jia, Y.: Deep dual-resolution networks for real-time and accurate semantic segmentation of traffic scenes. IEEE Trans. Intell. Transp. Syst. 24(3), 3448\u20133460 (2022). https:\/\/doi.org\/10.1109\/tits.2022.3228042","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"3030_CR19","unstructured":"Fu, L., Tian, H., Zhai, X. B., Gao, P., Peng, X.: IncepFormer: efficient inception transformer with pyramid pooling for semantic segmentation (2022). arXiv preprint arXiv:2212.03035"},{"key":"3030_CR20","doi-asserted-by":"publisher","unstructured":"Wu, Y.H., Liu, Y., Zhan, X., Cheng, M.M.: P2T: pyramid pooling transformer for scene understanding. IEEE Trans. Pattern Anal. Mach. Intell. (2022). https:\/\/doi.org\/10.1109\/tpami.2022.3202765","DOI":"10.1109\/tpami.2022.3202765"},{"key":"3030_CR21","doi-asserted-by":"publisher","unstructured":"Peng, C., Zhang, X., Yu, G., Luo, G., Sun, J.: Large kernel matters-improve semantic segmentation by global convolutional network. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4353\u20134361 (2017). https:\/\/doi.org\/10.1109\/cvpr.2017.189","DOI":"10.1109\/cvpr.2017.189"},{"key":"3030_CR22","unstructured":"Liu, W., Rabinovich, A., Berg, A.C.: ParseNet: looking wider to see better (2016)"},{"key":"3030_CR23","first-page":"7423","volume":"35","author":"J Wang","year":"2022","unstructured":"Wang, J., Gou, C., Wu, Q., Feng, H., Han, J., Ding, E., Wang, J.: Rtformer: efficient design for real-time semantic segmentation with transformer. Adv. Neural Inf. Process. Syst. 35, 7423\u20137436 (2022)","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"3030_CR24","unstructured":"Sobel, I., Feldman, G.: A 3x3 isotropic gradient operator for image processing. A talk at the Stanford artificial project, pp. 271\u2013272 (1968)"},{"key":"3030_CR25","doi-asserted-by":"publisher","unstructured":"Liang, T., Jin, Y., Li, Y., Wang, T.: Edcnn: edge enhancement-based densely connected network with compound loss for low-dose ct denoising. In: 2020 15th IEEE International Conference on Signal Processing (ICSP), vol. 1, pp. 193\u2013198. IEEE (2020). https:\/\/doi.org\/10.1109\/icsp48669.2020.9320928","DOI":"10.1109\/icsp48669.2020.9320928"},{"key":"3030_CR26","doi-asserted-by":"publisher","unstructured":"Cordts, M., Omran, M., Ramos, S., Rehfeld, T., Enzweiler, M., Benenson, R., Schiele, B.: The cityscapes dataset for semantic urban scene understanding. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3213\u20133223 (2016). https:\/\/doi.org\/10.1109\/cvpr.2016.350","DOI":"10.1109\/cvpr.2016.350"},{"key":"3030_CR27","doi-asserted-by":"publisher","unstructured":"Zhou, B., Zhao, H., Puig, X., Fidler, S., Barriuso, A., Torralba, A.: Scene parsing through ade20k dataset. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 633\u2013641 (2017). https:\/\/doi.org\/10.1109\/cvpr.2017.544","DOI":"10.1109\/cvpr.2017.544"},{"key":"3030_CR28","doi-asserted-by":"publisher","unstructured":"Caesar, H., Uijlings, J., Ferrari, V.: Coco-stuff: thing and stuff classes in context. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1209\u20131218 (2018). https:\/\/doi.org\/10.1109\/cvpr.2018.00132","DOI":"10.1109\/cvpr.2018.00132"},{"key":"3030_CR29","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham, M., Van Gool, L., Williams, C.K., Winn, J., Zisserman, A.: The pascal visual object classes (VOC) challenge. Int. J. Comput. Vis. 88, 303\u2013338 (2010). https:\/\/doi.org\/10.1007\/s11263-009-0275-4","journal-title":"Int. J. Comput. Vis."},{"key":"3030_CR30","unstructured":"Contributors, M.: MM Segmentation: openmmlab semantic segmentation toolbox and benchmark (2020).https:\/\/github.com\/open-mmlab\/mmsegmentation"},{"key":"3030_CR31","unstructured":"Ioffe, S., Szegedy, C.: Batch normalization: accelerating deep network training by reducing internal covariate shift. In International Conference on Machine Learning, pp. 448\u2013456. PMLR (2015)"},{"key":"3030_CR32","unstructured":"Glorot, X., Bordes, A., Bengio, Y.: Deep sparse rectifier neural networks. In: Proceedings of the Fourteenth International Conference on Artificial Intelligence and Statistics, pp. 315\u2013323. JMLR Workshop and Conference Proceedings (2011)"},{"key":"3030_CR33","unstructured":"Yuan, Y., Fu, R., Huang, L., Lin, W., Zhang, C., Chen, X., Wang, J.: Hrformer: high-resolution transformer for dense prediction (2021). arXiv preprint arXiv:2110.09408"},{"key":"3030_CR34","first-page":"17864","volume":"34","author":"B Cheng","year":"2021","unstructured":"Cheng, B., Schwing, A., Kirillov, A.: Per-pixel classification is not all you need for semantic segmentation. Adv. Neural Inf. Process. Syst. 34, 17864\u201317875 (2021)","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"3030_CR35","doi-asserted-by":"publisher","unstructured":"Li, X., Zhong, Z., Wu, J., Yang, Y., Lin, Z., Liu, H.: Expectation-maximization attention networks for semantic segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9167\u20139176 (2019). https:\/\/doi.org\/10.1109\/iccv.2019.00926","DOI":"10.1109\/iccv.2019.00926"},{"key":"3030_CR36","doi-asserted-by":"publisher","unstructured":"Zhao, H., Zhang, Y., Liu, S., Shi, J., Loy, C. C., Lin, D., Jia, J.: Psanet: point-wise spatial attention network for scene parsing. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 267-283 (2018). https:\/\/doi.org\/10.1007\/978-3-030-01240-3_17","DOI":"10.1007\/978-3-030-01240-3_17"},{"key":"3030_CR37","doi-asserted-by":"publisher","unstructured":"Kirillov, A., Girshick, R., He, K., Doll\u00e1r, P.: Panoptic feature pyramid networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6399\u20136408 (2019). https:\/\/doi.org\/10.1109\/cvpr.2019.00656","DOI":"10.1109\/cvpr.2019.00656"},{"key":"3030_CR38","doi-asserted-by":"crossref","unstructured":"Yuan, Y., Chen, X., Wang, J.: Object-context representations for semantic segmentation. In: Computer Vision-ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part VI 16, pp. 173\u2013190. Springer (2020)","DOI":"10.1007\/978-3-030-58539-6_11"},{"key":"3030_CR39","doi-asserted-by":"publisher","unstructured":"Zhang, H., Dana, K., Shi, J., Zhang, Z., Wang, X., Tyagi, A., Agrawal, A.: Context encoding for semantic segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7151\u20137160 (2018). https:\/\/doi.org\/10.1109\/cvpr.2018.00747","DOI":"10.1109\/cvpr.2018.00747"},{"key":"3030_CR40","doi-asserted-by":"publisher","unstructured":"Takikawa, T., Acuna, D., Jampani, V., Fidler, S.: Gated-scnn: gated shape cnns for semantic segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5229\u20135238 (2019). https:\/\/doi.org\/10.1109\/iccv.2019.00533","DOI":"10.1109\/iccv.2019.00533"}],"container-title":["Signal, Image and Video Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-024-03030-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11760-024-03030-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-024-03030-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,25]],"date-time":"2024-03-25T16:11:33Z","timestamp":1711383093000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11760-024-03030-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,2,16]]},"references-count":40,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2024,6]]}},"alternative-id":["3030"],"URL":"https:\/\/doi.org\/10.1007\/s11760-024-03030-6","relation":{},"ISSN":["1863-1703","1863-1711"],"issn-type":[{"value":"1863-1703","type":"print"},{"value":"1863-1711","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,2,16]]},"assertion":[{"value":"29 November 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 January 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 January 2024","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 February 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}}]}}