{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,26]],"date-time":"2026-02-26T15:20:49Z","timestamp":1772119249155,"version":"3.50.1"},"reference-count":64,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2024,9,16]],"date-time":"2024-09-16T00:00:00Z","timestamp":1726444800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,9,16]],"date-time":"2024-09-16T00:00:00Z","timestamp":1726444800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2020YFA0713503"],"award-info":[{"award-number":["2020YFA0713503"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2020YFA0713503"],"award-info":[{"award-number":["2020YFA0713503"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Vis Comput"],"published-print":{"date-parts":[[2025,3]]},"DOI":"10.1007\/s00371-024-03590-1","type":"journal-article","created":{"date-parts":[[2024,9,16]],"date-time":"2024-09-16T10:02:56Z","timestamp":1726480976000},"page":"3109-3128","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Lunet: an enhanced upsampling fusion network with efficient self-attention for semantic segmentation"],"prefix":"10.1007","volume":"41","author":[{"given":"Yan","family":"Zhou","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Haibin","family":"Zhou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yin","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jianxun","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Richard","family":"Irampaye","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dongli","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhengpeng","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,9,16]]},"reference":[{"key":"3590_CR1","doi-asserted-by":"publisher","unstructured":"Zhou, Y., Zhou, H., Li, N., Li, J., Wang, D.: Lightweight self-attention network for semantic segmentation. In: 2022 International Joint Conference on Neural Networks (IJCNN), pp. 1\u20138 (2022). https:\/\/doi.org\/10.1109\/IJCNN55064.2022.9891928","DOI":"10.1109\/IJCNN55064.2022.9891928"},{"issue":"3","key":"3590_CR2","doi-asserted-by":"publisher","first-page":"1341","DOI":"10.1109\/TITS.2020.2972974","volume":"22","author":"D Feng","year":"2021","unstructured":"Feng, D., Haase-Schutz, C., Rosenbaum, L., Hertlein, H., Glaser, C., Timm, F., Wiesbeck, W., Dietmayer, K.: Deep multi-modal object detection and semantic segmentation for autonomous driving: datasets, methods, and challenges. IEEE Trans. Intell. Transport. Syst. 22(3), 1341\u20131360 (2021). https:\/\/doi.org\/10.1109\/TITS.2020.2972974","journal-title":"IEEE Trans. Intell. Transport. Syst."},{"issue":"2","key":"3590_CR3","doi-asserted-by":"publisher","first-page":"584","DOI":"10.1038\/s41591-023-02702-z","volume":"30","author":"L Dai","year":"2024","unstructured":"Dai, L., Sheng, B., Chen, T., Wu, Q., Liu, R., Cai, C., Wu, L., Yang, D., Hamzah, H., Liu, Y., Wang, X., Guan, Z., Yu, S., Li, T., Tang, Z., Ran, A., Che, H., Chen, H., Zheng, Y., Shu, J., Huang, S., Wu, C., Lin, S., Liu, D., Li, J., Wang, Z., Meng, Z., Shen, J., Hou, X., Deng, C., Ruan, L., Lu, F., Chee, M., Quek, T.C., Srinivasan, R., Raman, R., Sun, X., Wang, Y.X., Wu, J., Jin, H., Dai, R., Shen, D., Yang, X., Guo, M., Zhang, C., Cheung, C.Y., Tan, G.S.W., Tham, Y.-C., Cheng, C.-Y., Li, H., Wong, T.Y., Jia, W.: A deep learning system for predicting time to progression of diabetic retinopathy. Nat. Med. 30(2), 584\u2013594 (2024). https:\/\/doi.org\/10.1038\/s41591-023-02702-z","journal-title":"Nat. Med."},{"key":"3590_CR4","doi-asserted-by":"publisher","unstructured":"Changpinyo, S., Pont-Tuset, J., Ferrari, V., Soricut, R.: Telling the what while pointing to the where: multimodal queries for image retrieval. In: IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 12136\u201312146 (2021). https:\/\/doi.org\/10.1109\/ICCV48922.2021.01192","DOI":"10.1109\/ICCV48922.2021.01192"},{"issue":"4","key":"3590_CR5","doi-asserted-by":"publisher","first-page":"640","DOI":"10.1109\/TPAMI.2016.2572683","volume":"39","author":"E Shelhamer","year":"2017","unstructured":"Shelhamer, E., Long, J., Darrell, T.: Fully convolutional networks for semantic segmentation. IEEE Trans. Pattern Anal. Mach. Intell. 39(4), 640\u2013651 (2017). https:\/\/doi.org\/10.1109\/TPAMI.2016.2572683","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"3590_CR6","doi-asserted-by":"publisher","unstructured":"Zhao, H., Shi, J., Qi, X., Wang, X., Jia, J.: Pyramid scene parsing network. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 6230\u20136239 (2017). https:\/\/doi.org\/10.1109\/cvpr.2017.660","DOI":"10.1109\/cvpr.2017.660"},{"issue":"4","key":"3590_CR7","doi-asserted-by":"publisher","first-page":"834","DOI":"10.1109\/TPAMI.2017.2699184","volume":"40","author":"L-C Chen","year":"2018","unstructured":"Chen, L.-C., Papandreou, G., Kokkinos, I., Murphy, K., Yuille, A.L.: DeepLab: semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected CRFs. IEEE Trans. Pattern Anal. Mach. Intell. 40(4), 834\u2013848 (2018). https:\/\/doi.org\/10.1109\/TPAMI.2017.2699184","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"3590_CR8","doi-asserted-by":"publisher","unstructured":"Chen, L.-C., Zhu, Y., Papandreou, G., Schroff, F., Adam, H.: Encoder\u2013decoder with atrous separable convolution for semantic image segmentation. In: European Conference on Computer Vision (ECCV), pp. 833\u2013851 (2018). https:\/\/doi.org\/10.1007\/978-3-030-01234-2_49","DOI":"10.1007\/978-3-030-01234-2_49"},{"key":"3590_CR9","doi-asserted-by":"publisher","unstructured":"Peng, C., Zhang, X., Yu, G., Luo, G., Sun, J.: Large kernel matters\u2014improve semantic segmentation by global convolutional network. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 1743\u20131751 (2017). https:\/\/doi.org\/10.1109\/cvpr.2017.189","DOI":"10.1109\/cvpr.2017.189"},{"key":"3590_CR10","doi-asserted-by":"publisher","unstructured":"Srinivas, A., Lin, T.-Y., Parmar, N., Shlens, J., Abbeel, P., Vaswani, A.: Bottleneck transformers for visual recognition. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 16514\u201316524 (2021). https:\/\/doi.org\/10.1109\/CVPR46437.2021.01625","DOI":"10.1109\/CVPR46437.2021.01625"},{"key":"3590_CR11","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, L., Polosukhin, I.: Attention is all you need. In: Advances in Neural Information Processing Systems (NeurIPS), pp. 5998\u20136008 (2017)"},{"key":"3590_CR12","doi-asserted-by":"publisher","first-page":"107448","DOI":"10.1016\/j.patcog.2020.107448","volume":"106","author":"N Wang","year":"2020","unstructured":"Wang, N., Ma, S., Li, J., Zhang, Y., Zhang, L.: Multistage attention network for image inpainting. Pattern Recogn. 106, 107448 (2020). https:\/\/doi.org\/10.1016\/j.patcog.2020.107448","journal-title":"Pattern Recogn."},{"key":"3590_CR13","doi-asserted-by":"publisher","unstructured":"Zhao, H., Jia, J., Koltun, V.: Exploring self-attention for image recognition. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 10073\u201310082 (2020). https:\/\/doi.org\/10.1109\/CVPR42600.2020.01009","DOI":"10.1109\/CVPR42600.2020.01009"},{"key":"3590_CR14","doi-asserted-by":"publisher","unstructured":"Zhang, H., Dana, K., Shi, J., Zhang, Z., Wang, X., Tyagi, A., Agrawal, A.: Context encoding for semantic segmentation. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 7151\u20137160 (2018). https:\/\/doi.org\/10.1109\/cvpr.2018.00747","DOI":"10.1109\/cvpr.2018.00747"},{"key":"3590_CR15","doi-asserted-by":"publisher","unstructured":"Fu, J., Liu, J., Tian, H., Li, Y., Bao, Y., Fang, Z., Lu, H.: Dual attention network for scene segmentation. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 3146\u20133154 (2019). https:\/\/doi.org\/10.1109\/cvpr.2019.00326","DOI":"10.1109\/cvpr.2019.00326"},{"issue":"8","key":"3590_CR16","doi-asserted-by":"publisher","first-page":"2375","DOI":"10.1007\/s11263-021-01465-9","volume":"129","author":"Y Yuan","year":"2021","unstructured":"Yuan, Y., Huang, L., Guo, J., Zhang, C., Chen, X., Wang, J.: OCNet: object context for semantic segmentation. Int. J. Comput. Vis. 129(8), 2375\u20132398 (2021). https:\/\/doi.org\/10.1007\/s11263-021-01465-9","journal-title":"Int. J. Comput. Vis."},{"key":"3590_CR17","doi-asserted-by":"publisher","DOI":"10.1109\/JSEN.2024.3355388","author":"Y Fu","year":"2024","unstructured":"Fu, Y., Gao, M., Xie, G., Hu, M., Wei, C., Ding, R.: Density-aware U-net for unstructured environment dust segmentation. IEEE Sens. J. (2024). https:\/\/doi.org\/10.1109\/JSEN.2024.3355388. (Accessed 2024-02-06)","journal-title":"IEEE Sens. J."},{"issue":"8","key":"3590_CR18","doi-asserted-by":"publisher","first-page":"2011","DOI":"10.1109\/TPAMI.2019.2913372","volume":"42","author":"J Hu","year":"2020","unstructured":"Hu, J., Shen, L., Albanie, S., Sun, G., Wu, E.: Squeeze-and-excitation networks. IEEE Trans. Pattern Anal. Mach. Intell. 42(8), 2011\u20132023 (2020). https:\/\/doi.org\/10.1109\/TPAMI.2019.2913372","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"1","key":"3590_CR19","doi-asserted-by":"publisher","first-page":"580","DOI":"10.1007\/s10489-021-02446-8","volume":"52","author":"X Hu","year":"2022","unstructured":"Hu, X., Jing, L., Sehar, U.: Joint pyramid attention network for real-time semantic segmentation of urban scenes. Appl. Intell. 52(1), 580\u2013594 (2022). https:\/\/doi.org\/10.1007\/s10489-021-02446-8","journal-title":"Appl. Intell."},{"key":"3590_CR20","doi-asserted-by":"publisher","unstructured":"Woo, S., Park, J., Lee, J.-Y., Kweon, I.S.: CBAM: convolutional block attention module. In: European Conference on Computer Vision (ECCV), pp. 3\u201319 (2018). https:\/\/doi.org\/10.1007\/978-3-030-01234-2_1","DOI":"10.1007\/978-3-030-01234-2_1"},{"key":"3590_CR21","doi-asserted-by":"publisher","unstructured":"Wang, X., Girshick, R., Gupta, A., He, K.: Non-local neural networks. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 7794\u20137803 (2018). https:\/\/doi.org\/10.1109\/CVPR.2018.00813","DOI":"10.1109\/CVPR.2018.00813"},{"issue":"12","key":"3590_CR22","doi-asserted-by":"publisher","first-page":"2481","DOI":"10.1109\/TPAMI.2016.2644615","volume":"39","author":"V Badrinarayanan","year":"2017","unstructured":"Badrinarayanan, V., Kendall, A., Cipolla, R.: SegNet: a deep convolutional encoder\u2013decoder architecture for image segmentation. IEEE Trans. Pattern Anal. Mach. Intell. 39(12), 2481\u20132495 (2017). https:\/\/doi.org\/10.1109\/TPAMI.2016.2644615","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"3590_CR23","doi-asserted-by":"publisher","unstructured":"Lin, G., Milan, A., Shen, C., Reid, I.: RefineNet: multi-path refinement networks for high-resolution semantic segmentation. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 5168\u20135177 (2017). https:\/\/doi.org\/10.1109\/cvpr.2017.549","DOI":"10.1109\/cvpr.2017.549"},{"key":"3590_CR24","doi-asserted-by":"publisher","unstructured":"Yu, C., Wang, J., Peng, C., Gao, C., Yu, G., Sang, N.: BiSeNet: bilateral segmentation network for real-time semantic segmentation. In: European Conference on Computer Vision (ECCV), pp. 334\u2013349 (2018). https:\/\/doi.org\/10.1007\/978-3-030-01261-8_20","DOI":"10.1007\/978-3-030-01261-8_20"},{"key":"3590_CR25","doi-asserted-by":"publisher","first-page":"14","DOI":"10.1016\/j.neucom.2022.01.044","volume":"480","author":"J Li","year":"2022","unstructured":"Li, J., Liao, G., Sun, W., Sun, J., Sheng, T., Zhu, K., von Deneen, K.M., Zhang, Y.: A 2.5D semantic segmentation of the pancreas using attention guided dual context embedded U-Net. Neurocomputing 480, 14\u201326 (2022). https:\/\/doi.org\/10.1016\/j.neucom.2022.01.044","journal-title":"Neurocomputing"},{"issue":"2","key":"3590_CR26","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham, M., Van Gool, L., Williams, C.K.I., Winn, J., Zisserman, A.: The pascal visual object classes (VOC) challenge. Int. J. Comput. Vis. 88(2), 303\u2013338 (2010). https:\/\/doi.org\/10.1007\/s11263-009-0275-4","journal-title":"Int. J. Comput. Vis."},{"key":"3590_CR27","doi-asserted-by":"publisher","unstructured":"Cordts, M., Omran, M., Ramos, S., Rehfeld, T., Enzweiler, M., Benenson, R., Franke, U., Roth, S., Schiele, B.: The cityscapes dataset for semantic urban scene understanding. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 3213\u20133223 (2016). https:\/\/doi.org\/10.1109\/cvpr.2016.350","DOI":"10.1109\/cvpr.2016.350"},{"key":"3590_CR28","doi-asserted-by":"publisher","unstructured":"Lin, T.-Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Doll\u00e1r, P., Zitnick, C.L.: Microsoft COCO: common objects in context. In: European Conference on Computer Vision (ECCV), pp. 740\u2013755 (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"3590_CR29","doi-asserted-by":"publisher","first-page":"102677","DOI":"10.1016\/j.media.2022.102677","volume":"83","author":"A Gupta","year":"2023","unstructured":"Gupta, A., Gehlot, S., Goswami, S., Motwani, S., Gupta, R., Faura, \u00c1.G., \u0160tepec, D., Martin\u010di\u010d, T., Azad, R., Merhof, D., Bozorgpour, A., Azad, B., Sulaiman, A., Pandey, D., Gupta, P., Bhattacharya, S., Sinha, A., Agarwal, R., Qiu, X., Zhang, Y., Fan, M., Park, Y., Lee, D., Park, J.S., Lee, K., Ye, J.: SegPC-2021: a challenge & dataset on segmentation of Multiple Myeloma plasma cells from microscopic images. Med. Image Anal. 83, 102677 (2023). https:\/\/doi.org\/10.1016\/j.media.2022.102677","journal-title":"Med. Image Anal."},{"key":"3590_CR30","doi-asserted-by":"publisher","unstructured":"Chen, L.-C., Papandreou, G., Schroff, F., Adam, H.: Rethinking atrous convolution for semantic image segmentation. arXiv preprint (2017). https:\/\/doi.org\/10.48550\/arXiv.1706.05587","DOI":"10.48550\/arXiv.1706.05587"},{"key":"3590_CR31","doi-asserted-by":"publisher","unstructured":"Li, H., Xiong, P., Fan, H., Sun, J.: DFANet: deep feature aggregation for real-time semantic segmentation. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 9522\u20139531 (2019). https:\/\/doi.org\/10.1109\/cvpr.2019.00975","DOI":"10.1109\/cvpr.2019.00975"},{"issue":"13","key":"3590_CR32","doi-asserted-by":"publisher","first-page":"15617","DOI":"10.1007\/s10489-021-03038-2","volume":"52","author":"S Luo","year":"2022","unstructured":"Luo, S., Zhang, J., Xiao, N., Qiang, Y., Li, K., Zhao, J., Meng, L., Song, P.: DAS-Net: a lung nodule segmentation method based on adaptive dual-branch attention and shadow mapping. Appl. Intell. 52(13), 15617\u201315631 (2022). https:\/\/doi.org\/10.1007\/s10489-021-03038-2","journal-title":"Appl. Intell."},{"key":"3590_CR33","doi-asserted-by":"publisher","unstructured":"Zhao, H., Zhang, Y., Liu, S., Shi, J., Loy, C.C., Lin, D., Jia, J.: PSANet: point-wise spatial attention network for scene parsing. In: European Conference on Computer Vision (ECCV), pp. 270\u2013286 (2018). https:\/\/doi.org\/10.1007\/978-3-030-01240-3_17","DOI":"10.1007\/978-3-030-01240-3_17"},{"key":"3590_CR34","doi-asserted-by":"publisher","unstructured":"Yuan, Y., Chen, X., Wang, J.: Object-contextual representations for semantic segmentation. In: European Conference on Computer Vision (ECCV), pp. 173\u2013190 (2020). https:\/\/doi.org\/10.1007\/978-3-030-58539-6_11","DOI":"10.1007\/978-3-030-58539-6_11"},{"issue":"10","key":"3590_CR35","doi-asserted-by":"publisher","first-page":"3349","DOI":"10.1109\/TPAMI.2020.2983686","volume":"43","author":"J Wang","year":"2021","unstructured":"Wang, J., Sun, K., Cheng, T., Jiang, B., Deng, C., Zhao, Y., Liu, D., Mu, Y., Tan, M., Wang, X., Liu, W., Xiao, B.: Deep high-resolution representation learning for visual recognition. IEEE Trans. Pattern Anal. Mach. Intell. 43(10), 3349\u20133364 (2021). https:\/\/doi.org\/10.1109\/TPAMI.2020.2983686","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"3590_CR36","doi-asserted-by":"crossref","unstructured":"Liu, Z., Lin, Y., Cao, Y., Hu, H., Wei, Y., Zhang, Z., Lin, S., Guo, B.: Swin transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"3590_CR37","doi-asserted-by":"publisher","unstructured":"Huang, Z., Wang, X., Huang, L., Huang, C., Wei, Y., Liu, W.: CCNet: criss-cross attention for semantic segmentation. In: IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 603\u2013612 (2019). https:\/\/doi.org\/10.1109\/ICCV.2019.00069","DOI":"10.1109\/ICCV.2019.00069"},{"key":"3590_CR38","doi-asserted-by":"publisher","unstructured":"Zhu, Z., Xu, M., Bai, S., Huang, T., Bai, X.: Asymmetric non-local neural networks for semantic segmentation. In: IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 593\u2013602 (2019). https:\/\/doi.org\/10.1109\/ICCV.2019.00068","DOI":"10.1109\/ICCV.2019.00068"},{"key":"3590_CR39","doi-asserted-by":"publisher","unstructured":"Yin, M., Yao, Z., Cao, Y., Li, X., Zhang, Z., Lin, S., Hu, H.: Disentangled non-local neural networks. In: European Conference on Computer Vision (ECCV), pp. 191\u2013207 (2020). https:\/\/doi.org\/10.1007\/978-3-030-58555-6_12","DOI":"10.1007\/978-3-030-58555-6_12"},{"key":"3590_CR40","doi-asserted-by":"publisher","unstructured":"Li, X., Zhong, Z., Wu, J., Yang, Y., Lin, Z., Liu, H.: Expectation-maximization attention networks for semantic segmentation. In: IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 9166\u20139175 (2019). https:\/\/doi.org\/10.1109\/ICCV.2019.00926","DOI":"10.1109\/ICCV.2019.00926"},{"key":"3590_CR41","doi-asserted-by":"publisher","unstructured":"Wang, W., Xie, E., Li, X., Fan, D.-P., Song, K., Liang, D., Lu, T., Luo, P., Shao, L.: Pyramid vision transformer: a versatile backbone for dense prediction without convolutions. In: IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 548\u2013558 (2021). https:\/\/doi.org\/10.1109\/ICCV48922.2021.00061","DOI":"10.1109\/ICCV48922.2021.00061"},{"issue":"5","key":"3590_CR42","doi-asserted-by":"publisher","first-page":"5436","DOI":"10.1109\/TPAMI.2022.3211006","volume":"45","author":"M-H Guo","year":"2023","unstructured":"Guo, M.-H., Liu, Z.-N., Mu, T.-J., Hu, S.-M.: Beyond self-attention: external attention using two linear layers for visual tasks. IEEE Trans. Pattern Anal. Mach. Intell. 45(5), 5436\u20135447 (2023). https:\/\/doi.org\/10.1109\/TPAMI.2022.3211006","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"3590_CR43","doi-asserted-by":"publisher","unstructured":"Feng, Y., Jeon, H., Blagojevic, F., Guyot, C., Li, Q., Li, D.: AttMEMO: accelerating transformers with memoization on big memory systems. arXiv (2023). https:\/\/doi.org\/10.48550\/arXiv.2301.09262","DOI":"10.48550\/arXiv.2301.09262"},{"key":"3590_CR44","doi-asserted-by":"publisher","unstructured":"Guo, J., Han, K., Wu, H., Tang, Y., Chen, X., Wang, Y., Xu, C.: CMT: Convolutional neural networks meet vision transformers. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 12165\u201312175 (2022). https:\/\/doi.org\/10.1109\/CVPR52688.2022.01186","DOI":"10.1109\/CVPR52688.2022.01186"},{"key":"3590_CR45","doi-asserted-by":"publisher","unstructured":"Dong, X., Bao, J., Chen, D., Zhang, W., Yu, N., Yuan, L., Chen, D., Guo, B.: CSWin transformer: a general vision transformer backbone with cross-shaped windows. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 12114\u201312124 (2022). https:\/\/doi.org\/10.1109\/CVPR52688.2022.01181","DOI":"10.1109\/CVPR52688.2022.01181"},{"key":"3590_CR46","doi-asserted-by":"publisher","unstructured":"Gu, J., Kwon, H., Wang, D., Ye, W., Li, M., Chen, Y.-H., Lai, L., Chandra, V., Pan, D.Z.: Multi-scale high-resolution vision transformer for semantic segmentation. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 12084\u201312093 (2022). https:\/\/doi.org\/10.1109\/CVPR52688.2022.01178","DOI":"10.1109\/CVPR52688.2022.01178"},{"key":"3590_CR47","doi-asserted-by":"publisher","unstructured":"Noh, H., Hong, S., Han, B.: Learning deconvolution network for semantic segmentation. In: IEEE International Conference on Computer Vision (ICCV), pp. 1520\u20131528 (2015). https:\/\/doi.org\/10.1109\/iccv.2015.178","DOI":"10.1109\/iccv.2015.178"},{"key":"3590_CR48","doi-asserted-by":"publisher","unstructured":"Wang, P., Chen, P., Yuan, Y., Liu, D., Huang, Z., Hou, X., Cottrell, G.: Understanding convolution for semantic segmentation. In: IEEE Winter Conference on Applications of Computer Vision (WACV), pp. 1451\u20131460 (2018). https:\/\/doi.org\/10.1109\/WACV.2018.00163","DOI":"10.1109\/WACV.2018.00163"},{"key":"3590_CR49","doi-asserted-by":"publisher","unstructured":"Tian, Z., He, T., Shen, C., Yan, Y.: Decoders matter for semantic segmentation: data-dependent decoding enables flexible feature aggregation. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 3121\u20133130 (2019). https:\/\/doi.org\/10.1109\/CVPR.2019.00324","DOI":"10.1109\/CVPR.2019.00324"},{"key":"3590_CR50","doi-asserted-by":"publisher","unstructured":"Liu, J., He, J., Zhang, J., Ren, J., Li, H.: EfficientFCN: holistically-guided decoding for semantic segmentation. In: European Conference on Computer Vision (ECCV) (2020). https:\/\/doi.org\/10.1007\/978-3-030-58574-7_1","DOI":"10.1007\/978-3-030-58574-7_1"},{"key":"3590_CR51","doi-asserted-by":"publisher","first-page":"50","DOI":"10.1109\/TMM.2021.3120873","volume":"25","author":"X Lin","year":"2023","unstructured":"Lin, X., Sun, S., Huang, W., Sheng, B., Li, P., Feng, D.D.: EAPT: efficient attention pyramid transformer for image processing. IEEE Trans. Multimed. 25, 50\u201361 (2023). https:\/\/doi.org\/10.1109\/TMM.2021.3120873","journal-title":"IEEE Trans. Multimed."},{"key":"3590_CR52","doi-asserted-by":"publisher","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 770\u2013778 (2016). https:\/\/doi.org\/10.1109\/cvpr.2016.90","DOI":"10.1109\/cvpr.2016.90"},{"key":"3590_CR53","doi-asserted-by":"publisher","unstructured":"Shi, W., Caballero, J., Husz\u00e1r, F., Totz, J., Aitken, A.P., Bishop, R., Rueckert, D., Wang, Z.: Real-time single image and video super-resolution using an efficient sub-pixel convolutional neural network. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 1874\u20131883 (2016). https:\/\/doi.org\/10.1109\/CVPR.2016.207","DOI":"10.1109\/CVPR.2016.207"},{"issue":"2","key":"3590_CR54","doi-asserted-by":"publisher","first-page":"386","DOI":"10.1109\/TPAMI.2018.2844175","volume":"42","author":"K He","year":"2020","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., Girshick, R.: Mask R-CNN. IEEE Trans. Pattern Anal. Mach. Intell. 42(2), 386\u2013397 (2020). https:\/\/doi.org\/10.1109\/TPAMI.2018.2844175","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"5","key":"3590_CR55","doi-asserted-by":"publisher","first-page":"1483","DOI":"10.1109\/TPAMI.2019.2956516","volume":"43","author":"Z Cai","year":"2021","unstructured":"Cai, Z., Vasconcelos, N.: Cascade R-CNN: high quality object detection and instance segmentation. IEEE Trans. Pattern Anal. Mach. Intell. 43(5), 1483\u20131498 (2021). https:\/\/doi.org\/10.1109\/TPAMI.2019.2956516","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"1","key":"3590_CR56","doi-asserted-by":"publisher","first-page":"669","DOI":"10.1109\/TPAMI.2022.3145407","volume":"45","author":"Z Tian","year":"2023","unstructured":"Tian, Z., Zhang, B., Chen, H., Shen, C.: Instance and panoptic segmentation using conditional convolutions. IEEE Trans. Pattern Anal. Mach. Intell. 45(1), 669\u2013680 (2023). https:\/\/doi.org\/10.1109\/TPAMI.2022.3145407","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"3590_CR57","doi-asserted-by":"publisher","unstructured":"Fang, Y., Yang, S., Wang, X., Li, Y., Fang, C., Shan, Y., Feng, B., Liu, W.: Instances as queries. In: IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 6890\u20136899 (2021). https:\/\/doi.org\/10.1109\/ICCV48922.2021.00683","DOI":"10.1109\/ICCV48922.2021.00683"},{"key":"3590_CR58","doi-asserted-by":"publisher","first-page":"4757","DOI":"10.1109\/TIP.2023.3305090","volume":"32","author":"F Zhang","year":"2023","unstructured":"Zhang, F., Panahi, A., Gao, G.: FsaNet: frequency self-attention for semantic segmentation. IEEE Trans. Image Process. 32, 4757\u20134772 (2023). https:\/\/doi.org\/10.1109\/TIP.2023.3305090","journal-title":"IEEE Trans. Image Process."},{"key":"3590_CR59","doi-asserted-by":"publisher","unstructured":"Xu, G., Zhang, X., He, X., Wu, X.: LeViT-UNet: make faster encoders with\u00a0transformer for\u00a0medical image segmentation. In: Pattern Recognition and Computer Vision, pp. 42\u201353 (2024). https:\/\/doi.org\/10.1007\/978-981-99-8543-2_4","DOI":"10.1007\/978-981-99-8543-2_4"},{"key":"3590_CR60","doi-asserted-by":"publisher","first-page":"106626","DOI":"10.1016\/j.compbiomed.2023.106626","volume":"154","author":"Q Xu","year":"2023","unstructured":"Xu, Q., Ma, Z., He, N., Duan, W.: DCSAU-Net: a deeper and more compact split-attention U-Net for medical image segmentation. Comput. Biol. Med. 154, 106626 (2023). https:\/\/doi.org\/10.1016\/j.compbiomed.2023.106626","journal-title":"Comput. Biol. Med."},{"key":"3590_CR61","doi-asserted-by":"publisher","unstructured":"Wang, H., Cao, P., Wang, J., Zaiane, O.R.: UCTransNet: rethinking the skip connections in U-net from a channel-wise perspective with transformer. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 36, pp. 2441\u20132449 (2022). https:\/\/doi.org\/10.1609\/aaai.v36i3.20144. Number: 3","DOI":"10.1609\/aaai.v36i3.20144"},{"issue":"5","key":"3590_CR62","doi-asserted-by":"publisher","first-page":"1484","DOI":"10.1109\/TMI.2022.3230943","volume":"42","author":"X Huang","year":"2023","unstructured":"Huang, X., Deng, Z., Li, D., Yuan, X., Fu, Y.: MISSFormer: an effective transformer for 2D medical image segmentation. IEEE Trans. Med. Imaging 42(5), 1484\u20131494 (2023). https:\/\/doi.org\/10.1109\/TMI.2022.3230943","journal-title":"IEEE Trans. Med. Imaging"},{"key":"3590_CR63","doi-asserted-by":"publisher","first-page":"74","DOI":"10.1016\/j.neunet.2019.08.025","volume":"121","author":"N Ibtehaz","year":"2020","unstructured":"Ibtehaz, N., Rahman, M.S.: MultiResUNet: rethinking the U-net architecture for multimodal biomedical image segmentation. Neural Netw. 121, 74\u201387 (2020). https:\/\/doi.org\/10.1016\/j.neunet.2019.08.025","journal-title":"Neural Netw."},{"key":"3590_CR64","doi-asserted-by":"publisher","unstructured":"Chen, J., Lu, Y., Yu, Q., Luo, X., Adeli, E., Wang, Y., Lu, L., Yuille, A.L., Zhou, Y.: TransUNet: transformers make strong encoders for medical image segmentation (2021). arXiv. arXiv:2102.04306 [cs]. https:\/\/doi.org\/10.48550\/arXiv.2102.04306","DOI":"10.48550\/arXiv.2102.04306"}],"container-title":["The Visual Computer"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-024-03590-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00371-024-03590-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-024-03590-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,10]],"date-time":"2025-03-10T05:08:54Z","timestamp":1741583334000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00371-024-03590-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,16]]},"references-count":64,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2025,3]]}},"alternative-id":["3590"],"URL":"https:\/\/doi.org\/10.1007\/s00371-024-03590-1","relation":{"has-preprint":[{"id-type":"doi","id":"10.21203\/rs.3.rs-4255035\/v1","asserted-by":"object"}]},"ISSN":["0178-2789","1432-2315"],"issn-type":[{"value":"0178-2789","type":"print"},{"value":"1432-2315","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,9,16]]},"assertion":[{"value":"22 July 2024","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 September 2024","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declared that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}