{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,15]],"date-time":"2026-05-15T04:50:03Z","timestamp":1778820603386,"version":"3.51.4"},"reference-count":67,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2026,1,29]],"date-time":"2026-01-29T00:00:00Z","timestamp":1769644800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,29]],"date-time":"2026-01-29T00:00:00Z","timestamp":1769644800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"Jiangsu Petrochemical Process Key Equipment Digital Twin Technology Engineering Research Center Open Project","award":["DTEC202103"],"award-info":[{"award-number":["DTEC202103"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int. J. Mach. Learn. &amp; Cyber."],"published-print":{"date-parts":[[2026,2]]},"DOI":"10.1007\/s13042-025-02891-8","type":"journal-article","created":{"date-parts":[[2026,1,29]],"date-time":"2026-01-29T14:41:04Z","timestamp":1769697664000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["SCFI-ESeg: spatial and content feature integration for efficient semantic segmentation"],"prefix":"10.1007","volume":"17","author":[{"given":"Ning","family":"Li","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xudong","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gaochao","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bo","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Baohua","family":"Yuan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,1,29]]},"reference":[{"key":"2891_CR1","first-page":"3213","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition","author":"M Cordts","year":"2016","unstructured":"Cordts M, Omran M, Ramos S, Rehfeld T, Enzweiler M, Benenson R, Franke U, Roth S, Schiele B (2016) The cityscapes dataset for semantic urban scene understanding. Proceedings of the IEEE conference on computer vision and pattern recognition. IEEE, Geneva, pp 3213\u20133223"},{"key":"2891_CR2","first-page":"3354","volume-title":"IEEE conference on computer vision and pattern recognition","author":"A Geiger","year":"2012","unstructured":"Geiger A, Lenz P, Urtasun R (2012) Are we ready for autonomous driving? the kitti vision benchmark suite. IEEE conference on computer vision and pattern recognition. IEEE, Geneva, pp 3354\u20133361"},{"key":"2891_CR3","first-page":"1","volume-title":"IEEE 20th international conference on intelligent transportation systems (ITSC)","author":"M Siam","year":"2017","unstructured":"Siam M, Elkerdawy S, Jagersand M, Yogamani S (2017) Deep semantic segmentation for automated driving: taxonomy roadmap and challenges. IEEE 20th international conference on intelligent transportation systems (ITSC). IEEE, Geneva, pp 1\u20138"},{"issue":"3","key":"2891_CR4","doi-asserted-by":"publisher","first-page":"1197","DOI":"10.1002\/mp.14676","volume":"48","author":"J Ma","year":"2021","unstructured":"Ma J, Wang Y, An X, Ge C, Yu Z, Chen J, Zhu Q, Dong G, He J, He Z et al (2021) Toward data-efficient learning: A benchmark for covid-19 ct lung and infection segmentation. Med Phys 48(3):1197\u20131210","journal-title":"Med Phys"},{"key":"2891_CR5","doi-asserted-by":"publisher","first-page":"60","DOI":"10.1016\/j.isprsjprs.2018.04.014","volume":"145","author":"R Kemker","year":"2018","unstructured":"Kemker R, Salvaggio C, Kanan C (2018) Algorithms for semantic segmentation of multispectral remote sensing imagery using deep learning. ISPRS J Photogramm Remote Sens 145:60\u201377","journal-title":"ISPRS J Photogramm Remote Sens"},{"key":"2891_CR6","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, Weissenborn D, Zhai X, Unterthiner T, Dehghani M, Minderer M, Heigold G, Gelly S (2021) An image is worth 16x16 words. Transformers for image recognition at scale. In: International Conference on Learning Representations"},{"key":"2891_CR7","first-page":"11299","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","author":"H He","year":"2023","unstructured":"He H, Cai J, Pan Z, Liu J, Zhang J, Tao D, Zhuang B (2023) Dynamic focus-aware positional queries for semantic segmentation. Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. IEEE, Geneva, pp 11299\u201311308"},{"key":"2891_CR8","first-page":"7262","volume-title":"Proceedings of the IEEE\/CVF international conference on computer vision","author":"R Strudel","year":"2021","unstructured":"Strudel R, Garcia R, Laptev I, Schmid C (2021) Segmenter: transformer for semantic segmentation. Proceedings of the IEEE\/CVF international conference on computer vision. IEEE, Geneva, pp 7262\u20137272"},{"key":"2891_CR9","first-page":"3041","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","author":"F Li","year":"2023","unstructured":"Li F, Zhang H, Xu H, Liu S, Zhang L, Ni LM, Shum H-Y (2023) Mask dino: towards a unified transformer-based framework for object detection and segmentation. Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. IEEE, Geneva, pp 3041\u20133050"},{"key":"2891_CR10","first-page":"1290","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","author":"B Cheng","year":"2022","unstructured":"Cheng B, Misra I, Schwing AG, Kirillov A, Girdhar R (2022) Masked-attention mask transformer for universal image segmentation. Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. IEEE, Geneva, pp 1290\u20131299"},{"key":"2891_CR11","first-page":"3431","volume-title":"Procedings of the IEEE conference on computer vision and pattern recognition","author":"J Long","year":"2015","unstructured":"Long J, Shelhamer E, Darrell T (2015) Fully convolutional networks for semantic segmentation. Procedings of the IEEE conference on computer vision and pattern recognition. IEEE, Geneva, pp 3431\u20133440"},{"key":"2891_CR12","first-page":"234","volume-title":"Medical image computing and computer-assisted intervention-MICCAI 2015: 18th international conference, Munich, Germany, October 5\u20139, 2015, proceedings, part III 18","author":"O Ronneberger","year":"2015","unstructured":"Ronneberger O, Fischer P, Brox T (2015) U-net: convolutional networks for biomedical image segmentation. Medical image computing and computer-assisted intervention-MICCAI 2015: 18th international conference, Munich, Germany, October 5\u20139, 2015, proceedings, part III 18. Springer, Cham, pp 234\u2013241"},{"issue":"12","key":"2891_CR13","doi-asserted-by":"publisher","first-page":"2481","DOI":"10.1109\/TPAMI.2016.2644615","volume":"39","author":"V Badrinarayanan","year":"2017","unstructured":"Badrinarayanan V, Kendall A, Cipolla R (2017) Segnet: a deep convolutional encoder-decoder architecture for image segmentation. IEEE Trans Pattern Anal Mach Intell 39(12):2481\u20132495","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"2891_CR14","first-page":"801","volume-title":"Procedings of the European conference on computer vision (ECCV)","author":"L-C Chen","year":"2018","unstructured":"Chen L-C, Zhu Y, Papandreou G, Schroff F, Adam H (2018) Encoder-decoder with atrous separable convolution for semantic image segmentation. Procedings of the European conference on computer vision (ECCV). Springer, Cham, pp 801\u2013818"},{"key":"2891_CR15","first-page":"3146","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","author":"J Fu","year":"2019","unstructured":"Fu J, Liu J, Tian H, Li Y, Bao Y, Fang Z, Lu H (2019) Dual attention network for scene segmentation. Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. IEEE, Geneva, pp 3146\u20133154"},{"key":"2891_CR16","doi-asserted-by":"publisher","first-page":"11418","DOI":"10.1609\/aaai.v34i07.6805","volume":"34","author":"X Li","year":"2020","unstructured":"Li X, Zhao H, Han L, Tong Y, Tan S, Yang K (2020) Gated fully fusion for semantic segmentation. Proceedings of the AAAI conference on artificial intelligence 34:11418\u201311425","journal-title":"Proceedings of the AAAI conference on artificial intelligence"},{"issue":"10","key":"2891_CR17","doi-asserted-by":"publisher","first-page":"3349","DOI":"10.1109\/TPAMI.2020.2983686","volume":"43","author":"J Wang","year":"2020","unstructured":"Wang J, Sun K, Cheng T, Jiang B, Deng C, Zhao Y, Liu D, Mu Y, Tan M, Wang X et al (2020) Deep high-resolution representation learning for visual recognition. IEEE Trans Pattern Anal Mach Intell 43(10):3349\u20133364","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"2891_CR18","doi-asserted-by":"publisher","first-page":"173","DOI":"10.1007\/978-3-030-58539-6_11","volume-title":"Computer Vision-ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, proceedings, Part VI 16","author":"Y Yuan","year":"2020","unstructured":"Yuan Y, Chen X, Wang J (2020) Object-contextual representations for semantic segmentation. Computer Vision-ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, proceedings, Part VI 16. Springer, Cham, pp 173\u2013190"},{"key":"2891_CR19","first-page":"1016","volume":"36","author":"Y Huang","year":"2022","unstructured":"Huang Y, Kang D, Jia W, Liu L, He X (2022) Channelized axial attention-considering channel relation within spatial attention for semantic segmentation. Proc AAAI Conf Artif Intell 36:1016\u20131025","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"2891_CR20","first-page":"1140","volume":"35","author":"M-H Guo","year":"2022","unstructured":"Guo M-H, Lu C-Z, Hou Q, Liu Z, Cheng M-M, Hu S-M (2022) Segnext: Rethinking convolutional attention design for semantic segmentation. Adv Neural Inf Process Syst 35:1140\u20131156","journal-title":"Adv Neural Inf Process Syst"},{"issue":"6","key":"2891_CR21","doi-asserted-by":"publisher","first-page":"3957","DOI":"10.1007\/s00371-024-03640-8","volume":"41","author":"G Yue","year":"2025","unstructured":"Yue G, Jiao G, Li C, Xiang J (2025) When CNN meet with vit: decision-level feature fusion for camouflaged object detection. Vis Comput 41(6):3957\u20133972","journal-title":"Vis Comput"},{"key":"2891_CR22","first-page":"1","volume-title":"ICASSP 2025\u20132025 IEEE international conference on acoustics, speech and signal processing (ICASSP)","author":"G Yue","year":"2025","unstructured":"Yue G, Jiao G, Xiang J (2025) Semi-supervised iterative learning network for camouflaged object detection. ICASSP 2025\u20132025 IEEE international conference on acoustics, speech and signal processing (ICASSP). IEEE, Geneva, pp 1\u20135"},{"key":"2891_CR23","first-page":"130433","volume-title":"Neurocomputing","author":"F Wang","year":"2025","unstructured":"Wang F, Jiao G, Yue G (2025) More observation leads to more clarity: multi-view collaboration network for camouflaged object detection. Neurocomputing. Elsevier, Amsterdam, p 130433"},{"key":"2891_CR24","first-page":"6881","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","author":"S Zheng","year":"2021","unstructured":"Zheng S, Lu J, Zhao H, Zhu X, Luo Z, Wang Y, Fu Y, Feng J, Xiang T, Torr PH et al (2021) Rethinking semantic segmentation from a sequence-to-sequence perspective with transformers. Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. IEEE, Geneva, pp 6881\u20136890"},{"key":"2891_CR25","first-page":"4971","volume":"35","author":"B Zhang","year":"2022","unstructured":"Zhang B, Tian Z, Tang Q, Chu X, Wei X, Shen C et al (2022) Segvit: semantic segmentation with plain vision transformers. Adv Neural Inf Process Syst 35:4971\u20134982","journal-title":"Adv Neural Inf Process Syst"},{"key":"2891_CR26","first-page":"17864","volume":"34","author":"B Cheng","year":"2021","unstructured":"Cheng B, Schwing A, Kirillov A (2021) Per-pixel classification is not all you need for semantic segmentation. Adv Neural Inf Process Syst 34:17864\u201317875","journal-title":"Adv Neural Inf Process Syst"},{"key":"2891_CR27","first-page":"3051","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","author":"H Shi","year":"2023","unstructured":"Shi H, Hayat M, Cai J (2023) Transformer scale gate for semantic segmentation. Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. IEEE, Geneva, pp 3051\u20133060"},{"key":"2891_CR28","first-page":"51606","volume":"36","author":"Z Jin","year":"2023","unstructured":"Jin Z, Hu X, Zhu L, Song L, Yuan L, Yu L (2023) Idrnet: intervention-driven relation network for semantic segmentation. Adv Neural Inf Process Syst 36:51606\u201351620","journal-title":"Adv Neural Inf Process Syst"},{"key":"2891_CR29","first-page":"752","volume-title":"Proeedings of the IEEE\/CVF international conference on computer vision","author":"J Jain","year":"2023","unstructured":"Jain J, Singh A, Orlov N, Huang Z, Li J, Walton S, Shi H (2023) Semask: semantically masked transformers for semantic segmentation. Proeedings of the IEEE\/CVF international conference on computer vision. IEEE, Geneva, pp 752\u2013761"},{"key":"2891_CR30","first-page":"15804","volume-title":"Proeedings of the IEEE\/CVF conference on computer vision and pattern recognition","author":"N Cavagnero","year":"2024","unstructured":"Cavagnero N, Rosi G, Cuttano C, Pistilli F, Ciccone M, Averta G, Cermelli F (2024) Pem: prototype-based efficient maskformer for image segmentation. Proeedings of the IEEE\/CVF conference on computer vision and pattern recognition. IEEE, Geneva, pp 15804\u201315813"},{"key":"2891_CR31","first-page":"3679","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","author":"J Wang","year":"2024","unstructured":"Wang J, Li C (2024) Contextseg: sketch semantic segmentation by querying the context with attention. Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. IEEE, Geneva, pp 3679\u20133688"},{"key":"2891_CR32","first-page":"3732","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","author":"C Sung","year":"2024","unstructured":"Sung C, Kim W, An J, Lee W, Lim H, Myung H (2024) Contextrast: contextual contrastive learning for semantic segmentation. Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. IEEE, Geneva, pp 3732\u20133742"},{"key":"2891_CR33","unstructured":"Sun K, Zhao Y, Jiang B, Cheng T, Wang J (2019) High-resolution representations for labeling pixels and regions"},{"key":"2891_CR34","first-page":"1","volume-title":"Computer vision-ECCV 2020: 16th European conference, Glasgow, UK, August 23\u201328, proceedings, Part XXVI 16","author":"J Liu","year":"2020","unstructured":"Liu J, He J, Zhang J, Ren JS, Li H (2020) Efficientfcn: holistically-guided decoding for semantic segmentation. Computer vision-ECCV 2020: 16th European conference, Glasgow, UK, August 23\u201328, proceedings, Part XXVI 16. Springer, Cham, pp 1\u201317"},{"key":"2891_CR35","unstructured":"Yuan Y, Huang L, Guo J, Zhang C, Chen X, Wang J (2018) Ocnet: Object context network for scene parsing arXiv preprint arXiv:1809.00916"},{"key":"2891_CR36","first-page":"12179","volume-title":"Proceedings of the IEEE\/CVF international conference on computer vision","author":"R Ranftl","year":"2021","unstructured":"Ranftl R, Bochkovskiy A, Koltun V (2021) Vision transformers for dense prediction. Proceedings of the IEEE\/CVF international conference on computer vision. IEEE, Geneva, pp 12179\u201312188"},{"key":"2891_CR37","unstructured":"Bousselham W, Thibault G, Pagano L, Machireddy A, Gray J, Chang YH, Song X (2021) Efficient self-ensemble for semantic segmentation, arXiv preprint arXiv:2111.13280"},{"issue":"2","key":"2891_CR38","doi-asserted-by":"publisher","first-page":"1159","DOI":"10.1109\/TCSVT.2023.3292156","volume":"34","author":"G Zhu","year":"2023","unstructured":"Zhu G, Wang R, Liu Y, Zhu Z, Gao C, Liu L, Sang N (2023) An adaptive post-processing network with the global-local aggregation for semantic segmentation. IEEE Trans Circuits Syst Video Technol 34(2):1159\u20131173","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"2891_CR39","volume-title":"IEEE transactions on circuits and systems for video technology","author":"Y Huang","year":"2024","unstructured":"Huang Y, Kang D, Gao S, Li W, Duan L (2024) High-level feature guided decoding for semantic segmentation. IEEE transactions on circuits and systems for video technology. IEEE, Geneva"},{"key":"2891_CR40","volume-title":"IEEE transactions on multimedia","author":"Z Qin","year":"2024","unstructured":"Qin Z, Liu J, Zhang X, Tian M, Zhou A, Yi S, Li H (2024) Pyramid fusion transformer for semantic segmentation. In: Step A (ed) IEEE transactions on multimedia. IEEE, Geneva"},{"key":"2891_CR41","first-page":"38","volume-title":"European conference on computer vision","author":"MF Naeem","year":"2024","unstructured":"Naeem MF, Xian Y, Zhai X, Hoyer L, Van Gool L, Tombari F (2024) Silc: improving vision language pretraining with self-distillation. European conference on computer vision. Springer, Cham, pp 38\u201355"},{"key":"2891_CR42","first-page":"633","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition","author":"B Zhou","year":"2017","unstructured":"Zhou B, Zhao H, Puig X, Fidler S, Barriuso A, Torralba A (2017) Scene parsing through ade20k dataset. Proceedings of the IEEE conference on computer vision and pattern recognition. IEEE, Geneva, pp 633\u2013641"},{"key":"2891_CR43","first-page":"891","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition","author":"R Mottaghi","year":"2014","unstructured":"Mottaghi R, Chen X, Liu X, Cho N-G, Lee S-W, Fidler S, Urtasun R, Yuille A (2014) The role of context for object detection and semantic segmentation in the wild. Proceedings of the IEEE conference on computer vision and pattern recognition. IEEE, Geneva, pp 891\u2013898"},{"issue":"1","key":"2891_CR44","doi-asserted-by":"publisher","first-page":"98","DOI":"10.1007\/s11263-014-0733-5","volume":"111","author":"M Everingham","year":"2015","unstructured":"Everingham M, Eslami SMA, Van Gool L, Williams CKI, Winn J, Zisserman A (2015) The pascal visual object classes challenge: a retrospective. Int J Comput Vision 111(1):98\u2013136","journal-title":"Int J Comput Vision"},{"key":"2891_CR45","unstructured":"Steiner A, Kolesnikov A, Zhai X, Wightman R, Uszkoreit J, Beyer L (2021) How to train your vit? data, augmentation, and regularization in vision transformers"},{"key":"2891_CR46","volume-title":"Pyramid scene parsing network","author":"H Zhao","year":"2016","unstructured":"Zhao H, Shi J, Qi X, Wang X, Jia J (2016) Pyramid scene parsing network. IEEE Computer Society, Washington, D. C."},{"key":"2891_CR47","first-page":"12077","volume":"34","author":"E Xie","year":"2021","unstructured":"Xie E, Wang W, Yu Z, Anandkumar A, Alvarez JM, Luo P (2021) Segformer: simple and efficient design for semantic segmentation with transformers. Adv Neural Inf Process Syst 34:12077\u201312090","journal-title":"Adv Neural Inf Process Syst"},{"key":"2891_CR48","first-page":"2263","volume":"37","author":"Jh Shim","year":"2023","unstructured":"Shim Jh, Yu H, Kong K, Kang SJ (2023) Feedformer: revisiting transformer decoder for efficient semantic segmentation. Proc AAAI Conf Artif Intell 37:2263\u20132271","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"2891_CR49","first-page":"418","volume-title":"Proceedings of the European conference on computer vision (ECCV)","author":"T Xiao","year":"2018","unstructured":"Xiao T, Liu Y, Zhou B, Jiang Y, Sun J (2018) Unified perceptual parsing for scene understanding. Proceedings of the European conference on computer vision (ECCV). Springer, Cham, pp 418\u2013434"},{"key":"2891_CR50","volume-title":"Vision transformers for dense prediction: a survey, knowledge-based systems","author":"S Zuo","year":"2022","unstructured":"Zuo S, Xiao Y, Chang X, Wang X (2022) Vision transformers for dense prediction: a survey, knowledge-based systems. Elsevier, Amsterdam"},{"issue":"5","key":"2891_CR51","doi-asserted-by":"publisher","first-page":"5988","DOI":"10.1109\/TPAMI.2022.3206106","volume":"45","author":"Z Jin","year":"2023","unstructured":"Jin Z, Yu D, Yu YL (2023) Mcibi++: soft mining contextual information beyond image for semantic segmentation. IEEE Trans Pattern Anal Mach Intell 45(5):5988\u20136005","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"10","key":"2891_CR52","doi-asserted-by":"publisher","first-page":"5655","DOI":"10.1109\/TCSVT.2023.3252807","volume":"33","author":"F Lin","year":"2023","unstructured":"Lin F, Liang Z, Tian CS (2023) Structtoken: rethinking semantic segmentation with structural prior. IEEE Trans Circuits Syst Video Technol 33(10):5655\u20135663","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"2891_CR53","first-page":"132267","volume":"37","author":"X Zhu","year":"2024","unstructured":"Zhu X, Yang X, Wang Z, Li H, Dou W, Ge J, Lu L, Qiao Y, Dai J (2024) Parameter-inverted image pyramid networks. Adv Neural Inf Process Syst 37:132267\u2013132288","journal-title":"Adv Neural Inf Process Syst"},{"key":"2891_CR54","unstructured":"Contributors M (2020) Mmsegmentation: Openmmlab semantic segmentation toolbox and benchmark"},{"key":"2891_CR55","first-page":"10012","volume-title":"Proceedings of the IEEE\/CVF international conference on computer vision","author":"Z Liu","year":"2021","unstructured":"Liu Z, Lin Y, Cao Y, Hu H, Wei Y, Zhang Z, Lin S, Guo B (2021) Swin transformer: hierarchical vision transformer using shifted windows. Proceedings of the IEEE\/CVF international conference on computer vision. IEEE, Geneva, pp 10012\u201310022"},{"key":"2891_CR56","doi-asserted-by":"crossref","unstructured":"Robbins H, Monro S (1951) A stochastic approximation method, the annals of mathematical statistics. pp 400\u2013407","DOI":"10.1214\/aoms\/1177729586"},{"key":"2891_CR57","unstructured":"Chen LC, Papandreou G, Kokkinos I, Murphy K, Yuille AL (2014) Semantic image segmentation with deep convolutional nets and fully connected crfs, arXiv preprint arXiv:1412.7062"},{"key":"2891_CR58","first-page":"12094","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","author":"J Gu","year":"2022","unstructured":"Gu J, Kwon H, Wang D, Ye W, Li M, Chen Y-H, Lai L, Chandra V, Pan DZ (2022) Multi-scale high-resolution vision transformer for semantic segmentation. Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. IEEE Computer Society, Washington, pp 12094\u201312103"},{"key":"2891_CR59","doi-asserted-by":"publisher","first-page":"5585","DOI":"10.1007\/978-3-030-96530-3","volume-title":"Proceedings of the IEEE\/CVF international conference on computer vision","author":"B Zhao","year":"2023","unstructured":"Zhao B, Yu Z, Lan S, Cheng Y, Anandkumar A, Lao Y, Alvarez JM (2023) Fully attentional networks with self-emerging token labeling. Proceedings of the IEEE\/CVF international conference on computer vision. IEEE, Geneva, pp 5585\u20135595"},{"issue":"10","key":"2891_CR60","doi-asserted-by":"publisher","first-page":"9024","DOI":"10.1109\/TCSVT.2024.3395132","volume":"34","author":"Y Huang","year":"2024","unstructured":"Huang Y, Kang D, Chen L, Jia W, He X, Duan L, Zhe X, Bao L (2024) Card: Semantic segmentation with efficient class-aware regularized decoder. IEEE Trans Circuits Syst Video Technol 34(10):9024\u20139038","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"2891_CR61","first-page":"7281","volume":"34","author":"Y Yuan","year":"2021","unstructured":"Yuan Y, Fu R, Huang L, Lin W, Zhang C, Chen X, Wang J (2021) Hrformer: High-resolution vision transformer for dense predict. Adv Neural Inf Process Syst 34:7281\u20137293","journal-title":"Adv Neural Inf Process Syst"},{"key":"2891_CR62","unstructured":"Park J (2018) Bam: Bottleneck attention module, arXiv preprint arXiv:1807.06514"},{"key":"2891_CR63","first-page":"3","volume-title":"Proceedings of the European conference on computer vision (ECCV)","author":"S Woo","year":"2018","unstructured":"Woo S, Park J, Lee J-Y, Kweon IS (2018) Cbam: convolutional block attention module. Proceedings of the European conference on computer vision (ECCV). ACM International, New York, pp 3\u201319"},{"key":"2891_CR64","first-page":"7132","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition","author":"J Hu","year":"2018","unstructured":"Hu J, Shen L, Sun G (2018) Squeeze-and-excitation networks. Proceedings of the IEEE conference on computer vision and pattern recognition. IEEE, Geneva, pp 7132\u20137141"},{"key":"2891_CR65","doi-asserted-by":"crossref","unstructured":"Liu H, Liu F, Fan X, Huang D (2021) Polarized self-attention: towards high-quality pixel-wise regression, arXiv preprint arXiv:2107.00782","DOI":"10.1016\/j.neucom.2022.07.054"},{"key":"2891_CR66","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser \u0141, Polosukhin I (2017) Attention is all you need, Advances in neural information processing systems 30"},{"key":"2891_CR67","doi-asserted-by":"crossref","unstructured":"Xie M, Peng H, Li P, Zeng G, Wang S, Wu J, Li P, Yu PS (2025) Hierarchical superpixel segmentation via structural information theory. In: Proceedings of the 2025 SIAM international conference on data mining (SDM) pp 242\u2013251","DOI":"10.1137\/1.9781611978520.23"}],"container-title":["International Journal of Machine Learning and Cybernetics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-025-02891-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s13042-025-02891-8","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-025-02891-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,16]],"date-time":"2026-03-16T11:05:26Z","timestamp":1773659126000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s13042-025-02891-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,1,29]]},"references-count":67,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2026,2]]}},"alternative-id":["2891"],"URL":"https:\/\/doi.org\/10.1007\/s13042-025-02891-8","relation":{},"ISSN":["1868-8071","1868-808X"],"issn-type":[{"value":"1868-8071","type":"print"},{"value":"1868-808X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,1,29]]},"assertion":[{"value":"15 January 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 November 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 January 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"44"}}