{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,19]],"date-time":"2026-02-19T16:36:45Z","timestamp":1771519005296,"version":"3.50.1"},"reference-count":69,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2024,11,14]],"date-time":"2024-11-14T00:00:00Z","timestamp":1731542400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2024,11,14]],"date-time":"2024-11-14T00:00:00Z","timestamp":1731542400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"DOI":"10.13039\/100014718","name":"Innovative Research Group Project of the National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61472220"],"award-info":[{"award-number":["61472220"]}],"id":[{"id":"10.13039\/100014718","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100014718","name":"Innovative Research Group Project of the National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61572286"],"award-info":[{"award-number":["61572286"]}],"id":[{"id":"10.13039\/100014718","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Complex Intell. Syst."],"published-print":{"date-parts":[[2025,1]]},"DOI":"10.1007\/s40747-024-01650-6","type":"journal-article","created":{"date-parts":[[2024,11,14]],"date-time":"2024-11-14T13:24:35Z","timestamp":1731590675000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Mix-layers semantic extraction and multi-scale aggregation transformer for semantic segmentation"],"prefix":"10.1007","volume":"11","author":[{"given":"Tianping","family":"Li","sequence":"first","affiliation":[]},{"given":"Xiaolong","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Zhenyi","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Zhaotong","family":"Cui","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0001-3951-2359","authenticated-orcid":false,"given":"Zhou","family":"Maoxia","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,14]]},"reference":[{"key":"1650_CR1","doi-asserted-by":"publisher","unstructured":"Wang W, Dai J, Chen Z et al (2023) InternImage: exploring large-scale vision foundation models with deformable convolutions. In: 2023 IEEE\/CVF conference on computer vision and pattern recognition (CVPR), Vancouver, BC, Canada, pp 14408\u201314419. https:\/\/doi.org\/10.1109\/CVPR52729.2023.01385","DOI":"10.1109\/CVPR52729.2023.01385"},{"key":"1650_CR2","unstructured":"Wang P, Wang S, Lin J et al (2023) ONE-PEACE: exploring one general representation model toward unlimited modalities. arXiv:2305.11172"},{"key":"1650_CR3","doi-asserted-by":"publisher","unstructured":"Zheng H, Lin Z, Lu J et al (2022) Image inpainting with cascaded modulation gan and object-aware training. In: European conference on computer vision, pp 277\u2013296. https:\/\/doi.org\/10.1007\/978-3-031-19787-1_16","DOI":"10.1007\/978-3-031-19787-1_16"},{"key":"1650_CR4","unstructured":"Cui Y, Jiang C, Wang L, Wu G (2021) Target transformed regression for accurate tracking. arXiv:2104.00403"},{"key":"1650_CR5","doi-asserted-by":"publisher","unstructured":"Zhu Z, Hou J, Wu DO (2023) Cross-modal orthogonal high-rank augmentation for RGB-event transformer-trackers. In: 2023 IEEE\/CVF international conference on computer vision (ICCV), Paris, France, pp 21988\u201321998. https:\/\/doi.org\/10.1109\/ICCV51070.2023.02015","DOI":"10.1109\/ICCV51070.2023.02015"},{"key":"1650_CR6","doi-asserted-by":"publisher","unstructured":"Feng C-M, Yan Y, Fu H et al (2021) Task transformer network for joint MRI reconstruction and super-resolution. In: medical image computing and computer assisted intervention\u2014MICCAI 2021: 24th international conference, Strasbourg, France, September 27\u2013October 1, 2021, Proceedings, Part VI 24, pp 307\u2013317. https:\/\/doi.org\/10.1007\/978-3-030-87231-1_30","DOI":"10.1007\/978-3-030-87231-1_30"},{"key":"1650_CR7","doi-asserted-by":"publisher","unstructured":"Ledig C, Theis L, Huszar F et al (2017) Photo-realistic single image super-resolution using a generative adversarial network. In: 2017 IEEE conference on computer vision and pattern recognition (CVPR), Honolulu, HI, pp 105\u2013114. https:\/\/doi.org\/10.1109\/CVPR.2017.19","DOI":"10.1109\/CVPR.2017.19"},{"key":"1650_CR8","doi-asserted-by":"publisher","unstructured":"Zong Z, Song G, Liu Y (2023) DETRs with collaborative hybrid assignments training. In: 2023 IEEE\/CVF international conference on computer vision (ICCV), Paris, France, pp 6725\u20136735. https:\/\/doi.org\/10.1109\/ICCV51070.2023.00621","DOI":"10.1109\/ICCV51070.2023.00621"},{"key":"1650_CR9","doi-asserted-by":"crossref","unstructured":"Carion N, Massa F, Synnaeve G et al (2020) End-to-end object detection with transformers. In: Computer vision\u2014ECCV 2020, vol 12346. Springer International Publishing, Cham, pp 213\u2013229","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"1650_CR10","doi-asserted-by":"publisher","DOI":"10.1016\/j.asoc.2021.107426","author":"Q Zhang","year":"2021","unstructured":"Zhang Q, Lee F, Wang Y et al (2021) An joint end-to-end framework for learning with noisy labels. Appl Soft Comput. https:\/\/doi.org\/10.1016\/j.asoc.2021.107426","journal-title":"Appl Soft Comput"},{"key":"1650_CR11","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2024.123846","author":"Q Zhang","year":"2024","unstructured":"Zhang Q, Zhu Y, Yang M et al (2024) Cross-to-merge training with class balance strategy for learning with noisy labels. Expert Syst Appl. https:\/\/doi.org\/10.1016\/j.eswa.2024.123846","journal-title":"Expert Syst Appl"},{"key":"1650_CR12","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2022.104779","author":"J Fan","year":"2022","unstructured":"Fan J, Yu Y, Wang Z (2022) Partial label learning with competitive learning graph neural network. Eng Appl Artif Intell. https:\/\/doi.org\/10.1016\/j.engappai.2022.104779","journal-title":"Eng Appl Artif Intell"},{"key":"1650_CR13","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2023.3347792","author":"J Fan","year":"2024","unstructured":"Fan J, Huang L, Gong C et al (2024) KMT-PLL: K-means cross-attention transformer for partial label learning. IEEE Trans Neural Netw Learn Syst. https:\/\/doi.org\/10.1109\/TNNLS.2023.3347792","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"1650_CR14","doi-asserted-by":"publisher","unstructured":"Shao H, Wang L, Chen R et al (2023) ReasonNet: end-to-end driving with temporal and global reasoning. In: 2023 IEEE\/CVF conference on computer vision and pattern recognition (CVPR), Vancouver, BC, Canada, pp 13723\u201313733. https:\/\/doi.org\/10.1109\/CVPR52729.2023.01319","DOI":"10.1109\/CVPR52729.2023.01319"},{"key":"1650_CR15","doi-asserted-by":"publisher","unstructured":"Siam M, Elkerdawy S, Jagersand M, Yogamani S (2017) Deep semantic segmentation for automated driving: taxonomy, roadmap and challenges. In: 2017 IEEE 20th international conference on intelligent transportation systems (ITSC), pp 1\u20138. https:\/\/doi.org\/10.1109\/itsc.2017.8317714","DOI":"10.1109\/itsc.2017.8317714"},{"key":"1650_CR16","doi-asserted-by":"publisher","unstructured":"Zhou Y, Li Z, Bai S et al (2019) Prior-aware neural network for partially-supervised multi-organ segmentation. In: 2019 IEEE\/CVF international conference on computer vision (ICCV), Seoul, Korea (South), pp 10671\u201310680. https:\/\/doi.org\/10.1109\/ICCV.2019.01077","DOI":"10.1109\/ICCV.2019.01077"},{"key":"1650_CR17","doi-asserted-by":"publisher","unstructured":"Van Aken B, Papaioannou J-M, Mayrdorfer M et al (2021) Clinical outcome prediction from admission notes using self-supervised knowledge integration. In: Proceedings of the 16th conference of the European chapter of the association for computational linguistics: main volume, online, pp 881\u2013893. https:\/\/doi.org\/10.18653\/v1\/2021.eacl-main.75","DOI":"10.18653\/v1\/2021.eacl-main.75"},{"key":"1650_CR18","doi-asserted-by":"publisher","unstructured":"Shen S, Seneviratne S, Wanyan X, Kirley M (2023) Firerisk: a remote sensing dataset for fire risk assessment with benchmarks using supervised and self-supervised learning. In: 2023 international conference on digital image computing: techniques and applications (DICTA), pp 189\u2013196. https:\/\/doi.org\/10.1109\/dicta60407.2023.00034","DOI":"10.1109\/dicta60407.2023.00034"},{"key":"1650_CR19","doi-asserted-by":"publisher","DOI":"10.3390\/rs12091432","author":"J Rabbi","year":"2020","unstructured":"Rabbi J, Ray N, Schubert M et al (2020) Small-object detection in remote sensing images with end-to-end edge-enhanced GAN and object detector network. Remote Sens. https:\/\/doi.org\/10.3390\/rs12091432","journal-title":"Remote Sens"},{"key":"1650_CR20","doi-asserted-by":"crossref","unstructured":"Long J, Shelhamer E, Darrell T (2015) Fully convolutional networks for semantic segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 3431\u20133440","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"1650_CR21","unstructured":"Vaswani A, Shazeer N, Parmar N et al (2017) Attention is all you need. In: Advances in neural information processing systems"},{"key":"1650_CR22","unstructured":"Bahdanau D, Cho K, Bengio Y (2016) Neural machine translation by jointly learning to align and translate. arXiv:1409.0473"},{"key":"1650_CR23","doi-asserted-by":"publisher","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A et al (2021) An image is worth 16X16 words: transformers for image recognition at scale. https:\/\/doi.org\/10.48550\/arXiv.2010.11929","DOI":"10.48550\/arXiv.2010.11929"},{"key":"1650_CR24","doi-asserted-by":"publisher","unstructured":"Deng J, Dong W, Socher R, et al (2009) ImageNet: a large-scale hierarchical image database. In: 2009 IEEE conference on computer vision and pattern recognition, Miami, FL, pp 248\u2013255. https:\/\/doi.org\/10.1109\/CVPR.2009.5206848","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"1650_CR25","doi-asserted-by":"publisher","unstructured":"Zheng S, Lu J, Zhao H et al (2021) Rethinking semantic segmentation from a sequence-to-sequence perspective with transformers. In: 2021 IEEE\/CVF conference on computer vision and pattern recognition (CVPR), Nashville, TN, USA, pp 6877\u20136886. https:\/\/doi.org\/10.1109\/CVPR46437.2021.00681","DOI":"10.1109\/CVPR46437.2021.00681"},{"key":"1650_CR26","doi-asserted-by":"publisher","unstructured":"Xie E, Wang W, Yu Z et al (2021) SegFormer: simple and efficient design for semantic segmentation with transformers. In: Advances in neural information processing systems, pp 12077\u201312090. https:\/\/doi.org\/10.48550\/arXiv.2105.15203","DOI":"10.48550\/arXiv.2105.15203"},{"key":"1650_CR27","doi-asserted-by":"publisher","unstructured":"Li J, Hassani A, Walton S, Shi H (2023) ConvMLP: hierarchical convolutional MLPs for vision. In: 2023 IEEE\/CVF conference on computer vision and pattern recognition workshops (CVPRW), Vancouver, BC, Canada, pp 6307\u20136316. https:\/\/doi.org\/10.1109\/CVPRW59228.2023.00671","DOI":"10.1109\/CVPRW59228.2023.00671"},{"key":"1650_CR28","doi-asserted-by":"publisher","unstructured":"Zhu Z, Xu M, Bai S et al (2019) Asymmetric non-local neural networks for semantic segmentation. In: 2019 IEEE\/CVF international conference on computer vision (ICCV), Seoul, Korea (South), pp 593\u2013602. https:\/\/doi.org\/10.1109\/ICCV.2019.00068","DOI":"10.1109\/ICCV.2019.00068"},{"key":"1650_CR29","doi-asserted-by":"publisher","unstructured":"Zhou B, Zhao H, Puig X et al (2017) Scene parsing through ADE20K dataset. In: 2017 IEEE conference on computer vision and pattern recognition (CVPR), Honolulu, HI, pp 5122\u20135130. https:\/\/doi.org\/10.1109\/CVPR.2017.544","DOI":"10.1109\/CVPR.2017.544"},{"key":"1650_CR30","doi-asserted-by":"publisher","unstructured":"Cordts M, Omran M, Ramos S et al (2016) The cityscapes dataset for semantic urban scene understanding. In: 2016 IEEE conference on computer vision and pattern recognition (CVPR), Las Vegas, NV, USA, pp 3213\u20133223. https:\/\/doi.org\/10.1109\/CVPR.2016.350","DOI":"10.1109\/CVPR.2016.350"},{"key":"1650_CR31","doi-asserted-by":"publisher","unstructured":"Caesar H, Uijlings J, Ferrari V (2018) COCO-stuff: thing and stuff classes in context. In: 2018 IEEE\/CVF conference on computer vision and pattern recognition, Salt Lake City, UT, USA, pp 1209\u20131218. https:\/\/doi.org\/10.1109\/CVPR.2018.00132","DOI":"10.1109\/CVPR.2018.00132"},{"key":"1650_CR32","doi-asserted-by":"publisher","unstructured":"Liu Z, Lin Y, Cao Y et al (2021) Swin transformer: hierarchical vision transformer using shifted windows. In: 2021 IEEE\/CVF international conference on computer vision (ICCV), Montreal, QC, Canada, pp 9992\u201310002. https:\/\/doi.org\/10.1109\/ICCV48922.2021.00986","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"1650_CR33","doi-asserted-by":"publisher","unstructured":"Chen L, Zhang H, Xiao J et al (2017) SCA-CNN: spatial and channel-wise attention in convolutional networks for image captioning. In: 2017 IEEE conference on computer vision and pattern recognition (CVPR), Honolulu, HI, pp 6298\u20136306. https:\/\/doi.org\/10.1109\/CVPR.2017.667","DOI":"10.1109\/CVPR.2017.667"},{"key":"1650_CR34","doi-asserted-by":"publisher","unstructured":"Huang Z, Shi X, Zhang C et al (2022) Flowformer: a transformer architecture for optical flow. In: European conference on computer vision, pp 668\u2013685. https:\/\/doi.org\/10.1007\/978-3-031-19790-1_40","DOI":"10.1007\/978-3-031-19790-1_40"},{"key":"1650_CR35","doi-asserted-by":"publisher","unstructured":"Fu J, Liu J, Tian H, et al (2019) Dual attention network for scene segmentation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 3146\u20133154. https:\/\/doi.org\/10.1109\/cvpr.2019.00326","DOI":"10.1109\/cvpr.2019.00326"},{"key":"1650_CR36","doi-asserted-by":"publisher","unstructured":"Hu H, Gu J, Zhang Z, et al (2018) Relation networks for object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 3588\u20133597. https:\/\/doi.org\/10.1109\/cvpr.2018.00378","DOI":"10.1109\/cvpr.2018.00378"},{"key":"1650_CR37","doi-asserted-by":"publisher","unstructured":"Wang X, Girshick R, Gupta A, He K (2018) Non-local Neural Networks. In: 2018 IEEE\/CVF conference on computer vision and pattern recognition, Salt Lake City, UT, USA, pp 7794\u20137803. https:\/\/doi.org\/10.1109\/CVPR.2018.00813","DOI":"10.1109\/CVPR.2018.00813"},{"key":"1650_CR38","unstructured":"Yuan Y, Huang L, Guo J et al (2021) OCNet: object context network for scene parsing. arXiv:1809.00916"},{"key":"1650_CR39","doi-asserted-by":"crossref","unstructured":"Zhao H, Zhang Y, Liu S et al (2018) PSANet: point-wise spatial attention network for scene parsing. In: Computer vision\u2014ECCV 2018, vol 11213. Springer International Publishing, Cham, pp 270\u2013286","DOI":"10.1007\/978-3-030-01240-3_17"},{"key":"1650_CR40","doi-asserted-by":"crossref","unstructured":"Shehzadi T, Hashmi KA, Stricker D, Afzal MZ (2024) Sparse semi-DETR: sparse learnable queries for semi-supervised object detection. arXiv:2404.01819","DOI":"10.1109\/CVPR52733.2024.00558"},{"key":"1650_CR41","doi-asserted-by":"publisher","unstructured":"Xu Z, Wu D, Yu C et al (2024) SCTNet: single-branch CNN with transformer semantic information for real-time segmentation. In: Proceedings of the AAAI conference on artificial intelligence, pp 6378\u20136386. https:\/\/doi.org\/10.1609\/aaai.v38i6.28457","DOI":"10.1609\/aaai.v38i6.28457"},{"key":"1650_CR42","doi-asserted-by":"publisher","unstructured":"Wang H, Zhu Y, Adam H et al (2021) Max-deeplab: end-to-end panoptic segmentation with mask transformers. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 5463\u20135474. https:\/\/doi.org\/10.1109\/cvpr46437.2021.00542","DOI":"10.1109\/cvpr46437.2021.00542"},{"key":"1650_CR43","doi-asserted-by":"publisher","unstructured":"Su J, Yin R, Zhang S, Luo J (2023) Motion-state alignment for video semantic segmentation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 3570\u20133579. https:\/\/doi.org\/10.1109\/cvprw59228.2023.00365","DOI":"10.1109\/cvprw59228.2023.00365"},{"key":"1650_CR44","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2023.3344294","author":"Z Xiao","year":"2023","unstructured":"Xiao Z, Tong H, Qu R et al (2023) CapMatch: semi-supervised contrastive transformer capsule with feature-based knowledge distillation for human activity recognition. IEEE Trans Neural Netw Learn Syst. https:\/\/doi.org\/10.1109\/TNNLS.2023.3344294","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"1650_CR45","doi-asserted-by":"publisher","DOI":"10.1109\/TETCI.2023.3304948","author":"Z Xiao","year":"2024","unstructured":"Xiao Z, Xing H, Zhao B et al (2024) Deep contrastive representation learning with self-distillation. IEEE Trans Emerg Top Comput Intell. https:\/\/doi.org\/10.1109\/TETCI.2023.3304948","journal-title":"IEEE Trans Emerg Top Comput Intell"},{"key":"1650_CR46","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.2023.3342640","author":"Z Xiao","year":"2024","unstructured":"Xiao Z, Xing H, Qu R et al (2024) Densely knowledge-aware network for multivariate time series classification. IEEE Trans Syst Man Cybern Syst. https:\/\/doi.org\/10.1109\/TSMC.2023.3342640","journal-title":"IEEE Trans Syst Man Cybern Syst"},{"key":"1650_CR47","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2024.3370219","author":"Z Xiao","year":"2024","unstructured":"Xiao Z, Xu X, Xing H et al (2024) DTCM: deep transformer capsule mutual distillation for multivariate time series classification. IEEE Trans Cogn Dev Syst. https:\/\/doi.org\/10.1109\/TCDS.2024.3370219","journal-title":"IEEE Trans Cogn Dev Syst"},{"key":"1650_CR48","doi-asserted-by":"publisher","unstructured":"Touvron H, Cord M, Douze M et al (2021) Training data-efficient image transformers & distillation through attention. In: International conference on machine learning, pp 10347\u201310357. https:\/\/doi.org\/10.48550\/arXiv.2012.12877","DOI":"10.48550\/arXiv.2012.12877"},{"key":"1650_CR49","doi-asserted-by":"publisher","unstructured":"Arnab A, Dehghani M, Heigold G et al (2021) ViViT: a video vision transformer. In: 2021 IEEE\/CVF international conference on computer vision (ICCV), Montreal, QC, Canada, pp 6816\u20136826. https:\/\/doi.org\/10.1109\/ICCV48922.2021.00676","DOI":"10.1109\/ICCV48922.2021.00676"},{"key":"1650_CR50","doi-asserted-by":"publisher","unstructured":"Wang W, Xie E, Li X et al (2021) Pyramid vision transformer: a versatile backbone for dense prediction without convolutions. In: 2021 IEEE\/CVF international conference on computer vision (ICCV), Montreal, QC, Canada, pp 548\u2013558. https:\/\/doi.org\/10.1109\/ICCV48922.2021.00061","DOI":"10.1109\/ICCV48922.2021.00061"},{"key":"1650_CR51","doi-asserted-by":"publisher","unstructured":"Guo M-H, Lu C-Z, Hou Q, et al (2022) Segnext: rethinking convolutional attention design for semantic segmentation. In: Advances in neural information processing systems, pp 1140\u20131156. https:\/\/doi.org\/10.48550\/arXiv.2209.08575","DOI":"10.48550\/arXiv.2209.08575"},{"key":"1650_CR52","doi-asserted-by":"publisher","unstructured":"Yang C, Wang Y, Zhang J et al (2022) Lite vision transformer with enhanced self-attention. In: 2022 IEEE\/CVF conference on computer vision and pattern recognition (CVPR), New Orleans, LA, USA, pp 11988\u201311998. https:\/\/doi.org\/10.1109\/CVPR52688.2022.01169","DOI":"10.1109\/CVPR52688.2022.01169"},{"key":"1650_CR53","doi-asserted-by":"publisher","unstructured":"Zhao H, Shi J, Qi X et al (2017) Pyramid scene parsing network. In: 2017 IEEE conference on computer vision and pattern recognition (CVPR), Honolulu, HI, pp 6230\u20136239. https:\/\/doi.org\/10.1109\/CVPR.2017.660","DOI":"10.1109\/CVPR.2017.660"},{"key":"1650_CR54","doi-asserted-by":"publisher","unstructured":"Huang G, Liu Z, Van Der Maaten L, Weinberger KQ (2017) Densely connected convolutional networks. In: 2017 IEEE conference on computer vision and pattern recognition (CVPR), Honolulu, HI, pp 2261\u20132269. https:\/\/doi.org\/10.1109\/CVPR.2017.243","DOI":"10.1109\/CVPR.2017.243"},{"key":"1650_CR55","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2021.3127553","author":"L Rosas-Arias","year":"2022","unstructured":"Rosas-Arias L, Benitez-Garcia G, Portillo-Portillo J et al (2022) FASSD-Net: fast and accurate real-time semantic segmentation for embedded systems. IEEE Trans Intell Transp Syst. https:\/\/doi.org\/10.1109\/TITS.2021.3127553","journal-title":"IEEE Trans Intell Transp Syst"},{"key":"1650_CR56","unstructured":"Peng J, Liu Y, Tang S et al (2022) PP-LiteSeg: a superior real-time semantic segmentation model. arXiv:2204.02681"},{"key":"1650_CR57","doi-asserted-by":"publisher","DOI":"10.1109\/tits.2022.3228042","author":"H Pan","year":"2022","unstructured":"Pan H, Hong Y, Sun W, Jia Y (2022) Deep dual-resolution networks for real-time and accurate semantic segmentation of traffic scenes. IEEE Trans Intell Transp Syst. https:\/\/doi.org\/10.1109\/tits.2022.3228042","journal-title":"IEEE Trans Intell Transp Syst"},{"key":"1650_CR58","doi-asserted-by":"publisher","unstructured":"Wang Y, Chen S, Bian H et al (2023) Deep multi-resolution network for real- time semantic segmentation in street scenes. In: 2023 international joint conference on neural networks (IJCNN), Gold Coast, Australia, pp 01\u201308. https:\/\/doi.org\/10.1109\/IJCNN54540.2023.10191758","DOI":"10.1109\/IJCNN54540.2023.10191758"},{"key":"1650_CR59","doi-asserted-by":"publisher","unstructured":"Xu J, Xiong Z, Bhattacharyya SP (2023) PIDNet: a real-time semantic segmentation network inspired by PID controllers. In: 2023 IEEE\/CVF conference on computer vision and pattern recognition (CVPR), Vancouver, BC, Canada, pp 19529\u201319539. https:\/\/doi.org\/10.1109\/CVPR52729.2023.01871","DOI":"10.1109\/CVPR52729.2023.01871"},{"key":"1650_CR60","doi-asserted-by":"publisher","unstructured":"Strudel R, Garcia R, Laptev I, Schmid C (2021) Segmenter: transformer for semantic segmentation. In: 2021 IEEE\/CVF international conference on computer vision (ICCV), Montreal, QC, Canada, pp 7242\u20137252. https:\/\/doi.org\/10.1109\/ICCV48922.2021.00717","DOI":"10.1109\/ICCV48922.2021.00717"},{"key":"1650_CR61","doi-asserted-by":"publisher","unstructured":"Chen L-C, Zhu Y, Papandreou G et al (2018) Encoder-decoder with atrous separable convolution for semantic image segmentation. In: Proceedings of the European conference on computer vision (ECCV), pp 801\u2013818. https:\/\/doi.org\/10.1007\/978-3-030-01234-2_49","DOI":"10.1007\/978-3-030-01234-2_49"},{"key":"1650_CR62","doi-asserted-by":"publisher","unstructured":"Orsic M, Kreso I, Bevandic P, Segvic S (2019) In defense of pre-trained ImageNet architectures for real-time semantic segmentation of road-driving images. In: 2019 IEEE\/CVF conference on computer vision and pattern recognition (CVPR), Long Beach, CA, USA, pp 12599\u201312608. https:\/\/doi.org\/10.1109\/CVPR.2019.01289","DOI":"10.1109\/CVPR.2019.01289"},{"key":"1650_CR63","doi-asserted-by":"publisher","unstructured":"Kirillov A, Girshick R, He K, Dollar P (2019) Panoptic feature pyramid networks. In: 2019 IEEE\/CVF conference on computer vision and pattern recognition (CVPR), Long Beach, CA, USA, pp 6392\u20136401. https:\/\/doi.org\/10.1109\/CVPR.2019.00656","DOI":"10.1109\/CVPR.2019.00656"},{"key":"1650_CR64","doi-asserted-by":"publisher","unstructured":"Shim J, Yu H, Kong K, Kang S-J (2023) FeedFormer: revisiting transformer decoder for efficient semantic segmentation. In: Proceedings of the AAAI conference on artificial intelligence, pp 2263\u20132271. https:\/\/doi.org\/10.1609\/aaai.v37i2.25321","DOI":"10.1609\/aaai.v37i2.25321"},{"key":"1650_CR65","unstructured":"Yan H, Wu M, Zhang C (2024) Multi-scale representations by varying window attention for semantic segmentation. arXiv:2404.16573"},{"key":"1650_CR66","unstructured":"Yeom S-K, von Klitzing J (2023) U-MixFormer: UNet-like transformer with mix-attention for efficient semantic segmentation. arXiv:2312.06272"},{"key":"1650_CR67","unstructured":"Yuan Y, Fu R, Huang L et al (2021) HRFormer: high-resolution transformer for dense prediction. arxiv:2110.09408"},{"key":"1650_CR68","unstructured":"Cheng B, Schwing AG, Kirillov (2021) A per-pixel classification is not all you need for semantic segmentation. In: Advances in neural information processing systems, pp 17864\u201317875"},{"key":"1650_CR69","doi-asserted-by":"publisher","unstructured":"Cheng B, Misra I, Schwing AG et al (2022) Masked-attention mask transformer for universal image segmentation. In: 2022 IEEE\/CVF conference on computer vision and pattern recognition (CVPR), New Orleans, LA, USA, pp 1280\u20131289. https:\/\/doi.org\/10.1109\/CVPR52688.2022.00135","DOI":"10.1109\/CVPR52688.2022.00135"}],"container-title":["Complex &amp; Intelligent Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s40747-024-01650-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s40747-024-01650-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s40747-024-01650-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,30]],"date-time":"2025-01-30T20:20:56Z","timestamp":1738268456000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s40747-024-01650-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,14]]},"references-count":69,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2025,1]]}},"alternative-id":["1650"],"URL":"https:\/\/doi.org\/10.1007\/s40747-024-01650-6","relation":{},"ISSN":["2199-4536","2198-6053"],"issn-type":[{"value":"2199-4536","type":"print"},{"value":"2198-6053","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,14]]},"assertion":[{"value":"27 May 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 August 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 November 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no conflicts of interest in the publication of this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"36"}}