{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,7]],"date-time":"2025-11-07T15:49:39Z","timestamp":1762530579517,"version":"build-2065373602"},"reference-count":69,"publisher":"Springer Science and Business Media LLC","issue":"15","license":[{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"Natural Science Foundation for Outstanding Young Scholars of Fujian Province","award":["2022J06023"],"award-info":[{"award-number":["2022J06023"]}]},{"name":"High-level Talent Innovation and Entrepreneurship Project of Quanzhou City","award":["2023C013R"],"award-info":[{"award-number":["2023C013R"]}]},{"name":"Fujian Province Science and Technology Empowering Police Research Initiative","award":["2024Y0064"],"award-info":[{"award-number":["2024Y0064"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2025,10]]},"DOI":"10.1007\/s10489-025-06883-7","type":"journal-article","created":{"date-parts":[[2025,10,10]],"date-time":"2025-10-10T12:14:14Z","timestamp":1760098454000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Semantic segmentation in power grid scenarios using scale-transforming transformer"],"prefix":"10.1007","volume":"55","author":[{"given":"Wenjie","family":"Pan","sequence":"first","affiliation":[]},{"given":"Linhan","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Yutao","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Yuqing","family":"Fu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8840-3629","authenticated-orcid":false,"given":"Jianqing","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"Yibing","family":"Zhan","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,10,10]]},"reference":[{"issue":"12","key":"6883_CR1","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3664598","volume":"56","author":"J Leng","year":"2024","unstructured":"Leng J, Ye Y, Mo M, Gao C, Gan J, Xiao B, Gao X (2024) Recent advances for aerial object detection: A survey. ACM Comput Surv 56(12):1\u201336","journal-title":"ACM Comput Surv"},{"key":"6883_CR2","doi-asserted-by":"crossref","unstructured":"Fu H, Gong M, Wang C, Batmanghelich K, Zhang K, Tao D (2019) Geometry-consistent generative adversarial networks for one-sided unsupervised domain mapping. In: Proceedings of the IEEE conference on computer vision and pattern recognition, California, USA, pp 2427\u20132436","DOI":"10.1109\/CVPR.2019.00253"},{"issue":"4","key":"6883_CR3","doi-asserted-by":"publisher","first-page":"6913","DOI":"10.1109\/TII.2024.3353874","volume":"20","author":"Y Wang","year":"2024","unstructured":"Wang Y, Zhang J, Chen Y, Yuan H, Wu C (2024) An automated learning method of semantic segmentation for train autonomous driving environment understanding. IEEE Trans Industr Inf 20(4):6913\u20136922","journal-title":"IEEE Trans Industr Inf"},{"issue":"5","key":"6883_CR4","doi-asserted-by":"publisher","first-page":"7440","DOI":"10.1109\/TII.2024.3361021","volume":"20","author":"H Hu","year":"2024","unstructured":"Hu H, Fang B, Ran Y, Wei X, Xian W, Zhou M, Kwong S (2024) Deep aual-stream convolutional neural networks for cardiac image semantic segmentation. IEEE Trans Industr Inf 20(5):7440\u20137448","journal-title":"IEEE Trans Industr Inf"},{"key":"6883_CR5","doi-asserted-by":"crossref","unstructured":"Abdelfattah R, Wang X, Wang S (2020) Ttpla: An aerial-image dataset for detection and segmentation of transmission towers and power lines. In: Proceedings of the asian conference on computer vision, Kansai, Japan, pp 601\u2013618","DOI":"10.1007\/978-3-030-69544-6_36"},{"key":"6883_CR6","doi-asserted-by":"publisher","first-page":"6248","DOI":"10.1109\/TIP.2023.3321465","volume":"32","author":"R Abdelfattah","year":"2023","unstructured":"Abdelfattah R, Wang X, Wang S (2023) Plgan: Generative adversarial networks for power-line segmentation in aerial images. IEEE Trans Image Process 32:6248\u20136259","journal-title":"IEEE Trans Image Process"},{"key":"6883_CR7","doi-asserted-by":"crossref","unstructured":"Sharma P, Saurav S, Singh S (2024) Object detection in power line infrastructure: A review of the challenges and solutions. Eng Appl Artif Intell 130:107781","DOI":"10.1016\/j.engappai.2023.107781"},{"issue":"3","key":"6883_CR8","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky O, Deng J, Su H, Krause J, Satheesh S, Ma S, Huang Z, Karpathy A, Khosla A, Bernstein M (2015) Imagenet large scale visual recognition challenge. Int J Comput Vision 115(3):211\u2013252","journal-title":"Int J Comput Vision"},{"key":"6883_CR9","doi-asserted-by":"publisher","first-page":"626","DOI":"10.1016\/j.neucom.2022.01.005","volume":"493","author":"Y Mo","year":"2022","unstructured":"Mo Y, Wu Y, Yang X, Liu F, Liao Y (2022) Review the state-of-the-art technologies of semantic segmentation based on deep learning. Neurocomputing 493:626\u2013646","journal-title":"Neurocomputing"},{"key":"6883_CR10","doi-asserted-by":"crossref","unstructured":"Liu S, Ma Y, Zhang X, Wang H, Ji J, Sun X, Ji R (2024) Rotated multi-scale interaction network for referring remote sensing image segmentation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, Washington, USA, pp 26658\u201326668","DOI":"10.1109\/CVPR52733.2024.02517"},{"key":"6883_CR11","doi-asserted-by":"crossref","unstructured":"Rahman MM, Munir M, Marculescu R (2024) Emcad: Efficient multi-scale convolutional attention decoding for medical image segmentation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, Washington, USA, pp 11769\u201311779","DOI":"10.1109\/CVPR52733.2024.01118"},{"issue":"3","key":"6883_CR12","doi-asserted-by":"publisher","first-page":"415","DOI":"10.1007\/s41095-022-0274-8","volume":"8","author":"W Wang","year":"2022","unstructured":"Wang W, Xie E, Li X, Fan DP, Song K, Liang D, Lu T, Luo P, Shao L (2022) Pvt v2: Improved baselines with pyramid vision transformer. Comput Vis Media 8(3):415\u2013424","journal-title":"Comput Vis Media"},{"issue":"3","key":"6883_CR13","doi-asserted-by":"publisher","first-page":"2831","DOI":"10.1007\/s10489-024-05324-1","volume":"54","author":"L Xia","year":"2024","unstructured":"Xia L, Ding X (2024) Human-object interaction detection based on cascade multi-scale transformer. Appl Intell 54(3):2831\u20132850","journal-title":"Appl Intell"},{"key":"6883_CR14","doi-asserted-by":"crossref","unstructured":"Sun H, Wang Y, Wang X, Zhang B, Xin Y, Zhang B, Cao X, Ding E, Han S (2024) Maformer: A transformer network with multi-scale attention fusion for visual recognition. Neurocomputing, 127828","DOI":"10.1016\/j.neucom.2024.127828"},{"key":"6883_CR15","doi-asserted-by":"crossref","unstructured":"Liu Z, Hu H, Lin Y, Yao Z, Xie Z, Wei Y, Ning J, Cao Y, Zhang Z, Dong L (2022) Swin transformer v2: Scaling up capacity and resolution. In: Proceedings of the IEEE conference on computer vision and pattern recognition, Louisiana, USA, pp 12009\u201312019","DOI":"10.1109\/CVPR52688.2022.01170"},{"key":"6883_CR16","doi-asserted-by":"crossref","unstructured":"Cao H, Wang Y, Chen J, Jiang D, Zhang X, Tian Q, Wang M (2022) Swin-unet: Unet-like pure transformer for medical image segmentation. In: European conference on computer vision, Tel Aviv, Israel, pp 205\u2013218","DOI":"10.1007\/978-3-031-25066-8_9"},{"issue":"6","key":"6883_CR17","first-page":"1","volume":"55","author":"H Zhang","year":"2025","unstructured":"Zhang H, Li L, Xie X, He Y, Ren J, Xie G (2025) Entropy guidance hierarchical rich-scale feature network for remote sensing image semantic segmentation of high resolution. Appl Intell 55(6):1\u201322","journal-title":"Appl Intell"},{"key":"6883_CR18","doi-asserted-by":"publisher","first-page":"7887","DOI":"10.1109\/TMM.2024.3372835","volume":"26","author":"Q Zhou","year":"2024","unstructured":"Zhou Q, Wang L, Gao G, Kang B, Ou W, Lu H (2024) Boundary-guided lightweight semantic segmentation with multi-scale semantic context. IEEE Trans Multimed 26:7887\u20137900","journal-title":"IEEE Trans Multimed"},{"key":"6883_CR19","doi-asserted-by":"crossref","unstructured":"Xie Y, Zhang H, Xu X, Zhu J, He S (2023) Towards a smaller student: Capacity dynamic distillation for efficient image retrieval. In: Proceedings of the IEEE conference on computer vision and pattern recognition, Vancouver, Canada, pp 16006\u201316015","DOI":"10.1109\/CVPR52729.2023.01536"},{"issue":"2","key":"6883_CR20","doi-asserted-by":"publisher","first-page":"2084","DOI":"10.1007\/s10489-023-05259-z","volume":"54","author":"J Zhang","year":"2024","unstructured":"Zhang J, Tsai P, Tsai M (2024) Semantic2graph: graph-based multi-modal feature fusion for action segmentation in videos. Appl Intell 54(2):2084\u20132099","journal-title":"Appl Intell"},{"key":"6883_CR21","first-page":"12077","volume":"34","author":"E Xie","year":"2021","unstructured":"Xie E, Wang W, Yu Z, Anandkumar A, Alvarez JM, Luo P (2021) Segformer: Simple and efficient design for semantic segmentation with transformers. Adv Neural Inf Process Syst 34:12077\u201312090","journal-title":"Adv Neural Inf Process Syst"},{"key":"6883_CR22","doi-asserted-by":"crossref","unstructured":"Yu W, Luo M, Zhou P, Si C, Zhou Y, Wang X, Feng J, Yan S (2022) Metaformer is actually what you need for vision. In: Proceedings of the IEEE conference on computer vision and pattern recognition, Louisiana, USA, pp. 10819\u201310829","DOI":"10.1109\/CVPR52688.2022.01055"},{"issue":"4","key":"6883_CR23","first-page":"733","volume":"9","author":"M Guo","year":"2023","unstructured":"Guo M, Lu C, Liu Z, Cheng M, Hu S (2023) Visual attention network. Computational Visual. Media 9(4):733\u2013752","journal-title":"Media"},{"issue":"10","key":"6883_CR24","doi-asserted-by":"publisher","first-page":"3008","DOI":"10.1109\/TMI.2020.2983721","volume":"39","author":"S Feng","year":"2020","unstructured":"Feng S, Zhao H, Shi F, Cheng X, Wang M, Ma Y, Xiang D, Zhu W, Chen X (2020) Cpfnet: Context pyramid fusion network for medical image segmentation. IEEE Trans Med Imaging 39(10):3008\u20133018","journal-title":"IEEE Trans Med Imaging"},{"key":"6883_CR25","doi-asserted-by":"crossref","unstructured":"Szegedy C, Liu W, Jia Y, Sermanet P, Reed S, Anguelov D, Erhan D, Vanhoucke V, Rabinovich A (2015) Going deeper with convolutions. In: Proceedings of the IEEE conference on computer vision and pattern recognition, Massachusetts, USA, pp 1\u20139","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"6883_CR26","doi-asserted-by":"crossref","unstructured":"Zhang W, Huang Z, Luo G, Chen T, Wang X, Liu W, Yu G, Shen C (2022) Topformer: Token pyramid transformer for mobile semantic segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, Louisiana, USA, pp 12083\u201312093","DOI":"10.1109\/CVPR52688.2022.01177"},{"key":"6883_CR27","doi-asserted-by":"crossref","unstructured":"Zhao H, Shi J, Qi X, Wang X, Jia J (2017) Pyramid scene parsing network. In: Proceedings of the IEEE conference on computer vision and pattern recognition, Hawaii, USA, pp 2881\u20132890","DOI":"10.1109\/CVPR.2017.660"},{"key":"6883_CR28","doi-asserted-by":"crossref","unstructured":"Hou Q, Zhang L, Cheng MM, Feng J (2020) Strip pooling: Rethinking spatial pooling for scene parsing. In: Proceedings of the IEEE conference on computer vision and pattern recognition, Seattle, USA, pp 4003\u20134012","DOI":"10.1109\/CVPR42600.2020.00406"},{"key":"6883_CR29","first-page":"1","volume":"71","author":"Q Zhou","year":"2022","unstructured":"Zhou Q, Qu Z, Li Y-X, Ju F-R (2022) Tunnel crack detection with linear seam based on mixed attention and multiscale feature fusion. IEEE Trans Instrum Meas 71:1\u201311","journal-title":"IEEE Trans Instrum Meas"},{"issue":"2","key":"6883_CR30","doi-asserted-by":"publisher","first-page":"1385","DOI":"10.1109\/TII.2023.3271441","volume":"20","author":"T Jing","year":"2023","unstructured":"Jing T, Meng Q-H, Hou H-R (2023) Smokeseger: a transformer-cnn coupled model for urban scene smoke segmentation. IEEE Trans Industr Inf 20(2):1385\u20131396","journal-title":"IEEE Trans Industr Inf"},{"key":"6883_CR31","doi-asserted-by":"crossref","unstructured":"Wang W, Xie E, Li X, Fan D, Song K, Liang D, Lu T, Luo P, Shao L (2021) Pyramid vision transformer: A versatile backbone for dense prediction without convolutions. In: Proceedings of the IEEE international conference on computer vision, Quebec, Canada, pp 568\u2013578","DOI":"10.1109\/ICCV48922.2021.00061"},{"issue":"11","key":"6883_CR32","doi-asserted-by":"publisher","first-page":"12760","DOI":"10.1109\/TPAMI.2022.3202765","volume":"45","author":"Y Wu","year":"2023","unstructured":"Wu Y, Liu Y, Zhan X, Cheng M (2023) P2t: Pyramid pooling transformer for scene understanding. IEEE Trans Pattern Anal Mach Intell 45(11):12760\u201312771","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"6883_CR33","doi-asserted-by":"crossref","unstructured":"He J, Deng Z, Zhou L, Wang Y, Qiao Y (2019) Adaptive pyramid context network for semantic segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, California, USA, pp 7519\u20137528","DOI":"10.1109\/CVPR.2019.00770"},{"key":"6883_CR34","first-page":"9355","volume":"34","author":"X Chu","year":"2021","unstructured":"Chu X, Tian Z, Wang Y, Zhang B, Ren H, Wei X, Xia H, Shen C (2021) Twins: Revisiting the design of spatial attention in vision transformers. Adv Neural Inf Process Syst 34:9355\u20139366","journal-title":"Adv Neural Inf Process Syst"},{"key":"6883_CR35","doi-asserted-by":"crossref","unstructured":"Liu Z, Lin Y, Cao Y, Hu H, Wei Y, Zhang Z, Lin S, Guo B (2021) Swin transformer: Hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE international conference on computer vision, British Columbia, Canada, pp 10012\u201310022","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"6883_CR36","doi-asserted-by":"crossref","unstructured":"Ren S, Zhou D, He S, Feng J, Wang X (2022) Shunted self-attention via multi-scale token aggregation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, Louisiana, USA, pp 10853\u201310862","DOI":"10.1109\/CVPR52688.2022.01058"},{"issue":"10","key":"6883_CR37","doi-asserted-by":"publisher","first-page":"3349","DOI":"10.1109\/TPAMI.2020.2983686","volume":"43","author":"J Wang","year":"2020","unstructured":"Wang J, Sun K, Cheng T, Jiang B, Deng C, Zhao Y, Liu D, Mu Y, Tan M, Wang X (2020) Deep high-resolution representation learning for visual recognition. IEEE Trans Pattern Anal Mach Intell 43(10):3349\u20133364","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"6883_CR38","doi-asserted-by":"crossref","unstructured":"Zheng S, Lu J, Zhao H, Zhu X, Luo Z, Wang Y, Fu Y, Feng J, Xiang T, Torr PH (2021) Rethinking semantic segmentation from a sequence-to-sequence perspective with transformers. In: Proceedings of the IEEE conference on computer vision and pattern recognition, Tennessee, USA, pp 6881\u20136890","DOI":"10.1109\/CVPR46437.2021.00681"},{"key":"6883_CR39","doi-asserted-by":"crossref","unstructured":"Ronneberger O, Fischer P, Brox T (2015) U-net: Convolutional networks for biomedical image segmentation. In: Medical image computing and computer-assisted intervention, Munich, Germany, pp 234\u2013241","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"6883_CR40","doi-asserted-by":"crossref","unstructured":"Chen LC, Yang Y, Wang J, Xu W, Yuille AL (2016) Attention to scale: Scale-aware semantic image segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, Nevada, USA, pp 3640\u20133649","DOI":"10.1109\/CVPR.2016.396"},{"key":"6883_CR41","unstructured":"Tao A, Sapra K, Catanzaro B (2020) Hierarchical multi-scale attention for semantic segmentation. arXiv preprint arXiv:2005.10821"},{"key":"6883_CR42","doi-asserted-by":"crossref","unstructured":"Zhang H, Dana K, Shi J, Zhang Z, Wang X, Tyagi A, Agrawal A (2018) Context encoding for semantic segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, Utah, USA, pp 7151\u20137160","DOI":"10.1109\/CVPR.2018.00747"},{"key":"6883_CR43","doi-asserted-by":"crossref","unstructured":"Huang Z, Wang X, Huang L, Huang C, Wei Y, Liu W (2019) Ccnet: Criss-cross attention for semantic segmentation. In: Proceedings of the IEEE international conference on computer vision, Seoul, Korea, pp. 603\u2013612","DOI":"10.1109\/ICCV.2019.00069"},{"key":"6883_CR44","doi-asserted-by":"crossref","unstructured":"Yuan Y, Chen X, Wang J (2020) Object-contextual representations for semantic segmentation. In: European conference on computer vision, Scotland, England, pp 173\u2013190","DOI":"10.1007\/978-3-030-58539-6_11"},{"key":"6883_CR45","doi-asserted-by":"crossref","unstructured":"Pan W, Zhu J, Zeng H (2025) Fair training with zero inputs. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol 39. Pennsylvania, USA, pp 6317\u20136325","DOI":"10.1609\/aaai.v39i6.32676"},{"key":"6883_CR46","first-page":"1","volume":"71","author":"A Lin","year":"2022","unstructured":"Lin A, Chen B, Xu J, Zhang Z, Lu G, Zhang D (2022) Ds-transunet: Dual swin transformer u-net for medical image segmentation. IEEE Trans Instrum Meas 71:1\u201315","journal-title":"IEEE Trans Instrum Meas"},{"key":"6883_CR47","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TIM.2022.3216413","volume":"71","author":"Z Wang","year":"2022","unstructured":"Wang Z, Chen Y, Shao W, Li H, Zhang L (2022) Swinfuse: A residual swin transformer fusion network for infrared and visible images. IEEE Trans Instrum Meas 71:1\u201312","journal-title":"IEEE Trans Instrum Meas"},{"key":"6883_CR48","doi-asserted-by":"crossref","unstructured":"Gu J, Kwon H, Wang D, Ye W, Li M, Chen YH, Lai L, Chandra V, Pan DZ (2022) Multi-scale high-resolution vision transformer for semantic segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, Louisiana, USA, pp 12094\u201312103","DOI":"10.1109\/CVPR52688.2022.01178"},{"key":"6883_CR49","first-page":"1140","volume":"35","author":"M Guo","year":"2022","unstructured":"Guo M, Lu C, Hou Q, Liu Z, Cheng M, Hu S (2022) Segnext: Rethinking convolutional attention design for semantic segmentation. Adv Neural Inf Process Syst 35:1140\u20131156","journal-title":"Adv Neural Inf Process Syst"},{"key":"6883_CR50","doi-asserted-by":"crossref","unstructured":"Zhang J, Zhang Y, Xu X (2021) Pyramid u-net for retinal vessel segmentation. In: IEEE International conference on acoustics, speech and signal processing, Toronto, Canada, pp 1125\u20131129","DOI":"10.1109\/ICASSP39728.2021.9414164"},{"key":"6883_CR51","unstructured":"Ioffe S, Szegedy C (2015) Batch normalization: Accelerating deep network training by reducing internal covariate shift. In: International conference on machine learning, Nord, France, pp 448\u2013456"},{"key":"6883_CR52","doi-asserted-by":"crossref","unstructured":"Szegedy C, Vanhoucke V, Ioffe S, Shlens J, Wojna Z (2016) Rethinking the inception architecture for computer vision. In: Proceedings of the IEEE conference on computer vision and pattern recognition, Nevada, USA, pp 2818\u20132826","DOI":"10.1109\/CVPR.2016.308"},{"key":"6883_CR53","doi-asserted-by":"crossref","unstructured":"Szegedy C, Ioffe S, Vanhoucke V, Alemi AA (2017) Inception-v4, inception-resnet and the impact of residual connections on learning. In: AAAI Conference on artificial intelligence, California, USA","DOI":"10.1609\/aaai.v31i1.11231"},{"key":"6883_CR54","doi-asserted-by":"crossref","unstructured":"Li X, Yang Y, Zhao Q, Shen T, Lin Z, Liu H (2020) Spatial pyramid based graph reasoning for semantic segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, Seattle, USA, pp 8950\u20138959","DOI":"10.1109\/CVPR42600.2020.00897"},{"key":"6883_CR55","doi-asserted-by":"crossref","unstructured":"Lei J, Hu X, Wang Y, Liu D (2023) Pyramidflow: High-resolution defect contrastive localization using pyramid normalizing flow. In: Proceedings of the IEEE conference on computer vision and pattern recognition, Vancouver, Canada, pp 14143\u201314152","DOI":"10.1109\/CVPR52729.2023.01359"},{"key":"6883_CR56","doi-asserted-by":"publisher","first-page":"9900","DOI":"10.1109\/JSTARS.2022.3221860","volume":"15","author":"J Li","year":"2022","unstructured":"Li J, Liu Y, Liu J, Song R, Liu W, Han K, Du Q (2022) Feature guide network with context aggregation pyramid for remote sensing image segmentation. IEEE Journal of selected topics in applied earth observations and remote sensing 15:9900\u20139912","journal-title":"IEEE Journal of selected topics in applied earth observations and remote sensing"},{"key":"6883_CR57","doi-asserted-by":"crossref","unstructured":"Li D, Yao A, Chen Q (2020) Psconv: Squeezing feature pyramid into one compact poly-scale convolutional layer. In: European conference on computer vision, Scotland, UK, pp 615\u2013632","DOI":"10.1007\/978-3-030-58589-1_37"},{"key":"6883_CR58","doi-asserted-by":"crossref","unstructured":"Vijay S, Guhan T, Srinivasan K, Vincent P, Chang CY (2023) Mri brain tumor segmentation using residual spatial pyramid pooling-powered 3d u-net. Front. Public Health 11:1091850","DOI":"10.3389\/fpubh.2023.1091850"},{"key":"6883_CR59","doi-asserted-by":"crossref","unstructured":"Ahmad I, Qayyum A, Gupta BB, Alassafi MO, AlGhamdi RA (2022) Ensemble of 2d residual neural networks integrated with atrous spatial pyramid pooling module for myocardium segmentation of left ventricle cardiac mri. Mathematics 10(4):627","DOI":"10.3390\/math10040627"},{"key":"6883_CR60","unstructured":"Hendrycks D, Gimpel K (2016) Gaussian error linear units (gelus). arXiv preprint arXiv:1606.08415"},{"key":"6883_CR61","unstructured":"Hinton GE, Srivastava N, Krizhevsky A, Sutskever I, Salakhutdinov RR (2012) Improving neural networks by preventing co-adaptation of feature detectors. arXiv preprint arXiv:1207.0580"},{"key":"6883_CR62","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, Weissenborn D, Zhai X, Unterthiner T, Dehghani M, Minderer M, Heigold G, Gelly S (2021) An image is worth 16x16 words: transformers for image recognition at scale. In: International conference on learning representations, Vienna, Austria"},{"key":"6883_CR63","doi-asserted-by":"crossref","unstructured":"Kirillov A, Girshick R, He K, Doll\u00e1r P (2019) Panoptic feature pyramid networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, California, USA, pp 6399\u20136408","DOI":"10.1109\/CVPR.2019.00656"},{"key":"6883_CR64","unstructured":"Contributors M (2020) Mmsegmentation: OpenMMLab Semantic Segmentation Toolbox and Benchmark. https:\/\/github.com\/open-mmlab\/mmsegmentation"},{"key":"6883_CR65","unstructured":"Loshchilov I, Hutter F (2019) Decoupled weight decay regularization. In: International conference on learning representations, Louisiana, USA"},{"key":"6883_CR66","doi-asserted-by":"crossref","unstructured":"Xiao T, Liu Y, Zhou B, Jiang Y, Sun J (2018) Unified perceptual parsing for scene understanding. In: Proceedings of the European conference on computer vision, Munich, Germany, pp 418\u2013434","DOI":"10.1007\/978-3-030-01228-1_26"},{"issue":"4","key":"6883_CR67","doi-asserted-by":"publisher","first-page":"1486","DOI":"10.1109\/TSMC.2018.2871750","volume":"50","author":"X Tao","year":"2018","unstructured":"Tao X, Zhang D, Wang Z, Liu X, Zhang H, Xu D (2018) Detection of power line insulator defects using aerial images analyzed with convolutional neural networks. IEEE Trans Syst Man Cybern Syst 50(4):1486\u20131498","journal-title":"IEEE Trans Syst Man Cybern Syst"},{"key":"6883_CR68","doi-asserted-by":"crossref","unstructured":"Xu M, Zhang Z, Hu H, Wang J, Wang L, Wei F, Bai X, Liu Z (2021) End-to-end semi-supervised object detection with soft teacher. In: Proceedings of the IEEE international conference on computer vision, British Columbia, Canada, pp 3060\u20133069","DOI":"10.1109\/ICCV48922.2021.00305"},{"key":"6883_CR69","doi-asserted-by":"crossref","unstructured":"Zhou B, Zhao H, Puig X, Fidler S, Barriuso A, Torralba A (2017) Scene parsing through ade20k dataset. In: Proceedings of the IEEE conference on computer vision and pattern recognition, Hawaii, USA, pp 633\u2013641","DOI":"10.1109\/CVPR.2017.544"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-025-06883-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-025-06883-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-025-06883-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,7]],"date-time":"2025-11-07T15:42:59Z","timestamp":1762530179000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-025-06883-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10]]},"references-count":69,"journal-issue":{"issue":"15","published-print":{"date-parts":[[2025,10]]}},"alternative-id":["6883"],"URL":"https:\/\/doi.org\/10.1007\/s10489-025-06883-7","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"type":"print","value":"0924-669X"},{"type":"electronic","value":"1573-7497"}],"subject":[],"published":{"date-parts":[[2025,10]]},"assertion":[{"value":"22 September 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 August 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 October 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"Authors have no conflict of interest to declare.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflicts of Interest"}}],"article-number":"1012"}}