{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,14]],"date-time":"2026-03-14T11:04:44Z","timestamp":1773486284288,"version":"3.50.1"},"reference-count":53,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2026,1,5]],"date-time":"2026-01-05T00:00:00Z","timestamp":1767571200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,5]],"date-time":"2026-01-05T00:00:00Z","timestamp":1767571200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Pattern Anal Applic"],"published-print":{"date-parts":[[2026,3]]},"DOI":"10.1007\/s10044-025-01601-y","type":"journal-article","created":{"date-parts":[[2026,1,5]],"date-time":"2026-01-05T07:06:35Z","timestamp":1767596795000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Cross-scale feature alignment and feature enhancement for small object detection"],"prefix":"10.1007","volume":"29","author":[{"given":"Binglin","family":"Wu","sequence":"first","affiliation":[]},{"given":"Yi","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Xiaxu","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Guoliang","family":"Xie","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,1,5]]},"reference":[{"issue":"6","key":"1601_CR1","doi-asserted-by":"publisher","first-page":"523","DOI":"10.1016\/j.vrih.2023.06.006","volume":"5","author":"Z Li","year":"2023","unstructured":"Li Z, Pang X, Jiang Y, Wang Y (2023) RealFuVSR: feature enhanced real-world video Super-Resolution. Virtual Real Intell Hardw 5(6):523\u2013537","journal-title":"Virtual Real Intell Hardw"},{"issue":"5","key":"1601_CR2","doi-asserted-by":"publisher","first-page":"432","DOI":"10.1016\/j.vrih.2022.08.007","volume":"4","author":"Y Zhao","year":"2022","unstructured":"Zhao Y, Zhang H, Lu P et al (2022) DSD-MatchingNet: deformable sparse-to-dense feature matching for learning accurate correspondences. Virtual Real Intell Hardw 4(5):432\u2013443","journal-title":"Virtual Real Intell Hardw"},{"key":"1601_CR3","doi-asserted-by":"publisher","unstructured":"Xu Y, Yan W, Yang G, Luo J et al (2020) CenterFace: joint face detection and alignment using face as point. Scient Program. https:\/\/doi.org\/10.1155\/2020\/7845384","DOI":"10.1155\/2020\/7845384"},{"key":"1601_CR4","doi-asserted-by":"crossref","unstructured":"Liu Y, Huang E et al (2024) 3D facial attractiveness prediction based on deep feature fusion. Comput Animat Virtual Worlds. 35(1):e2203","DOI":"10.1002\/cav.2203"},{"key":"1601_CR5","doi-asserted-by":"publisher","first-page":"107139","DOI":"10.1016\/j.engappai.2023.107139","volume":"126","author":"G Ates","year":"2023","unstructured":"Ates G et al (2023) Dual Cross-Attention for medical image segmentation. Eng Appl Artif Intell 126:107139","journal-title":"Eng Appl Artif Intell"},{"issue":"12","key":"1601_CR6","doi-asserted-by":"publisher","first-page":"3446","DOI":"10.1109\/TMI.2021.3087857","volume":"40","author":"R Liu","year":"2021","unstructured":"Liu R, Liu M et al (2021) NHBS-Net: a feature fusion attention network for ultrasound neonatal hip bone segmentation. IEEE Trans Med Imaging 40(12):3446\u20133458","journal-title":"IEEE Trans Med Imaging"},{"key":"1601_CR7","doi-asserted-by":"publisher","unstructured":"Masood S et al (2022) A deep learning-based semantic segmentation architecture for autonomous driving applications. Wireless Commun Mobile Comput. https:\/\/doi.org\/10.1155\/2022\/8684138","DOI":"10.1155\/2022\/8684138"},{"issue":"20","key":"1601_CR8","doi-asserted-by":"publisher","first-page":"4396","DOI":"10.3390\/app9204396","volume":"9","author":"H Liu","year":"2019","unstructured":"Liu H, Lang B (2019) Machine learning and deep learning methods for intrusion detection systems: a survey. Appl Sci 9(20):4396","journal-title":"Appl Sci"},{"key":"1601_CR9","doi-asserted-by":"publisher","first-page":"105985","DOI":"10.1016\/j.engappai.2023.105985","volume":"121","author":"B Prasath","year":"2023","unstructured":"Prasath B, Akila M (2023) IoT-based pest detection and classification using deep features with enhanced deep learning strategies. Eng Appl Artif Intell 121:105985","journal-title":"Eng Appl Artif Intell"},{"key":"1601_CR10","doi-asserted-by":"publisher","first-page":"103","DOI":"10.1007\/s10044-024-01323-7","volume":"27","author":"T Ning","year":"2024","unstructured":"Ning T, Wu W (2024) Small object detection based on YOLOv8 in UAV perspective. Pattern Anal Appl 27:103","journal-title":"Pattern Anal Appl"},{"key":"1601_CR11","doi-asserted-by":"publisher","first-page":"121","DOI":"10.1007\/s10044-024-01324-6","volume":"27","author":"J Wang","year":"2024","unstructured":"Wang J, Yang H, Wu M et al (2024) UR-YOLO: an urban road small object detection algorithm. Pattern Anal Appl 27:121","journal-title":"Pattern Anal Appl"},{"key":"1601_CR12","doi-asserted-by":"publisher","first-page":"439","DOI":"10.1016\/j.neunet.2022.08.029","volume":"155","author":"K Min","year":"2022","unstructured":"Min K, Lee G-H, Lee S-W (2022) Attentional feature pyramid network for small object detection. Neural Netw 155:439\u2013450","journal-title":"Neural Netw"},{"issue":"2","key":"1601_CR13","doi-asserted-by":"publisher","first-page":"936","DOI":"10.1109\/TSMC.2020.3005231","volume":"52","author":"G Chen","year":"2022","unstructured":"Chen G et al (2022) A survey of the four pillars for small object detection: multiscale representation, contextual information, super-resolution, and region proposal. IEEE Trans Syst Man Cybernetics: Syst 52(2):936\u2013953","journal-title":"IEEE Trans Syst Man Cybernetics: Syst"},{"key":"1601_CR14","doi-asserted-by":"crossref","unstructured":"Li C, Yang T, Zhu S, Chen C, Guan S (2020) Density map guided object detection in aerial images. In: proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops, pp. 190\u2013191","DOI":"10.1109\/CVPRW50498.2020.00103"},{"key":"1601_CR15","doi-asserted-by":"crossref","unstructured":"Varga LA, Zell A (2021) Tackling the background bias in sparse object detection via cropped windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 2768\u20132777","DOI":"10.1109\/ICCVW54120.2021.00311"},{"issue":"6","key":"1601_CR16","doi-asserted-by":"publisher","first-page":"3311","DOI":"10.1007\/s10489-020-01949-0","volume":"51","author":"C Sun","year":"2021","unstructured":"Sun C, Ai Y, Wang S, Zhang W (2021) Mask-guided SSD for Small-object detection. Appl Intell 51(6):3311\u20133322","journal-title":"Appl Intell"},{"issue":"2","key":"1601_CR17","doi-asserted-by":"publisher","first-page":"936","DOI":"10.1109\/TSMC.2020.3005231","volume":"52","author":"C Guang","year":"2022","unstructured":"Guang C, Haitao W et al (2022) A survey of the four pillars for small object detection: multiscale Representation, contextual Information, Super-Resolution, and region proposal. IEEE Trans Syst Man Cybernetics: Syst 52(2):936\u2013953","journal-title":"IEEE Trans Syst Man Cybernetics: Syst"},{"key":"1601_CR18","doi-asserted-by":"crossref","unstructured":"Lin TY, Doll\u00e1r P, Girshick R, He K, Hariharan B, Belongie S (2017) Feature pyramid networks for object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2117\u20132125","DOI":"10.1109\/CVPR.2017.106"},{"issue":"9","key":"1601_CR19","doi-asserted-by":"publisher","first-page":"4508","DOI":"10.3390\/s23094508","volume":"23","author":"J Dang","year":"2023","unstructured":"Dang J, Tang X, Li S (2023) HA-FPN: hierarchical attention feature pyramid network for object detection. Sensors 23(9):4508","journal-title":"Sensors"},{"issue":"9","key":"1601_CR20","doi-asserted-by":"publisher","first-page":"4432","DOI":"10.3390\/s23094432","volume":"23","author":"H-J Park","year":"2023","unstructured":"Park H-J, Kang J-W, Kim B-G (2023) SsFPN: scale sequence (S2) feature-Based feature pyramid network for object detection. Sensors 23(9):4432","journal-title":"Sensors"},{"key":"1601_CR21","doi-asserted-by":"publisher","first-page":"16023","DOI":"10.1007\/s00521-024-09917-2","volume":"36","author":"D Feng","year":"2024","unstructured":"Feng D, Zhuang X et al (2024) Position information encoding FPN for small object detection in aerial images. Neural Comput Appl 36:16023\u201316035","journal-title":"Neural Comput Appl"},{"key":"1601_CR22","doi-asserted-by":"publisher","first-page":"127809","DOI":"10.1016\/j.neucom.2024.127809","volume":"593","author":"B Han","year":"2024","unstructured":"Han B, He L, Ke J et al (2024) Weighted parallel decoupled feature pyramid network for object detection. Neurocomputing 593:127809","journal-title":"Neurocomputing"},{"key":"1601_CR23","doi-asserted-by":"publisher","first-page":"17438","DOI":"10.1038\/s41598-024-68255-4","volume":"14","author":"Z Guo","year":"2024","unstructured":"Guo Z, Hu X et al (2024) A duplex transform heterogeneous feature fusion network for road segmentation. Sci Rep 14:17438","journal-title":"Sci Rep"},{"key":"1601_CR24","doi-asserted-by":"publisher","first-page":"80","DOI":"10.1007\/s10489-024-05951-8","volume":"55","author":"H Guo","year":"2025","unstructured":"Guo H, Wang T et al (2025) Multilingual natural scene text detection via global feature fusion. Appl Intell 55:80","journal-title":"Appl Intell"},{"key":"1601_CR25","first-page":"5998","volume":"30","author":"A Vaswani","year":"2017","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser \u0141, Polosukhin I (2017) Attention is all you need. Adv Neural Inf Process Syst 30:5998\u20136008","journal-title":"Adv Neural Inf Process Syst"},{"key":"1601_CR26","doi-asserted-by":"crossref","unstructured":"Sun K, Xiao B, Liu D et al (2019) Deep high-resolution representation learning for human pose estimation. In: Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5686\u20135696","DOI":"10.1109\/CVPR.2019.00584"},{"key":"1601_CR27","doi-asserted-by":"crossref","unstructured":"Varghese R (2024) YOLOv8: a novel object detection algorithm with enhanced performance and robustness. In: Proceedings of the International Conference on Advances in Data Engineering and Intelligent Computing Systems, pp. 1\u20136","DOI":"10.1109\/ADICS58448.2024.10533619"},{"key":"1601_CR28","doi-asserted-by":"crossref","unstructured":"Xiao Z, Chen Y et al (2024) Human action recognition in immersive virtual reality based on multi-scale spatio-temporal attention network. Comput Animat Virtual Worlds. 35(5):e2293","DOI":"10.1002\/cav.2293"},{"key":"1601_CR29","doi-asserted-by":"crossref","unstructured":"Woo S, Park J, Lee JY et al (2018) CBAM: convolutional block attention module. In: Proceedings of the European Conference on Computer Vision, pp. 3\u201319","DOI":"10.1007\/978-3-030-01234-2_1"},{"key":"1601_CR30","unstructured":"Liu Y, Shao Z, Hoffmann N Global attention mechanism: retain informationto enhance channel-spatial interactions. ArXiv, abs\/2112.05561"},{"key":"1601_CR31","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A et al An image is worth 16x16 words: Transformers for Image Recognition at Scale. arXiv, abs\/2010.11929"},{"key":"1601_CR32","doi-asserted-by":"publisher","first-page":"50","DOI":"10.1109\/TMM.2021.3120873","volume":"25","author":"X Lin","year":"2021","unstructured":"Lin X, Sun S et al (2021) EAPT: efficient attention pyramid transformer for image processing. IEEE Trans Multimedia 25:50\u201361","journal-title":"IEEE Trans Multimedia"},{"key":"1601_CR33","doi-asserted-by":"publisher","first-page":"105504","DOI":"10.1016\/j.engappai.2022.105504","volume":"117","author":"S-Y Wang","year":"2023","unstructured":"Wang S-Y, Qu Z et al (2023) BANet: small and multi-object detection with a bidirectional attention network for traffic scenes. Eng Appl Artif Intell 117:105504","journal-title":"Eng Appl Artif Intell"},{"key":"1601_CR34","doi-asserted-by":"crossref","unstructured":"Liu S, Qi L, Qin H et al (2018) Path aggregation network for instance segmentation. In: Proceedings of the Conference on Computer Vision and Pattern Recognition, pp. 8759\u20138768","DOI":"10.1109\/CVPR.2018.00913"},{"key":"1601_CR35","doi-asserted-by":"crossref","unstructured":"Tan M, Pang R et al (2020) : EfficientDet: Scalable and Efficient Object Detection. In: Proceedings of the Conference on Computer Vision and Pattern Recognition, pp. 10778\u201310787","DOI":"10.1109\/CVPR42600.2020.01079"},{"key":"1601_CR36","doi-asserted-by":"publisher","unstructured":"Chen H, Wang Q et al (2023) ALFPN: Adaptive learning feature pyramid network for small object detection. Int J Intellig Syst. https:\/\/doi.org\/10.1155\/2023\/6266209","DOI":"10.1155\/2023\/6266209"},{"issue":"2","key":"1601_CR37","doi-asserted-by":"publisher","first-page":"242","DOI":"10.3390\/sym17020242","volume":"17","author":"J Liu","year":"2025","unstructured":"Liu J, Wang Y, Cao Y, Guo C, Shi P, Li P (2025) Unified spatial-frequency modeling and alignment for multi-scale small object detection. Symmetry 17(2):242","journal-title":"Symmetry"},{"key":"1601_CR38","doi-asserted-by":"crossref","unstructured":"Lee Y, Lee J et al (2019) SNIDER: Single noisy image denoising and rectification for improving license plate recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision Workshop, pp. 1017\u20131026","DOI":"10.1109\/ICCVW.2019.00131"},{"key":"1601_CR39","doi-asserted-by":"crossref","unstructured":"Ronneberger O, Fischer P et al (2015) U-Net: convolutional networks for biomedical image segmentation. arXiv, abs\/1505.04597","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"1601_CR40","first-page":"5616217","volume":"60","author":"W Ma","year":"2022","unstructured":"Ma W, Li N et al (2022) Feature split\u2013merge\u2013enhancement network for remote sensing object detection. IEEE Trans Geosci Remote Sens 60:5616217","journal-title":"IEEE Trans Geosci Remote Sens"},{"key":"1601_CR41","doi-asserted-by":"publisher","first-page":"116919","DOI":"10.1016\/j.image.2023.116919","volume":"113","author":"Y Chen","year":"2023","unstructured":"Chen Y, Zhu X, Li Y, Wei Y, Ye L (2023) Enhanced semantic feature pyramid network for small object detection. Signal Process Image Commun 113:116919","journal-title":"Signal Process Image Commun"},{"key":"1601_CR42","doi-asserted-by":"crossref","unstructured":"Jie H, Shen L et al (2018) Squeeze-and-excitation networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7132\u20137141","DOI":"10.1109\/CVPR.2018.00745"},{"issue":"1","key":"1601_CR43","doi-asserted-by":"publisher","first-page":"475","DOI":"10.1109\/TCSVT.2023.3286896","volume":"34","author":"Z Chen","year":"2024","unstructured":"Chen Z, Ji H, Zhang Y et al (2024) High-Resolution feature pyramid network for small object detection on drone view. IEEE Trans Circuits Syst Video Technol 34(1):475\u2013489","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"1601_CR44","doi-asserted-by":"crossref","unstructured":"Zhu P, Wen L et al (2022) Detection and tracking meet drones challenge. In: Proceedings of the IEEE Transactions on Pattern Analysis and Machine Intelligence, pp. 7380\u20137399","DOI":"10.1109\/TPAMI.2021.3119563"},{"key":"1601_CR45","doi-asserted-by":"crossref","unstructured":"Zhu Z, Liang D, Zhang S et al (2016) Traffic-Sign detection and classification in the wild. In: Proceedings of the Conference on Computer Vision and Pattern Recognition, pp. 2110\u20132118","DOI":"10.1109\/CVPR.2016.232"},{"issue":"8","key":"1601_CR46","doi-asserted-by":"publisher","first-page":"13403","DOI":"10.1109\/TITS.2021.3124192","volume":"23","author":"B Bovcon","year":"2022","unstructured":"Bovcon B, Muhovi J, Vranac D et al (2022) MODS -- a USV-oriented object detection and obstacle segmentation benchmark. IEEE Trans Intell Transp Syst 23(8):13403\u201313418","journal-title":"IEEE Trans Intell Transp Syst"},{"key":"1601_CR47","doi-asserted-by":"crossref","unstructured":"Lin TY et al (2014) Microsoft COCO: common objects in context. In: Computer Vision\u2014ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6\u201312, 2014, Proceedings, Part V 13, pp. 740\u2013755. Springer","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"1601_CR48","doi-asserted-by":"publisher","first-page":"103752","DOI":"10.1016\/j.jvcir.2023.103752","volume":"90","author":"M Wang","year":"2023","unstructured":"Wang M, Yang Z et al (2023) FE-YOLOv5: feature enhancement network based on YOLOv5 for small object detection. J Vis Commun Image Represent 90:103752","journal-title":"J Vis Commun Image Represent"},{"key":"1601_CR49","doi-asserted-by":"crossref","unstructured":"Lin T-Y, Goyal P et al (2017) Focal loss for dense object detection. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2999\u20133007","DOI":"10.1109\/ICCV.2017.324"},{"key":"1601_CR50","doi-asserted-by":"crossref","unstructured":"Tian Z, Shen C et al (2019) FCOS: fully convolutional one-stage object detection. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9626\u20139635","DOI":"10.1109\/ICCV.2019.00972"},{"key":"1601_CR51","doi-asserted-by":"crossref","unstructured":"Zhang S, Chi Y et al (2020) Bridging the gap between anchor-based and anchor-free detection via adaptive training sample selection. Proc. IEEE\/CVF Conf. Comput. Vis. Pattern Recognit (CVPR). 9756\u20139765","DOI":"10.1109\/CVPR42600.2020.00978"},{"key":"1601_CR52","unstructured":"Xu X, Jiang J, Chen W et al DAMO-YOLO: a report on real-time object detection design. arXiv, abs\/ 2211.15444"},{"key":"1601_CR53","unstructured":"Ge Z, Liu S, Wang F et al (2021) YOLOX: exceeding YOLO series in 2021. arXiv, abs\/2107.08430"}],"container-title":["Pattern Analysis and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10044-025-01601-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10044-025-01601-y","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10044-025-01601-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,14]],"date-time":"2026-03-14T10:38:38Z","timestamp":1773484718000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10044-025-01601-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,1,5]]},"references-count":53,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2026,3]]}},"alternative-id":["1601"],"URL":"https:\/\/doi.org\/10.1007\/s10044-025-01601-y","relation":{},"ISSN":["1433-7541","1433-755X"],"issn-type":[{"value":"1433-7541","type":"print"},{"value":"1433-755X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,1,5]]},"assertion":[{"value":"24 February 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 December 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 January 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests. The authors declare that they have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"21"}}