{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,25]],"date-time":"2026-06-25T16:32:29Z","timestamp":1782405149633,"version":"3.54.5"},"reference-count":114,"publisher":"Springer Science and Business Media LLC","issue":"12","license":[{"start":{"date-parts":[[2024,2,15]],"date-time":"2024-02-15T00:00:00Z","timestamp":1707955200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,2,15]],"date-time":"2024-02-15T00:00:00Z","timestamp":1707955200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2024,4]]},"DOI":"10.1007\/s00521-024-09422-6","type":"journal-article","created":{"date-parts":[[2024,2,15]],"date-time":"2024-02-15T05:02:32Z","timestamp":1707973352000},"page":"6283-6303","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":106,"title":["A review of small object detection based on deep learning"],"prefix":"10.1007","volume":"36","author":[{"given":"Wei","family":"Wei","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6717-5727","authenticated-orcid":false,"given":"Yu","family":"Cheng","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jiafeng","family":"He","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xiyue","family":"Zhu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2024,2,15]]},"reference":[{"key":"9422_CR1","doi-asserted-by":"crossref","unstructured":"Girshick R, Donahue J, Darrell T, Malik J (2014) Rich feature hierarchies for accurate object detection and semantic segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 580\u2013587","DOI":"10.1109\/CVPR.2014.81"},{"key":"9422_CR2","doi-asserted-by":"crossref","unstructured":"Girshick R (2015) Fast R-CNN. In: Proceedings of the IEEE international conference on computer vision, pp 1440\u20131448","DOI":"10.1109\/ICCV.2015.169"},{"issue":"6","key":"9422_CR3","doi-asserted-by":"publisher","first-page":"1137","DOI":"10.1109\/TPAMI.2016.2577031","volume":"39","author":"S Ren","year":"2017","unstructured":"Ren S, He K, Girshick R, Sun J (2017) Faster R-CNN: towards real-time object detection with region proposal networks. IEEE Trans Pattern Anal Mach Intell 39(6):1137\u20131149. https:\/\/doi.org\/10.1109\/TPAMI.2016.2577031","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"9422_CR4","doi-asserted-by":"crossref","unstructured":"Redmon J, Divvala S, Girshick R, Farhadi A (2016) You only look once: unified, real-time object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 779\u2013788","DOI":"10.1109\/CVPR.2016.91"},{"key":"9422_CR5","doi-asserted-by":"crossref","unstructured":"Redmon J, Farhadi A (2017) Yolo9000: better, faster, stronger. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 7263\u20137271","DOI":"10.1109\/CVPR.2017.690"},{"key":"9422_CR6","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer vision\u2014ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin T-Y, Maire M, Belongie S, Hays J, Perona P, Ramanan D, Doll\u00e1r P, Zitnick CL (2014) Microsoft coco: common objects in context. In: Fleet D, Pajdla T, Schiele B, Tuytelaars T (eds) Computer vision\u2014ECCV 2014. Springer, Cham, pp 740\u2013755"},{"key":"9422_CR7","unstructured":"Zou Z, Chen K, Shi Z, Guo Y, Ye J (2019) Object detection in 20 years: a survey. arXiv e-prints, 1905"},{"key":"9422_CR8","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2020.103910","volume":"97","author":"K Tong","year":"2020","unstructured":"Tong K, Wu Y, Zhou F (2020) Recent advances in small object detection based on deep learning: a review. Image Vis Comput 97:103910. https:\/\/doi.org\/10.1016\/j.imavis.2020.103910","journal-title":"Image Vis Comput"},{"key":"9422_CR9","doi-asserted-by":"publisher","first-page":"214","DOI":"10.1007\/978-3-319-54193-8_14","volume-title":"Computer vision\u2014ACCV 2016","author":"C Chen","year":"2017","unstructured":"Chen C, Liu M-Y, Tuzel O, Xiao J (2017) R-CNN for small object detection. In: Lai S-H, Lepetit V, Nishino K, Sato Y (eds) Computer vision\u2014ACCV 2016. Springer, Cham, pp 214\u2013230"},{"issue":"1","key":"9422_CR10","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1007\/s11263-014-0748-y","volume":"119","author":"J Xiao","year":"2016","unstructured":"Xiao J, Ehinger KA, Hays J, Torralba A, Oliva A (2016) Sun database: exploring a large collection of scene categories. Int J Comput Vis 119(1):3\u201322","journal-title":"Int J Comput Vis"},{"issue":"2","key":"9422_CR11","doi-asserted-by":"publisher","first-page":"936","DOI":"10.1109\/TSMC.2020.3005231","volume":"52","author":"G Chen","year":"2020","unstructured":"Chen G, Wang H, Chen K, Li Z, Song Z, Liu Y, Chen W, Knoll A (2020) A survey of the four pillars for small object detection: multiscale representation, contextual information, super-resolution, and region proposal. IEEE Trans Syst Man Cybern Syst 52(2):936\u2013953","journal-title":"IEEE Trans Syst Man Cybern Syst"},{"key":"9422_CR12","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2021.114602","volume":"172","author":"Y Liu","year":"2021","unstructured":"Liu Y, Sun P, Wergeles N, Shang Y (2021) A survey and performance evaluation of deep learning methods for small object detection. Expert Syst Appl 172:114602. https:\/\/doi.org\/10.1016\/j.eswa.2021.114602","journal-title":"Expert Syst Appl"},{"key":"9422_CR13","doi-asserted-by":"crossref","unstructured":"Lin T-Y, Doll\u00e1r P, Girshick R, He K, Hariharan B, Belongie S (2017) Feature pyramid networks for object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2117\u20132125","DOI":"10.1109\/CVPR.2017.106"},{"key":"9422_CR14","doi-asserted-by":"publisher","unstructured":"Liu S, Qi L, Qin H, Shi J, Jia J (2018) Path aggregation network for instance segmentation. In: 2018 IEEE\/CVF conference on computer vision and pattern recognition, pp 8759\u20138768. https:\/\/doi.org\/10.1109\/CVPR.2018.00913","DOI":"10.1109\/CVPR.2018.00913"},{"key":"9422_CR15","doi-asserted-by":"crossref","unstructured":"Liang Z, Shao J, Zhang D, Gao L (2018) Small object detection using deep feature pyramid networks. In: Pacific rim conference on multimedia. Springer, pp 554\u2013564","DOI":"10.1007\/978-3-030-00764-5_51"},{"key":"9422_CR16","doi-asserted-by":"crossref","unstructured":"Ghiasi G, Lin T-Y, Le QV (2019) NAS-FPN: learning scalable feature pyramid architecture for object detection. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 7036\u20137045","DOI":"10.1109\/CVPR.2019.00720"},{"key":"9422_CR17","doi-asserted-by":"crossref","unstructured":"Qiao S, Chen L-C, Yuille A (2021) Detectors: detecting objects with recursive feature pyramid and switchable atrous convolution. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 10213\u201310224","DOI":"10.1109\/CVPR46437.2021.01008"},{"key":"9422_CR18","unstructured":"Li C, Li L, Jiang H, Weng K, Geng Y, Li L, Ke Z, Li Q, Cheng M, Nie W et al. (2022) Yolov6: a single-stage object detection framework for industrial applications. arXiv preprint arXiv:2209.02976"},{"key":"9422_CR19","doi-asserted-by":"crossref","unstructured":"Woo S, Hwang S, Kweon IS (2018) Stairnet: top-down semantic aggregation for accurate one shot detection. In: 2018 IEEE winter conference on applications of computer vision (WACV). IEEE, pp 1093\u20131102","DOI":"10.1109\/WACV.2018.00125"},{"key":"9422_CR20","doi-asserted-by":"crossref","unstructured":"Guo C, Fan B, Zhang Q, Xiang S, Pan C (2019) Augfpn: improving multi-scale feature learning for object detection. Journal Article","DOI":"10.1109\/CVPR42600.2020.01261"},{"key":"9422_CR21","unstructured":"Nayan A-A, Saha J, Mozumder AN, Mahmud KR, Azad AKA (2020) Real time multi-class object detection and recognition using vision augmentation algorithm. arXiv preprint arXiv:2003.07442"},{"key":"9422_CR22","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/LGRS.2021.3103069","volume":"19","author":"M Hong","year":"2022","unstructured":"Hong M, Li S, Yang Y, Zhu F, Zhao Q, Lu L (2022) Sspnet: scale selection pyramid network for tiny person detection from UAV images. IEEE Geosci Remote Sens Lett 19:1\u20135. https:\/\/doi.org\/10.1109\/LGRS.2021.3103069","journal-title":"IEEE Geosci Remote Sens Lett"},{"key":"9422_CR23","doi-asserted-by":"publisher","unstructured":"Gong Y, Yu X, Ding Y, Peng X, Zhao J, Han Z (2021) Effective fusion factor in FPN for tiny object detection. In: 2021 IEEE winter conference on applications of computer vision (WACV), pp 1159\u20131167. https:\/\/doi.org\/10.1109\/WACV48630.2021.00120","DOI":"10.1109\/WACV48630.2021.00120"},{"key":"9422_CR24","doi-asserted-by":"crossref","unstructured":"Tan M, Pang R, Le QV (2020) Efficientdet: scalable and efficient object detection. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 10781\u201310790","DOI":"10.1109\/CVPR42600.2020.01079"},{"key":"9422_CR25","unstructured":"Liu S, Huang D, Wang Y (2019) Learning spatial fusion for single-shot object detection. arXiv preprint arXiv:1911.09516"},{"key":"9422_CR26","doi-asserted-by":"crossref","unstructured":"Hu J, Shen L, Sun G (2018) Squeeze-and-excitation networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 7132\u20137141","DOI":"10.1109\/CVPR.2018.00745"},{"key":"9422_CR27","doi-asserted-by":"crossref","unstructured":"Li X, Wang W, Hu X, Yang J (2019) Selective kernel networks. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 510\u2013519","DOI":"10.1109\/CVPR.2019.00060"},{"key":"9422_CR28","doi-asserted-by":"crossref","unstructured":"Zhang H, Wu C, Zhang Z, Zhu Y, Lin H, Zhang Z, Sun Y, He T, Mueller J, Manmatha R et al. (2022) Resnest: split-attention networks. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 2736\u20132746","DOI":"10.1109\/CVPRW56347.2022.00309"},{"key":"9422_CR29","doi-asserted-by":"crossref","unstructured":"Dai Y, Gieseke F, Oehmcke S, Wu Y, Barnard K (2021) Attentional feature fusion. In: Proceedings of the IEEE\/CVF winter conference on applications of computer vision, pp 3560\u20133569","DOI":"10.1109\/WACV48630.2021.00360"},{"key":"9422_CR30","doi-asserted-by":"publisher","first-page":"69","DOI":"10.1016\/j.infrared.2019.02.012","volume":"98","author":"T Yu","year":"2019","unstructured":"Yu T, Mo B, Liu F, Qi H, Liu Y (2019) Robust thermal infrared object tracking with continuous correlation filters and adaptive feature fusion. Infrared Phys Technol 98:69\u201381. https:\/\/doi.org\/10.1016\/j.infrared.2019.02.012","journal-title":"Infrared Phys Technol"},{"key":"9422_CR31","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2023.3266837","author":"D Yuan","year":"2023","unstructured":"Yuan D, Chang X, Liu Q, Yang Y, Wang D, Shu M, He Z, Shi G (2023) Active learning for deep visual tracking. IEEE Trans Neural Netw Learn Syst. https:\/\/doi.org\/10.1109\/TNNLS.2023.3266837","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"issue":"9","key":"9422_CR32","doi-asserted-by":"publisher","first-page":"2109","DOI":"10.1109\/TPAMI.2017.2745563","volume":"40","author":"X Zeng","year":"2017","unstructured":"Zeng X, Ouyang W, Yan J, Li H, Xiao T, Wang K, Liu Y, Zhou Y, Yang B, Wang Z et al (2017) Crafting gbd-net for object detection. IEEE Trans Pattern Anal Mach Intell 40(9):2109\u20132123","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"5","key":"9422_CR33","doi-asserted-by":"publisher","first-page":"2439","DOI":"10.1109\/TIP.2018.2886767","volume":"28","author":"Y Li","year":"2018","unstructured":"Li Y, Zeng J, Shan S, Chen X (2018) Occlusion aware facial expression recognition using CNN with attention mechanism. IEEE Trans Image Process 28(5):2439\u20132450","journal-title":"IEEE Trans Image Process"},{"key":"9422_CR34","doi-asserted-by":"crossref","unstructured":"Tang X, Du DK, He Z, Liu J (2018) Pyramidbox: a context-assisted single shot face detector. In: Proceedings of the European conference on computer vision (ECCV), pp 797\u2013813","DOI":"10.1007\/978-3-030-01240-3_49"},{"key":"9422_CR35","doi-asserted-by":"crossref","unstructured":"Bell S, Zitnick CL, Bala K, Girshick R (2016) Inside-outside net: detecting objects in context with skip pooling and recurrent neural networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2874\u20132883","DOI":"10.1109\/CVPR.2016.314"},{"key":"9422_CR36","unstructured":"Le QV, Jaitly N, Hinton GE (2015) A simple way to initialize recurrent networks of rectified linear units. arXiv preprint arXiv:1504.00941"},{"key":"9422_CR37","unstructured":"Zhu Y, Urtasun R, Salakhutdinov R, Fidler S (2015) segdeepm: exploiting segmentation and context in deep neural networks for object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 4703\u20134711"},{"key":"9422_CR38","doi-asserted-by":"crossref","unstructured":"Liu Y, Wang R, Shan S, Chen X (2018) Structure inference net: object detection using scene-level context and instance-level relationships. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 6985\u20136994","DOI":"10.1109\/CVPR.2018.00730"},{"key":"9422_CR39","unstructured":"Fu K, Li J, Ma L, Mu K, Tian Y (2020) Intrinsic relationship reasoning for small object detection. arXiv e-prints, 2009"},{"key":"9422_CR40","unstructured":"Kipf TN, Welling M (2016) Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907"},{"key":"9422_CR41","doi-asserted-by":"crossref","unstructured":"Hu H, Gu J, Zhang Z, Dai J, Wei Y (2018) Relation networks for object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 3588\u20133597","DOI":"10.1109\/CVPR.2018.00378"},{"key":"9422_CR42","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser \u0141, Polosukhin I (2017) Attention is all you need. In: Advances in neural information processing systems, vol 30"},{"key":"9422_CR43","doi-asserted-by":"publisher","first-page":"287","DOI":"10.1016\/j.neucom.2020.12.093","volume":"433","author":"J Leng","year":"2021","unstructured":"Leng J, Ren Y, Jiang W, Sun X, Wang Y (2021) Realize your surroundings: exploiting context information for small object detection. Neurocomputing 433:287\u2013299. https:\/\/doi.org\/10.1016\/j.neucom.2020.12.093","journal-title":"Neurocomputing"},{"issue":"1","key":"9422_CR44","doi-asserted-by":"publisher","first-page":"53","DOI":"10.1109\/MSP.2017.2765202","volume":"35","author":"A Creswell","year":"2018","unstructured":"Creswell A, White T, Dumoulin V, Arulkumaran K, Sengupta B, Bharath AA (2018) Generative adversarial networks: an overview. IEEE Signal Process Mag 35(1):53\u201365","journal-title":"IEEE Signal Process Mag"},{"key":"9422_CR45","doi-asserted-by":"crossref","unstructured":"Li J, Liang X, Wei Y, Xu T, Feng J, Yan S (2017) Perceptual generative adversarial networks for small object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1222\u20131230","DOI":"10.1109\/CVPR.2017.211"},{"key":"9422_CR46","doi-asserted-by":"crossref","unstructured":"Bai Y, Zhang Y, Ding M, Ghanem B (2018) SOD-MTGAN: small object detection via multi-task generative adversarial network. Springer, Cham, pp 210\u2013226","DOI":"10.1007\/978-3-030-01261-8_13"},{"key":"9422_CR47","doi-asserted-by":"crossref","unstructured":"Noh J, Bae W, Lee W, Seo J, Kim G (2019) Better to follow, follow to be better: towards precise supervision of feature super-resolution for small object detection. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 9725\u20139734","DOI":"10.1109\/ICCV.2019.00982"},{"key":"9422_CR48","doi-asserted-by":"crossref","unstructured":"Liu J, Li C, Liang F, Lin C, Sun M, Yan J, Ouyang W, Xu D (2021) Inception convolution with efficient dilation search. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 11486\u201311495","DOI":"10.1109\/CVPR46437.2021.01132"},{"key":"9422_CR49","unstructured":"Huang L, Yang Y, Deng Y, Yu Y (2015) Densebox: unifying landmark localization with end to end object detection. arXiv preprint arXiv:1509.04874"},{"key":"9422_CR50","doi-asserted-by":"crossref","unstructured":"Tian Z, Shen C, Chen H, He T (2019) FCOS: fully convolutional one-stage object detection. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 9627\u20139636","DOI":"10.1109\/ICCV.2019.00972"},{"key":"9422_CR51","doi-asserted-by":"crossref","unstructured":"Zhu C, He Y, Savvides M (2019) Feature selective anchor-free module for single-shot object detection. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 840\u2013849","DOI":"10.1109\/CVPR.2019.00093"},{"key":"9422_CR52","doi-asserted-by":"publisher","first-page":"7389","DOI":"10.1109\/TIP.2020.3002345","volume":"29","author":"T Kong","year":"2020","unstructured":"Kong T, Sun F, Liu H, Jiang Y, Li L, Shi J (2020) Foveabox: beyound anchor-based object detection. IEEE Trans Image Process 29:7389\u20137398","journal-title":"IEEE Trans Image Process"},{"key":"9422_CR53","doi-asserted-by":"crossref","unstructured":"Chen R, Liu Y, Zhang M, Liu S, Yu B, Tai Y-W (2020) Dive deeper into box for object detection. In: Computer vision\u2013ECCV 2020: 16th European conference, Glasgow, UK, August 23\u201328, 2020, proceedings, part XXII 16. Springer, pp 412\u2013428","DOI":"10.1007\/978-3-030-58542-6_25"},{"key":"9422_CR54","doi-asserted-by":"crossref","unstructured":"Tychsen-Smith L, Petersson L (2017) Denet: scalable real-time object detection with directed sparse sampling. In: Proceedings of the IEEE international conference on computer vision, pp 428\u2013436","DOI":"10.1109\/ICCV.2017.54"},{"key":"9422_CR55","unstructured":"Wang X, Chen K, Huang Z, Yao C, Liu W (2017) Point linking network for object detection. arXiv preprint arXiv:1706.03646"},{"key":"9422_CR56","doi-asserted-by":"crossref","unstructured":"Law H, Deng J (2018) Cornernet: detecting objects as paired keypoints. In: Proceedings of the European conference on computer vision (ECCV), pp 734\u2013750","DOI":"10.1007\/978-3-030-01264-9_45"},{"key":"9422_CR57","doi-asserted-by":"crossref","unstructured":"Duan K, Bai S, Xie L, Qi H, Huang Q, Tian Q (2019) Centernet: keypoint triplets for object detection. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 6569\u20136578","DOI":"10.1109\/ICCV.2019.00667"},{"key":"9422_CR58","unstructured":"Law H, Teng Y, Russakovsky O, Deng J (2019) Cornernet-lite: efficient keypoint based object detection. arXiv e-prints, 1904"},{"key":"9422_CR59","doi-asserted-by":"crossref","unstructured":"Zhou X, Zhuo J, Krahenbuhl P (2019) Bottom-up object detection by grouping extreme and center points. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 850\u2013859","DOI":"10.1109\/CVPR.2019.00094"},{"key":"9422_CR60","doi-asserted-by":"crossref","unstructured":"Yang Z, Liu S, Hu H, Wang L, Lin S (2019) Reppoints: point set representation for object detection. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 9657\u20139666","DOI":"10.1109\/ICCV.2019.00975"},{"key":"9422_CR61","doi-asserted-by":"crossref","unstructured":"Zhang S, Zhu X, Lei Z, Shi H, Wang X, Li SZ (2017) Faceboxes: a CPU real-time face detector with high accuracy. In: 2017 IEEE international joint conference on biometrics (IJCB). IEEE, pp 1\u20139","DOI":"10.1109\/BTAS.2017.8272675"},{"key":"9422_CR62","doi-asserted-by":"crossref","unstructured":"Zhang S, Zhu X, Lei Z, Shi H, Wang X, Li SZ (2017) S3fd: single shot scale-invariant face detector. In: Proceedings of the IEEE international conference on computer vision, pp 192\u2013201","DOI":"10.1109\/ICCV.2017.30"},{"key":"9422_CR63","doi-asserted-by":"crossref","unstructured":"Eggert C, Zecha D, Brehm S, Lienhart R (2017) Improving small object proposals for company logo detection. In: Proceedings of the 2017 ACM on international conference on multimedia retrieval, pp 167\u2013174","DOI":"10.1145\/3078971.3078990"},{"key":"9422_CR64","doi-asserted-by":"publisher","unstructured":"Deng J, Dong W, Socher R, Li L-J, Li K, Fei-Fei L (2009) Imagenet: a large-scale hierarchical image database. In: 2009 IEEE conference on computer vision and pattern recognition, pp 248\u2013255. https:\/\/doi.org\/10.1109\/CVPR.2009.5206848","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"9422_CR65","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2009","unstructured":"Everingham M, Gool LV, Williams CKI, Winn JM, Zisserman A (2009) The Pascal visual object classes (VOC) challenge. Int J Comput Vis 88:303\u2013338","journal-title":"Int J Comput Vis"},{"key":"9422_CR66","doi-asserted-by":"crossref","unstructured":"Kisantal M, Wojna Z, Murawski J, Naruniec J, Cho K (2019) Augmentation for small object detection. arXiv preprint arXiv:1902.07296","DOI":"10.5121\/csit.2019.91713"},{"key":"9422_CR67","unstructured":"Zhao M, Cheng L, Yang X, Feng P, Liu L, Wu N (2019) Tbc-net: a real-time detector for infrared small target detection using semantic constraint. arXiv preprint arXiv:2001.05852"},{"issue":"12","key":"9422_CR68","doi-asserted-by":"publisher","first-page":"4996","DOI":"10.1109\/TIP.2013.2281420","volume":"22","author":"C Gao","year":"2013","unstructured":"Gao C, Meng D, Yang Y, Wang Y, Zhou X, Hauptmann AG (2013) Infrared patch-image model for small target detection in a single image. IEEE Trans Image Process 22(12):4996\u20135009","journal-title":"IEEE Trans Image Process"},{"key":"9422_CR69","doi-asserted-by":"crossref","unstructured":"Chen C, Zhang Y, Lv Q, Wei S, Wang X, Sun X, Dong J (2019) Rrnet: a hybrid detector for object detection in drone-captured images. In: Proceedings of the IEEE\/CVF international conference on computer vision workshops, pp 0\u20130","DOI":"10.1109\/ICCVW.2019.00018"},{"key":"9422_CR70","unstructured":"Chen Y, Zhang P, Li Z, Li Y, Zhang X, Qi L, Sun J, Jia J (2020) Dynamic scale training for object detection. Journal Article"},{"key":"9422_CR71","doi-asserted-by":"publisher","first-page":"178798","DOI":"10.1109\/ACCESS.2019.2959015","volume":"7","author":"Z Ou","year":"2019","unstructured":"Ou Z, Xiao F, Xiong B, Shi S, Song M (2019) Famn: feature aggregation multipath network for small traffic sign detection. IEEE Access 7:178798\u2013178810","journal-title":"IEEE Access"},{"issue":"7","key":"9422_CR72","doi-asserted-by":"publisher","first-page":"1476","DOI":"10.1109\/TPAMI.2016.2601099","volume":"39","author":"R Shaoqing","year":"2017","unstructured":"Shaoqing R, Kaiming H, Girshick R, Xiangyu Z, Jian S (2017) Object detection networks on convolutional feature maps. IEEE Trans Pattern Anal Mach Intell 39(7):1476\u20131481. https:\/\/doi.org\/10.1109\/TPAMI.2016.2601099","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"9422_CR73","doi-asserted-by":"publisher","first-page":"57120","DOI":"10.1109\/ACCESS.2019.2913882","volume":"7","author":"Z Liu","year":"2019","unstructured":"Liu Z, Du J, Tian F, Wen J (2019) Mr-cnn: a multi-scale region-based convolutional neural network for small traffic sign recognition. IEEE Access 7:57120\u201357128. https:\/\/doi.org\/10.1109\/ACCESS.2019.2913882","journal-title":"IEEE Access"},{"key":"9422_CR74","doi-asserted-by":"crossref","unstructured":"Zhu Z, Liang D, Zhang S, Huang X, Li B, Hu S (2016) Traffic-sign detection and classification in the wild. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2110\u20132118","DOI":"10.1109\/CVPR.2016.232"},{"issue":"7","key":"9422_CR75","doi-asserted-by":"publisher","first-page":"3423","DOI":"10.1109\/TIP.2019.2896952","volume":"28","author":"Y Yuan","year":"2019","unstructured":"Yuan Y, Xiong Z, Wang Q (2019) Vssa-net: vertical spatial sequence attention network for traffic sign detection. IEEE Trans Image Process 28(7):3423\u20133434. https:\/\/doi.org\/10.1109\/TIP.2019.2896952","journal-title":"IEEE Trans Image Process"},{"issue":"4","key":"9422_CR76","doi-asserted-by":"publisher","first-page":"400","DOI":"10.1007\/s42154-021-00157-x","volume":"4","author":"G Chen","year":"2021","unstructured":"Chen G, Chen K, Zhang L, Zhang L, Knoll A (2021) Vcanet: vanishing-point-guided context-aware network for small road object detection. Autom Innov 4(4):400\u2013412. https:\/\/doi.org\/10.1007\/s42154-021-00157-x. (identifier: 157)","journal-title":"Autom Innov"},{"key":"9422_CR77","doi-asserted-by":"publisher","unstructured":"Lee S, Kim J, Yoon JS, Shin S, Bailo O, Kim N, Lee T-H, Hong HS, Han S-H, Kweon IS (2017) Vpgnet: vanishing point guided network for lane and road marking detection and recognition. https:\/\/doi.org\/10.1109\/ICCV.2017.215. Journal Article","DOI":"10.1109\/ICCV.2017.215"},{"key":"9422_CR78","doi-asserted-by":"publisher","unstructured":"Liu T, Fu HY, Wen Q, Zhang DK, Li LF (2018) Extended faster R-CNN for long distance human detection: finding pedestrians in UAV images. In: 2018 IEEE international conference on consumer electronics (ICCE), pp 1\u20132. https:\/\/doi.org\/10.1109\/ICCE.2018.8326306","DOI":"10.1109\/ICCE.2018.8326306"},{"issue":"8","key":"9422_CR79","doi-asserted-by":"publisher","first-page":"2238","DOI":"10.3390\/s20082238","volume":"20","author":"M Liu","year":"2020","unstructured":"Liu M, Wang X, Zhou A, Fu X, Ma Y, Piao C (2020) Uav-yolo: Small object detection on unmanned aerial vehicle perspective. Sensors 20(8):2238","journal-title":"Sensors"},{"key":"9422_CR80","unstructured":"Redmon J, Farhadi A (2018) Yolov3: an incremental improvement. arXiv preprint arXiv:1804.02767"},{"key":"9422_CR81","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"issue":"6","key":"9422_CR82","doi-asserted-by":"publisher","first-page":"1758","DOI":"10.1109\/TCSVT.2019.2905881","volume":"30","author":"X Liang","year":"2019","unstructured":"Liang X, Zhang J, Zhuo L, Li Y, Tian Q (2019) Small object detection in unmanned aerial vehicle images using feature fusion and scaling-based single shot detector with spatial context analysis. IEEE Trans Circuits Syst Video Technol 30(6):1758\u20131770","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"9422_CR83","first-page":"21","volume-title":"SSD: single shot multibox detector","author":"W Liu","year":"2016","unstructured":"Liu W, Anguelov D, Erhan D, Szegedy C, Reed S, Fu C-Y, Berg AC (2016) SSD: single shot multibox detector. Springer, Cham, pp 21\u201337"},{"key":"9422_CR84","unstructured":"Li Z, Zhou F (2017) Fssd: feature fusion single shot multibox detector. arXiv preprint arXiv:1712.00960"},{"key":"9422_CR85","doi-asserted-by":"publisher","DOI":"10.1145\/3486678","author":"D Yuan","year":"2022","unstructured":"Yuan D, Chang X, Li Z, He Z (2022) Learning adaptive spatial-temporal context-aware correlation filters for UAV tracking. ACM Trans Multimed Comput Commun Appl. https:\/\/doi.org\/10.1145\/3486678","journal-title":"ACM Trans Multimed Comput Commun Appl"},{"key":"9422_CR86","doi-asserted-by":"publisher","first-page":"292","DOI":"10.1016\/j.neucom.2021.03.016","volume":"443","author":"G Tian","year":"2021","unstructured":"Tian G, Liu J, Yang W (2021) A dual neural network for object detection in UAV images. Neurocomputing 443:292\u2013301","journal-title":"Neurocomputing"},{"key":"9422_CR87","doi-asserted-by":"crossref","unstructured":"Wang C-Y, Bochkovskiy A, Liao H-YM (2022) YOLOv7: trainable bag-of-freebies sets new state-of-the-art for real-time object detectors. arXiv preprint arXiv:2207.02696","DOI":"10.1109\/CVPR52729.2023.00721"},{"key":"9422_CR88","doi-asserted-by":"crossref","unstructured":"Zhao H, Zhang H, Zhao Y (2023) Yolov7-sea: object detection of maritime UAV images based on improved yolov7. In: Proceedings of the IEEE\/CVF winter conference on applications of computer vision, pp 233\u2013238","DOI":"10.1109\/WACVW58289.2023.00029"},{"key":"9422_CR89","doi-asserted-by":"crossref","unstructured":"Yang X, Yang J, Yan J, Zhang Y, Zhang T, Guo Z, Sun X, Fu K (2019) Scrdet: towards more robust detection for small, cluttered and rotated objects. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 8232\u20138241","DOI":"10.1109\/ICCV.2019.00832"},{"key":"9422_CR90","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1016\/j.patrec.2021.11.027","volume":"153","author":"F Xiaolin","year":"2022","unstructured":"Xiaolin F, Fan H, Ming Y, Tongxin Z, Ran B, Zenghui Z, Zhiyuan G (2022) Small object detection in remote sensing images based on super-resolution. Pattern Recogn Lett 153:107\u2013112","journal-title":"Pattern Recogn Lett"},{"key":"9422_CR91","first-page":"1","volume":"60","author":"J Han","year":"2021","unstructured":"Han J, Ding J, Li J, Xia G-S (2021) Align deep features for oriented object detection. IEEE Trans Geosci Remote Sens 60:1\u201311","journal-title":"IEEE Trans Geosci Remote Sens"},{"key":"9422_CR92","doi-asserted-by":"crossref","unstructured":"Xia G-S, Bai X, Ding J, Zhu Z, Belongie S, Luo J, Datcu M, Pelillo M, Zhang L (2018) Dota: a large-scale dataset for object detection in aerial images. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 3974\u20133983","DOI":"10.1109\/CVPR.2018.00418"},{"issue":"9","key":"9422_CR93","doi-asserted-by":"publisher","first-page":"1432","DOI":"10.3390\/rs12091432","volume":"12","author":"J Rabbi","year":"2020","unstructured":"Rabbi J, Ray N, Schubert M, Chowdhury S, Chao D (2020) Small-object detection in remote sensing images with end-to-end edge-enhanced GAN and object detector network. Remote Sens 12(9):1432","journal-title":"Remote Sens"},{"issue":"8","key":"9422_CR94","doi-asserted-by":"publisher","first-page":"5799","DOI":"10.1109\/TGRS.2019.2902431","volume":"57","author":"K Jiang","year":"2019","unstructured":"Jiang K, Wang Z, Yi P, Wang G, Lu T, Jiang J (2019) Edge-enhanced GAN for remote sensing image superresolution. IEEE Trans Geosci Remote Sens 57(8):5799\u20135812","journal-title":"IEEE Trans Geosci Remote Sens"},{"issue":"19","key":"9422_CR95","doi-asserted-by":"publisher","first-page":"3152","DOI":"10.3390\/rs12193152","volume":"12","author":"L Courtrai","year":"2020","unstructured":"Courtrai L, Pham M-T, Lef\u00e8vre S (2020) Small object detection in remote sensing images based on super-resolution with auxiliary generative adversarial networks. Remote Sens 12(19):3152","journal-title":"Remote Sens"},{"key":"9422_CR96","unstructured":"Gulrajani I, Ahmed F, Arjovsky M, Dumoulin V, Courville A (2017) Improved training of Wasserstein GANs. In: Proceedings of the 31st international conference on neural information processing systems, pp 5769\u20135779"},{"key":"9422_CR97","doi-asserted-by":"crossref","unstructured":"Lim B, Son S, Kim H, Nah S, Mu\u00a0Lee K (2017) Enhanced deep residual networks for single image super-resolution. In: Proceedings of the IEEE conference on computer vision and pattern recognition workshops, pp 136\u2013144","DOI":"10.1109\/CVPRW.2017.151"},{"key":"9422_CR98","doi-asserted-by":"crossref","unstructured":"Zhu J-Y, Park T, Isola P, Efros AA (2017) Unpaired image-to-image translation using cycle-consistent adversarial networks. In: Proceedings of the IEEE international conference on computer vision, pp 2223\u20132232","DOI":"10.1109\/ICCV.2017.244"},{"issue":"5","key":"9422_CR99","doi-asserted-by":"publisher","first-page":"813","DOI":"10.3390\/app8050813","volume":"8","author":"Y Ren","year":"2018","unstructured":"Ren Y, Zhu C, Xiao S (2018) Small object detection in optical remote sensing images via modified faster R-CNN. Appl Sci 8(5):813. https:\/\/doi.org\/10.3390\/app8050813. (identifier: app8050813)","journal-title":"Appl Sci"},{"key":"9422_CR100","unstructured":"Braun M, Krebs S, Flohr F, Gavrila DM (2018) The Eurocity persons dataset: a novel benchmark for object detection. arXiv preprint arXiv:1805.07193"},{"key":"9422_CR101","doi-asserted-by":"publisher","first-page":"323","DOI":"10.1016\/j.neunet.2012.02.016","volume":"32","author":"J Stallkamp","year":"2012","unstructured":"Stallkamp J, Schlipsing M, Salmen J, Igel C (2012) Man vs. computer: benchmarking machine learning algorithms for traffic sign recognition. Neural Netw 32:323\u2013332","journal-title":"Neural Netw"},{"key":"9422_CR102","doi-asserted-by":"crossref","unstructured":"Zhang S, Benenson R, Schiele B (2017) Citypersons: a diverse dataset for pedestrian detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 3213\u20133221","DOI":"10.1109\/CVPR.2017.474"},{"key":"9422_CR103","doi-asserted-by":"publisher","first-page":"296","DOI":"10.1016\/j.isprsjprs.2019.11.023","volume":"159","author":"K Li","year":"2020","unstructured":"Li K, Wan G, Cheng G, Meng L, Han J (2020) Object detection in optical remote sensing images: a survey and a new benchmark. ISPRS J Photogramm Remote Sens 159:296\u2013307","journal-title":"ISPRS J Photogramm Remote Sens"},{"key":"9422_CR104","doi-asserted-by":"crossref","unstructured":"Yu X, Gong Y, Jiang N, Ye Q, Han Z (2020) Scale match for tiny person detection. In: Proceedings of the IEEE\/CVF winter conference on applications of computer vision, pp 1257\u20131265","DOI":"10.1109\/WACV45572.2020.9093394"},{"key":"9422_CR105","doi-asserted-by":"crossref","unstructured":"Bondi E, Jain R, Aggrawal P, Anand S, Hannaford R, Kapoor A, Piavis J, Shah S, Joppa L, Dilkina B, et al (2020) Birdsai: a dataset for detection and tracking in aerial thermal infrared videos. In: Proceedings of the IEEE\/CVF winter conference on applications of computer vision, pp 1747\u20131756","DOI":"10.1109\/WACV45572.2020.9093284"},{"issue":"8","key":"9422_CR106","doi-asserted-by":"publisher","first-page":"899","DOI":"10.1007\/s11265-020-01614-2","volume":"93","author":"J Wan","year":"2021","unstructured":"Wan J, Ding W, Zhu H, Xia M, Huang Z, Tian L, Zhu Y, Wang H (2021) An efficient small traffic sign detection method based on yolov3. J Signal Process Syst 93(8):899\u2013911","journal-title":"J Signal Process Syst"},{"key":"9422_CR107","doi-asserted-by":"crossref","unstructured":"Lin T-Y, Goyal P, Girshick R, He K, Doll\u00e1r P (2017) Focal loss for dense object detection. In: Proceedings of the IEEE international conference on computer vision, pp 2980\u20132988","DOI":"10.1109\/ICCV.2017.324"},{"key":"9422_CR108","doi-asserted-by":"crossref","unstructured":"Liu Z, Lin Y, Cao Y, Hu H, Wei Y, Zhang Z, Lin S, Guo B (2021) Swin transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 10012\u201310022","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"9422_CR109","unstructured":"Dai J, Li Y, He K, Sun J (2016) R-FCN: object detection via region-based fully convolutional networks. In: Proceedings of the 30th international conference on neural information processing systems, pp 379\u2013387"},{"key":"9422_CR110","doi-asserted-by":"crossref","unstructured":"Azimi SM, Vig E, Bahmanyar R, K\u00f6rner M, Reinartz P (2018) Towards multi-class object detection in unconstrained remote sensing imagery. In: Asian conference on computer vision. Springer, pp 150\u2013165","DOI":"10.1007\/978-3-030-20893-6_10"},{"issue":"12","key":"9422_CR111","doi-asserted-by":"publisher","first-page":"10015","DOI":"10.1109\/TGRS.2019.2930982","volume":"57","author":"G Zhang","year":"2019","unstructured":"Zhang G, Lu S, Zhang W (2019) Cad-net: a context-aware detection network for objects in remote sensing imagery. IEEE Trans Geosci Remote Sens 57(12):10015\u201310024","journal-title":"IEEE Trans Geosci Remote Sens"},{"key":"9422_CR112","first-page":"8780","volume":"34","author":"P Dhariwal","year":"2021","unstructured":"Dhariwal P, Nichol A (2021) Diffusion models beat GANs on image synthesis. Adv Neural Inf Process Syst 34:8780\u20138794","journal-title":"Adv Neural Inf Process Syst"},{"key":"9422_CR113","doi-asserted-by":"crossref","unstructured":"Carion N, Massa F, Synnaeve G, Usunier N, Kirillov A, Zagoruyko S (2020) End-to-end object detection with transformers. In: European conference on computer vision. Springer, pp 213\u2013229","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"9422_CR114","unstructured":"Zhu X, Su W, Lu L, Li B, Wang X, Dai J (2020) Deformable DETR: deformable transformers for end-to-end object detection. arXiv preprint arXiv:2010.04159"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-024-09422-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-024-09422-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-024-09422-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,16]],"date-time":"2024-03-16T19:08:39Z","timestamp":1710616119000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-024-09422-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,2,15]]},"references-count":114,"journal-issue":{"issue":"12","published-print":{"date-parts":[[2024,4]]}},"alternative-id":["9422"],"URL":"https:\/\/doi.org\/10.1007\/s00521-024-09422-6","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"value":"0941-0643","type":"print"},{"value":"1433-3058","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,2,15]]},"assertion":[{"value":"3 April 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 January 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 February 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}