{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T02:39:39Z","timestamp":1773283179460,"version":"3.50.1"},"reference-count":40,"publisher":"Springer Science and Business Media LLC","issue":"18","license":[{"start":{"date-parts":[[2023,11,16]],"date-time":"2023-11-16T00:00:00Z","timestamp":1700092800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,11,16]],"date-time":"2023-11-16T00:00:00Z","timestamp":1700092800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"DOI":"10.1007\/s11042-023-17625-7","type":"journal-article","created":{"date-parts":[[2023,11,16]],"date-time":"2023-11-16T06:01:46Z","timestamp":1700114506000},"page":"53221-53242","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":11,"title":["Modified YOLOv5 for small target detection in aerial images"],"prefix":"10.1007","volume":"83","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9498-0107","authenticated-orcid":false,"given":"Inderpreet","family":"Singh","sequence":"first","affiliation":[]},{"given":"Geetika","family":"Munjal","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,11,16]]},"reference":[{"key":"17625_CR1","doi-asserted-by":"publisher","unstructured":"Jocher G et al ultralytics\/yolov5: V5.0 - YOLOv5-P6 1280 models, AWS, Supervise.ly and YouTube integrations. https:\/\/doi.org\/10.5281\/zenodo.4679653","DOI":"10.5281\/zenodo.4679653"},{"key":"17625_CR2","doi-asserted-by":"publisher","unstructured":"Redmon J, Divvala S, Girshick R, Farhadi A (2016) You only look once: unified, real-time object detection. In: 2016 IEEE conference on computer vision and pattern recognition (CVPR), pp 779\u2013788. https:\/\/doi.org\/10.1109\/CVPR.2016.91","DOI":"10.1109\/CVPR.2016.91"},{"key":"17625_CR3","unstructured":"Tan M, Le QV (2020) EfficientNet: rethinking model scaling for convolutional neural networks"},{"key":"17625_CR4","doi-asserted-by":"publisher","unstructured":"Xia G-S, Bai X, Ding J, Zhu Z, Belongie S, Luo J, Datcu M, Pelillo M, Zhang L (2018) Dota: a large-scale dataset for object detection in aerial images. In: 2018 IEEE\/CVF conference on computer vision and pattern recognition, pp 3974\u20133983. https:\/\/doi.org\/10.1109\/CVPR.2018.00418","DOI":"10.1109\/CVPR.2018.00418"},{"key":"17625_CR5","doi-asserted-by":"publisher","unstructured":"Du D, Zhu P et al (2019) Visdrone-det2019: the vision meets drone object detection in image challenge results. In: 2019 IEEE\/CVF international conference on computer vision workshop (ICCVW), pp 213\u2013226. https:\/\/doi.org\/10.1109\/ICCVW.2019.00030","DOI":"10.1109\/ICCVW.2019.00030"},{"key":"17625_CR6","doi-asserted-by":"crossref","unstructured":"Ding J, Xue N, Xia G-S, Bai X, Yang W, Yang MY, Belongie S, Luo J, Datcu M, Pelillo M, Zhang L (2021) Object detection in aerial images: a large-scale benchmark and challenges","DOI":"10.1109\/TPAMI.2021.3117983"},{"issue":"1","key":"17625_CR7","doi-asserted-by":"publisher","first-page":"142","DOI":"10.1109\/TPAMI.2015.2437384","volume":"38","author":"R Girshick","year":"2016","unstructured":"Girshick R, Donahue J, Darrell T, Malik J (2016) Region-based convolutional networks for accurate object detection and segmentation. IEEE Trans Pattern Anal Mach Intell 38(1):142\u2013158. https:\/\/doi.org\/10.1109\/TPAMI.2015.2437384","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"17625_CR8","doi-asserted-by":"publisher","unstructured":"Liu S, Deng W (2015) Very deep convolutional neural network based image classification using small training sample size. In: 2015 3rd IAPR Asian conference on pattern recognition (ACPR), pp 730\u2013734. https:\/\/doi.org\/10.1109\/ACPR.2015.7486599","DOI":"10.1109\/ACPR.2015.7486599"},{"key":"17625_CR9","doi-asserted-by":"publisher","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: 2016 IEEE conference on computer vision and pattern recognition (CVPR), pp 770\u2013778. https:\/\/doi.org\/10.1109\/CVPR.2016.90","DOI":"10.1109\/CVPR.2016.90"},{"key":"17625_CR10","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2012) ImageNet classification with deep convolutional neural networks. Curran Associates, Inc. https:\/\/proceedings.neurips.cc\/paper\/2012\/file\/c399862d3b9d6b76c8436e924a68c45b-Paper.pdf"},{"key":"17625_CR11","doi-asserted-by":"publisher","unstructured":"Szegedy C, Liu W, Jia Y, Sermanet P, Reed S, Anguelov D, Erhan D, Vanhoucke V, Rabinovich A (2015) Going deeper with convolutions. In: 2015 IEEE conference on computer vision and pattern recognition (CVPR), pp 1\u20139. https:\/\/doi.org\/10.1109\/CVPR.2015.7298594","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"17625_CR12","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1007\/978-3-319-46448-0_2","volume-title":"Computer Vision - ECCV 2016","author":"W Liu","year":"2016","unstructured":"Liu W, Anguelov D, Erhan D, Szegedy C, Reed S, Fu C-Y, Berg AC (2016) Ssd: single shot multibox detector. In: Leibe B, Matas J, Sebe N, Welling M (eds) Computer Vision - ECCV 2016. Springer, Cham, pp 21\u201337"},{"key":"17625_CR13","unstructured":"Dai J, Li Y, He K, Sun J (2016) R\u2013fcn: object detection via region\u2013based fully convolutional networks. In: Lee DD, Sugiyama M, Luxburg U, Guyon I, Garnett R (eds.) NIPS, pp 379\u2013387. http:\/\/dblp.uni-trier.de\/db\/conf\/nips\/nips2016.html#DaiLHS16"},{"key":"17625_CR14","doi-asserted-by":"publisher","unstructured":"Tan M, Pang R, Le QV (2020) Efficientdet: scalable and efficient object detection. In: 2020 IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 10778\u201310787. https:\/\/doi.org\/10.1109\/CVPR42600.2020.01079","DOI":"10.1109\/CVPR42600.2020.01079"},{"key":"17625_CR15","doi-asserted-by":"publisher","unstructured":"Tian Z, Shen C, Chen H, He T (2019) Fcos: Fully convolutional one-stage object detection. In: 2019 IEEE\/CVF international conference on computer vision (ICCV), pp 9626\u20139635. https:\/\/doi.org\/10.1109\/ICCV.2019.00972","DOI":"10.1109\/ICCV.2019.00972"},{"key":"17625_CR16","doi-asserted-by":"publisher","unstructured":"Redmon J, Farhadi A (2017) Yolo9000: better, faster, stronger. In: 2017 IEEE conference on computer vision and pattern recognition (CVPR), pp 6517\u20136525. https:\/\/doi.org\/10.1109\/CVPR.2017.690","DOI":"10.1109\/CVPR.2017.690"},{"key":"17625_CR17","unstructured":"Ioffe S, Szegedy C (2015) Batch normalization: accelerating deep network training by reducing internal covariate shift. In: Proceedings of the 32nd international conference on machine learning. ICML\u201915, vol 37, pp 448\u2013456. JMLR.org, ???"},{"issue":"56","key":"17625_CR18","first-page":"1929","volume":"15","author":"N Srivastava","year":"2014","unstructured":"Srivastava N, Hinton G, Krizhevsky A, Sutskever I, Salakhutdinov R (2014) Dropout: a simple way to prevent neural networks from overfitting. J Mach Learn Res 15(56):1929\u20131958","journal-title":"J Mach Learn Res"},{"key":"17625_CR19","unstructured":"Redmon J, Farhadi A (2018) YOLOv3: an incremental improvement"},{"key":"17625_CR20","doi-asserted-by":"publisher","unstructured":"Lin T-Y, Doll\u00e1r P, Girshick R, He K, Hariharan B, Belongie S (2017) Feature pyramid networks for object detection. In: 2017 IEEE conference on computer vision and pattern recognition (CVPR), pp 936\u2013944. https:\/\/doi.org\/10.1109\/CVPR.2017.106","DOI":"10.1109\/CVPR.2017.106"},{"key":"17625_CR21","unstructured":"Bochkovskiy A, Wang C-Y, Liao H-YM (2020) YOLOv4: optimal speed and accuracy of object detection"},{"key":"17625_CR22","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision - ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin T-Y, Maire M, Belongie S, Hays J, Perona P, Ramanan D, Doll\u00e1r P, Zitnick CL (2014) Microsoft coco: common objects in context. In: Fleet D, Pajdla T, Schiele B, Tuytelaars T (eds) Computer Vision - ECCV 2014. Springer, Cham, pp 740\u2013755"},{"key":"17625_CR23","doi-asserted-by":"publisher","unstructured":"Deng J, Dong W, Socher R, Li L-J, Li K, Fei-Fei L (2009) Imagenet: a large-scale hierarchical image database. In: 2009 IEEE conference on computer vision and pattern recognition, pp 248\u2013255. https:\/\/doi.org\/10.1109\/CVPR.2009.5206848","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"17625_CR24","doi-asserted-by":"publisher","first-page":"346","DOI":"10.1007\/978-3-319-10578-9_23","volume-title":"Computer Vision - ECCV 2014","author":"K He","year":"2014","unstructured":"He K, Zhang X, Ren S, Sun J (2014) Spatial pyramid pooling in deep convolutional networks for visual recognition. In: Fleet D, Pajdla T, Schiele B, Tuytelaars T (eds) Computer Vision - ECCV 2014. Springer, Cham, pp 346\u2013361"},{"issue":"3","key":"17625_CR25","doi-asserted-by":"publisher","first-page":"642","DOI":"10.1007\/s11263-019-01204-1","volume":"128","author":"H Law","year":"2020","unstructured":"Law H, Deng J (2020) Cornernet: detecting objects as paired keypoints. Int J Comput Vis 128(3):642\u2013656. https:\/\/doi.org\/10.1007\/s11263-019-01204-1","journal-title":"Int J Comput Vis"},{"key":"17625_CR26","unstructured":"Zhou X, Wang D, Kr\u00e4henb\u00fchl P (2019) Objects as points. arXiv:1904.07850"},{"key":"17625_CR27","doi-asserted-by":"publisher","unstructured":"Duan K, Bai S, Xie L, Qi H, Huang Q, Tian Q (2019) Centernet: keypoint triplets for object detection. In: 2019 IEEE\/CVF international conference on computer vision (ICCV), pp 6568\u20136577. https:\/\/doi.org\/10.1109\/ICCV.2019.00667","DOI":"10.1109\/ICCV.2019.00667"},{"key":"17625_CR28","doi-asserted-by":"crossref","unstructured":"Carion N, Massa F, Synnaeve G, Usunier N, Kirillov A, Zagoruyko S (2020) End-to-end object detection with transformers. arXiv:2005.12872","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"17625_CR29","doi-asserted-by":"publisher","unstructured":"Liu S, Qi L, Qin H, Shi J, Jia J (2018) Path aggregation network for instance segmentation. In: 2018 IEEE\/CVF conference on computer vision and pattern recognition, pp 8759\u20138768. https:\/\/doi.org\/10.1109\/CVPR.2018.00913","DOI":"10.1109\/CVPR.2018.00913"},{"key":"17625_CR30","doi-asserted-by":"publisher","unstructured":"Wang C-Y, Mark\u00a0Liao H-Y, Wu Y-H, Chen P-Y, Hsieh J-W, Yeh I-H (2020) Cspnet: a new backbone that can enhance learning capability of cnn. In: 2020 IEEE\/CVF conference on computer vision and pattern recognition workshops (CVPRW), pp 1571\u20131580. https:\/\/doi.org\/10.1109\/CVPRW50498.2020.00203","DOI":"10.1109\/CVPRW50498.2020.00203"},{"key":"17625_CR31","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TIM.2023.3244819","volume":"72","author":"J Ni","year":"2023","unstructured":"Ni J, Shen K, Chen Y, Yang SX (2023) An improved ssd-like deep network-based object detection method for indoor scenes. IEEE Trans Instrum Meas 72:1\u201315. https:\/\/doi.org\/10.1109\/TIM.2023.3244819","journal-title":"IEEE Trans Instrum Meas"},{"key":"17625_CR32","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TIM.2022.3146923","volume":"71","author":"J Ni","year":"2022","unstructured":"Ni J, Shen K, Chen Y, Cao W, Yang SX (2022) An improved deep network-based scene classification method for self-driving cars. IEEE Trans Instrum Meas 71:1\u201314. https:\/\/doi.org\/10.1109\/TIM.2022.3146923","journal-title":"IEEE Trans Instrum Meas"},{"key":"17625_CR33","unstructured":"Ren S, He K, Girshick R, Sun J (2015) Faster r-cnn: towards real-time object detection with region proposal networks. In: Cortes, C., Lawrence, N., Lee, D., Sugiyama, M., Garnett, R. (eds.) Advances in neural information processing systems. Curran Associates, Inc., vol 28. ???. https:\/\/proceedings.neurips.cc\/paper\/2015\/file\/14bfa6bb14875e45bba028a21ed38046-Paper.pdf"},{"key":"17625_CR34","doi-asserted-by":"crossref","unstructured":"Razakarivony S, Jurie F (2015) Vehicle detection in aerial imagery: a small target detection benchmark. Journal of Visual Communication and Image Representation, Elsevier","DOI":"10.1016\/j.jvcir.2015.11.002"},{"key":"17625_CR35","doi-asserted-by":"publisher","unstructured":"Mundhenk TN, Konjevod G, Sakla WA, Boakye K (2020) Cars overhead with context (COWC). UC San Diego Library Digital Collections, In Lawrence Livermore National Laboratory (LLNL) Open Data Initiative. https:\/\/doi.org\/10.6075\/J0CN72BC, http:\/\/library.ucsd.edu\/dc\/object\/bb8332755d","DOI":"10.6075\/J0CN72BC"},{"key":"17625_CR36","doi-asserted-by":"crossref","unstructured":"Du D, Qi Y, Yu H, Yang Y, Duan K, Li G, Zhang W, Huang Q, Tian Q (2018) The unmanned aerial vehicle benchmark: object detection and tracking","DOI":"10.1007\/978-3-030-01249-6_23"},{"issue":"3","key":"17625_CR37","doi-asserted-by":"publisher","first-page":"90","DOI":"10.1109\/MCSE.2007.55","volume":"9","author":"JD Hunter","year":"2007","unstructured":"Hunter JD (2007) Matplotlib: a 2d graphics environment. Comput Sci Eng 9(3):90\u201395. https:\/\/doi.org\/10.1109\/MCSE.2007.55","journal-title":"Comput Sci Eng"},{"key":"17625_CR38","unstructured":"Clark A (2015) Pillow (PIL Fork) Documentation. readthedocs. https:\/\/buildmedia.readthedocs.org\/media\/pdf\/pillow\/latest\/pillow.pdf"},{"key":"17625_CR39","unstructured":"Umesh P (2012) Image processing in python. CSI Commun 23"},{"key":"17625_CR40","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser L, Polosukhin I (2017) Attention is all you need. In: Proceedings of the 31st international conference on neural information processing systems. NIPS\u201917. Curran Associates Inc., Red Hook, pp 6000\u20136010"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-17625-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-023-17625-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-17625-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,5,15]],"date-time":"2024-05-15T10:06:51Z","timestamp":1715767611000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-023-17625-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,16]]},"references-count":40,"journal-issue":{"issue":"18","published-online":{"date-parts":[[2024,5]]}},"alternative-id":["17625"],"URL":"https:\/\/doi.org\/10.1007\/s11042-023-17625-7","relation":{},"ISSN":["1573-7721"],"issn-type":[{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,11,16]]},"assertion":[{"value":"17 April 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 October 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 October 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 November 2023","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no conflicts of interest to declare that are relevant to the content of this article.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of Interest Statement"}}]}}