{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,16]],"date-time":"2026-01-16T17:33:29Z","timestamp":1768584809964,"version":"3.49.0"},"reference-count":49,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2023,11,14]],"date-time":"2023-11-14T00:00:00Z","timestamp":1699920000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,11,14]],"date-time":"2023-11-14T00:00:00Z","timestamp":1699920000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Multimed Info Retr"],"published-print":{"date-parts":[[2023,12]]},"DOI":"10.1007\/s13735-023-00305-5","type":"journal-article","created":{"date-parts":[[2023,11,14]],"date-time":"2023-11-14T03:01:56Z","timestamp":1699930916000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":14,"title":["A lightweight small object detection algorithm based on improved YOLOv5 for driving scenarios"],"prefix":"10.1007","volume":"12","author":[{"given":"Zonghui","family":"Wen","sequence":"first","affiliation":[]},{"given":"Jia","family":"Su","sequence":"additional","affiliation":[]},{"given":"Yongxiang","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Mingyu","family":"Li","sequence":"additional","affiliation":[]},{"given":"Guoxi","family":"Gan","sequence":"additional","affiliation":[]},{"given":"Shenmeng","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Deyu","family":"Fan","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,11,14]]},"reference":[{"key":"305_CR1","doi-asserted-by":"publisher","first-page":"39","DOI":"10.1007\/978-3-031-40286-9_4","volume-title":"Knowledge science, engineering and management","author":"Z Wen","year":"2023","unstructured":"Wen Z, Su J, Zhang Y (2023) Sie-yolov5: improved yolov5 for small object detection in drone-captured-scenarios. In: Jin Z, Jiang Y, Buchmann RA, Bi Y, Ghiran A-M, Ma W (eds) Knowledge science, engineering and management. Springer, Cham, pp 39\u201346"},{"key":"305_CR2","unstructured":"Lin T, Maire M, Belongie SJ, Bourdev LD, Girshick RB, Hays J, Perona P, Ramanan D, Doll\u00e1r P, Zitnick CL (2014) Microsoft COCO: common objects in context. CoRR abs\/1405.0312 1405.0312"},{"issue":"3","key":"305_CR3","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky O, Deng J, Su H, Krause J, Satheesh S, Ma S, Huang Z, Karpathy A, Khosla A, Bernstein M, Berg AC, Fei-Fei L (2015) ImageNet large scale visual recognition challenge. Int J Comput Vision (IJCV) 115(3):211\u2013252. https:\/\/doi.org\/10.1007\/s11263-015-0816-y","journal-title":"Int J Comput Vision (IJCV)"},{"key":"305_CR4","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham M, Gool LV, Williams CKI, Winn JM, Zisserman A (2010) The pascal visual object classes (voc) challenge. Int J Comput Vision 88:303\u2013338","journal-title":"Int J Comput Vision"},{"key":"305_CR5","doi-asserted-by":"publisher","first-page":"214","DOI":"10.1007\/978-3-319-54193-8_14","volume-title":"Computer vision - ACCV 2016","author":"C Chen","year":"2017","unstructured":"Chen C, Liu M-Y, Tuzel O, Xiao J (2017) R-cnn for small object detection. In: Lai S-H, Lepetit V, Nishino K, Sato Y (eds) Computer vision - ACCV 2016. Springer, Cham, pp 214\u2013230"},{"key":"305_CR6","doi-asserted-by":"crossref","unstructured":"Lin T, Goyal P, Girshick RB, He K, Doll\u00e1r P (2017) Focal loss for dense object detection. CoRR abs\/1708.02002 1708.02002","DOI":"10.1109\/ICCV.2017.324"},{"key":"305_CR7","unstructured":"Tan M, Pang R, Le QV (2019) Efficientdet: scalable and efficient object detection. CoRR abs\/1911.09070 1911.09070"},{"key":"305_CR8","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser L, Polosukhin I (2017), Attention is all you need"},{"key":"305_CR9","doi-asserted-by":"crossref","unstructured":"Redmon J, Divvala SK, Girshick RB, Farhadi A (2015), You only look once: unified, real-time object detection. CoRR abs\/1506.02640 1506.02640","DOI":"10.1109\/CVPR.2016.91"},{"key":"305_CR10","doi-asserted-by":"crossref","unstructured":"Redmon J, Farhadi A (2016) YOLO9000: better, faster, stronger. CoRR abs\/1612.08242 1612.08242","DOI":"10.1109\/CVPR.2017.690"},{"key":"305_CR11","unstructured":"Redmon J, Farhadi A (2018) Yolov3: An incremental improvement. CoRR abs\/1804.02767 1804.02767"},{"key":"305_CR12","unstructured":"Bochkovskiy A, Wang C, Liao HM (2020) Yolov4: optimal speed and accuracy of object detection. CoRR abs\/2004.10934 2004.10934"},{"key":"305_CR13","doi-asserted-by":"publisher","unstructured":"Li C, Li L, Jiang H, Weng K, Geng Y, Li L, Ke Z, Li Q, Cheng M, Nie W, Li Y, Zhang B, Liang Y, Zhou, L. Xu, X, Chu X, Wei X, Wei X (2022), YOLOv6: a single-stage object detection framework for industrial applications. https:\/\/doi.org\/10.48550\/ARXIV.2209.02976","DOI":"10.48550\/ARXIV.2209.02976"},{"key":"305_CR14","doi-asserted-by":"publisher","unstructured":"Wang C-Y, Bochkovskiy A, Liao H-YM (2022) YOLOv7: trainable bag-of-freebies sets new state-of-the-art for real-time object detectors. arXiv . https:\/\/doi.org\/10.48550\/ARXIV.2207.02696","DOI":"10.48550\/ARXIV.2207.02696"},{"key":"305_CR15","doi-asserted-by":"crossref","unstructured":"Wang C-Y, Liao H-YM, Yeh I-H, Wu Y-H, Chen P-Y, Hsieh J-W (2019) CSPNet: a new backbone that can enhance learning capability of CNN","DOI":"10.1109\/CVPRW50498.2020.00203"},{"key":"305_CR16","doi-asserted-by":"crossref","unstructured":"Zhang Y-F, Ren W, Zhang Z, Jia Z, Wang L, Tan T (2022) Focal and efficient IOU loss for accurate bounding box regression)","DOI":"10.1016\/j.neucom.2022.07.042"},{"issue":"8","key":"305_CR17","doi-asserted-by":"publisher","first-page":"8574","DOI":"10.1109\/TCYB.2021.3095305","volume":"52","author":"Z Zheng","year":"2022","unstructured":"Zheng Z, Wang P, Ren D, Liu W, Ye R, Hu Q, Zuo W (2022) Enhancing geometric factors in model learning and inference for object detection and instance segmentation. IEEE Transact Cybern 52(8):8574\u20138586. https:\/\/doi.org\/10.1109\/TCYB.2021.3095305","journal-title":"IEEE Transact Cybern"},{"key":"305_CR18","unstructured":"He K, Zhang X, Ren S, Sun J (2014) Spatial pyramid pooling in deep convolutional networks for visual recognition. CoRR abs\/1406.4729 1406.4729"},{"key":"305_CR19","unstructured":"Goodfellow IJ, Pouget-Abadie J, Mirza M, Xu B, Warde-Farley D, Ozair S, Courville A, Bengio Y (2014), Generative adversarial networks"},{"key":"305_CR20","unstructured":"Mirza M, Osindero S (2014), Conditional generative adversarial nets"},{"key":"305_CR21","unstructured":"Arjovsky M, Chintala S, Bottou L (2017) Wasserstein GAN"},{"key":"305_CR22","doi-asserted-by":"crossref","unstructured":"Karras T, Laine S, Aila T (2019) A style-based generator architecture for generative adversarial networks","DOI":"10.1109\/CVPR.2019.00453"},{"key":"305_CR23","doi-asserted-by":"crossref","unstructured":"Razghandi M, Zhou H, Erol-Kantarci M, Turgut D (2022) Variational autoencoder generative adversarial network for synthetic data generation in smart home","DOI":"10.1109\/ICC45855.2022.9839249"},{"key":"305_CR24","doi-asserted-by":"publisher","unstructured":"Prajapati K, Chudasama V, Patel H, Upla K, Ramachandra R, Raja K, Busch C Unsupervised single image super-resolution network (usisresnet) for real-world data using generative adversarial network. In: 2020 IEEE\/CVF Conference on computer vision and pattern recognition workshops (CVPRW), pp. 1904\u20131913 (2020). https:\/\/doi.org\/10.1109\/CVPRW50498.2020.00240","DOI":"10.1109\/CVPRW50498.2020.00240"},{"key":"305_CR25","doi-asserted-by":"crossref","unstructured":"Zhang K, Liang J, Gool LV, Timofte R (2021) Designing a practical degradation model for deep blind image super-resolution","DOI":"10.1109\/ICCV48922.2021.00475"},{"key":"305_CR26","doi-asserted-by":"crossref","unstructured":"Han W, Zhang Z, Zhang Y, Yu J, Chiu C-C, Qin J, Gulati A, Pang R, Wu Y (2020) ContextNet: improving convolutional neural networks for automatic speech recognition with global context","DOI":"10.21437\/Interspeech.2020-2059"},{"key":"305_CR27","doi-asserted-by":"crossref","unstructured":"Bell S, Zitnick CL, Bala K, Girshick R (2015) Inside-outside net: detecting objects in context with skip pooling and recurrent neural networks","DOI":"10.1109\/CVPR.2016.314"},{"issue":"7","key":"305_CR28","doi-asserted-by":"publisher","first-page":"3423","DOI":"10.1109\/tip.2019.2896952","volume":"28","author":"Y Yuan","year":"2019","unstructured":"Yuan Y, Xiong Z, Wang Q (2019) VSSA-NET: vertical spatial sequence attention network for traffic sign detection. IEEE Trans Image Process 28(7):3423\u20133434. https:\/\/doi.org\/10.1109\/tip.2019.2896952","journal-title":"IEEE Trans Image Process"},{"key":"305_CR29","doi-asserted-by":"crossref","unstructured":"Cui L, Ma R, Lv P, Jiang X, Gao Z, Zhou B, Xu M (2020) MDSSD: multi-scale deconvolutional single shot detector for small objects","DOI":"10.1007\/s11432-019-2723-1"},{"key":"305_CR30","doi-asserted-by":"publisher","first-page":"868","DOI":"10.1109\/TIP.2020.3039378","volume":"30","author":"K Sun","year":"2021","unstructured":"Sun K, Zhang J, Liu J, Yu R, Song Z (2021) Drcnn: dynamic routing convolutional neural network for multi-view 3d object recognition. IEEE Transact Image Process 30:868\u2013877. https:\/\/doi.org\/10.1109\/TIP.2020.3039378","journal-title":"IEEE Transact Image Process"},{"key":"305_CR31","doi-asserted-by":"publisher","first-page":"57120","DOI":"10.1109\/ACCESS.2019.2913882","volume":"7","author":"Z Liu","year":"2019","unstructured":"Liu Z, Du J, Tian F (2019) Wen J Mr-cnn: a multi-scale region-based convolutional neural network for small traffic sign recognition. IEEE Access 7:57120\u201357128. https:\/\/doi.org\/10.1109\/ACCESS.2019.2913882","journal-title":"IEEE Access"},{"issue":"12","key":"305_CR32","doi-asserted-by":"publisher","first-page":"10015","DOI":"10.1109\/tgrs.2019.2930982","volume":"57","author":"G Zhang","year":"2019","unstructured":"Zhang G, Lu S, Zhang W (2019) CAD-net: a context-aware detection network for objects in remote sensing imagery. IEEE Trans Geosci Remote Sens 57(12):10015\u201310024. https:\/\/doi.org\/10.1109\/tgrs.2019.2930982","journal-title":"IEEE Trans Geosci Remote Sens"},{"key":"305_CR33","doi-asserted-by":"crossref","unstructured":"Chen D, Miao D, Zhao X (2023) Hyneter: hybrid network transformer for object detection","DOI":"10.1109\/ICASSP49357.2023.10096922"},{"key":"305_CR34","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2023.119560","volume":"217","author":"J Ding","year":"2023","unstructured":"Ding J, Li W, Pei L, Yang M, Ye C (2023) Yuan B Sw-yolox: an anchor-free detector based transformer for sea surface object detection. Expert Syst Appl 217:119560. https:\/\/doi.org\/10.1016\/j.eswa.2023.119560","journal-title":"Expert Syst Appl"},{"issue":"8","key":"305_CR35","doi-asserted-by":"publisher","first-page":"3872","DOI":"10.1109\/TCSVT.2023.3234311","volume":"33","author":"H Yang","year":"2023","unstructured":"Yang H, Yang Z, Hu A, Liu C, Cui TJ, Miao J (2023) Unifying convolution and transformer for efficient concealed object detection in passive millimeter-wave images. IEEE Trans Circuits Syst Video Technol 33(8):3872\u20133887. https:\/\/doi.org\/10.1109\/TCSVT.2023.3234311","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"305_CR36","doi-asserted-by":"publisher","unstructured":"Yang C, Huang Z, Wang N (2022), Querydet: Cascaded sparse query for accelerating high-resolution small object detection. In: 2022 IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp. 13658\u201313667 . https:\/\/doi.org\/10.1109\/CVPR52688.2022.01330","DOI":"10.1109\/CVPR52688.2022.01330"},{"key":"305_CR37","doi-asserted-by":"crossref","unstructured":"Sunkara R, Luo T (2022), No more strided convolutions or pooling: a new cnn building block for low-resolution images and small objects","DOI":"10.1007\/978-3-031-26409-2_27"},{"key":"305_CR38","doi-asserted-by":"crossref","unstructured":"Zhu X, Lyu S, Wang X, Zhao Q (2021) TPH-YOLOv5: improved YOLOv5 based on transformer prediction head for object detection on drone-captured scenarios","DOI":"10.1109\/ICCVW54120.2021.00312"},{"key":"305_CR39","doi-asserted-by":"crossref","unstructured":"Liu S, Qi L, Qin H, Shi J, Jia J (2018) Path aggregation network for instance segmentation","DOI":"10.1109\/CVPR.2018.00913"},{"key":"305_CR40","doi-asserted-by":"crossref","unstructured":"Neubeck A, Gool LV (2006), Efficient non-maximum suppression. 18th international conference on pattern recognition (ICPR\u201906) 3, 850\u2013855","DOI":"10.1109\/ICPR.2006.479"},{"key":"305_CR41","doi-asserted-by":"crossref","unstructured":"Chen J, Kao S-H, He H, Zhuo W, Wen S, Lee C-H, Chan S-HG (2023) Run. Chasing Higher FLOPS for Faster Neural Networks, Don\u2019t Walk","DOI":"10.1109\/CVPR52729.2023.01157"},{"key":"305_CR42","doi-asserted-by":"crossref","unstructured":"Hu J, Shen L, Albanie S, Sun G, Wu E (2019) Squeeze-and-excitation networks","DOI":"10.1109\/CVPR.2018.00745"},{"key":"305_CR43","unstructured":"Woo S, Park J, Lee J (2018), Kweon IS CBAM: convolutional block attention module. CoRR abs\/1807.06521 1807.06521"},{"issue":"2","key":"305_CR44","doi-asserted-by":"publisher","first-page":"699","DOI":"10.1109\/tmi.2020.3035253","volume":"40","author":"R Gu","year":"2021","unstructured":"Gu R, Wang G, Song T, Huang R, Aertsen M, Deprest J, Ourselin S, Vercauteren T, Zhang S (2021) CA-net: comprehensive attention convolutional neural networks for explainable medical image segmentation. IEEE Trans Med Imaging 40(2):699\u2013711. https:\/\/doi.org\/10.1109\/tmi.2020.3035253","journal-title":"IEEE Trans Med Imaging"},{"key":"305_CR45","doi-asserted-by":"crossref","unstructured":"Zheng Z, Wang P, Liu W, Li J, Ye R, Ren D (2019) Distance-IoU loss: faster and better learning for bounding box regression","DOI":"10.1609\/aaai.v34i07.6999"},{"key":"305_CR46","doi-asserted-by":"crossref","unstructured":"Han K, Wang Y, Tian Q, Guo J, Xu C, Xu C (2020) GhostNet: more features from cheap operations","DOI":"10.1109\/CVPR42600.2020.00165"},{"key":"305_CR47","doi-asserted-by":"crossref","unstructured":"Zhang X, Zhou X, Lin M, Sun J (2017) ShuffleNet: an extremely efficient convolutional neural network for mobile devices","DOI":"10.1109\/CVPR.2018.00716"},{"key":"305_CR48","unstructured":"Howard AG, Zhu M, Chen B, Kalenichenko D, Wang W, Weyand T, Andreetto M, Adam H (2017) MobileNets: efficient convolutional neural networks for mobile vision applications"},{"key":"305_CR49","unstructured":"Xu S, Wang X, Lv W, Chang Q, Cui C, Deng K, Wang G, Dang Q, Wei S, Du Y (2022), Lai B PP-YOLOE: an evolved version of YOLO"}],"container-title":["International Journal of Multimedia Information Retrieval"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13735-023-00305-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s13735-023-00305-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13735-023-00305-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,12,2]],"date-time":"2023-12-02T14:17:09Z","timestamp":1701526629000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s13735-023-00305-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,14]]},"references-count":49,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2023,12]]}},"alternative-id":["305"],"URL":"https:\/\/doi.org\/10.1007\/s13735-023-00305-5","relation":{},"ISSN":["2192-6611","2192-662X"],"issn-type":[{"value":"2192-6611","type":"print"},{"value":"2192-662X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,11,14]]},"assertion":[{"value":"19 July 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 October 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 October 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 November 2023","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"We declare that the authors have no competing interests as defined by Springer, or other interests that might be perceived to influence the results and\/or discussion reported in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"None.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical Approval"}}],"article-number":"38"}}