{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T07:28:29Z","timestamp":1740122909505,"version":"3.37.3"},"reference-count":40,"publisher":"Springer Science and Business Media LLC","issue":"41","license":[{"start":{"date-parts":[[2024,3,25]],"date-time":"2024-03-25T00:00:00Z","timestamp":1711324800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,3,25]],"date-time":"2024-03-25T00:00:00Z","timestamp":1711324800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100018806","name":"Science and Technology Department of Hubei Province","doi-asserted-by":"crossref","award":["2019AAA057"],"award-info":[{"award-number":["2019AAA057"]}],"id":[{"id":"10.13039\/501100018806","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"DOI":"10.1007\/s11042-024-18585-2","type":"journal-article","created":{"date-parts":[[2024,3,25]],"date-time":"2024-03-25T06:02:11Z","timestamp":1711346531000},"page":"88587-88612","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["An efficient and lightweight small target detection framework for vision-based autonomous road cleaning"],"prefix":"10.1007","volume":"83","author":[{"given":"Cheng","family":"Hu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mengyao","family":"Ni","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Danhua","family":"Cao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,3,25]]},"reference":[{"issue":"3","key":"18585_CR1","first-page":"032024","volume":"1302","author":"H Min","year":"2019","unstructured":"Min H, Zhu X, Yan B, Yu B (2019) Research on visual algorithm of road garbage based on intelligent control of road sweeper. J Phys: Conf Ser 1302(3):032024","journal-title":"J Phys: Conf Ser"},{"issue":"1","key":"18585_CR2","first-page":"012028","volume":"1684","author":"J Deng","year":"2020","unstructured":"Deng J, Xuan X, Wang W, Li Z, Yao H, Wang Z (2020) A review of research on object detection based on deep learning. J Phys: Conf Ser 1684(1):012028","journal-title":"J Phys: Conf Ser"},{"key":"18585_CR3","doi-asserted-by":"crossref","unstructured":"He K, Gkioxari G, Doll\u00e1r P, Girshick R (2017) Mask r-cnn. 2017 IEEE International Conference on Computer Vision (ICCV), Venice, Italy,\u00a0 pp 2980\u20132988. https:\/\/arxiv.org\/abs\/1703.06870","DOI":"10.1109\/ICCV.2017.322"},{"key":"18585_CR4","doi-asserted-by":"crossref","unstructured":"Gavrilescu R, Zet C, Fo\u0219al\u0103u C, Skoczylas M, Cotovanu D (2018) Faster R-CNN: an approach to realtime object detection. In: 2018 international conference and exposition on electrical and power engineering (EPE), Iasi, Romania, pp 165\u2013168. https:\/\/arxiv.org\/abs\/1506.01497","DOI":"10.1109\/ICEPE.2018.8559776"},{"key":"18585_CR5","doi-asserted-by":"crossref","unstructured":"Lin TY, Doll\u00e1r P, Girshick R, He K, Hariharan B, Belongie S (2017) Feature pyramid networks for object detection. In: 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR),\u00a0Honolulu, Hawaii, USA,\u00a0IEEE, pp 2117\u20132125.\u00a0https:\/\/arxiv.org\/abs\/1612.03144","DOI":"10.1109\/CVPR.2017.106"},{"key":"18585_CR6","doi-asserted-by":"crossref","unstructured":"Redmon J, Divvala S, Girshick R, Farhadi A (2016) You only look once: unified, real-time object detection. In: IEEE conference on computer vision and pattern recognition (CVPR),\u00a0Las Vegas, NV, USA.\u00a0IEEE, pp 779\u2013788.\u00a0https:\/\/arxiv.org\/abs\/1506.02640","DOI":"10.1109\/CVPR.2016.91"},{"key":"18585_CR7","doi-asserted-by":"crossref","unstructured":"Redmon J, Farhadi A (2017) YOLO9000: better, faster, stronger. In: IEEE conference on computer vision and pattern recognition (CVPR),\u00a0Honolulu, Hawaii, USA. IEEE, pp 7263\u20137271.\u00a0https:\/\/arxiv.org\/abs\/1612.08242","DOI":"10.1109\/CVPR.2017.690"},{"key":"18585_CR8","first-page":"1125","volume":"15","author":"J Redmon","year":"2018","unstructured":"Redmon J, Farhadi A (2018) YOLOv3: an incremental improvement. Pattern Anal 15:1125\u20131131","journal-title":"Pattern Anal"},{"key":"18585_CR9","doi-asserted-by":"publisher","unstructured":"Bochkovskiy A, Wang CY, Liao H Y M (2020) YOLOv4: optimal speed and accuracy of object detection. https:\/\/doi.org\/10.48550\/arXiv.2004.10934","DOI":"10.48550\/arXiv.2004.10934"},{"key":"18585_CR10","unstructured":"Tsung-Yi L, Michael M, Serge B, Lubomir B, Ross G, James H, Pietro P, Deva R, Lawrence  CZ, Doll\u00e1r P (2015) Microsoft COCO: common objects in context. arXiv:1405.0312.\u00a0https:\/\/arxiv.org\/abs\/1405.0312v3. Accessed 21 Feb 2015"},{"issue":"4","key":"18585_CR11","first-page":"1","volume":"52","author":"S Ghosh","year":"2019","unstructured":"Ghosh S, Das N, Das I, Maulik U (2019) Understanding deep learning techniques for image segmentation. ACM Computing Surveys(CSUR) 52(4):1\u201335","journal-title":"ACM Computing Surveys(CSUR)"},{"issue":"3","key":"18585_CR12","doi-asserted-by":"publisher","first-page":"362","DOI":"10.1002\/rob.21918","volume":"37","author":"S Grigorescu","year":"2020","unstructured":"Grigorescu S, Trasnea B, Cocias T, Macesanu G (2020) A survey of deep learning techniques for autonomous driving. J Field Robot 37(3):362\u2013386","journal-title":"J Field Robot"},{"key":"18585_CR13","doi-asserted-by":"publisher","first-page":"675","DOI":"10.1007\/s10846-018-0954-x","volume":"95","author":"M Karaduman","year":"2019","unstructured":"Karaduman M, C\u0131nar A, Eren H (2019) UAV traffic patrolling via road detection and tracking in anonymous aerial video frames. J Intell Rob Syst 95:675\u2013690","journal-title":"J Intell Rob Syst"},{"key":"18585_CR14","doi-asserted-by":"crossref","unstructured":"Chen L, Papandreou G, Schroff F, Adam H (2018) Rethinking atrous convolution for semantic image segmentation. In: 15th european conference on computer vision (ECCV), Munich, Germany. Springer international publishing, pp 833\u2013851. https:\/\/arxiv.org\/abs\/1706.05587","DOI":"10.1007\/978-3-030-01234-2_49"},{"key":"18585_CR15","doi-asserted-by":"publisher","first-page":"82","DOI":"10.1016\/j.neucom.2018.03.037","volume":"304","author":"HS Yu","year":"2018","unstructured":"Yu HS, Yang ZG, Tan L et al (2018) Methods and datasets on semantic segmentation: A review. Neurocomputing 304:82\u2013103","journal-title":"Neurocomputing"},{"key":"18585_CR16","doi-asserted-by":"publisher","first-page":"104514","DOI":"10.1109\/ACCESS.2019.2932117","volume":"7","author":"D Zeng","year":"2019","unstructured":"Zeng D, Zhang S, Chen F, Wang Y (2019) Multi-scale CNN based garbage detection of airborne hyperspectral data. IEEE Access 7:104514\u2013104527","journal-title":"IEEE Access"},{"issue":"14","key":"18585_CR17","doi-asserted-by":"publisher","first-page":"3816","DOI":"10.3390\/s20143816","volume":"20","author":"T Wang","year":"2020","unstructured":"Wang T, Cai Y, Liang L, Ye D (2020) A multi-level approach to waste object segmentation. Sensors 20(14):3816","journal-title":"Sensors"},{"key":"18585_CR18","doi-asserted-by":"crossref","unstructured":"Chen LC, Yang Y, Wang J,\u00a0Xu W, Yuille AL\u00a0(2016) Attention to scale: scale-aware semantic image segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), Las Vegas, NV, USA.\u00a0IEEE, pp 3640\u20133649.\u00a0https:\/\/arxiv.org\/abs\/1511.03339","DOI":"10.1109\/CVPR.2016.396"},{"issue":"1","key":"18585_CR19","doi-asserted-by":"publisher","first-page":"263","DOI":"10.1109\/TITS.2017.2750080","volume":"19","author":"E Romera","year":"2017","unstructured":"Romera E, Alvarez JM, Bergasa LM et al (2017) Erfnet: Efficient residual factorized convnet for real-time semantic segmentation. IEEE Trans Intell Transp Syst 19(1):263\u2013272","journal-title":"IEEE Trans Intell Transp Syst"},{"issue":"4","key":"18585_CR20","doi-asserted-by":"publisher","first-page":"3235","DOI":"10.1007\/s11063-022-10762-4","volume":"54","author":"Y Miao","year":"2022","unstructured":"Miao Y, Zhang S, He S (2022) Real-time detection network SI-SSD for weak targets in complex traffic scenarios. Neural Process Lett 54(4):3235\u20133247","journal-title":"Neural Process Lett"},{"key":"18585_CR21","doi-asserted-by":"publisher","first-page":"85771","DOI":"10.1109\/ACCESS.2019.2924960","volume":"7","author":"M Ju","year":"2019","unstructured":"Ju M, Luo J, Zhang P, He M, Luo H (2019) A Simple and Efficient Network for Small Target Detection. IEEE Access 7:85771\u201385781","journal-title":"IEEE Access"},{"key":"18585_CR22","unstructured":"Cui Y, Yang L, Liu D (2022) Dynamic proposals for efficient object detection. arXiv preprint arXiv:2207.05252. https:\/\/arxiv.org\/abs\/2207.05252. Accessed 12 Jul 2022"},{"key":"18585_CR23","unstructured":"Cui Y (2022) DFA: dynamic feature aggregation for efficient video object detection. arXiv preprint arXiv:2210.00588. https:\/\/arxiv.org\/abs\/2210.00588. Accessed 2 Oct 2022"},{"key":"18585_CR24","doi-asserted-by":"publisher","first-page":"103659","DOI":"10.1016\/j.infrared.2021.103659","volume":"114","author":"M Ju","year":"2021","unstructured":"Ju M, Luo J, Liu G, Luo H (2021) ISTDet: An efficient end-to-end neural network for infrared small target detection. Infrared Phys Technol 114:103659","journal-title":"Infrared Phys Technol"},{"issue":"12","key":"18585_CR25","doi-asserted-by":"publisher","first-page":"2481","DOI":"10.1109\/TPAMI.2016.2644615","volume":"39","author":"V Badrinarayanan","year":"2017","unstructured":"Badrinarayanan V, Kendall A, Cipolla R (2017) SegNet: A Deep Convolutional Encoder-Decoder Architecture for Image Segmentation. IEEE Trans Pattern Anal Mach Intell 39(12):2481\u20132495","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"18585_CR26","doi-asserted-by":"crossref","unstructured":"Ronneberger O, Fischer P, Brox T (2015) U-Net: convolutional networks for biomedical image segmentation. In: Proceedings of international conference on medical image computing and computer-assisted intervention (MICCAI), Munich, Germany.\u00a0Springer international publishing, pp 234\u2013241. https:\/\/arxiv.org\/abs\/1505.04597","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"18585_CR27","unstructured":"Yu F, Koltun V (2016) Multi-scale context aggregation by dilated convolutions. arXiv preprint arXiv:1511.07122. https:\/\/arxiv.org\/abs\/1511.07122. Accessed 30 Apr 2016"},{"key":"18585_CR28","doi-asserted-by":"crossref","unstructured":"Zhao H, Shi J, Qi X, Wang X, Jia J (2017) Pyramid scene parsing network. In: 2017 IEEE conference on computer vision and pattern recognition (CVPR), Honolulu, Hawaii, USA. IEEE, pp 2881\u20132890. https:\/\/arxiv.org\/abs\/1612.01105","DOI":"10.1109\/CVPR.2017.660"},{"key":"18585_CR29","doi-asserted-by":"crossref","unstructured":"Sandler M, Howard A, Zhu M, Zhmoginov A, Chen L C (2018) MobileNetV2: inverted residuals and linear bottlenecks. In: IEEE conference on computer vision and pattern recognition (CVPR), Salt Lake City, UT, USA.\u00a0IEEE, pp 4510\u20134520.\u00a0https:\/\/arxiv.org\/abs\/1801.04381","DOI":"10.1109\/CVPR.2018.00474"},{"key":"18585_CR30","doi-asserted-by":"crossref","unstructured":"Ding X, Zhang X, Ma N, Han J, Ding G, Sun J (2021) Repvgg: making vgg-style convnets great again. In: IEEE conference on computer vision and pattern recognition (CVPR),\u00a0virtual.\u00a0IEEE, pp 13733\u201313742.\u00a0https:\/\/arxiv.org\/abs\/2101.03697","DOI":"10.1109\/CVPR46437.2021.01352"},{"key":"18585_CR31","doi-asserted-by":"crossref","unstructured":"Sudre CH, Li W, Vercauteren T, Ourselin S, Cardoso MJ (2017) Generalised Dice overlap as a deep learning loss function for highly unbalanced segmentations. In: deep learning in medical image analysis and multimodal learning for clinical decision support: third international workshop, DLMIA 2017, and 7th international workshop, ML-CDS 2017, Held in conjunction with MICCAI 2017, Qu\u00e9bec City, QC, Canada. Proceedings 3, pp 240\u2013248. https:\/\/arxiv.org\/abs\/1707.03237","DOI":"10.1007\/978-3-319-67558-9_28"},{"key":"18585_CR32","doi-asserted-by":"crossref","unstructured":"Lin T Y, Goyal P, Girshick R et al (2017) Focal loss for dense object detection. IEEE Trans Pattern Anal Mach Intell (2):318\u2013327. https:\/\/arxiv.org\/abs\/1708.02002","DOI":"10.1109\/TPAMI.2018.2858826"},{"key":"18585_CR33","first-page":"15288","volume":"33","author":"J Mukhoti","year":"2020","unstructured":"Mukhoti J, Kulharia V, Sanyal A, Golodetz S, Torr P, Dokania P (2020) Calibrating deep neural networks using focal loss. Adv Neural Inf Process Syst 33:15288\u201315299","journal-title":"Adv Neural Inf Process Syst"},{"key":"18585_CR34","doi-asserted-by":"crossref","unstructured":"Shi W, Caballero J, Huszar F, Totz J, Aitken AP, Bishop R et al (2016) Real-time single image and video super-resolution using an efficient sub-pixel convolutional neural network. In: 2016 IEEE conference on computer vision and pattern recognition (CVPR),\u00a0Las Vegas, NV, USA.\u00a0IEEE, pp 1874\u20131883.\u00a0https:\/\/arxiv.org\/abs\/1609.05158","DOI":"10.1109\/CVPR.2016.207"},{"key":"18585_CR35","doi-asserted-by":"crossref","unstructured":"Cordts M, Omran M, Ramos S,\u00a0Rehfeld T., Enzweiler M., Benenson R\u00a0et al (2016) The cityscapes dataset for semantic urban scene understanding. In: 2016 IEEE Conference on computer vision and pattern recognition (CVPR), Las Vegas, NV, USA.\u00a0IEEE, pp 3213\u20133223.\u00a0https:\/\/arxiv.org\/abs\/1604.01685","DOI":"10.1109\/CVPR.2016.350"},{"key":"18585_CR36","unstructured":"Paszke A, Gross S, Massa F, Lerer A, Bradbury J, Chanan G et al (2019) PyTorch: an imperative style, high-performance deep learning library. In: advances in neural information processing systems 32 (NIPS), Vancouver, Canada.\u00a0Curran Associates, Inc., pp 8024\u20138035.\u00a0https:\/\/arxiv.org\/abs\/1912.01703"},{"key":"18585_CR37","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep Residual Learning for Image Recognition. In: 2016 IEEE conference on computer vision and pattern recognition (CVPR), Las Vegas, NV, USA.\u00a0IEEE, pp 770\u2013778.\u00a0https:\/\/arxiv.org\/abs\/1512.03385","DOI":"10.1109\/CVPR.2016.90"},{"key":"18585_CR38","doi-asserted-by":"crossref","unstructured":"Xu J, Xiong Z, Bhattacharyya SP (2023) PIDNet: a real-time semantic segmentation network inspired by PID controllers. In: 2023 IEEE\/CVF conference on computer vision and pattern recognition (CVPR), Vancouver, BC, Canada.\u00a0IEEE, pp 19529\u201319539.\u00a0https:\/\/arxiv.org\/abs\/2206.02066","DOI":"10.1109\/CVPR52729.2023.01871"},{"key":"18585_CR39","doi-asserted-by":"crossref","unstructured":"Yang C, Huang Z, Wang N (2021) QueryDet: cascaded sparse query for accelerating high-resolution small object detection. In: 2022 IEEE\/CVF conference on computer vision and pattern recognition (CVPR), New Orleans, LA, USA.\u00a0IEEE, pp 13658\u201313667.\u00a0https:\/\/arxiv.org\/abs\/2103.09136","DOI":"10.1109\/CVPR52688.2022.01330"},{"key":"18585_CR40","doi-asserted-by":"crossref","unstructured":"Tang S, Zhang S, Fang Y(2023) HIC-YOLOv5: improved yolov5 for small object detection. arXiv:2309.16393.\u00a0https:\/\/arxiv.org\/abs\/1405.0312v3. Accessed\u00a011 Jan 2024","DOI":"10.1109\/ICRA57147.2024.10610273"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-024-18585-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-024-18585-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-024-18585-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,23]],"date-time":"2024-12-23T12:10:23Z","timestamp":1734955823000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-024-18585-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,3,25]]},"references-count":40,"journal-issue":{"issue":"41","published-online":{"date-parts":[[2024,12]]}},"alternative-id":["18585"],"URL":"https:\/\/doi.org\/10.1007\/s11042-024-18585-2","relation":{},"ISSN":["1573-7721"],"issn-type":[{"type":"electronic","value":"1573-7721"}],"subject":[],"published":{"date-parts":[[2024,3,25]]},"assertion":[{"value":"20 July 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 January 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 February 2024","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 March 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"Not applicable.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval"}},{"value":"The authors declare that they have no conflicts of interest to this paper.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}