{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,21]],"date-time":"2026-05-21T17:10:17Z","timestamp":1779383417420,"version":"3.53.1"},"reference-count":55,"publisher":"Springer Science and Business Media LLC","issue":"18","license":[{"start":{"date-parts":[[2023,11,30]],"date-time":"2023-11-30T00:00:00Z","timestamp":1701302400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,11,30]],"date-time":"2023-11-30T00:00:00Z","timestamp":1701302400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"Jiangsu Petrochemical Process Key Equipment Digital Twin Technology Engineering Research Center Open Project","award":["DTEC202103"],"award-info":[{"award-number":["DTEC202103"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"DOI":"10.1007\/s11042-023-17679-7","type":"journal-article","created":{"date-parts":[[2023,11,30]],"date-time":"2023-11-30T08:02:27Z","timestamp":1701331347000},"page":"55751-55771","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":47,"title":["DENS-YOLOv6: a small object detection model for garbage detection on water surface"],"prefix":"10.1007","volume":"83","author":[{"given":"Ning","family":"Li","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Mingliang","family":"Wang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Gaochao","family":"Yang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Bo","family":"Li","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Baohua","family":"Yuan","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Shoukun","family":"Xu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2023,11,30]]},"reference":[{"key":"17679_CR1","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser \u0141, Polosukhin I (2017) Attention is all you need. Adv Neural Inform Process Syst 30"},{"key":"17679_CR2","doi-asserted-by":"crossref","unstructured":"Carion N, Massa F, Synnaeve G, Usunier N, Kirillov A, Zagoruyko S (2020) End-to-end object detection with transformers. In: European conference on computer vision. Springer, pp 213\u2013229","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"17679_CR3","unstructured":"Zhu X, Su W, Lu L, Li B, Wang X, Dai J (2020) Deformable detr: deformable transformers for end-to-end object detection. arXiv:2010.04159"},{"key":"17679_CR4","unstructured":"Zhang H, Li F, Liu S, Zhang L, Su H, Zhu J, Ni LM, Shum H-Y (2022) Dino: Detr with improved denoising anchor boxes for end-to-end object detection. arXiv:2203.03605"},{"key":"17679_CR5","doi-asserted-by":"crossref","unstructured":"Girshick R, Donahue J, Darrell T, Malik J (2014) Rich feature hierarchies for accurate object detection and semantic segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 580\u2013587","DOI":"10.1109\/CVPR.2014.81"},{"key":"17679_CR6","doi-asserted-by":"crossref","unstructured":"Girshick R (2015) Fast r-cnn. In: Proceedings of the IEEE international conference on computer vision, pp 1440\u20131448","DOI":"10.1109\/ICCV.2015.169"},{"key":"17679_CR7","unstructured":"Ren S, He K, Girshick R, Sun J (2015) Faster r-cnn: towards real-time object detection with region proposal networks. Adv Neural Inform Process Syst 28"},{"key":"17679_CR8","doi-asserted-by":"crossref","unstructured":"Cai Z, Vasconcelos N (2018) Cascade r-cnn: delving into high quality object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 6154\u20136162","DOI":"10.1109\/CVPR.2018.00644"},{"key":"17679_CR9","unstructured":"Bochkovskiy A, Wang C-Y, Liao H-YM (2020) Yolov4: optimal speed and accuracy of object detection. arXiv:2004.10934"},{"key":"17679_CR10","unstructured":"Li C, Li L, Jiang H, Weng K, Geng Y, Li L, Ke Z, Li Q, Cheng M, Nie W et al (2022) Yolov6: a single-stage object detection framework for industrial applications. arXiv:2209.02976"},{"key":"17679_CR11","doi-asserted-by":"crossref","unstructured":"Wang C-Y, Bochkovskiy A, Liao H-YM (2023) Yolov7: trainable bag-of-freebies sets new state-of-the-art for real-time object detectors. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 7464\u20137475","DOI":"10.1109\/CVPR52729.2023.00721"},{"key":"17679_CR12","unstructured":"Wang J, Xu C, Yang W, Yu L (2021) A normalized gaussian wasserstein distance for tiny object detection. arXiv:2110.13389"},{"key":"17679_CR13","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham M, Van Gool L, Williams CK, Winn J, Zisserman A (2010) The pascal visual object classes (voc) challenge. Int J Comput Vision 88:303\u2013338","journal-title":"Int J Comput Vision"},{"key":"17679_CR14","unstructured":"Goodfellow I, Pouget-Abadie J, Mirza M, Xu B, Warde-Farley D, Ozair S, Courville A, Bengio Y (2014) Generative adversarial nets. Adv Neural Inform Process Syst 27"},{"key":"17679_CR15","doi-asserted-by":"crossref","unstructured":"Bai Y, Zhang Y, Ding M, Ghanem B (2018) Sod-mtgan: small object detection via multi-task generative adversarial network. In: Proceedings of the European conference on computer vision (ECCV), pp 206\u2013221","DOI":"10.1007\/978-3-030-01261-8_13"},{"key":"17679_CR16","doi-asserted-by":"crossref","unstructured":"Li J, Liang X, Wei Y, Xu T, Feng J, Yan S (2017) Perceptual generative adversarial networks for small object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1222\u20131230","DOI":"10.1109\/CVPR.2017.211"},{"key":"17679_CR17","doi-asserted-by":"crossref","unstructured":"Noh J, Bae W, Lee W, Seo J, Kim G (2019) Better to follow, follow to be better: towards precise supervision of feature super-resolution for small object detection. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 9725\u20139734","DOI":"10.1109\/ICCV.2019.00982"},{"key":"17679_CR18","doi-asserted-by":"crossref","unstructured":"Hu H, Gu J, Zhang Z, Dai J, Wei Y (2018) Relation networks for object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 3588\u20133597","DOI":"10.1109\/CVPR.2018.00378"},{"key":"17679_CR19","doi-asserted-by":"crossref","unstructured":"Lim J-S, Astrid M, Yoon H-J, Lee S-I (2021) Small object detection using context and attention. In: 2021 international conference on artificial intelligence in information and communication (ICAIIC). IEEE, pp 181\u2013186","DOI":"10.1109\/ICAIIC51459.2021.9415217"},{"key":"17679_CR20","doi-asserted-by":"publisher","first-page":"29","DOI":"10.1016\/j.neucom.2023.01.055","volume":"525","author":"S Xu","year":"2023","unstructured":"Xu S, Gu J, Hua Y, Liu Y (2023) Dktnet: dual-key transformer network for small object detection. Neurocomputing 525:29\u201341","journal-title":"Neurocomputing"},{"key":"17679_CR21","doi-asserted-by":"crossref","unstructured":"Liu W, Anguelov D, Erhan D, Szegedy C, Reed S, Fu C-Y, Berg AC (2016) Ssd: single shot multibox detector. In: Computer vision\u2013ECCV 2016: 14th European conference, proceedings, Part I 14. Springer, pp 21\u201337","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"17679_CR22","doi-asserted-by":"crossref","unstructured":"Zhang S, Zhu X, Lei Z, Shi H, Wang X, Li SZ (2017) S3fd: single shot scale-invariant face detector. In: Proceedings of the IEEE international conference on computer vision, pp 192\u2013201","DOI":"10.1109\/ICCV.2017.30"},{"key":"17679_CR23","doi-asserted-by":"crossref","unstructured":"Xu C, Wang J, Yang W, Yu L (2021) Dot distance for tiny object detection in aerial images. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 1192\u20131201","DOI":"10.1109\/CVPRW53098.2021.00130"},{"key":"17679_CR24","doi-asserted-by":"crossref","unstructured":"Lin T-Y, Doll\u00e1r P, Girshick R, He K, Hariharan B, Belongie S (2017) Feature pyramid networks for object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2117\u20132125","DOI":"10.1109\/CVPR.2017.106"},{"key":"17679_CR25","doi-asserted-by":"crossref","unstructured":"Liu S, Qi L, Qin H, Shi J, Jia J (2018) Path aggregation network for instance segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 8759\u20138768","DOI":"10.1109\/CVPR.2018.00913"},{"key":"17679_CR26","doi-asserted-by":"crossref","unstructured":"Zhao Q, Sheng T, Wang Y, Tang Z, Chen Y, Cai L, Ling H (2019) M2det: a single-shot object detector based on multi-level feature pyramid network. In: Proceedings of the AAAI conference on artificial intelligence, vol 33, pp 9259\u20139266","DOI":"10.1609\/aaai.v33i01.33019259"},{"key":"17679_CR27","doi-asserted-by":"crossref","unstructured":"Ghiasi G, Lin T-Y, Le QV (2019) Nas-fpn: learning scalable feature pyramid architecture for object detection. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 7036\u20137045","DOI":"10.1109\/CVPR.2019.00720"},{"key":"17679_CR28","doi-asserted-by":"crossref","unstructured":"Tan M, Pang R, Le QV (2020) Efficientdet: scalable and efficient object detection. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 10781\u201310790","DOI":"10.1109\/CVPR42600.2020.01079"},{"key":"17679_CR29","doi-asserted-by":"crossref","unstructured":"Hu J, Shen L, Sun G (2018) Squeeze-and-excitation networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 7132\u20137141","DOI":"10.1109\/CVPR.2018.00745"},{"key":"17679_CR30","doi-asserted-by":"crossref","unstructured":"Woo S, Park J, Lee J-Y, Kweon IS (2018) Cbam: convolutional block attention module. In: Proceedings of the European conference on computer vision (ECCV), pp 3\u201319","DOI":"10.1007\/978-3-030-01234-2_1"},{"key":"17679_CR31","doi-asserted-by":"crossref","unstructured":"Wang Q, Wu B, Zhu P, Li P, Zuo W, Hu Q (2020) Eca-net: efficient channel attention for deep convolutional neural networks. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 11534\u201311542","DOI":"10.1109\/CVPR42600.2020.01155"},{"key":"17679_CR32","doi-asserted-by":"crossref","unstructured":"Hou Q, Zhou D, Feng J (2021) Coordinate attention for efficient mobile network design. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 13713\u201313722","DOI":"10.1109\/CVPR46437.2021.01350"},{"key":"17679_CR33","unstructured":"Yang L, Zhang R-Y, Li L, Xie X (2021) Simam: a simple, parameter-free attention module for convolutional neural networks. In: International conference on machine learning. PMLR, pp 11863\u201311874"},{"key":"17679_CR34","doi-asserted-by":"crossref","unstructured":"Zhang Q-L, Yang Y-B (2021) Sa-net: shuffle attention for deep convolutional neural networks. In: ICASSP 2021-2021 IEEE international conference on acoustics, speech and signal processing (ICASSP). IEEE, pp 2235\u20132239","DOI":"10.1109\/ICASSP39728.2021.9414568"},{"key":"17679_CR35","doi-asserted-by":"crossref","unstructured":"Shao Z, Han J, Debattista K, Pang Y (2023) Textual context-aware dense captioning with diverse words. IEEE Trans Multimed","DOI":"10.1109\/TMM.2023.3241517"},{"key":"17679_CR36","doi-asserted-by":"crossref","unstructured":"Gupta A, Narayan S, Joseph K, Khan S, Khan FS, Shah M (2022) Ow-detr: open-world detection transformer. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 9235\u20139244","DOI":"10.1109\/CVPR52688.2022.00902"},{"key":"17679_CR37","doi-asserted-by":"crossref","unstructured":"Chu F, Cao J, Shao Z, Pang Y (2022) Illumination-guided transformer-based network for multispectral pedestrian detection. In: CAAI international conference on artificial intelligence. Springer, pp 343\u2013355","DOI":"10.1007\/978-3-031-20497-5_28"},{"key":"17679_CR38","doi-asserted-by":"crossref","unstructured":"Cheng Y, Zhu J, Jiang M, Fu J, Pang C, Wang P, Sankaran K, Onabola O, Liu Y, Liu D et al (2021) Flow: a dataset and benchmark for floating waste detection in inland waters. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 10953\u201310962","DOI":"10.1109\/ICCV48922.2021.01077"},{"issue":"22","key":"17679_CR39","doi-asserted-by":"publisher","first-page":"4366","DOI":"10.3390\/math10224366","volume":"10","author":"X Yang","year":"2022","unstructured":"Yang X, Zhao J, Zhao L, Zhang H, Li L, Ji Z, Ganchev I (2022) Detection of river floating garbage based on improved yolov5. Math 10(22):4366","journal-title":"Math"},{"key":"17679_CR40","unstructured":"Jiang Z, Wu B, Ma L, Lian J (2023) Faster-rcnn water-floating garbage recognition based on multi-scale feature and polarized self-attention. J Comput Appl 0"},{"key":"17679_CR41","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"17679_CR42","doi-asserted-by":"publisher","first-page":"81147","DOI":"10.1109\/ACCESS.2021.3085348","volume":"9","author":"L Zhang","year":"2021","unstructured":"Zhang L, Wei Y, Wang H, Shao Y, Shen J (2021) Real-time detection of river surface floating object based on improved refinedet. IEEE Access 9:81147\u201381160","journal-title":"IEEE Access"},{"key":"17679_CR43","doi-asserted-by":"crossref","unstructured":"Ma L, Wu B, Deng J, Lian J (2023) Small-target water-floating garbage detection and recognition based on unet-yolov5s. In: 2023 5th international conference on communications, information system and computer engineering (CISCE). IEEE, pp 391\u2013395","DOI":"10.1109\/CISCE58541.2023.10142409"},{"key":"17679_CR44","doi-asserted-by":"crossref","unstructured":"Ding X, Zhang X, Ma N, Han J, Ding G, Sun J (2021) Repvgg: making vgg-style convnets great again. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 13733\u201313742","DOI":"10.1109\/CVPR46437.2021.01352"},{"key":"17679_CR45","unstructured":"Ge Z, Liu S, Wang F, Li Z, Sun J (2021) Yolox: exceeding yolo series in 2021. arXiv:2107.08430"},{"key":"17679_CR46","unstructured":"Gevorgyan Z (2022) Siou loss: more powerful learning for bounding box regression. arXiv:2205.12740"},{"key":"17679_CR47","doi-asserted-by":"crossref","unstructured":"Lin T-Y, Maire M, Belongie S, Hays J, Perona P, Ramanan D, Doll\u00e1r P, Zitnick CL (2014) Microsoft coco: common objects in context. In: Computer vision\u2013ECCV 2014: 13th European conference, proceedings, part V 13. Springer, pp 740\u2013755","DOI":"10.1007\/978-3-319-10602-1_48"},{"issue":"11s","key":"17679_CR48","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3519022","volume":"54","author":"S Antonelli","year":"2022","unstructured":"Antonelli S, Avola D, Cinque L, Crisostomi D, Foresti GL, Galasso F, Marini MR, Mecca A, Pannone D (2022) Few-shot object detection: a survey. ACM Computing Surveys (CSUR) 54(11s):1\u201337","journal-title":"ACM Computing Surveys (CSUR)"},{"key":"17679_CR49","doi-asserted-by":"crossref","unstructured":"Wang J, Pang Y, Cao J, Sun H, Shao Z, Li X (2023) Deep intra-image contrastive learning for weakly supervised one-step person search. arXiv:2302.04607","DOI":"10.1016\/j.patcog.2023.110047"},{"issue":"6","key":"17679_CR50","doi-asserted-by":"publisher","first-page":"109","DOI":"10.1007\/s11554-023-01369-6","volume":"20","author":"H Wu","year":"2023","unstructured":"Wu H, Wu G, Hu J, Xu S, Zhang S, Liu Y (2023) Cityuplaces: a new dataset for efficient vision-based recognition. J Real-Time Image Proc 20(6):109","journal-title":"J Real-Time Image Proc"},{"issue":"7","key":"17679_CR51","first-page":"3688","volume":"44","author":"Y Liu","year":"2021","unstructured":"Liu Y, Zhang D, Zhang Q, Han J (2021) Part-object relational visual saliency. IEEE Trans Pattern Anal Mach Intell 44(7):3688\u20133704","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"17679_CR52","doi-asserted-by":"publisher","first-page":"6719","DOI":"10.1109\/TIP.2022.3215887","volume":"31","author":"Y Liu","year":"2022","unstructured":"Liu Y, Zhang D, Liu N, Xu S, Han J (2022) Disentangled capsule routing for fast part-object relational saliency. IEEE Trans Image Process 31:6719\u20136732","journal-title":"IEEE Trans Image Process"},{"key":"17679_CR53","doi-asserted-by":"crossref","unstructured":"Liu Y, Dong X, Zhang D, Xu S (2023) Deep unsupervised part-whole relational visual saliency. Neurocomputing 126916","DOI":"10.1016\/j.neucom.2023.126916"},{"key":"17679_CR54","doi-asserted-by":"publisher","first-page":"5154","DOI":"10.1109\/TIFS.2021.3124734","volume":"16","author":"Y Liu","year":"2021","unstructured":"Liu Y, Zhang D, Zhang Q, Han J (2021) Integrating part-object relationship and contrast for camouflaged object detection. IEEE Trans Inf Forensics Secur 16:5154\u20135166","journal-title":"IEEE Trans Inf Forensics Secur"},{"key":"17679_CR55","unstructured":"Gao A, Pang Y, Nie J, Shao Z, Cao J, Guo Y, Li X (2022) Esgn: efficient stereo geometry network for fast 3d object detection. IEEE Trans Circ Syst Vid Technol"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-17679-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-023-17679-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-17679-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,5,15]],"date-time":"2024-05-15T10:31:06Z","timestamp":1715769066000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-023-17679-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,30]]},"references-count":55,"journal-issue":{"issue":"18","published-online":{"date-parts":[[2024,5]]}},"alternative-id":["17679"],"URL":"https:\/\/doi.org\/10.1007\/s11042-023-17679-7","relation":{},"ISSN":["1573-7721"],"issn-type":[{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,11,30]]},"assertion":[{"value":"11 September 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"1 November 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 November 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 November 2023","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"All of us here attest that there are no competing interests with this study.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Ethical and informed consent for data used.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical and informed consent for data used"}}]}}