{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,8]],"date-time":"2026-04-08T16:57:35Z","timestamp":1775667455925,"version":"3.50.1"},"reference-count":61,"publisher":"Springer Science and Business Media LLC","issue":"14","license":[{"start":{"date-parts":[[2023,10,16]],"date-time":"2023-10-16T00:00:00Z","timestamp":1697414400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,10,16]],"date-time":"2023-10-16T00:00:00Z","timestamp":1697414400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"DOI":"10.1007\/s11042-023-17185-w","type":"journal-article","created":{"date-parts":[[2023,10,16]],"date-time":"2023-10-16T07:02:16Z","timestamp":1697439736000},"page":"42257-42283","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Higher efficient YOLOv7: a one-stage method for non-salient object detection"],"prefix":"10.1007","volume":"83","author":[{"given":"Chengang","family":"Dong","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuhao","family":"Tang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Liyan","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,10,16]]},"reference":[{"key":"17185_CR1","doi-asserted-by":"publisher","first-page":"84","DOI":"10.1145\/3065386","volume":"60","author":"A Krizhevsky","year":"2017","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2017) ImageNet classification with deep convolutional neural networks. Commun ACM 60:84\u201390","journal-title":"Commun ACM"},{"key":"17185_CR2","first-page":"91","volume":"28","author":"S Ren","year":"2015","unstructured":"Ren S, He K, Girshick R, Sun J (2015) Faster R-CNN: Towards real-time object detection with region proposal networks. Adv Neural Inf Process Syst 28:91\u201399","journal-title":"Adv Neural Inf Process Syst"},{"key":"17185_CR3","doi-asserted-by":"crossref","unstructured":"Szegedy C, Liu W, Jia Y, Sermanet P, Reed S, Anguelov D, Erhan D, Vanhoucke V,  Rabinovich A (2015) Going deeper with convolutions. In: Proceedings of the IEEE conference on computer vision and pattern recognition. IEEE, pp 1\u20139","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"17185_CR4","doi-asserted-by":"crossref","unstructured":"Wang C-Y, Bochkovskiy A, Liao H-Y M (2023) YOLOv7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. IEEE, pp 7464\u20137475","DOI":"10.1109\/CVPR52729.2023.00721"},{"key":"17185_CR5","doi-asserted-by":"crossref","unstructured":"Wang F, Jiang M, Qian C, Yang S, Li C, Zhang H, Wang X, Tang, X (2018) Residual attention network for image classification. In: Proceedings of the IEEE conference on computer vision and pattern recognition. IEEE, pp 3156\u20133164","DOI":"10.1109\/CVPR.2017.683"},{"key":"17185_CR6","first-page":"19365","volume":"33","author":"L Huang","year":"2020","unstructured":"Huang L, Zhang C, Zhang H (2020) Self-adaptive training: beyond empirical risk minimization. Adv Neural Inf Process Sys 33:19365\u201319376","journal-title":"Adv Neural Inf Process Sys"},{"key":"17185_CR7","doi-asserted-by":"crossref","unstructured":"Tan M, Pang R, Le Q V (2020) Efficientdet: scalable and efficient object detection. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. IEEE, pp 10781\u201310790","DOI":"10.1109\/CVPR42600.2020.01079"},{"key":"17185_CR8","doi-asserted-by":"crossref","unstructured":"Hu J, Shen L, Sun G (2018) Squeeze-and-excitation networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition. IEEE, pp 7132\u20137141","DOI":"10.1109\/CVPR.2018.00745"},{"key":"17185_CR9","unstructured":"Liu MY, Chen CY, Lin WY (2019) Distance-IoU loss: faster and better learning for bounding box regression. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. IEEE, pp 5179\u20135188"},{"key":"17185_CR10","doi-asserted-by":"crossref","unstructured":"Redmon J, Divvala S, Girshick R, Farhadi A (2016) You only look once: unified, real-time object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition. IEEE, pp 779\u2013788","DOI":"10.1109\/CVPR.2016.91"},{"key":"17185_CR11","doi-asserted-by":"crossref","unstructured":"Liu W, Anguelov D, Erhan D, Szegedy C, Reed S (2016) SSD: Single shot multibox detector. In: European conference on computer vision. Springer, pp 21\u201337","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"17185_CR12","unstructured":"Howard AG, Zhu M, Chen B, Kalenichenko D, Wang W, Weyand T, Andreetto M, Adam H (2017) Mobilenets: efficient convolutional neural networks for mobile vision applications. arXiv preprint arXiv: 1704.04861"},{"key":"17185_CR13","doi-asserted-by":"crossref","unstructured":"Redmon J, Farhadi A (2017) YOLO9000: better, faster, stronger. In: Proceedings of the IEEE conference on computer vision and pattern recognition. IEEE, pp 7263\u20137271","DOI":"10.1109\/CVPR.2017.690"},{"key":"17185_CR14","unstructured":"Bochkovskiy A, Wang CY, Liao HYM (2020) YOLOv4: optimal speed and accuracy of object detection. arXiv preprint arXiv: 2004. 10934"},{"key":"17185_CR15","doi-asserted-by":"publisher","first-page":"1023","DOI":"10.1007\/s11554-022-01241-z","volume":"19","author":"H Wang","year":"2022","unstructured":"Wang H, Jin Y, Ke H, Zhang X (2022) DDH-YOLOv5: improved YOLOv5 based on Double IoU-aware Decoupled Head for object detection. J Real-Time Image Proc 19:1023\u20131033","journal-title":"J Real-Time Image Proc"},{"key":"17185_CR16","unstructured":"Li C, Li L, Jiang H et al (2022) YOLOv6: a single-stage object detection framework for industrial applications. arXiv preprint arXiv: 2209.02976"},{"key":"17185_CR17","doi-asserted-by":"crossref","unstructured":"He K, Gkioxari G, Doll\u00e1r P, Girshick R (2017) Mask R-CNN. In: Proceedings of the IEEE international conference on computer vision. IEEE, pp 2980\u20132988","DOI":"10.1109\/ICCV.2017.322"},{"key":"17185_CR18","doi-asserted-by":"crossref","unstructured":"Ali H, Khursheed M, Fatima SK, Shuja SM, Noor S (2019) Object recognition for dental instruments using SSD-MobileNet. In: International conference on information science and communication technology (ICISCT). IEEE, pp 1\u20136","DOI":"10.1109\/CISCT.2019.8777441"},{"issue":"6","key":"17185_CR19","first-page":"1941","volume":"43","author":"X Li","year":"2020","unstructured":"Li X, Qi H, Ji X, Dai J, Wei Y (2020) RoI transformer: A joint detection and classification network for object detection. IEEE Trans Pattern Anal Mach Intell 43(6):1941\u20131954","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"17185_CR20","doi-asserted-by":"crossref","unstructured":"Chowdhury PN, Sain A, Bhunia AK, Xiang T, Gryaditskaya Y, Song Y-Z (2022) Fs-coco: towards understanding of freehand sketches of common objects in context. In: European conference on computer vision. Springer, pp 253\u2013270","DOI":"10.1007\/978-3-031-20074-8_15"},{"key":"17185_CR21","doi-asserted-by":"crossref","unstructured":"Ren Z, Zhou Y, Chen Y, Zhou R, Gao Y (2021) Efficient human pose estimation by maximizing fusion and high-level spatial attention. In: International conference on automatic face and gesture recognition. IEEE, pp 01\u201306","DOI":"10.1109\/FG52635.2021.9666981"},{"key":"17185_CR22","doi-asserted-by":"crossref","unstructured":"Hou Q, Zhou D, Feng J (2021) Coordinate attention for efficient mobile network design. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. IEEE, pp 13713\u201313722","DOI":"10.1109\/CVPR46437.2021.01350"},{"key":"17185_CR23","doi-asserted-by":"crossref","unstructured":"Ukwuoma CC, Zhiguang Q, Hossin MA, Cobbinah BM, Oluwasanmi A, Chikwendu IA, Ejiyi CJ, Abubakar HS (2021) Holistic attention on pooling based cascaded partial decoder for real-time salient object detection. In: International conference on pattern recognition and artificial intelligence (PRAI). IEEE, pp 378\u2013384","DOI":"10.1109\/PRAI53619.2021.9551094"},{"key":"17185_CR24","unstructured":"Chen K, Wang J, Pang J, Cao Y, Xiong Y, Li X (2018) Spatial attention module for image captioning. In: Proceedings of the IEEE conference on computer vision and pattern recognition. IEEE, pp 6298\u20136306"},{"key":"17185_CR25","first-page":"3702","volume":"29","author":"S Zhang","year":"2020","unstructured":"Zhang S, Wen L, Bian X, Lei J, Liu S (2020) Global context module with two complementary attention mechanisms for object detection. IEEE Trans Image Process 29:3702\u20133712","journal-title":"IEEE Trans Image Process"},{"key":"17185_CR26","unstructured":"Redmon J, Farhadi A (2018) YOLOv3: an incremental improvement. arXiv preprint arXiv: 1804. 02767"},{"key":"17185_CR27","doi-asserted-by":"crossref","unstructured":"Zhu X, Wang Y, Dai J, Lu H, Wei Y (2019) Feature selective anchor-free module for single-shot object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition. IEEE, pp 424\u2013433","DOI":"10.1109\/CVPR.2019.00093"},{"key":"17185_CR28","doi-asserted-by":"crossref","unstructured":"Christlein V, Spranger L, Seuret M, Nicolaou A, Kr\u00e1l P, Maier A (2019) Deep generalized max pooling. In: International conference on document analysis and recognition (ICDAR). IEEE, pp 1090\u20131096","DOI":"10.1109\/ICDAR.2019.00177"},{"key":"17185_CR29","unstructured":"Zhou K, Wang Y, Zhang T, Liu J, Peng C (2019) Objects as points. arXiv preprint arXiv: 1904. 07850"},{"key":"17185_CR30","unstructured":"Lin M, Chen Q, Yan S (2013) Network in network. arXiv preprint arXiv:1312.4400"},{"key":"17185_CR31","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1016\/j.neucom.2022.05.052","volume":"500","author":"Y Shen","year":"2022","unstructured":"Shen Y, Zhang F, Liu D, Pu W, Zhang Q (2022) Manhattan-distance IOU loss for fast and accurate bounding box regression and object detection. Neurocomputing 500:99\u2013114","journal-title":"Neurocomputing"},{"key":"17185_CR32","unstructured":"Abdelwahab M, Elhoseiny M, Hussein ME (2019) MASR: multi-lingual ASR using pre-trained deep learning models. arXiv preprint arXiv: 1910. 13422"},{"key":"17185_CR33","doi-asserted-by":"publisher","DOI":"10.1016\/j.cmpb.2020.105866","volume":"200","author":"Y Su","year":"2021","unstructured":"Su Y, Li D, Chen X (2021) Lung nodule detection based on faster R-CNN framework. Comput Methods Programs Biomed 200:105866","journal-title":"Comput Methods Programs Biomed"},{"key":"17185_CR34","doi-asserted-by":"publisher","first-page":"728","DOI":"10.1007\/s13198-021-01514-z","volume":"14","author":"H Cen","year":"2023","unstructured":"Cen H (2023) Target location detection of mobile robots based on R-FCN deep convolutional neural network. Int J Syst Assur Eng Manag 14:728\u2013737","journal-title":"Int J Syst Assur Eng Manag"},{"key":"17185_CR35","unstructured":"Li C, Li L, Geng Y, Jiang H, Cheng M, Zhang B, Ke Z, Xu X, Chu X (2023) Yolov6 v3. 0: A full-scale reloading. arXiv preprint arXiv: 2301.05586"},{"key":"17185_CR36","unstructured":"Fu C-Y, Liu W, Ranga A, Tyagi A, Berg AC (2017) Dssd: Deconvolutional single shot detector. arXiv preprint arXiv:1701.06659"},{"key":"17185_CR37","doi-asserted-by":"crossref","unstructured":"Cai Z, Vasconcelos N (2018) Cascade r-cnn: Delving into high quality object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition. IEEE, pp 6154\u20136162","DOI":"10.1109\/CVPR.2018.00644"},{"key":"17185_CR38","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1109\/TIM.2020.2991573","volume":"70","author":"X Cheng","year":"2020","unstructured":"Cheng X, Yu J (2020) RetinaNet with difference channel attention and adaptively spatial feature fusion for steel surface defect detection. IEEE Trans Instrum Meas 70:1\u201311","journal-title":"IEEE Trans Instrum Meas"},{"key":"17185_CR39","doi-asserted-by":"crossref","unstructured":"Sun Z, Cao S, Yang Y, Kitani KM (2021) Rethinking transformer-based set prediction for object detection. In: Proceedings of the IEEE\/CVF international conference on computer vision. IEEE, pp 3611\u20133620","DOI":"10.1109\/ICCV48922.2021.00359"},{"key":"17185_CR40","doi-asserted-by":"crossref","unstructured":"Sun P, Zhang R, Jiang Y, Kong T, Xu C, Zhan W, Tomizuka M, Li L, Yuan Z, and Wang C (2021) Sparse r-cnn: End-to-end object detection with learnable proposals. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. IEEE, pp 14454\u201314463","DOI":"10.1109\/CVPR46437.2021.01422"},{"key":"17185_CR41","unstructured":"Vaswani A, Shazeer N, Parmar N et al (2017) Attention is all you need. In: Proceedings of the 31st international conference on neural information processing systems. ACM, pp 6000\u20136010"},{"issue":"1","key":"17185_CR42","doi-asserted-by":"publisher","first-page":"11314","DOI":"10.1038\/s41598-023-38109-6","volume":"13","author":"MS Hossain","year":"2023","unstructured":"Hossain MS, Shahriar GM, Syeed MM, Uddin MF, Hasan M, Shivam S, Advani S (2023) Region of interest (ROI) selection using vision transformer for automatic analysis using whole slide images. Sci Rep 13(1):11314","journal-title":"Sci Rep"},{"key":"17185_CR43","unstructured":"Terven J, Cordova-Esparza D (2023) A comprehensive review of YOLO: From YOLOv1 to YOLOv8 and beyond. arXiv preprint arXiv: 2304.00501"},{"key":"17185_CR44","doi-asserted-by":"crossref","unstructured":"Selvaraju RR, Cogswell M, Das A, Vedantam R, Parikh D, Batra D (2017) Grad-cam: visual explanations from deep networks via gradient-based localization. In: Proceedings of the IEEE international conference on computer vision. IEEE, pp 618\u2013626","DOI":"10.1109\/ICCV.2017.74"},{"key":"17185_CR45","doi-asserted-by":"crossref","unstructured":"Sriram S, Vinayakumar R, Sowmya V, Alazab M, Soman K (2020) Multi-scale learning based malware variant detection using spatial pyramid pooling network. In: IEEE INFOCOM 2020-IEEE conference on computer communications workshops (INFOCOM WKSHPS). IEEE, pp 740\u2013745","DOI":"10.1109\/INFOCOMWKSHPS50562.2020.9162661"},{"key":"17185_CR46","doi-asserted-by":"crossref","unstructured":"Woo S, Park J, Lee J-Y, Kweon IS (2018) Cbam: convolutional block attention module. In: Proceedings of the European conference on computer vision (ECCV).Springer, pp 3\u201319","DOI":"10.1007\/978-3-030-01234-2_1"},{"key":"17185_CR47","doi-asserted-by":"crossref","unstructured":"Zhou X, Koltun V, Kr\u00e4henb\u00fchl P (2020) Tracking objects as points. In: European conference on computer vision(ECCV). Springer, pp 474\u2013490","DOI":"10.1007\/978-3-030-58548-8_28"},{"key":"17185_CR48","unstructured":"Zhu X, Su W, Lu L, Li B, Wang X, Dai J (2020) Deformable detr: deformable transformers for end-to-end object detection. arXiv preprint arXiv: 2010.04159"},{"key":"17185_CR49","first-page":"5621","volume":"33","author":"Y Chen","year":"2020","unstructured":"Chen Y, Zhang Z, Cao Y, Wang L, Lin S, Hu H (2020) Reppoints v2: Verification meets regression for object detection. Adv Neural Inf Process Syst 33:5621\u20135631","journal-title":"Adv Neural Inf Process Syst"},{"key":"17185_CR50","unstructured":"Liang T, Chu X, Liu Y, Wang Y, Tang Z, Chu W, Chen J, Ling H (2021) Cbnetv2: a composite backbone network architecture for object detection. arXiv preprint arXiv: 2107.00420"},{"key":"17185_CR51","unstructured":"Wang C-Y, Yeh I-H, Liao H-Y M (2021) You only learn one representation: unified network for multiple tasks. arXiv preprint arXiv:2105.04206"},{"key":"17185_CR52","unstructured":"Xu X, Jiang Y, Chen W, Huang Y, Zhang Y, Sun X (2022) Damo-yolo: a report on real-time object detection design. arXiv preprint arXiv:2211.15444"},{"key":"17185_CR53","doi-asserted-by":"crossref","unstructured":"Liu Z, Lin Y, Cao Y, Hu H, Wei Y, Zhang Z, Lin S, Guo B (2021) Swin transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF international conference on computer vision. IEEE, pp 10012\u201310022","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"17185_CR54","doi-asserted-by":"crossref","unstructured":"Dai X, Chen Y, Xiao B, Chen D, Liu M, Yuan L, Zhang L (2021) Dynamic head: unifying object detection heads with attentions. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. IEEE, pp 7373\u20137382","DOI":"10.1109\/CVPR46437.2021.00729"},{"key":"17185_CR55","doi-asserted-by":"crossref","unstructured":"Liu Z, Hu H, Lin Y, Yao Z, Xie Z, Wei Y, Ning J, Cao Y, Zhang Z, Dong L (2022) Swin transformer v2: scaling up capacity and resolution. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. IEEE, pp 12009\u201312019","DOI":"10.1109\/CVPR52688.2022.01170"},{"key":"17185_CR56","unstructured":"Lv W, Xu S, Zhao Y, Wang G, Wei J, Cui C, Du Y, Dang Q, Liu Y (2023) Detrs beat yolos on real-time object detection. arXiv preprint arXiv:2304.08069"},{"key":"17185_CR57","unstructured":"Xu S, Wang X, Lv W, Chang Q, Cui C, Deng K, Wang G, Dang Q, Wei S, Du Y (2023) PP-YOLOE: an evolved version of YOLO. arXiv preprint arXiv:2203.16250"},{"key":"17185_CR58","unstructured":"Ge Z, Liu S, Wang F, Li Z, Sun J (2021) Yolox: exceeding yolo series in 2021. arXiv preprint arXiv:2107.08430"},{"key":"17185_CR59","doi-asserted-by":"crossref","unstructured":"Liu S, Qi L, Qin H, Shi J, Jia J (2018) Path aggregation network for instance segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition. IEEE, pp 8759\u20138768","DOI":"10.1109\/CVPR.2018.00913"},{"key":"17185_CR60","doi-asserted-by":"crossref","unstructured":"Ding J, Xue N, Long Y, Xia G-S, Lu Q (2019) Learning RoI transformer for oriented object detection in aerial images. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. IEEE, pp 2849\u2013285","DOI":"10.1109\/CVPR.2019.00296"},{"key":"17185_CR61","doi-asserted-by":"crossref","unstructured":"Ding X, Zhang X, Ma N, Han J, Ding G, Sun J (2021) Repvgg: Making vgg-style convnets great again. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. IEEE, pp 13733\u201313742","DOI":"10.1109\/CVPR46437.2021.01352"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-17185-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-023-17185-w\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-17185-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,4,4]],"date-time":"2024-04-04T13:54:10Z","timestamp":1712238850000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-023-17185-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,16]]},"references-count":61,"journal-issue":{"issue":"14","published-online":{"date-parts":[[2024,4]]}},"alternative-id":["17185"],"URL":"https:\/\/doi.org\/10.1007\/s11042-023-17185-w","relation":{},"ISSN":["1573-7721"],"issn-type":[{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,10,16]]},"assertion":[{"value":"26 April 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 August 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 September 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 October 2023","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"We declare that we do not have any commercial or associative interest that represents a conflict of interest in connection with the work submitted.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of Interest"}}]}}