{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,30]],"date-time":"2025-07-30T14:02:16Z","timestamp":1753884136366,"version":"3.37.3"},"reference-count":60,"publisher":"Springer Science and Business Media LLC","issue":"30","license":[{"start":{"date-parts":[[2024,2,13]],"date-time":"2024-02-13T00:00:00Z","timestamp":1707782400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,2,13]],"date-time":"2024-02-13T00:00:00Z","timestamp":1707782400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key R & D Program of China","doi-asserted-by":"crossref","award":["2019YFE0105400"],"award-info":[{"award-number":["2019YFE0105400"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"crossref"}]},{"name":"Intelligent Situation Awareness System for Smart Ship","award":["MC-201920-X01"],"award-info":[{"award-number":["MC-201920-X01"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"DOI":"10.1007\/s11042-024-18448-w","type":"journal-article","created":{"date-parts":[[2024,2,13]],"date-time":"2024-02-13T08:01:55Z","timestamp":1707811315000},"page":"75387-75405","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Feature-enhanced composite backbone network for object detection"],"prefix":"10.1007","volume":"83","author":[{"given":"Junbao","family":"Wu","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3586-9286","authenticated-orcid":false,"given":"Hao","family":"Meng","sequence":"additional","affiliation":[]},{"given":"Tianhao","family":"Yan","sequence":"additional","affiliation":[]},{"given":"Ming","family":"Yuan","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,2,13]]},"reference":[{"key":"18448_CR1","unstructured":"Ren S, He K, Girshick R, Sun J (2015) Faster r-cnn: Towards real-time object detection with region proposal networks. Adv Neural Inform Process Syst 28"},{"key":"18448_CR2","doi-asserted-by":"crossref","unstructured":"Cohen J, Crispim-Junior C, Chiappa J-M , Rodet LT (2023) Industrial object detection with multi-modal ssd: closing the gap between synthetic and real images. Multimed Tools Appl 1\u201328","DOI":"10.1007\/s11042-023-15367-0"},{"key":"18448_CR3","doi-asserted-by":"crossref","unstructured":"Liu W, Anguelov D, Erhan D, Szegedy C, Reed S, Fu C-Y, Berg AC (2016) Ssd: Single shot multibox detector. In:Computer Vision\u2013ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11\u201314, 2016, Proceedings, Part I 14, pp 21\u201337 . Springer","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"18448_CR4","doi-asserted-by":"publisher","first-page":"2789","DOI":"10.1007\/s11063-020-10228-5","volume":"51","author":"H Zhao","year":"2020","unstructured":"Zhao H, Li Z, Fang L, Zhang T (2020) A balanced feature fusion ssd for object detection. Neural Process Lett 51:2789\u20132806","journal-title":"Neural Process Lett"},{"key":"18448_CR5","doi-asserted-by":"crossref","unstructured":"Jeong J, Park H, Kwak N (2017) Enhancement of ssd by concatenating feature maps for object detection. arXiv preprint arXiv:1705.09587","DOI":"10.5244\/C.31.76"},{"key":"18448_CR6","doi-asserted-by":"crossref","unstructured":"Soylu E, Soylu T (2023) A performance comparison of yolov8 models for traffic sign detection in the robotaxi-full scale autonomous vehicle competition. Multimed Tools Appl 1\u201331","DOI":"10.1007\/s11042-023-16451-1"},{"key":"18448_CR7","doi-asserted-by":"crossref","unstructured":"Vellaidurai A , Rathinam M (2023) A novel oyolov5 model for vehicle detection and classification in adverse weather conditions. Multimed Tools Appl 1\u201318","DOI":"10.1007\/s11042-023-16450-2"},{"key":"18448_CR8","doi-asserted-by":"crossref","unstructured":"Xiang X, Meng F, Lv N, Yin H (2022) Engineering vehicles detection for warehouse surveillance system based on modified yolov4-tiny. Neural Process Lett 1\u201317","DOI":"10.1007\/s11063-022-10982-8"},{"key":"18448_CR9","doi-asserted-by":"crossref","unstructured":"Jia Z, Sun S, Liu G (2023) Real-time traffic sign detection based on weighted attention and model refinement. Neural Process Lett 1\u201317","DOI":"10.1007\/s11063-023-11271-8"},{"key":"18448_CR10","unstructured":"Zhu X, Su W, Lu L, Li B, Wang X, Dai J (2020) Deformable detr: Deformable transformers for end-to-end object detection. arXiv preprint arXiv:2010.04159"},{"key":"18448_CR11","doi-asserted-by":"crossref","unstructured":"Dai X, Chen Y, Yang J, Zhang P, Yuan L, Zhang L (2021) Dynamic detr: End-to-end object detection with dynamic attention. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 2988\u20132997","DOI":"10.1109\/ICCV48922.2021.00298"},{"key":"18448_CR12","doi-asserted-by":"crossref","unstructured":"Carion N, Massa F, Synnaeve G, Usunier N, Kirillov A, Zagoruyko S (2020) End-to-end object detection with transformers. In: European conference on computer vision, pp 213\u2013229 . Springer","DOI":"10.1007\/978-3-030-58452-8_13"},{"issue":"2","key":"18448_CR13","doi-asserted-by":"publisher","first-page":"652","DOI":"10.1109\/TPAMI.2019.2938758","volume":"43","author":"S-H Gao","year":"2019","unstructured":"Gao S-H, Cheng M-M, Zhao K, Zhang X-Y, Yang M-H, Torr P (2019) Res2net: A new multi-scale backbone architecture. IEEE Trans Pattern Anal Mach Intell 43(2):652\u2013662","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"18448_CR14","doi-asserted-by":"crossref","unstructured":"Wang W, Dai J, Chen Z, Huang Z, Li Z, Zhu X, Hu X, Lu T, Lu L, Li H, etal. (2023) Internimage: Exploring large-scale vision foundation models with deformable convolutions. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 14408\u201314419","DOI":"10.1109\/CVPR52729.2023.01385"},{"key":"18448_CR15","doi-asserted-by":"crossref","unstructured":"Guo J, Han K, Wu H, Tang Y, Chen X, Wang Y, Xu C (2022) Cmt: Convolutional neural networks meet vision transformers. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 12175\u201312185","DOI":"10.1109\/CVPR52688.2022.01186"},{"key":"18448_CR16","first-page":"4203","volume":"35","author":"J Yang","year":"2022","unstructured":"Yang J, Li C, Dai X, Gao J (2022) Focal modulation networks. Adv Neural Inf Process Syst 35:4203\u20134217","journal-title":"Adv Neural Inf Process Syst"},{"key":"18448_CR17","doi-asserted-by":"crossref","unstructured":"Wu H, Xiao B, Codella N, Liu M, Dai X, Yuan L, Zhang L (2021) Cvt: Introducing convolutions to vision transformers. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 22\u201331","DOI":"10.1109\/ICCV48922.2021.00009"},{"key":"18448_CR18","doi-asserted-by":"crossref","unstructured":"Liu Z, Lin Y, Cao Y, Hu H, Wei Y, Zhang Z, Lin S, Guo B (2021) Swin transformer: Hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 10012\u201310022","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"18448_CR19","doi-asserted-by":"publisher","first-page":"11653","DOI":"10.1609\/aaai.v34i07.6834","volume":"34","author":"Y Liu","year":"2020","unstructured":"Liu Y, Wang Y, Wang S, Liang T, Zhao Q, Tang Z, Ling H (2020) Cbnet: A novel composite backbone network architecture for object detection. Proceedings of the AAAI conference on artificial intelligence 34:11653\u201311660","journal-title":"Proceedings of the AAAI conference on artificial intelligence"},{"key":"18448_CR20","unstructured":"Tishby N, Pereira FC, Bialek W (2000) The information bottleneck method. arXiv preprint physics\/0004057"},{"key":"18448_CR21","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"18448_CR22","doi-asserted-by":"crossref","unstructured":"Xie S, Girshick R, Doll\u00e1r P, Tu Z, He K (2017) Aggregated residual transformations for deep neural networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1492\u20131500","DOI":"10.1109\/CVPR.2017.634"},{"key":"18448_CR23","doi-asserted-by":"crossref","unstructured":"Lin T-Y, Doll\u00e1r P, Girshick R, He K, Hariharan B, Belongie S (2017) Feature pyramid networks for object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2117\u20132125","DOI":"10.1109\/CVPR.2017.106"},{"key":"18448_CR24","doi-asserted-by":"crossref","unstructured":"Liu S, Qi L, Qin H, Shi J, Jia J (2018) Path aggregation network for instance segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 8759\u20138768","DOI":"10.1109\/CVPR.2018.00913"},{"key":"18448_CR25","doi-asserted-by":"crossref","unstructured":"Tan M, Pang R, Le QV (2020) Efficientdet: Scalable and efficient object detection. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 10781\u201310790","DOI":"10.1109\/CVPR42600.2020.01079"},{"key":"18448_CR26","doi-asserted-by":"crossref","unstructured":"Wang J, Chen K, Xu R, Liu Z, Loy CC, Lin D (2019) Carafe: Content-aware reassembly of features. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 3007\u20133016","DOI":"10.1109\/ICCV.2019.00310"},{"key":"18448_CR27","doi-asserted-by":"crossref","unstructured":"Redmon J, Divvala S, Girshick R, Farhadi A (2016) You only look once: Unified, real-time object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 779\u2013788","DOI":"10.1109\/CVPR.2016.91"},{"key":"18448_CR28","doi-asserted-by":"crossref","unstructured":"Lin T-Y, Goyal P, Girshick R, He K, Doll\u00e1r P (2017) Focal loss for dense object detection. In: Proceedings of the IEEE international conference on computer vision, pp 2980\u20132988","DOI":"10.1109\/ICCV.2017.324"},{"key":"18448_CR29","doi-asserted-by":"crossref","unstructured":"Feng C, Zhong Y, Gao Y, Scott MR, Huang W (2021) Tood: Task-aligned one-stage object detection. In:2021 IEEE\/CVF international conference on computer vision (ICCV). IEEE Computer Society. pp 3490\u20133499","DOI":"10.1109\/ICCV48922.2021.00349"},{"key":"18448_CR30","doi-asserted-by":"crossref","unstructured":"Cai Z, Vasconcelos N (2018) Cascade r-cnn: Delving into high quality object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 6154\u20136162","DOI":"10.1109\/CVPR.2018.00644"},{"key":"18448_CR31","unstructured":"Zhou X, Koltun V, Kr\u00e4henb\u00fchl P (2021) Probabilistic two-stage detection. arXiv preprint arXiv:2103.07461"},{"key":"18448_CR32","doi-asserted-by":"crossref","unstructured":"Li F, Zhang H, Liu S, Guo J, Ni LM, Zhang L (2022) Dn-detr: Accelerate detr training by introducing query denoising. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 13619\u201313627","DOI":"10.1109\/CVPR52688.2022.01325"},{"key":"18448_CR33","unstructured":"Liu S, Li F, Zhang H, Yang X, Qi X, Su H, Zhu J, Zhang L (2022) Dab-detr: Dynamic anchor boxes are better queries for detr. arXiv preprint arXiv:2201.12329"},{"key":"18448_CR34","unstructured":"Zhang H, Li F, Liu S, Zhang L, Su H, Zhu J, Ni LM, Shum H-Y (2022) Dino: Detr with improved denoising anchor boxes for end-to-end object detection. arXiv preprint arXiv:2203.03605"},{"key":"18448_CR35","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2012) Imagenet classification with deep convolutional neural networks. Adv Neural Inform Process Syst 25"},{"key":"18448_CR36","unstructured":"Tan M , Le Q (2019) Efficientnet: Rethinking model scaling for convolutional neural networks. In: International conference on machine learning, pp 6105\u20136114. PMLR"},{"key":"18448_CR37","doi-asserted-by":"crossref","unstructured":"Tychsen-Smith L , Petersson L (2017) Denet: Scalable real-time object detection with directed sparse sampling. In: Proceedings of the IEEE international conference on computer vision, pp 428\u2013436","DOI":"10.1109\/ICCV.2017.54"},{"key":"18448_CR38","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4419-9326-7","volume-title":"Ensemble machine learning: methods and applications","author":"C Zhang","year":"2012","unstructured":"Zhang C, Ma Y (2012) Ensemble machine learning: methods and applications. Springer"},{"key":"18448_CR39","unstructured":"Brown G (2004) Diversity in neural network ensembles. PhD thesis, Citeseer"},{"issue":"1","key":"18448_CR40","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1016\/j.inffus.2004.04.004","volume":"6","author":"G Brown","year":"2005","unstructured":"Brown G, Wyatt J, Harris R, Yao X (2005) Diversity creation methods: a survey and categorisation. Information fusion 6(1):5\u201320","journal-title":"Information fusion"},{"key":"18448_CR41","doi-asserted-by":"crossref","unstructured":"Chen M, Fu J, Ling H (2021) One-shot neural ensemble architecture search by diversity-guided search space shrinking. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 16530\u201316539","DOI":"10.1109\/CVPR46437.2021.01626"},{"key":"18448_CR42","unstructured":"Mao M, Zhang B, Doermann D, Guo J, Han S, Feng Y, Wang X, Ding E (2021) Probabilistic ranking-aware ensembles for enhanced object detections. arXiv preprint arXiv:2105.03139"},{"key":"18448_CR43","doi-asserted-by":"publisher","first-page":"6893","DOI":"10.1109\/TIP.2022.3216771","volume":"31","author":"T Liang","year":"2022","unstructured":"Liang T, Chu X, Liu Y, Wang Y, Tang Z, Chu W, Chen J, Ling H (2022) Cbnet: A composite backbone network architecture for object detection. IEEE Trans Image Process 31:6893\u20136906","journal-title":"IEEE Trans Image Process"},{"key":"18448_CR44","unstructured":"Dosovitskiy A , Beyer L , Kolesnikov A , Weissenborn D , Zhai X , Unterthiner T , Dehghani M , Minderer M , Heigold G , Gelly S , et\u00a0al. (2020) An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929"},{"key":"18448_CR45","doi-asserted-by":"crossref","unstructured":"Zhang P , Dai X , Yang J , Xiao B , Yuan L , Zhang L , Gao J (2021) Multi-scale vision longformer: A new vision transformer for high-resolution image encoding. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 2998\u20133008","DOI":"10.1109\/ICCV48922.2021.00299"},{"key":"18448_CR46","unstructured":"Chu X , Tian Z , Wang Y , Zhang B , Ren H , Wei X , Xia H , Shen C (2021) Twins: Revisiting spatial attention design in vision transformers. 2(3) . arXiv preprint arXiv:2104.13840"},{"key":"18448_CR47","unstructured":"Yang J , Li C , Zhang P , Dai X , Xiao B , Yuan L , Gao J (2021) Focal self-attention for local-global interactions in vision transformers. arXiv preprint arXiv:2107.00641"},{"issue":"5","key":"18448_CR48","first-page":"6575","volume":"45","author":"L Yuan","year":"2022","unstructured":"Yuan L, Hou Q, Jiang Z, Feng J, Yan S (2022) Volo: Vision outlooker for visual recognition. IEEE Trans Pattern Anal Mach Intell 45(5):6575\u20136586","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"18448_CR49","doi-asserted-by":"crossref","unstructured":"Chen C-FR , Fan Q, Panda R (2021) Crossvit: Cross-attention multi-scale vision transformer for image classification. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 357\u2013366","DOI":"10.1109\/ICCV48922.2021.00041"},{"key":"18448_CR50","doi-asserted-by":"crossref","unstructured":"Wang W , Xie E , Li X , Fan D-P , Song K , Liang D , Lu T , Luo P , Shao L (2021) Pyramid vision transformer: A versatile backbone for dense prediction without convolutions. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 568\u2013578","DOI":"10.1109\/ICCV48922.2021.00061"},{"key":"18448_CR51","doi-asserted-by":"crossref","unstructured":"Xu W , Xu Y , Chang T , Tu Z (2021) Co-scale conv-attentional image transformers. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 9981\u20139990","DOI":"10.1109\/ICCV48922.2021.00983"},{"key":"18448_CR52","unstructured":"Li Y , Zhang K , Cao J , Timofte R , Van\u00a0Gool L (2021) Localvit: Bringing locality to vision transformers. arXiv preprint arXiv:2104.05707"},{"key":"18448_CR53","unstructured":"Vaswani A , Shazeer N , Parmar N , Uszkoreit J , Jones L , Gomez AN , Kaiser \u0141 , Polosukhin I (2017) Attention is all you need. Adv Neural Inform Process Syst 30"},{"key":"18448_CR54","unstructured":"Chen K, Wang J, Pang J, Cao Y, Xiong Y, Li X, Sun S, Feng W, Liu Z, Xu J, et\u00a0al. (2019) Mmdetection: Open mmlab detection toolbox and benchmark. arXiv preprint arXiv:1906.07155"},{"key":"18448_CR55","doi-asserted-by":"crossref","unstructured":"Rossi L , Karimi A , Prati A (2021) A novel region of interest extraction layer for instance segmentation. In:2020 25th international conference on pattern recognition (ICPR), pp 2203\u20132209. IEEE","DOI":"10.1109\/ICPR48806.2021.9412258"},{"key":"18448_CR56","doi-asserted-by":"crossref","unstructured":"Pang J, Chen K, Shi J, Feng H, Ouyang W, Lin D (2019) Libra r-cnn: Towards balanced learning for object detection. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 821\u2013830","DOI":"10.1109\/CVPR.2019.00091"},{"key":"18448_CR57","doi-asserted-by":"publisher","first-page":"7389","DOI":"10.1109\/TIP.2020.3002345","volume":"29","author":"T Kong","year":"2020","unstructured":"Kong T, Sun F, Liu H, Jiang Y, Li L, Shi J (2020) Foveabox: Beyound anchor-based object detection. IEEE Trans Image Process 29:7389\u20137398","journal-title":"IEEE Trans Image Process"},{"key":"18448_CR58","unstructured":"Samet N, Hicsonmez S, Akbas E (2020) Reducing label noise in anchor-free object detection. arXiv preprint arXiv:2008.01167"},{"key":"18448_CR59","doi-asserted-by":"crossref","unstructured":"Lu X, Li B, Yue Y, Li Q, Yan J (2019) Grid r-cnn. In:Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 7363\u20137372","DOI":"10.1109\/CVPR.2019.00754"},{"key":"18448_CR60","doi-asserted-by":"crossref","unstructured":"Li Y, Chen Y, Wang N, Zhang Z (2019) Scale-aware trident networks for object detection. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 6054\u20136063","DOI":"10.1109\/ICCV.2019.00615"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-024-18448-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-024-18448-w\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-024-18448-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,3]],"date-time":"2024-09-03T02:10:46Z","timestamp":1725329446000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-024-18448-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,2,13]]},"references-count":60,"journal-issue":{"issue":"30","published-online":{"date-parts":[[2024,9]]}},"alternative-id":["18448"],"URL":"https:\/\/doi.org\/10.1007\/s11042-024-18448-w","relation":{},"ISSN":["1573-7721"],"issn-type":[{"type":"electronic","value":"1573-7721"}],"subject":[],"published":{"date-parts":[[2024,2,13]]},"assertion":[{"value":"25 August 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 December 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 January 2024","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 February 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}]}}