{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T21:05:13Z","timestamp":1773867913789,"version":"3.50.1"},"reference-count":56,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2023,11,15]],"date-time":"2023-11-15T00:00:00Z","timestamp":1700006400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,11,15]],"date-time":"2023-11-15T00:00:00Z","timestamp":1700006400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100012165","name":"Key Technologies Research and Development Program","doi-asserted-by":"publisher","award":["2020YFC1523301"],"award-info":[{"award-number":["2020YFC1523301"]}],"id":[{"id":"10.13039\/501100012165","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62106199"],"award-info":[{"award-number":["62106199"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2024,2]]},"DOI":"10.1007\/s00521-023-09122-7","type":"journal-article","created":{"date-parts":[[2023,11,15]],"date-time":"2023-11-15T13:03:03Z","timestamp":1700053383000},"page":"1699-1712","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Mitigate the scale imbalance via multi-scale information interaction in small object detection"],"prefix":"10.1007","volume":"36","author":[{"given":"Enhui","family":"Chai","sequence":"first","affiliation":[]},{"given":"Li","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Xingxing","family":"Hao","sequence":"additional","affiliation":[]},{"given":"Wei","family":"Zhou","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,11,15]]},"reference":[{"issue":"3","key":"9122_CR1","doi-asserted-by":"publisher","first-page":"642","DOI":"10.1007\/s11263-019-01204-1","volume":"128","author":"H Law","year":"2020","unstructured":"Law H, Deng J (2020) Cornernet: detecting objects as paired keypoints. Int J Comput Vis 128(3):642\u2013656","journal-title":"Int J Comput Vis"},{"key":"9122_CR2","doi-asserted-by":"publisher","unstructured":"Zhou X, Wang D, Krhenb\u00fchl P (2019) Objects as points. 2019. arXiv preprint https:\/\/doi.org\/10.48550\/arXiv.1904.07850","DOI":"10.48550\/arXiv.1904.07850"},{"key":"9122_CR3","doi-asserted-by":"crossref","unstructured":"Tian Z, Shen C, Chen H, He T (2020) Fcos: fully convolutional one-stage object detection. In: 2019 IEEE\/CVF international conference on computer vision (ICCV)","DOI":"10.1109\/ICCV.2019.00972"},{"key":"9122_CR4","doi-asserted-by":"crossref","unstructured":"Girshick R (2015) Fast r-cnn. Proceedings of the IEEE international conference on computer vision. 2015: 1440\u20131448.","DOI":"10.1109\/ICCV.2015.169"},{"issue":"6","key":"9122_CR5","doi-asserted-by":"publisher","first-page":"1137","DOI":"10.1109\/TPAMI.2016.2577031","volume":"39","author":"S Ren","year":"2017","unstructured":"Ren S, He K, Girshick R, Sun J (2017) Faster r-cnn: towards real-time object detection with region proposal networks. IEEE Trans Pattern Anal Mach Intell 39(6):1137\u20131149","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"9122_CR6","volume-title":"R-fcn: object detection via region-based fully convolutional networks","author":"J Dai","year":"2016","unstructured":"Dai J, Li Y, He K, Sun J (2016) R-fcn: object detection via region-based fully convolutional networks. Curran Associates Inc, Red Hook"},{"key":"9122_CR7","doi-asserted-by":"crossref","unstructured":"He K, Gkioxari G, Dollar P, Girshick R (2017) Mask r-cnn. In: International conference on computer vision","DOI":"10.1109\/ICCV.2017.322"},{"issue":"99","key":"9122_CR8","first-page":"2999","volume":"PP","author":"TY Lin","year":"2017","unstructured":"Lin TY, Goyal P, Girshick R, He K, Doll\u00e1r P (2017) Focal loss for dense object detection. IEEE Trans Pattern Anal Mach Intell PP(99):2999\u20133007","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"9122_CR9","volume-title":"You only look once: unified, real-time object detection","author":"J Redmon","year":"2019","unstructured":"Redmon J, Divvala S, Girshick R, Farhadi A (2019) You only look once: unified, real-time object detection. IEEE, Washington, D.C"},{"key":"9122_CR10","unstructured":"Redmon J, Farhadi A (2018) Yolov3: an incremental improvement. arXiv e-prints"},{"key":"9122_CR11","doi-asserted-by":"crossref","unstructured":"Berg AC, Fu CY, Szegedy C, Anguelov D, Erhan D, Reed S, Liu W (2016) SSD: single shot multibox detector. 14th European Conference, Amsterdam, The Netherlands, October 11\u201314, 2016, Proceedings, Part I 14. Springer International Publishing, 2016: 21\u201337","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"9122_CR12","doi-asserted-by":"publisher","unstructured":"Fu CY, Liu W, Ranga A, Tyagi A, Berg AC (2017) Dssd : deconvolutional single shot detector. arXiv preprint https:\/\/doi.org\/10.48550\/arXiv.1701.06659","DOI":"10.48550\/arXiv.1701.06659"},{"key":"9122_CR13","doi-asserted-by":"crossref","unstructured":"Zhao Q, Sheng T, Wang Y, Tang Z, Ling H (2019) M2det: a single-shot object detector based on multi-level feature pyramid network. In: Proceedings of the AAAI conference on artificial intelligence, pp 9259\u20139266","DOI":"10.1609\/aaai.v33i01.33019259"},{"key":"9122_CR14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00442","volume-title":"Single-shot refinement neural network for object detection","author":"S Zhang","year":"2018","unstructured":"Zhang S, Wen L, Bian X, Lei Z, Li SZ (2018) Single-shot refinement neural network for object detection. IEEE, Washington, D.C"},{"key":"9122_CR15","doi-asserted-by":"crossref","unstructured":"Tan M, Pang R, Le QV (2020) Efficientdet: scalable and efficient object detection. In: 2020 IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","DOI":"10.1109\/CVPR42600.2020.01079"},{"key":"9122_CR16","doi-asserted-by":"publisher","unstructured":"Kisantal M, Wojna Z, Murawski J, Naruniec J, Cho K (2019) Augmentation for small object detection. arXiv preprint https:\/\/doi.org\/10.48550\/arXiv.1902.07296","DOI":"10.48550\/arXiv.1902.07296"},{"key":"9122_CR17","doi-asserted-by":"crossref","unstructured":"Lim JS, Astrid M, Yoon HJ, Lee SI (2021) Small object detection using context and attention. 2021 international Conference on Artificial intelligence in information and Communication (ICAIIC). IEEE, 2021: 181\u2013186.","DOI":"10.1109\/ICAIIC51459.2021.9415217"},{"key":"9122_CR18","doi-asserted-by":"publisher","unstructured":"Zisserman A, Simonyan K (2014) Very deep convolutional networks for large-scale image recognition. arXiv preprint https:\/\/doi.org\/10.48550\/arXiv.1409.1556","DOI":"10.48550\/arXiv.1409.1556"},{"key":"9122_CR19","volume-title":"Going deeper with convolutions","author":"C Szegedy","year":"2014","unstructured":"Szegedy C, Liu W, Jia Y, Sermanet P, Rabinovich A (2014) Going deeper with convolutions. IEEE Computer Society, Washington, D.C"},{"key":"9122_CR20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90","volume-title":"Deep residual learning for image recognition","author":"K He","year":"2016","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. IEEE, Washington, D.C"},{"key":"9122_CR21","unstructured":"Iandola F, Moskewicz M, Karayev S, Girshick R, Darrell T, Keutzer K (2014) Densenet: implementing efficient convnet descriptor pyramids. arXiv preprint arXiv:1404.1869"},{"key":"9122_CR22","unstructured":"Bochkovskiy A, Wang CY, Liao HYM (2020) Yolov4: optimal speed and accuracy of object detection. arXiv preprint arXiv:2004.10934"},{"key":"9122_CR23","unstructured":"Iandola FN, Han S, Moskewicz MW, Ashraf K, Dally WJ, Keutzer K (2016) Squeezenet: alexnet-level accuracy with 50x fewer parameters and$$<$$ 0.5 mb model size. arXiv preprint arXiv:1602.07360"},{"key":"9122_CR24","unstructured":"Howard AG, Zhu M, Chen B, Kalenichenko D, Wang W, Weyand T, Andreetto M, Adam H (2017) Mobilenets: efficient convolutional neural networks for mobile vision applications. arXiv preprint arXiv:1704.04861"},{"key":"9122_CR25","doi-asserted-by":"crossref","unstructured":"Zhang X, Zhou X, Lin M, Sun J (2018) Shufflenet: an extremely efficient convolutional neural network for mobile devices. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 6848\u20136856","DOI":"10.1109\/CVPR.2018.00716"},{"key":"9122_CR26","doi-asserted-by":"crossref","unstructured":"Han K, Wang Y, Tian Q, Guo J, Xu C (2020) Ghostnet: more features from cheap operations. In: 2020 IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","DOI":"10.1109\/CVPR42600.2020.00165"},{"key":"9122_CR27","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.106","volume-title":"Feature pyramid networks for object detection","author":"TY Lin","year":"2017","unstructured":"Lin TY, Dollar P, Girshick R, He K, Hariharan B, Belongie S (2017) Feature pyramid networks for object detection. IEEE Computer Society, Washington, D.C"},{"key":"9122_CR28","doi-asserted-by":"crossref","unstructured":"Liu S, Qi L, Qin H, Shi J, Jia J (2018) Path aggregation network for instance segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 8759\u20138768","DOI":"10.1109\/CVPR.2018.00913"},{"issue":"9","key":"9122_CR29","doi-asserted-by":"publisher","first-page":"1904","DOI":"10.1109\/TPAMI.2015.2389824","volume":"37","author":"K He","year":"2015","unstructured":"He K, Zhang X, Ren S, Sun J (2015) Spatial pyramid pooling in deep convolutional networks for visual recognition. IEEE Trans Pattern Anal Mach Intell 37(9):1904\u20131916","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"9122_CR30","doi-asserted-by":"publisher","unstructured":"Chen LC, Papandreou G, Schroff F, Adam H (2017) Rethinking atrous convolution for semantic image segmentation. arXiv preprint https:\/\/doi.org\/10.48550\/arXiv.1706.05587","DOI":"10.48550\/arXiv.1706.05587"},{"key":"9122_CR31","doi-asserted-by":"publisher","unstructured":"Yu F, Koltun V (2016) Multi-scale context aggregation by dilated convolutions. In: ICLR. arXiv preprint https:\/\/doi.org\/10.48550\/arXiv.1511.07122","DOI":"10.48550\/arXiv.1511.07122"},{"key":"9122_CR32","volume-title":"Microsoft coco: common objects in context","author":"TY Lin","year":"2014","unstructured":"Lin TY, Maire M, Belongie S, Hays J, Zitnick CL (2014) Microsoft coco: common objects in context. Springer International Publishing, Cham"},{"key":"9122_CR33","doi-asserted-by":"crossref","unstructured":"Du D, Zhu P, Wen L, Bian X, Liu ZM (2019) Visdrone-det2019: the vision meets drone object detection in image challenge results. In: ICCV visdrone workshop","DOI":"10.1109\/ICCVW.2019.00031"},{"key":"9122_CR34","doi-asserted-by":"crossref","unstructured":"Cao Y, Chen K, Loy CC, Lin D (2019) Prime sample attention in object detection. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 11583\u201311591","DOI":"10.1109\/CVPR42600.2020.01160"},{"key":"9122_CR35","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00615","volume-title":"Scale-aware trident networks for object detection","author":"Y Li","year":"2019","unstructured":"Li Y, Chen Y, Wang N, Zhang Z (2019) Scale-aware trident networks for object detection. IEEE, Washington, D.C"},{"issue":"10","key":"9122_CR36","doi-asserted-by":"publisher","first-page":"3388","DOI":"10.1109\/TPAMI.2020.2981890","volume":"43","author":"K Oksuz","year":"2021","unstructured":"Oksuz K, Cam BC, Kalkan S, Akbas E (2021) Imbalance problems in object detection: a review. IEEE Trans Pattern Anal Mach Intell 43(10):3388\u20133415","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"9122_CR37","doi-asserted-by":"publisher","DOI":"10.1145\/2964284.2967274","volume-title":"Unitbox: an advanced object detection network","author":"J Yu","year":"2016","unstructured":"Yu J, Jiang Y, Wang Z, Cao Z, Huang T (2016) Unitbox: an advanced object detection network. ACM, New York"},{"key":"9122_CR38","doi-asserted-by":"crossref","unstructured":"Rezatofighi H, Tsoi N, Gwak JY, Sadeghian A, Savarese S (2019) Generalized intersection over union: a metric and a loss for bounding box regression. In: 2019 IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","DOI":"10.1109\/CVPR.2019.00075"},{"key":"9122_CR39","doi-asserted-by":"crossref","unstructured":"Zheng Z, Wang P, Liu W, Li J, Ye R, Ren D (2019) Distance-iou loss: faster and better learning for bounding box regression. arXiv","DOI":"10.1609\/aaai.v34i07.6999"},{"key":"9122_CR40","doi-asserted-by":"crossref","unstructured":"Pang J, Chen K, Shi J, Feng H, Ouyang W, Lin D (2020) Libra r-cnn: towards balanced learning for object detection. In: 2019 IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","DOI":"10.1109\/CVPR.2019.00091"},{"key":"9122_CR41","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00526","volume-title":"Towards accurate one-stage object detection with ap-loss","author":"K Chen","year":"2019","unstructured":"Chen K, Li J, Lin W, See J, Zou J (2019) Towards accurate one-stage object detection with ap-loss. IEEE, Washington, D.C"},{"key":"9122_CR42","doi-asserted-by":"crossref","unstructured":"Qian Q, Chen L, Li H, Jin R (2020) Dr loss: improving object detection by distributional ranking. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 12164\u201312172","DOI":"10.1109\/CVPR42600.2020.01218"},{"key":"9122_CR43","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, Weissenborn D, Houlsby N (2021) An image is worth 16x16 words: transformers for image recognition at scale. In: International Conference on Learning Representations, 2021"},{"key":"9122_CR44","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser \u0141, Polosukhin I (2017) Attention is all you need. Advances in neural information processing systems, 30, 2017"},{"key":"9122_CR45","doi-asserted-by":"crossref","unstructured":"Liu Z, Lin Y, Cao Y, Hu H, Wei Y, Zhang Z, Lin S, Guo B (2021) Swin transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 10012\u201310022","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"9122_CR46","doi-asserted-by":"crossref","unstructured":"Dai X, Chen Y, Xiao B, Chen D, Liu M, Yuan L, Zhang L (2021) Dynamic head: unifying object detection heads with attentions. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 7373-7382.","DOI":"10.1109\/CVPR46437.2021.00729"},{"key":"9122_CR47","unstructured":"Ma H, Xia X, Wang X, Xiao X, Li J, Zheng M (2022) Mocovit: mobile convolutional vision transformer. arXiv preprint arXiv:2205.12635"},{"key":"9122_CR48","doi-asserted-by":"crossref","unstructured":"Liu Z, Mao H, Wu CY, Feichtenhofer C, Darrell T, Xie S (2022) A convnet for the 2020s. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 11976\u201311986","DOI":"10.1109\/CVPR52688.2022.01167"},{"key":"9122_CR49","doi-asserted-by":"crossref","unstructured":"Ding X, Zhang X, Zhou Y, Han J, Ding G, Sun J (2022) Scaling up your kernels to 31x31: revisiting large kernel design in cnns. arXiv e-prints","DOI":"10.1109\/CVPR52688.2022.01166"},{"key":"9122_CR50","doi-asserted-by":"crossref","unstructured":"Shi W, Caballero J, Husz\u00e1r F, Totz J, Aitken AP, Bishop R, Rueckert D, Wang Z (2016) Real-time single image and video super-resolution using an efficient sub-pixel convolutional neural network. arXiv e-prints","DOI":"10.1109\/CVPR.2016.207"},{"key":"9122_CR51","doi-asserted-by":"crossref","unstructured":"Cai Z, Vasconcelos N (2017) Cascade r-cnn: delving into high quality object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 6154-6162","DOI":"10.1109\/CVPR.2018.00644"},{"key":"9122_CR52","doi-asserted-by":"crossref","unstructured":"Yang Z, Liu S, Hu H, Wang L, Lin S (2019) Reppoints: point set representation for object detection. In: 2019 IEEE\/CVF international conference on computer vision (ICCV)","DOI":"10.1109\/ICCV.2019.00975"},{"key":"9122_CR53","doi-asserted-by":"crossref","unstructured":"Du X, Lin TY, Jin P, Ghiasi G, Tan M, Cui Y, Le QV, Song X (2020) Spinenet: learning scale-permuted backbone for recognition and localization. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 11592\u201311601","DOI":"10.1109\/CVPR42600.2020.01161"},{"key":"9122_CR54","doi-asserted-by":"crossref","unstructured":"Li X, Wang W, Hu X, Li J, Tang J, Yang J (2021) Generalized focal loss v2: learning reliable localization quality estimation for dense object detection. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 11632\u201311641","DOI":"10.1109\/CVPR46437.2021.01146"},{"key":"9122_CR55","doi-asserted-by":"publisher","unstructured":"Song H, Sun D, Chun S, Jampani V, Han D, Heo B, Kim W, Yang MH (2021) Vidt: an efficient and effective fully transformer-based object detector. arXiv preprint https:\/\/doi.org\/10.48550\/arXiv.2110.03921","DOI":"10.48550\/arXiv.2110.03921"},{"key":"9122_CR56","doi-asserted-by":"crossref","unstructured":"Wang CY, Bochkovskiy A, Liao H (2022) Yolov7: trainable bag-of-freebies sets new state-of-the-art for real-time object detectors. arXiv e-prints","DOI":"10.1109\/CVPR52729.2023.00721"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-023-09122-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-023-09122-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-023-09122-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,11]],"date-time":"2024-01-11T18:16:51Z","timestamp":1704997011000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-023-09122-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,15]]},"references-count":56,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2024,2]]}},"alternative-id":["9122"],"URL":"https:\/\/doi.org\/10.1007\/s00521-023-09122-7","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"value":"0941-0643","type":"print"},{"value":"1433-3058","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,11,15]]},"assertion":[{"value":"18 April 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 October 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 November 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Written informed consent for publication of this paper was obtained from the Northwest University and all authors.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Informed consent"}}]}}