{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,10]],"date-time":"2026-05-10T15:09:38Z","timestamp":1778425778207,"version":"3.51.4"},"reference-count":40,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2026,5,10]],"date-time":"2026-05-10T00:00:00Z","timestamp":1778371200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,5,10]],"date-time":"2026-05-10T00:00:00Z","timestamp":1778371200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61379015"],"award-info":[{"award-number":["61379015"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100007129","name":"Natural Science Foundation of Shandong Province","doi-asserted-by":"publisher","award":["ZR2023QD190"],"award-info":[{"award-number":["ZR2023QD190"]}],"id":[{"id":"10.13039\/501100007129","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Real-Time Image Proc"],"published-print":{"date-parts":[[2026,6]]},"DOI":"10.1007\/s11554-026-01892-2","type":"journal-article","created":{"date-parts":[[2026,5,10]],"date-time":"2026-05-10T14:41:33Z","timestamp":1778424093000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Multi-scale feature reconstruction detection transformer (MSFR-DETR): a real-time detector for small objects"],"prefix":"10.1007","volume":"23","author":[{"given":"Mengyu","family":"Liu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yingkun","family":"Hou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiqiang","family":"Duan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bin","family":"Feng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hao","family":"Hou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mengmeng","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaoya","family":"Dai","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Long","family":"Gu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,5,10]]},"reference":[{"issue":"3","key":"1892_CR1","doi-asserted-by":"publisher","first-page":"257","DOI":"10.1109\/JPROC.2023.3238524","volume":"111","author":"Z Zou","year":"2023","unstructured":"Zou, Z., Chen, K., Shi, Z., Guo, Y., Ye, J.: Object detection in 20 years: a survey. Proc. IEEE 111(3), 257\u2013276 (2023)","journal-title":"Proc. IEEE"},{"issue":"6","key":"1892_CR2","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s10462-025-11186-x","volume":"58","author":"M Jamali","year":"2025","unstructured":"Jamali, M., Davidsson, P., Khoshkangini, R., Ljungqvist, M.G., Mihailescu, R.C.: Context in object detection: a systematic literature review. Artif. Intell. Rev. 58(6), 1\u201389 (2025)","journal-title":"Artif. Intell. Rev."},{"key":"1892_CR3","doi-asserted-by":"crossref","unstructured":"Chen, G., Wang, H., Chen, K., Li, Z., Song, Z., Liu, Y., Chen, W., Knoll, A.: A survey of the four pillars for small object detection: multiscale representation, contextual information, super-resolution, and region proposal. IEEE Trans. Syst. Man Cybern.: Syst. 52(2), 936\u2013953 (2020)","DOI":"10.1109\/TSMC.2020.3005231"},{"issue":"10","key":"1892_CR4","doi-asserted-by":"publisher","first-page":"3388","DOI":"10.1109\/TPAMI.2020.2981890","volume":"43","author":"K Oksuz","year":"2020","unstructured":"Oksuz, K., Cam, B.C., Kalkan, S., Akbas, E.: Imbalance problems in object detection: a review. IEEE Trans. Pattern Anal. Mach. Intell. 43(10), 3388\u20133415 (2020)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1892_CR5","doi-asserted-by":"crossref","unstructured":"Zhu, X., Lyu, S., Wang, X., Zhao, Q.: TPH-YOLOv5: improved YOLOv5 based on transformer prediction head for object detection on drone-captured scenarios. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 2778\u20132788. (2021)","DOI":"10.1109\/ICCVW54120.2021.00312"},{"key":"1892_CR6","doi-asserted-by":"crossref","unstructured":"Koyun, O.C., Keser, R.K., Akkaya, I.B., T\u00f6reyin, B.U.: Focus-and-detect: a small object detection framework for aerial images. In: Signal Process.: Image Commun, vol. 104, p. 116675. (2022)","DOI":"10.1016\/j.image.2022.116675"},{"issue":"11","key":"1892_CR7","doi-asserted-by":"publisher","DOI":"10.1016\/j.cja.2025.103456","volume":"38","author":"T Mengchu","year":"2025","unstructured":"Mengchu, T., Meiji, C., Zhimin, C., Yingliang, M., Shaohua, Y.: MFR-YOLOv10: object detection in UAV-taken images based on multilayer feature reconstruction network. Chin. J. Aeronaut. 38(11), 103456 (2025)","journal-title":"Chin. J. Aeronaut."},{"issue":"17","key":"1892_CR8","doi-asserted-by":"publisher","first-page":"5496","DOI":"10.3390\/s24175496","volume":"24","author":"Y Kong","year":"2024","unstructured":"Kong, Y., Shang, X., Jia, S.: Drone-DETR: efficient small object detection for remote sensing image using enhanced RT-DETR model. Sensors 24(17), 5496 (2024)","journal-title":"Sensors"},{"key":"1892_CR9","doi-asserted-by":"crossref","unstructured":"Yang, C., Huang, Z., Wang, N.: QueryDet: cascaded sparse query for accelerating high-resolution small object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13668\u201313677. (2022)","DOI":"10.1109\/CVPR52688.2022.01330"},{"issue":"11","key":"1892_CR10","first-page":"13467","volume":"45","author":"G Cheng","year":"2023","unstructured":"Cheng, G., Yuan, X., Yao, X., Yan, K., Zeng, Q., Xie, X., Han, J.: Towards large-scale small object detection: survey and benchmarks. IEEE Trans. Pattern Anal. Mach. Intell. 45(11), 13467\u201313488 (2023)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"2","key":"1892_CR11","first-page":"269","volume":"11","author":"J Zhu","year":"2025","unstructured":"Zhu, J., Qin, C., Choi, D.: YOLO-SDLUWD: YOLOv7-based small target detection network for infrared images in complex backgrounds. Dig. Commun. Netw. 11(2), 269\u2013279 (2025)","journal-title":"Dig. Commun. Netw."},{"key":"1892_CR12","doi-asserted-by":"crossref","unstructured":"Girshick, R., Donahue, J., Darrell, T., Malik, J.: Rich feature hierarchies for accurate object detection and semantic segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 580\u2013587. (2014)","DOI":"10.1109\/CVPR.2014.81"},{"key":"1892_CR13","doi-asserted-by":"crossref","unstructured":"Liu, W., Anguelov, D., Erhan, D., Szegedy, C., Reed, S., Fu, C.Y., Berg, A.C.: SSD: single shot multibox detector. In: European Conference on Computer Vision, pp. 21\u201337. Springer (2016)","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"1892_CR14","doi-asserted-by":"crossref","unstructured":"Redmon, J., Divvala, S., Girshick, R., Farhadi, A.: You only look once: Unified, real-time object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 779\u2013788. (2016)","DOI":"10.1109\/CVPR.2016.91"},{"key":"1892_CR15","first-page":"107984","volume":"37","author":"A Wang","year":"2024","unstructured":"Wang, A., Chen, H., Liu, L., Chen, K., Lin, Z., Han, J., et al.: YOLOv10: real-time end-to-end object detection. Adv. Neural. Inf. Process. Syst. 37, 107984\u2013108011 (2024)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1892_CR16","unstructured":"Tian, Y., Ye, Q., Doermann, D.: YOLOv12: attention-centric real-time object detectors, (2025). (arXiv preprint)"},{"key":"1892_CR17","first-page":"5998","volume":"30","author":"A Vaswani","year":"2017","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141, Polosukhin, I.: Attention is all you need. Adv. Neural. Inf. Process. Syst. 30, 5998\u20136008 (2017)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1892_CR18","doi-asserted-by":"crossref","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. In: European Conference on Computer Vision, pp. 213\u2013229. Springer (2020)","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"1892_CR19","unstructured":"Zhu, X., Su, W., Lu, L., Li, B., Wang, X., Dai, J.: Deformable DETR: deformable transformers for end-to-end object detection, (2020). (arXiv preprint)"},{"key":"1892_CR20","unstructured":"Zhang, H., Li, F., Liu, S., Zhang, L., Su, H., Zhu, J., Ni, L.M., Shum, H.Y.: DINO: DETR with improved denoising anchor boxes for end-to-end object detection. arXiv preprint. arXiv:2203.03605 (2022)"},{"key":"1892_CR21","doi-asserted-by":"crossref","unstructured":"Zhao, Y., Lv, W., Xu, S., Wei, J., Wang, G., Dang, Q., Liu, Y., Chen, J.: DETRs beat YOLOs on real-time object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16965\u201316974. (2024)","DOI":"10.1109\/CVPR52733.2024.01605"},{"key":"1892_CR22","doi-asserted-by":"publisher","first-page":"7050","DOI":"10.52202\/075280-0308","volume":"36","author":"H Chen","year":"2023","unstructured":"Chen, H., Wang, Y., Guo, J., Tao, D.: VanillaNet: the power of minimalism in deep learning. Adv. Neural. Inf. Process. Syst. 36, 7050\u20137064 (2023)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1892_CR23","doi-asserted-by":"crossref","unstructured":"Liu, X., Peng, H., Zheng, N., Yang, Y., Hu, H., Yuan, Y.: EfficientViT: memory efficient vision transformer with cascaded group attention. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14420\u201314430. (2023)","DOI":"10.1109\/CVPR52729.2023.01386"},{"key":"1892_CR24","doi-asserted-by":"crossref","unstructured":"Howard, A., Sandler, M., Chu, G., Chen, L.C., Chen, B., Tan, M., Wang, W., Zhu, Y., Pang, R., Vasudevan, V., et al.: Searching for MobileNetV3. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1314\u20131324 (2019)","DOI":"10.1109\/ICCV.2019.00140"},{"key":"1892_CR25","doi-asserted-by":"crossref","unstructured":"Chen, J., Kao, S.H., He, H., Zhuo, W., Wen, S., Lee, C.H., Chan, S.H.G.: Run, don\u2019t walk: chasing higher flops for faster neural networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12021\u201312031. (2023)","DOI":"10.1109\/CVPR52729.2023.01157"},{"key":"1892_CR26","doi-asserted-by":"crossref","unstructured":"Ding, X., Zhang, Y., Ge, Y., Zhao, S., Song, L., Yue, X., Shan, Y.: UniRepLKNet: a universal perception large-kernel convnet for audio video point cloud time-series and image recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5513\u20135524. (2024)","DOI":"10.1109\/CVPR52733.2024.00527"},{"key":"1892_CR27","doi-asserted-by":"publisher","first-page":"1968","DOI":"10.1109\/TMM.2021.3074273","volume":"24","author":"C Deng","year":"2021","unstructured":"Deng, C., Wang, M., Liu, L., Liu, Y., Jiang, Y.: Extended feature pyramid network for small object detection. IEEE Trans. Multimed. 24, 1968\u20131979 (2021)","journal-title":"IEEE Trans. Multimed."},{"key":"1892_CR28","doi-asserted-by":"crossref","unstructured":"Li, X., Li, X., Zhang, L., Cheng, G., Shi, J., Lin, Z., Tan, S., Tong, Y.: Improving semantic segmentation via decoupled body and edge supervision. In: European Conference on Computer Vision, pp. 435\u2013452. Springer (2020)","DOI":"10.1007\/978-3-030-58520-4_26"},{"key":"1892_CR29","doi-asserted-by":"crossref","unstructured":"Tang, F., Xu, Z., Huang, Q., Wang, J., Hou, X., Su, J., Liu, J.: DuAT: dual-aggregation transformer network for medical image segmentation. In: Chinese Conference on Pattern Recognition and Computer Vision (PRCV), pp. 343\u2013356. Springer (2023)","DOI":"10.1007\/978-981-99-8469-5_27"},{"key":"1892_CR30","doi-asserted-by":"crossref","unstructured":"Sunkara, R., Luo, T.: No more strided convolutions or pooling: a new CNN building block for low-resolution images and small objects. In: Joint European Conference on Machine Learning and Knowledge Discovery in Databases, pp. 443\u2013459. Springer (2022)","DOI":"10.1007\/978-3-031-26409-2_27"},{"key":"1892_CR31","doi-asserted-by":"crossref","unstructured":"Cui, Y., Ren, W., Knoll, A.: Omni-kernel network for image restoration. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 38, pp. 1426\u20131434. (2024)","DOI":"10.1609\/aaai.v38i2.27907"},{"key":"1892_CR32","doi-asserted-by":"crossref","unstructured":"Wang, C.Y., Liao, H.Y.M., Wu, Y.H., Chen, P.Y., Hsieh, J.W., Yeh, I.H.: CSPNet: a new backbone that can enhance learning capability of CNN. In: Proceedings of the IEEE\/CVF conference on Computer Vision and Pattern Recognition Workshops, pp. 390\u2013391. (2020)","DOI":"10.1109\/CVPRW50498.2020.00203"},{"key":"1892_CR33","unstructured":"Zhang, H., Xu, C., Zhang, S.: Inner-IoU: more effective intersection over union loss with auxiliary bounding box. arxiv 2023. arXiv preprint. arXiv:2311.02877 (2023)"},{"key":"1892_CR34","unstructured":"Ma, S., Xu, Y.: MPDIoU: a loss for efficient and accurate bounding box regression, (2023). (arXiv preprint)"},{"issue":"6","key":"1892_CR35","doi-asserted-by":"publisher","first-page":"1137","DOI":"10.1109\/TPAMI.2016.2577031","volume":"39","author":"S Ren","year":"2016","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. IEEE Trans. Pattern Anal. Mach. Intell. 39(6), 1137\u20131149 (2016)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1892_CR36","doi-asserted-by":"crossref","unstructured":"Cai, Z., Vasconcelos, N.: Cascade R-CNN: delving into high quality object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6154\u20136162. (2018)","DOI":"10.1109\/CVPR.2018.00644"},{"key":"1892_CR37","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Goyal, P., Girshick, R., He, K., Doll\u00e1r, P.: Focal loss for dense object detection. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2980\u20132988. (2017)","DOI":"10.1109\/ICCV.2017.324"},{"key":"1892_CR38","doi-asserted-by":"crossref","unstructured":"Feng, C., Zhong, Y., Gao, Y., Scott, M.R., Huang, W.: TOOD: task-aligned one-stage object detection. In: 2021 IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 3490\u20133499. IEEE Computer Society (2021)","DOI":"10.1109\/ICCV48922.2021.00349"},{"issue":"5","key":"1892_CR39","doi-asserted-by":"publisher","first-page":"175","DOI":"10.1007\/s11554-025-01753-4","volume":"22","author":"L Xiao","year":"2025","unstructured":"Xiao, L., Li, W., Tang, R., Li, H., Wan, B., Ren, D.: EDet-YOLO: an efficient small object detection algorithm for aerial images. J. Real-Time Image Proc. 22(5), 175 (2025)","journal-title":"J. Real-Time Image Proc."},{"issue":"1","key":"1892_CR40","doi-asserted-by":"publisher","first-page":"46","DOI":"10.1007\/s11554-025-01622-0","volume":"22","author":"Y Dong","year":"2025","unstructured":"Dong, Y., Xu, F., Guo, J.: LKR-DETR: small object detection in remote sensing images based on multi-large kernel convolution. J. Real-Time Image Proc. 22(1), 46 (2025)","journal-title":"J. Real-Time Image Proc."}],"container-title":["Journal of Real-Time Image Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11554-026-01892-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11554-026-01892-2","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11554-026-01892-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,10]],"date-time":"2026-05-10T14:41:41Z","timestamp":1778424101000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11554-026-01892-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,5,10]]},"references-count":40,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2026,6]]}},"alternative-id":["1892"],"URL":"https:\/\/doi.org\/10.1007\/s11554-026-01892-2","relation":{},"ISSN":["1861-8200","1861-8219"],"issn-type":[{"value":"1861-8200","type":"print"},{"value":"1861-8219","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,5,10]]},"assertion":[{"value":"19 December 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 April 2026","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 May 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"97"}}