{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,7]],"date-time":"2026-05-07T22:07:21Z","timestamp":1778191641635,"version":"3.51.4"},"reference-count":53,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2025,3,12]],"date-time":"2025-03-12T00:00:00Z","timestamp":1741737600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,3,12]],"date-time":"2025-03-12T00:00:00Z","timestamp":1741737600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100011447","name":"Science and Technology Department of Henan Province","doi-asserted-by":"publisher","award":["235200810031"],"award-info":[{"award-number":["235200810031"]}],"id":[{"id":"10.13039\/501100011447","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2025,6]]},"DOI":"10.1007\/s00530-025-01746-0","type":"journal-article","created":{"date-parts":[[2025,3,12]],"date-time":"2025-03-12T01:43:04Z","timestamp":1741743784000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["An improved Multi-Scale Fusion and Small Object Enhancement method for efficient pedestrian detection in dense scenes"],"prefix":"10.1007","volume":"31","author":[{"given":"Yalin","family":"Song","sequence":"first","affiliation":[]},{"given":"Peng","family":"Qian","sequence":"additional","affiliation":[]},{"given":"Kexin","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Shichong","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Rui","family":"Zhai","sequence":"additional","affiliation":[]},{"given":"Ran","family":"Song","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,3,12]]},"reference":[{"issue":"1","key":"1746_CR1","doi-asserted-by":"publisher","DOI":"10.1016\/j.grets.2022.100002","volume":"1","author":"Z Lv","year":"2023","unstructured":"Lv, Z., Shang, W.: Impacts of intelligent transportation systems on energy conservation and emission reduction of transport systems: a comprehensive review. Green Technol. Sustain. 1(1), 100002 (2023)","journal-title":"Green Technol. Sustain."},{"key":"1746_CR2","doi-asserted-by":"crossref","unstructured":"Wei, X., Bai, Y., Zheng, Y., Shi, D., Gong, Y.: Autoregressive visual tracking, pp. 9697\u20139706 (2023)","DOI":"10.1109\/CVPR52729.2023.00935"},{"key":"1746_CR3","doi-asserted-by":"crossref","unstructured":"Zhao, T., Ning, X., Hong, K., Qiu, Z., Lu, P., Zhao, Y., Zhang, L., Zhou, L., Dai, G., Yang, H., etal.: Ada3d: exploiting the spatial redundancy with adaptive inference for efficient 3d object detection. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 17728\u201317738 (2023)","DOI":"10.1109\/ICCV51070.2023.01625"},{"key":"1746_CR4","doi-asserted-by":"crossref","unstructured":"Vrontis, D., Christofi, M., Pereira, V., Tarba, S., Makrides, A., Trichina, E.: Artificial intelligence, robotics, advanced technologies and human resource management: a systematic review. In: Artificial Intelligence and International HRM, pp. 172\u2013201 (2023)","DOI":"10.4324\/9781003377085-7"},{"issue":"11","key":"1746_CR5","doi-asserted-by":"publisher","first-page":"3212","DOI":"10.1109\/TNNLS.2018.2876865","volume":"30","author":"Z-Q Zhao","year":"2019","unstructured":"Zhao, Z.-Q., Zheng, P., Xu, S.-T., Wu, X.: Object detection with deep learning: a review. IEEE Trans. Neural Netw. Learn. Syst. 30(11), 3212\u20133232 (2019)","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"1746_CR6","doi-asserted-by":"publisher","first-page":"357","DOI":"10.1016\/j.neucom.2013.12.017","volume":"135","author":"V-D Hoang","year":"2014","unstructured":"Hoang, V.-D., Le, M.-H., Jo, K.-H.: Hybrid cascade boosting machine using variant scale blocks based hog features for pedestrian detection. Neurocomputing 135, 357\u2013366 (2014)","journal-title":"Neurocomputing"},{"key":"1746_CR7","doi-asserted-by":"crossref","unstructured":"Paisitkriangkrai, S., Shen, C., Van Den\u00a0Hengel, A.: Strengthening the effectiveness of pedestrian detection with spatially pooled features. In: Computer Vision\u2013ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6\u201312, 2014, Proceedings, Part IV 13, pp. 546\u2013561. Springer (2014)","DOI":"10.1007\/978-3-319-10593-2_36"},{"key":"1746_CR8","doi-asserted-by":"crossref","unstructured":"Girshick, R., Donahue, J., Darrell, T., Malik, J.: Rich feature hierarchies for accurate object detection and semantic segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 580\u2013587 (2014)","DOI":"10.1109\/CVPR.2014.81"},{"key":"1746_CR9","doi-asserted-by":"crossref","unstructured":"Girshick, R.: Fast r-cnn (2015). arXiv:1504.08083","DOI":"10.1109\/ICCV.2015.169"},{"issue":"6","key":"1746_CR10","doi-asserted-by":"publisher","first-page":"1137","DOI":"10.1109\/TPAMI.2016.2577031","volume":"39","author":"S Ren","year":"2016","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster r-cnn: towards real-time object detection with region proposal networks. IEEE Trans. Pattern Anal. Mach. Intell. 39(6), 1137\u20131149 (2016)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1746_CR11","unstructured":"Dai, J., Li, Y., He, K., Sun, J.: R-fcn: Object detection via region-based fully convolutional networks. Advances in neural information processing systems, Vol. 29 (2016)"},{"issue":"4","key":"1746_CR12","doi-asserted-by":"publisher","first-page":"818","DOI":"10.1109\/TPAMI.2016.2562626","volume":"39","author":"H Peng","year":"2016","unstructured":"Peng, H., Li, B., Ling, H., Hu, W., Xiong, W., Maybank, S.J.: Salient object detection via structured matrix decomposition. IEEE Trans. Pattern Anal. Mach. Intell. 39(4), 818\u2013832 (2016)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1746_CR13","doi-asserted-by":"crossref","unstructured":"Redmon, J.: You only look once: unified, real-time object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (2016)","DOI":"10.1109\/CVPR.2016.91"},{"key":"1746_CR14","doi-asserted-by":"crossref","unstructured":"Wang, C.-Y., Bochkovskiy, A., Liao, H.-Y.M.: Scaled-yolov4: Scaling cross stage partial network. In: Proceedings of the IEEE\/cvf Conference on Computer Vision and Pattern Recognition, pp. 13029\u201313038 (2021)","DOI":"10.1109\/CVPR46437.2021.01283"},{"key":"1746_CR15","unstructured":"Horvat, M., Jele\u010devi\u0107, L., Gledec, G.: A comparative study of yolov5 models performance for image localization and classification. In: Central European Conference on Information and Intelligent Systems, pp. 349\u2013356. Faculty of Organization and Informatics Varazdin (2022)"},{"key":"1746_CR16","doi-asserted-by":"crossref","unstructured":"Wang, C.-Y., Bochkovskiy, A., Liao, H.-Y.M.: Yolov7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7464\u20137475 (2023)","DOI":"10.1109\/CVPR52729.2023.00721"},{"key":"1746_CR17","doi-asserted-by":"crossref","unstructured":"Liu, W., Anguelov, D., Erhan, D., Szegedy, C., Reed, S., Fu, C.-Y., Berg, A.C.: Ssd: Single shot multibox detector. In: Computer Vision\u2014ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11\u201314, 2016, Proceedings, Part I 14, pp. 21\u201337. Springer (2016)","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"1746_CR18","unstructured":"Ross, T.-Y., Doll\u00e1r, G.: Focal loss for dense object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2980\u20132988 (2017)"},{"key":"1746_CR19","doi-asserted-by":"crossref","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. In: European Conference on Computer Vision, pp. 213\u2013229. Springer (2020)","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"1746_CR20","doi-asserted-by":"crossref","unstructured":"Li, F., Zhang, H., Liu, S., Guo, J., Ni, L.M., Zhang, L.: Dn-detr: accelerate detr training by introducing query denoising. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13619\u201313627 (2022)","DOI":"10.1109\/CVPR52688.2022.01325"},{"key":"1746_CR21","unstructured":"Liu, S., Li, F., Zhang, H., Yang, X., Qi, X., Su, H., Zhu, J., Zhang, L.: Dab-detr: dynamic anchor boxes are better queries for detr (2022). arXiv:2201.12329"},{"key":"1746_CR22","doi-asserted-by":"crossref","unstructured":"Sun, P., Zhang, R., Jiang, Y., Kong, T., Xu, C., Zhan, W., Tomizuka, M., Li, L., Yuan, Z., Wang, C., et al.: Sparse r-cnn: end-to-end object detection with learnable proposals. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14454\u201314463 (2021)","DOI":"10.1109\/CVPR46437.2021.01422"},{"key":"1746_CR23","unstructured":"Zhang, H., Li, F., Liu, S., Zhang, L., Su, H., Zhu, J., Ni, L.M., Shum, H.-Y.: Dino: Detr with improved denoising anchor boxes for end-to-end object detection (2022). arXiv:2203.03605"},{"key":"1746_CR24","unstructured":"Dosovitskiy, A.: An image is worth 16x16 words: transformers for image recognition at scale (2020). arXiv:2010.11929"},{"key":"1746_CR25","unstructured":"Touvron, H., Cord, M., Douze, M., Massa, F., Sablayrolles, A., J\u00e9gou, H.: Training data-efficient image transformers & distillation through attention. In: International Conference on Machine Learning, pp. 10347\u201310357. PMLR (2021)"},{"key":"1746_CR26","first-page":"12077","volume":"34","author":"E Xie","year":"2021","unstructured":"Xie, E., Wang, W., Yu, Z., Anandkumar, A., Alvarez, J.M., Luo, P.: Segformer: Simple and efficient design for semantic segmentation with transformers. Adv. Neural. Inf. Process. Syst. 34, 12077\u201312090 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1746_CR27","doi-asserted-by":"crossref","unstructured":"Kong, Z., Dong, P., Ma, X., Meng, X., Sun, M., Niu, W., Shen, X., Yuan, G., Ren, B., Qin, M., et al.: Spvit: enabling faster vision transformers via soft token pruning (2022). https:\/\/arxiv.org\/abs\/2112.13890","DOI":"10.1007\/978-3-031-20083-0_37"},{"key":"1746_CR28","doi-asserted-by":"crossref","unstructured":"Zhao, Y., Lv, W., Xu, S., Wei, J., Wang, G., Dang, Q., Liu, Y., Chen, J.: Detrs beat yolos on real-time object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16965\u201316974 (2024)","DOI":"10.1109\/CVPR52733.2024.01605"},{"key":"1746_CR29","unstructured":"Lv, W., Zhao, Y., Chang, Q., Huang, K., Wang, G., Liu, Y.: Rt-detrv2: improved baseline with bag-of-freebies for real-time detection transformer (2024). arXiv:2407.17140"},{"key":"1746_CR30","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y., Doll\u00e1r, P., Girshick, R., He, K., Hariharan, B., Belongie, S.: Feature pyramid networks for object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2117\u20132125 (2017)","DOI":"10.1109\/CVPR.2017.106"},{"issue":"9","key":"1746_CR31","doi-asserted-by":"publisher","first-page":"10430","DOI":"10.1007\/s10489-021-02798-1","volume":"52","author":"C Yan","year":"2022","unstructured":"Yan, C., Zhang, H., Li, X., Yuan, D.: R-ssd: refined single shot multibox detector for pedestrian detection. Appl. Intell. 52(9), 10430\u201310447 (2022)","journal-title":"Appl. Intell."},{"key":"1746_CR32","unstructured":"Wang, A., Chen, H., Liu, L., Chen, K., Lin, Z., Han, J., Ding, G.: Yolov10: Real-time end-to-end object detection (2024). arXiv:2405.14458"},{"issue":"10s","key":"1746_CR33","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3505244","volume":"54","author":"S Khan","year":"2022","unstructured":"Khan, S., Naseer, M., Hayat, M., Zamir, S.W., Khan, F.S., Shah, M.: Transformers in vision: a survey. ACM Comput. Surv. (CSUR) 54(10s), 1\u201341 (2022)","journal-title":"ACM Comput. Surv. (CSUR)"},{"key":"1746_CR34","unstructured":"Lin, M., Li, C., Bu, X., Sun, M., Lin, C., Yan, J., Ouyang, W., Deng, Z.: Detr for crowd pedestrian detection (2020). arXiv:2012.06785"},{"key":"1746_CR35","unstructured":"Zhu, X., Su, W., Lu, L., Li, B., Wang, X., Dai, J.: Deformable detr: deformable transformers for end-to-end object detection (2020). arXiv:2010.04159"},{"key":"1746_CR36","doi-asserted-by":"publisher","DOI":"10.1016\/j.jvcir.2022.103620","volume":"89","author":"S Dubey","year":"2022","unstructured":"Dubey, S., Olimov, F., Rafique, M.A., Jeon, M.: Improving small objects detection using transformer. J. Vis. Commun. Image Represent. 89, 103620 (2022)","journal-title":"J. Vis. Commun. Image Represent."},{"key":"1746_CR37","doi-asserted-by":"crossref","unstructured":"Hu, J., Shen, L., Sun, G.: Squeeze-and-excitation networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7132\u20137141 (2018)","DOI":"10.1109\/CVPR.2018.00745"},{"key":"1746_CR38","doi-asserted-by":"crossref","unstructured":"Wong, A., Famuori, M., Shafiee, M.J., Li, F., Chwyl, B., Chung, J.: Yolo nano: a highly compact you only look once convolutional neural network for object detection. In: 2019 Fifth Workshop on Energy Efficient Machine Learning and Cognitive Computing-NeurIPS Edition (EMC2-NIPS), pp. 22\u201325. IEEE (2019)","DOI":"10.1109\/EMC2-NIPS53020.2019.00013"},{"key":"1746_CR39","doi-asserted-by":"crossref","unstructured":"Hu, Y., Chen, Y., Li, X., Feng, J.: Dynamic feature fusion for semantic edge detection (2019). arXiv:1902.09104","DOI":"10.24963\/ijcai.2019\/110"},{"key":"1746_CR40","unstructured":"Guo, H., Wang, Y., Ye, Z., Dai, J., Xiong, Y.: big. little vision transformer for efficient visual recognition (2024). arXiv:2410.10267"},{"issue":"1","key":"1746_CR41","doi-asserted-by":"publisher","first-page":"4228610","DOI":"10.1155\/2023\/4228610","volume":"2023","author":"P Shi","year":"2023","unstructured":"Shi, P., Chen, X., Qi, H., Zhang, C., Liu, Z.: Object detection based on Swin Deformable Transformer-BiPAFPN-YOLOX. Comput. Intell. Neurosci. 2023(1), 4228610 (2023)","journal-title":"Comput. Intell. Neurosci."},{"issue":"2","key":"1746_CR42","doi-asserted-by":"publisher","first-page":"12714","DOI":"10.1002\/eng2.12714","volume":"6","author":"R Chang","year":"2024","unstructured":"Chang, R., Gao, S., Li, H., Zhao, S., Yang, Y.: Toward reliable fusion object detection based on dilated pyramid and semantic attention. Eng. Rep. 6(2), 12714 (2024)","journal-title":"Eng. Rep."},{"key":"1746_CR43","doi-asserted-by":"crossref","unstructured":"Li, F., Zeng, A., Liu, S., Zhang, H., Li, H., Zhang, L., Ni, L.M.: Lite detr: an interleaved multi-scale encoder for efficient detr. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18558\u201318567 (2023)","DOI":"10.1109\/CVPR52729.2023.01780"},{"key":"1746_CR44","unstructured":"Farhadi, A., Redmon, J.: Yolov3: An incremental improvement. In: Computer Vision and Pattern Recognition, vol. 1804, pp. 1\u20136. Springer, Berlin (2018)"},{"key":"1746_CR45","doi-asserted-by":"crossref","unstructured":"Tan, M., Pang, R., Le, Q.V.: Efficientdet: scalable and efficient object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10781\u201310790 (2020)","DOI":"10.1109\/CVPR42600.2020.01079"},{"issue":"1","key":"1746_CR46","doi-asserted-by":"publisher","first-page":"10697","DOI":"10.1038\/s41598-024-61136-w","volume":"14","author":"Y Shi","year":"2024","unstructured":"Shi, Y., Jia, Y., Zhang, X.: Focusdet: an efficient object detector for small object. Sci. Rep. 14(1), 10697 (2024)","journal-title":"Sci. Rep."},{"key":"1746_CR47","unstructured":"Simonyan, K.: Very deep convolutional networks for large-scale image recognition (2014). arXiv:1409.1556"},{"key":"1746_CR48","unstructured":"Steen, M., Downe, S., Bamford, N., Edozien, L.: Densenet: densely connected convolutional networks, pp. 362\u2013371 (2018). hyperimagehttp:\/\/arxiv.org\/abs\/1608.06993arXiv:1608.06993"},{"issue":"18","key":"1746_CR49","doi-asserted-by":"publisher","first-page":"8972","DOI":"10.3390\/app12188972","volume":"12","author":"M Shafiq","year":"2022","unstructured":"Shafiq, M., Gu, Z.: Deep residual learning for image recognition: a survey. Appl. Sci. 12(18), 8972 (2022)","journal-title":"Appl. Sci."},{"key":"1746_CR50","unstructured":"Shao, S., Zhao, Z., Li, B., Xiao, T., Yu, G., Zhang, X., Sun, J.: Crowdhuman: a benchmark for detecting human in a crowd (2018). arXiv:1805.00123"},{"issue":"2","key":"1746_CR51","doi-asserted-by":"publisher","first-page":"380","DOI":"10.1109\/TMM.2019.2929005","volume":"22","author":"S Zhang","year":"2019","unstructured":"Zhang, S., Xie, Y., Wan, J., Xia, H., Li, S.Z., Guo, G.: Widerperson: a diverse dataset for dense pedestrian detection in the wild. IEEE Trans. Multimed. 22(2), 380\u2013393 (2019)","journal-title":"IEEE Trans. Multimed."},{"key":"1746_CR52","doi-asserted-by":"crossref","unstructured":"Varghese, R., Sambath, M.: Yolov8: a novel object detection algorithm with enhanced performance and robustness. In: 2024 International Conference on Advances in Data Engineering and Intelligent Computing Systems (ADICS), pp. 1\u20136. IEEE (2024)","DOI":"10.1109\/ADICS58448.2024.10533619"},{"key":"1746_CR53","doi-asserted-by":"crossref","unstructured":"Wang, C., Yeh, I., Liao, H.: Yolov9: learning what you want to learn using programmable gradient information (2024). arXiv:2402.13616","DOI":"10.1007\/978-3-031-72751-1_1"}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-01746-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-025-01746-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-01746-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,4]],"date-time":"2025-09-04T14:59:52Z","timestamp":1756997992000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-025-01746-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,12]]},"references-count":53,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2025,6]]}},"alternative-id":["1746"],"URL":"https:\/\/doi.org\/10.1007\/s00530-025-01746-0","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"value":"0942-4962","type":"print"},{"value":"1432-1882","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,3,12]]},"assertion":[{"value":"10 January 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 February 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 March 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing financial interests or personal relationships that could have influenced the work presented in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"151"}}