{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,16]],"date-time":"2025-09-16T20:47:55Z","timestamp":1758055675681,"version":"3.44.0"},"reference-count":48,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2025,7,2]],"date-time":"2025-07-02T00:00:00Z","timestamp":1751414400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,7,2]],"date-time":"2025-07-02T00:00:00Z","timestamp":1751414400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"the National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["No. 62376114","No. 62376114","No. 62376114","No. 62376114","No. 62376114"],"award-info":[{"award-number":["No. 62376114","No. 62376114","No. 62376114","No. 62376114","No. 62376114"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"name":"the Natural Science Foundation of Fujian Province","award":["No.2022J01891","No.2022J01891","No.2022J01891","No.2022J01891"],"award-info":[{"award-number":["No.2022J01891","No.2022J01891","No.2022J01891","No.2022J01891"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2025,8]]},"DOI":"10.1007\/s00530-025-01864-9","type":"journal-article","created":{"date-parts":[[2025,7,2]],"date-time":"2025-07-02T11:09:30Z","timestamp":1751454570000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["TCF-DETR: multi-scale token-channel fusion transformer for enhanced small object detection"],"prefix":"10.1007","volume":"31","author":[{"given":"Huan","family":"Lei","sequence":"first","affiliation":[]},{"given":"Ze","family":"Wu","sequence":"additional","affiliation":[]},{"given":"Lei","family":"Shang","sequence":"additional","affiliation":[]},{"given":"Hong","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"Wenyuan","family":"Yang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,7,2]]},"reference":[{"key":"1864_CR1","doi-asserted-by":"publisher","first-page":"7778","DOI":"10.1109\/TPAMI.2021.3117983","volume":"44","author":"J Ding","year":"2021","unstructured":"Ding, J., Xue, N., Xia, G.S., Bai, X., Yang, W., Yang, M.Y., Belongie, S., Luo, J., Datcu, M., Pelillo, M., et al.: Object detection in aerial images: a large-scale benchmark and challenges. IEEE Trans. Pattern Anal. Mach. Intell. 44, 7778\u20137796 (2021)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1864_CR2","doi-asserted-by":"publisher","first-page":"2055","DOI":"10.1016\/j.neucom.2017.10.037","volume":"275","author":"J Wu","year":"2018","unstructured":"Wu, J., Wang, H., Li, N., Yao, P., Huang, Y., Yang, H.: Path planning for solar-powered UAV in urban environment. Neurocomputing 275, 2055\u20132065 (2018)","journal-title":"Neurocomputing"},{"key":"1864_CR3","doi-asserted-by":"publisher","first-page":"24300","DOI":"10.1109\/JIOT.2022.3189214","volume":"9","author":"K Liu","year":"2022","unstructured":"Liu, K., Zheng, J.: UAV trajectory optimization for time-constrained data collection in UAV-enabled environmental monitoring systems. IEEE Internet Things J. 9, 24300\u201324314 (2022)","journal-title":"IEEE Internet Things J."},{"key":"1864_CR4","doi-asserted-by":"publisher","DOI":"10.1016\/j.ijdrr.2024.104468","volume":"107","author":"CS Cheng","year":"2024","unstructured":"Cheng, C.S., Luo, L., Murphy, S., Lee, Y.C., Leite, F.: A framework to enhance disaster debris estimation with AI and aerial photogrammetry. Int. J. Disaster Risk Reduct. 107, 104468 (2024)","journal-title":"Int. J. Disaster Risk Reduct."},{"key":"1864_CR5","doi-asserted-by":"publisher","first-page":"143","DOI":"10.1109\/TMI.2020.3023463","volume":"40","author":"A He","year":"2020","unstructured":"He, A., Li, T., Li, N., Wang, K., Fu, H.: CABNET: category attention block for imbalanced diabetic retinopathy grading. IEEE Trans. Med. Imaging 40, 143\u2013153 (2020)","journal-title":"IEEE Trans. Med. Imaging"},{"key":"1864_CR6","doi-asserted-by":"crossref","unstructured":"Roshanisefat, S., Kamali, H.M., Azar, K.Z., Dinakarrao, S.M.P., Karimi, N., Homayoun, H., Sasan, A.: DFSSD: deep faults and shallow state duality, a provably strong obfuscation solution for circuits with restricted access to scan chain. In: Proceedings of the IEEE VLSI Test Symposium, pp. 1\u20136 (2020)","DOI":"10.1109\/VTS48691.2020.9107629"},{"key":"1864_CR7","doi-asserted-by":"crossref","unstructured":"Redmon, J., Divvala, S., Girshick, R., Farhadi, A.: You only look once: unified, real-time object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 779\u2013788 (2016)","DOI":"10.1109\/CVPR.2016.91"},{"key":"1864_CR8","doi-asserted-by":"crossref","unstructured":"Wang, C.Y., Yeh, I.H., Liao, H.Y.M.: YOLOV9: learning what you want to learn using programmable gradient information. arXiv preprint arXiv:2402.13616 (2024)","DOI":"10.1007\/978-3-031-72751-1_1"},{"key":"1864_CR9","unstructured":"Wang, A., Chen, H., Liu, L., Chen, K., Lin, Z., Han, J., Ding, G.: YOLOV10: real-time end-to-end object detection. arXiv preprint arXiv:2405.14458 (2024)"},{"key":"1864_CR10","unstructured":"Jocher, G., Qiu, J.: Ultralytics YOLO11. AGPL-3.0 License (2024). https:\/\/github.com\/ultralytics\/ultralytics"},{"key":"1864_CR11","unstructured":"Dosovitskiy, A.: An image is worth $$16 \\times 16$$ words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"1864_CR12","doi-asserted-by":"crossref","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. In: European Conference on Computer Vision, pp. 213\u2013229 (2020)","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"1864_CR13","doi-asserted-by":"crossref","unstructured":"Dai, J., Qi, H., Xiong, Y., Li, Y., Zhang, G., Hu, H., Wei, Y.: Deformable convolutional networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 764\u2013773 (2017)","DOI":"10.1109\/ICCV.2017.89"},{"key":"1864_CR14","unstructured":"Zhu, X., Su, W., Lu, L., Li, B., Wang, X., Dai, J.: Deformable DETR: deformable transformers for end-to-end object detection. arXiv preprint arXiv:2010.04159 (2020)"},{"key":"1864_CR15","unstructured":"Zhang, H., Li, F., Liu, S., Zhang, L., Su, H., Zhu, J., Ni, L.M., Shum, H.Y.: DINO: DETR with improved denoising anchor boxes for end-to-end object detection. arXiv preprint arXiv:2203.03605 (2022)"},{"key":"1864_CR16","doi-asserted-by":"crossref","unstructured":"Zhao, Y., Lv, W., Xu, S., Wei, J., Wang, G., Dang, Q., Liu, Y., Chen, J.: DETRS beat YOLOS on real-time object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 16965\u201316974 (2024)","DOI":"10.1109\/CVPR52733.2024.01605"},{"key":"1864_CR17","first-page":"20014","volume":"34","author":"A Ali","year":"2021","unstructured":"Ali, A., Touvron, H., Caron, M., Bojanowski, P., Douze, M., Joulin, A., Laptev, I., Neverova, N., Synnaeve, G., Verbeek, J., et al.: XCIT: cross-covariance image transformers. Adv. Neural. Inf. Process. Syst. 34, 20014\u201320027 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1864_CR18","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, L., Polosukhin, I.: Attention is all you need (2017)"},{"key":"1864_CR19","unstructured":"Mehta, S., Rastegari, M.: Separable self-attention for mobile vision transformers. arXiv preprint arXiv:2206.02680 (2022)"},{"key":"1864_CR20","doi-asserted-by":"crossref","unstructured":"Shaker, A., Maaz, M., Rasheed, H., Khan, S., Yang, M.H., Khan, F.S.: SWIFTFORMER: efficient additive attention for transformer-based real-time mobile vision applications. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 17425\u201317436 (2023)","DOI":"10.1109\/ICCV51070.2023.01598"},{"key":"1864_CR21","unstructured":"Zhang, T., Li, L., Zhou, Y., Liu, W., Qian, C., Ji, X.: Cas-vit: Convolutional additive self-attention vision transformers for efficient mobile applications. arXiv preprint arXiv:2408.03703 (2024)"},{"key":"1864_CR22","doi-asserted-by":"crossref","unstructured":"Wang, A., Chen, H., Lin, Z., Han, J., Ding, G.: REPVIT: revisiting mobile CNN from VIT perspective. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 15909\u201315920 (2024)","DOI":"10.1109\/CVPR52733.2024.01506"},{"key":"1864_CR23","doi-asserted-by":"publisher","first-page":"4341","DOI":"10.1109\/TIP.2023.3297408","volume":"32","author":"Y Quan","year":"2023","unstructured":"Quan, Y., Zhang, D., Zhang, L., Tang, J.: Centralized feature pyramid for object detection. IEEE Trans. Image Process. 32, 4341\u20134354 (2023)","journal-title":"IEEE Trans. Image Process."},{"key":"1864_CR24","doi-asserted-by":"crossref","unstructured":"Tan, M., Pang, R., Le, Q.V.: EfficientDet: scalable and efficient object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 10781\u201310790 (2020)","DOI":"10.1109\/CVPR42600.2020.01079"},{"key":"1864_CR25","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Doll\u00e1r, P., Girshick, R., He, K., Hariharan, B., Belongie, S.: Feature pyramid networks for object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2117\u20132125 (2017)","DOI":"10.1109\/CVPR.2017.106"},{"key":"1864_CR26","doi-asserted-by":"crossref","unstructured":"Liu, S., Qi, L., Qin, H., Shi, J., Jia, J.: Path aggregation network for instance segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 8759\u20138768 (2018)","DOI":"10.1109\/CVPR.2018.00913"},{"key":"1864_CR27","doi-asserted-by":"crossref","unstructured":"Yang, G., Lei, J., Zhu, Z., Cheng, S., Feng, Z., Liang, R.: AFPN: asymptotic feature pyramid network for object detection. In: Proceedings of the IEEE International Conference on Systems, Man, and Cybernetics, pp. 2184\u20132189 (2023)","DOI":"10.1109\/SMC53992.2023.10394415"},{"key":"1864_CR28","doi-asserted-by":"publisher","first-page":"11942","DOI":"10.1109\/TCSVT.2024.3426673","volume":"34","author":"M Zha","year":"2024","unstructured":"Zha, M., Fu, F., Pei, Y., Wang, G., Li, T., Tang, X., Yang, Y., Tao Shen, H.: Dual domain perception and progressive refinement for mirror detection. IEEE Trans. Circuits Syst. Video Technol. 34, 11942\u201311953 (2024)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"1864_CR29","first-page":"51094","volume":"36","author":"C Wang","year":"2024","unstructured":"Wang, C., He, W., Nie, Y., Guo, J., Liu, C., Wang, Y., Han, K.: Gold-yolo: efficient object detector via gather-and-distribute mechanism. Adv. Neural. Inf. Process. Syst. 36, 51094\u201351112 (2024)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1864_CR30","doi-asserted-by":"publisher","first-page":"8","DOI":"10.1007\/s00530-024-01342-8","volume":"30","author":"F Sun","year":"2024","unstructured":"Sun, F., He, N., Li, R., Wang, X., Xu, S.: GD-PAN: a multiscale fusion architecture applied to object detection in UAV aerial images. Multimedia Syst. 30, 8\u201313 (2024)","journal-title":"Multimedia Syst."},{"key":"1864_CR31","doi-asserted-by":"crossref","unstructured":"Li, F., Zhang, H., Liu, S., Guo, J., Ni, L.M., Zhang, L.: DN-DETR: accelerate DETR training by introducing query denoising. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 13619\u201313627 (2022)","DOI":"10.1109\/CVPR52688.2022.01325"},{"key":"1864_CR32","unstructured":"Yao, Z., Ai, J., Li, B., Zhang, C.: Efficient DETR: improving end-to-end object detector with dense prior. arXiv preprint arXiv:2408.03703 (2021)"},{"key":"1864_CR33","unstructured":"Lv, W., Zhao, Y., Chang, Q., Huang, K., Wang, G., Liu, Y.: RT-DETRV2: improved baseline with bag-of-freebies for real-time detection transformer. arXiv preprint arXiv:2407.17140 (2024)"},{"key":"1864_CR34","unstructured":"Du, D., Zhu, P., Wen, L., Bian, X., Lin, H., Hu, Q., Peng, T., Zheng, J., Wang, X., Zhang, Y., et\u00a0al.: VisDrone-DET2019: the vision meets drone object detection in image challenge results. In: Proceedings of the IEEE International Conference on Computer Vision Workshops, pp. 213\u2013226 (2019)"},{"key":"1864_CR35","unstructured":"DC Tinyperson Dataset (2023). https:\/\/universe.roboflow.com\/chris-d-dbyby\/tinyperson"},{"key":"1864_CR36","doi-asserted-by":"crossref","unstructured":"Cordts, M., Omran, M., Ramos, S., Rehfeld, T., Enzweiler, M., Benenson, R., Franke, U., Roth, S., Schiele, B.: The cityscapes dataset for semantic urban scene understanding. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3213\u20133223 (2016)","DOI":"10.1109\/CVPR.2016.350"},{"key":"1864_CR37","doi-asserted-by":"crossref","unstructured":"Sun, P., Zhang, R., Jiang, Y., Kong, T., Xu, C., Zhan, W., Tomizuka, M., Li, L., Yuan, Z., Wang, C., et\u00a0al.: Sparse R-CNN: end-to-end object detection with learnable proposals. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 14454\u201314463 (2021)","DOI":"10.1109\/CVPR46437.2021.01422"},{"key":"1864_CR38","doi-asserted-by":"crossref","unstructured":"Feng, C., Zhong, Y., Gao, Y., Scott, M.R., Huang, W.: TOOD: task-aligned one-stage object detection. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 3490\u20133499 (2021)","DOI":"10.1109\/ICCV48922.2021.00349"},{"key":"1864_CR39","unstructured":"Jocher, G., Chaurasia, A., Qiu, J.: Ultralytics YOLOv8. AGPL-3.0 License (2023). https:\/\/github.com\/ultralytics\/ultralytics"},{"key":"1864_CR40","doi-asserted-by":"publisher","first-page":"241","DOI":"10.1007\/s00530-024-01447-0","volume":"30","author":"S Peng","year":"2024","unstructured":"Peng, S., Fan, X., Tian, S., Yu, L.: PS-YOLO: a small object detector based on efficient convolution and multi-scale feature fusion. Multimedia Syst. 30, 241 (2024)","journal-title":"Multimedia Syst."},{"key":"1864_CR41","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2024.121366","volume":"686","author":"Q Fan","year":"2025","unstructured":"Fan, Q., Li, Y., Deveci, M., Zhong, K., Kadry, S.: LUD-YOLO: a novel lightweight object detection network for unmanned aerial vehicle. Inf. Sci. 686, 121366 (2025)","journal-title":"Inf. Sci."},{"key":"1864_CR42","doi-asserted-by":"publisher","DOI":"10.1016\/j.bspc.2024.107131","volume":"100","author":"H Wang","year":"2025","unstructured":"Wang, H., Wang, G., Li, Y., Zhang, K.: YOLO-HV: a fast YOLOV8-based method for measuring hemorrhage volumes. Biomed. Signal Process. Control 100, 107131 (2025)","journal-title":"Biomed. Signal Process. Control"},{"key":"1864_CR43","doi-asserted-by":"publisher","first-page":"6437","DOI":"10.1007\/s40747-023-01076-6","volume":"9","author":"W Xing","year":"2023","unstructured":"Xing, W., Cui, Z., Qi, J.: HRCTNET: a hybrid network with high-resolution representation for object detection in UAV image. Complex Intell. Syst. 9, 6437\u20136457 (2023)","journal-title":"Complex Intell. Syst."},{"key":"1864_CR44","doi-asserted-by":"publisher","first-page":"3329","DOI":"10.1007\/s00530-023-01182-y","volume":"29","author":"X Wang","year":"2023","unstructured":"Wang, X., He, N., Hong, C., Sun, F., Han, W., Wang, Q.: YOLO-ERF: lightweight object detector for UAV aerial images. Multimedia Syst. 29, 3329\u20133339 (2023)","journal-title":"Multimedia Syst."},{"key":"1864_CR45","doi-asserted-by":"publisher","first-page":"2","DOI":"10.1007\/s11554-024-01485-x","volume":"21","author":"L Zihan","year":"2024","unstructured":"Zihan, L., Xu, W., Linyun, Z., Panlin, Y.: LightYOLO-S: a lightweight algorithm for detecting small targets. J. Real-Time Image Process. 21, 2\u201311 (2024)","journal-title":"J. Real-Time Image Process."},{"key":"1864_CR46","doi-asserted-by":"publisher","first-page":"585","DOI":"10.1007\/s11760-024-03176-3","volume":"18","author":"A Mu","year":"2024","unstructured":"Mu, A., Wang, H., Meng, W., Chen, Y.: Small target detection in drone aerial images based on feature fusion. Signal Image Video Process. 18, 585\u2013598 (2024)","journal-title":"Signal Image Video Process."},{"key":"1864_CR47","doi-asserted-by":"crossref","unstructured":"Li, Y., Chen, Y., Wang, N., Zhang, Z.: Scale-aware trident networks for object detection. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 6054\u20136063 (2019)","DOI":"10.1109\/ICCV.2019.00615"},{"key":"1864_CR48","doi-asserted-by":"crossref","unstructured":"Zhang, S., Wang, X., Wang, J., Pang, J., Lyu, C., Zhang, W., Luo, P., Chen, K.: Dense distinct query for end-to-end object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7329\u20137338 (2023)","DOI":"10.1109\/CVPR52729.2023.00708"}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-01864-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-025-01864-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-01864-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,15]],"date-time":"2025-09-15T09:04:44Z","timestamp":1757927084000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-025-01864-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,2]]},"references-count":48,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2025,8]]}},"alternative-id":["1864"],"URL":"https:\/\/doi.org\/10.1007\/s00530-025-01864-9","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"type":"print","value":"0942-4962"},{"type":"electronic","value":"1432-1882"}],"subject":[],"published":{"date-parts":[[2025,7,2]]},"assertion":[{"value":"1 January 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"23 May 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 July 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"292"}}