{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,14]],"date-time":"2026-02-14T06:37:53Z","timestamp":1771051073449,"version":"3.50.1"},"reference-count":38,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2025,2,5]],"date-time":"2025-02-05T00:00:00Z","timestamp":1738713600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,2,5]],"date-time":"2025-02-05T00:00:00Z","timestamp":1738713600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Real-Time Image Proc"],"published-print":{"date-parts":[[2025,4]]},"DOI":"10.1007\/s11554-025-01633-x","type":"journal-article","created":{"date-parts":[[2025,2,5]],"date-time":"2025-02-05T20:19:58Z","timestamp":1738786798000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Drone object detection incorporating multi-head mixed self-attention and dynamic regression mapping loss function"],"prefix":"10.1007","volume":"22","author":[{"given":"Qinghua","family":"Su","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jianhong","family":"Mu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sheng","family":"Xu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kaizheng","family":"Wan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiangyu","family":"Qi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhichao","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Juntao","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,2,5]]},"reference":[{"issue":"4","key":"1633_CR1","doi-asserted-by":"publisher","first-page":"1637","DOI":"10.1016\/j.patcog.2011.10.003","volume":"45","author":"Z Pei","year":"2012","unstructured":"Pei, Z., Zhang, Y., Yang, T., Zhang, X., Yang, Y.-H.: A novel multi-object detection method in complex scene using synthetic aperture imaging. Pattern Recognit. 45(4), 1637\u20131658 (2012). https:\/\/doi.org\/10.1016\/j.patcog.2011.10.003","journal-title":"Pattern Recognit."},{"key":"1633_CR2","doi-asserted-by":"publisher","DOI":"10.1016\/j.geits.2023.100092","author":"J Ruan","year":"2023","unstructured":"Ruan, J., Cui, H., Huang, Y., Li, T., Wu, C., Zhang, K.: A review of occluded objects detection in real complex scenarios for autonomous driving. Green Energy Intell. Transp. (2023). https:\/\/doi.org\/10.1016\/j.geits.2023.100092","journal-title":"Green Energy Intell. Transp."},{"issue":"3","key":"1633_CR3","doi-asserted-by":"publisher","first-page":"257","DOI":"10.1109\/JPROC.2023.3238524","volume":"111","author":"Z Zou","year":"2023","unstructured":"Zou, Z., Chen, K., Shi, Z., Guo, Y., Ye, J.: Object detection in 20 years: a survey. Proc. IEEE 111(3), 257\u2013276 (2023)","journal-title":"Proc. IEEE"},{"key":"1633_CR4","doi-asserted-by":"publisher","first-page":"204","DOI":"10.1016\/j.neucom.2023.01.056","volume":"527","author":"S Xu","year":"2023","unstructured":"Xu, S., Zhang, M., Song, W., Mei, H., He, Q., Liotta, A.: A systematic review and analysis of deep learning-based underwater object detection. Neurocomputing 527, 204\u2013232 (2023)","journal-title":"Neurocomputing"},{"key":"1633_CR5","doi-asserted-by":"publisher","first-page":"58443","DOI":"10.1109\/ACCESS.2020.2983149","volume":"8","author":"E Yurtsever","year":"2020","unstructured":"Yurtsever, E., Lambert, J., Carballo, A., Takeda, K.: A survey of autonomous driving: common practices and emerging technologies. IEEE Access 8, 58443\u201358469 (2020). https:\/\/doi.org\/10.1109\/ACCESS.2020.2983149","journal-title":"IEEE Access"},{"key":"1633_CR6","doi-asserted-by":"crossref","unstructured":"Terven, J. and Cordova-Esparza, D.: A Comprehensive Review of YOLO Architectures in Computer Vision: From YOLOv1 to YOLOv8 and YOLO-NAS. ArXiv Prepr. http:\/\/arxiv.org\/2304.00501 (2023)","DOI":"10.3390\/make5040083"},{"issue":"3","key":"1633_CR7","doi-asserted-by":"publisher","first-page":"257","DOI":"10.1109\/JPROC.2023.3238524","volume":"111","author":"Z Zou","year":"2023","unstructured":"Zou, Z., Chen, K., Shi, Z., Guo, Y., Ye, J.: Object detection in 20 years: a survey. Proc. IEEE 111(3), 257\u2013276 (2023). https:\/\/doi.org\/10.1109\/JPROC.2023.3238524","journal-title":"Proc. IEEE"},{"key":"1633_CR8","doi-asserted-by":"crossref","unstructured":"Chu, X., Zheng, A., Zhang, X. and Sun, J.: Detection in crowded scenes: one proposal, multiple predictions. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12214\u201312223 (2020)","DOI":"10.1109\/CVPR42600.2020.01223"},{"issue":"6","key":"1633_CR9","doi-asserted-by":"publisher","first-page":"1875","DOI":"10.1007\/s11263-021-01461-z","volume":"129","author":"S Zhang","year":"2021","unstructured":"Zhang, S., Chen, D., Yang, J., Schiele, B.: Guided attention in CNNs for occluded pedestrian detection and re-identification. Int. J. Comput. Vis. 129(6), 1875\u20131892 (2021). https:\/\/doi.org\/10.1007\/s11263-021-01461-z","journal-title":"Int. J. Comput. Vis."},{"key":"1633_CR10","doi-asserted-by":"crossref","unstructured":"Zhang, S., Yang, J., and Schiele, B.: Occluded pedestrian detection through guided attention in cnns. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6995\u20137003 (2018). Accessed: May 18, 2024. Available: http:\/\/openaccess.thecvf.com\/content_cvpr_2018\/html\/Zhang_Occluded_Pedestrian_Detection_CVPR_2018_paper.html","DOI":"10.1109\/CVPR.2018.00731"},{"key":"1633_CR11","doi-asserted-by":"publisher","unstructured":"Yu, J., Jiang, Y., Wang, Z., Cao, Z. and Huang, T.: UnitBox: an advanced object detection network. In Proceedings of the 24th ACM International Conference on Multimedia, Amsterdam The Netherlands: ACM, pp. 516\u2013520 (2016). https:\/\/doi.org\/10.1145\/2964284.2967274","DOI":"10.1145\/2964284.2967274"},{"key":"1633_CR12","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y. , Goyal, P., Girshick, R., He, K. and Doll\u00e1r, P.: Focal loss for dense object detection. In Proceedings of the IEEE international conference on computer vision, pp. 2980\u20132988 (2017). Accessed: May 18, 2024. Available: http:\/\/openaccess.thecvf.com\/content_iccv_2017\/html\/Lin_Focal_Loss_for_ICCV_2017_paper.html","DOI":"10.1109\/ICCV.2017.324"},{"key":"1633_CR13","doi-asserted-by":"crossref","unstructured":"Rezatofighi, H., Tsoi, N., Gwak, J., Sadeghian, A., Reid, I. and Savarese, S.: Generalized intersection over union: a metric and a loss for bounding box regression. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 658\u2013666 (2019). Accessed: May 18, 2024. Available: http:\/\/openaccess.thecvf.com\/content_CVPR_2019\/html\/Rezatofighi_Generalized_Intersection_Over_Union_A_Metric_and_a_Loss_for_CVPR_2019_paper.html","DOI":"10.1109\/CVPR.2019.00075"},{"key":"1633_CR14","doi-asserted-by":"crossref","unstructured":"Zheng, Z., Wang, P., Liu, W., Li, J., Ye, R. and Ren, D.: Distance-IoU loss: Faster and better learning for bounding box regression. In Proceedings of the AAAI Conference on Artificial Intelligence, pp. 12993\u201313000 (2020)","DOI":"10.1609\/aaai.v34i07.6999"},{"key":"1633_CR15","unstructured":"Zhang, H. and Zhang, S.: Shape-IoU: More Accurate Metric Considering Bounding Box Shape and Scale. arXiv: arXiv:2312.17663. (2024). Accessed: May 18, 2024. Available: http:\/\/arxiv.org\/abs\/2312.17663"},{"key":"1633_CR16","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y., Doll\u00e1r, P., Girshick, R., He, K., Hariharan, B. and Belongie, S.: Feature pyramid networks for object detection. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2117\u20132125 (2017). Accessed: May 10, 2024. Available: http:\/\/openaccess.thecvf.com\/content_cvpr_2017\/html\/Lin_Feature_Pyramid_Networks_CVPR_2017_paper.html","DOI":"10.1109\/CVPR.2017.106"},{"issue":"1","key":"1633_CR17","doi-asserted-by":"publisher","first-page":"87","DOI":"10.1109\/TPAMI.2022.3152247","volume":"45","author":"K Han","year":"2022","unstructured":"Han, K., et al.: A survey on vision transformer. IEEE Trans. Pattern Anal. Mach. Intell. 45(1), 87\u2013110 (2022). https:\/\/doi.org\/10.1109\/TPAMI.2022.3152247","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1633_CR18","doi-asserted-by":"publisher","first-page":"3340","DOI":"10.48550\/arXiv.1706.03762","volume":"30","author":"A Vaswani","year":"2017","unstructured":"Vaswani, A. et al.: Attention is all you need. Adv. Neural Inf. Process. Syst. 30, 3340\u20133350 (2017). https:\/\/doi.org\/10.48550\/arXiv.1706.03762","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"1633_CR19","doi-asserted-by":"publisher","first-page":"48","DOI":"10.1016\/j.neucom.2021.03.091","volume":"452","author":"Z Niu","year":"2021","unstructured":"Niu, Z., Zhong, G., Yu, H.: A review on the attention mechanism of deep learning. Neurocomputing 452, 48\u201362 (2021). https:\/\/doi.org\/10.1016\/j.neucom.2021.03.091","journal-title":"Neurocomputing"},{"issue":"3","key":"1633_CR20","doi-asserted-by":"publisher","first-page":"331","DOI":"10.1007\/s41095-022-0271-y","volume":"8","author":"M-H Guo","year":"2022","unstructured":"Guo, M.-H., et al.: Attention mechanisms in computer vision: a survey. Comput. Vis. Media 8(3), 331\u2013368 (2022). https:\/\/doi.org\/10.1007\/s41095-022-0271-y","journal-title":"Comput. Vis. Media"},{"issue":"5","key":"1633_CR21","doi-asserted-by":"publisher","first-page":"391","DOI":"10.1037\/h0031333","volume":"78","author":"MI Posner","year":"1971","unstructured":"Posner, M.I., Boies, S.J.: Components of attention. Psychol. Rev. 78(5), 391 (1971). https:\/\/doi.org\/10.1037\/h0031333","journal-title":"Psychol. Rev."},{"key":"1633_CR22","doi-asserted-by":"crossref","unstructured":"Hu, J., Shen, L., and Sun, G.: Squeeze-and-excitation networks. In Proceedings of the IEEE Conference on Computer Vision and Pattern recognition, pp. 7132\u20137141 (2018)","DOI":"10.1109\/CVPR.2018.00745"},{"key":"1633_CR23","doi-asserted-by":"crossref","unstructured":"Wang, B., Wu, Q., Zhu, P., Li, P., Zuo, W. and Hu, Q.: ECA-Net: efficient channel attention for deep convolutional neural networks. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11534\u201311542 (2020)","DOI":"10.1109\/CVPR42600.2020.01155"},{"key":"1633_CR24","doi-asserted-by":"crossref","unstructured":"Hou, Q., Zhou, D., and Feng, J.: Coordinate attention for efficient mobile network design. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13713\u201313722 (2021)","DOI":"10.1109\/CVPR46437.2021.01350"},{"key":"1633_CR25","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2023.106442","volume":"123","author":"D Wan","year":"2023","unstructured":"Wan, D., Lu, R., Shen, S., Xu, T., Lang, X., Ren, Z.: Mixed local channel attention for object detection. Eng. Appl. Artif. Intell. 123, 106442 (2023). https:\/\/doi.org\/10.1016\/j.engappai.2023.106442","journal-title":"Eng. Appl. Artif. Intell."},{"key":"1633_CR26","doi-asserted-by":"crossref","unstructured":"Zhu, C., He, Y. and Savvides, M.: Feature selective anchor-free module for single-shot object detection. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 840\u2013849 (2019)","DOI":"10.1109\/CVPR.2019.00093"},{"key":"1633_CR27","first-page":"21002","volume":"33","author":"X Li","year":"2020","unstructured":"Li, X., et al.: Generalized focal loss: Learning qualified and distributed bounding boxes for dense object detection. Adv. Neural. Inf. Process. Syst. 33, 21002\u201321012 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"issue":"6","key":"1633_CR28","doi-asserted-by":"publisher","first-page":"184","DOI":"10.1007\/s11554-024-01562-1","volume":"21","author":"J Mu","year":"2024","unstructured":"Mu, J., Su, Q., Wang, X., Liang, W., Xu, S., Wan, K.: A small object detection architecture with concatenated detection heads and multi-head mixed self-attention mechanism. J. Real-Time Image Process. 21(6), 184 (2024). https:\/\/doi.org\/10.1007\/s11554-024-01562-1","journal-title":"J. Real-Time Image Process."},{"key":"1633_CR29","unstructured":"Tong, Z., Chen, Y., Xu, Z. and Yu, R.: Wise-IoU: Bounding Box Regression Loss with Dynamic Focusing Mechanism. arXiv: arXiv:2301.10051 (2023). Accessed: May 16, 2024"},{"key":"1633_CR30","unstructured":"Zhang, H. and Zhang, S.: Focaler-IoU: More Focused Intersection over Union Loss. arXiv: arXiv:2401.10525 (2024). Accessed: May 19, 2024"},{"key":"1633_CR31","doi-asserted-by":"crossref","unstructured":"Blaschko, M.B., Kannala, J., Rahtu, E.: Non maximal suppression in cascaded ranking models. In Image Analysis: 18th Scandinavian Conference, SCIA,: Espoo, Finland, June 17\u201320, 2013. Proceedings 18. Springer 2013, 408\u2013419 (2013)","DOI":"10.1007\/978-3-642-38886-6_39"},{"key":"1633_CR32","doi-asserted-by":"crossref","unstructured":"Bodla, N., Singh, B., Chellappa, R. and Davis, L. S.: Soft-NMS\u2013improving object detection with one line of code. In Proceedings of the IEEE International Conference on Computer Vision, pp. 5561\u20135569 (2017)","DOI":"10.1109\/ICCV.2017.593"},{"issue":"8","key":"1633_CR33","doi-asserted-by":"publisher","first-page":"526","DOI":"10.3390\/drones7080526","volume":"7","author":"Z Zhang","year":"2023","unstructured":"Zhang, Z.: Drone-YOLO: an efficient neural network method for target detection in drone images. Drones 7(8), 526 (2023). https:\/\/doi.org\/10.3390\/drones7080526","journal-title":"Drones"},{"key":"1633_CR34","doi-asserted-by":"crossref","unstructured":"Girshick, R.: Fast r-cnn. In Proceedings of the IEEE International Conference on Computer Vision, pp. 1440\u20131448 (2015)","DOI":"10.1109\/ICCV.2015.169"},{"key":"1633_CR35","doi-asserted-by":"publisher","DOI":"10.1016\/j.jvcir.2023.103752","volume":"90","author":"M Wang","year":"2023","unstructured":"Wang, M., et al.: FE-YOLOv5: Feature enhancement network based on YOLOv5 for small object detection. J. Vis. Commun. Image Represent. 90, 103752 (2023). https:\/\/doi.org\/10.1016\/j.jvcir.2023.103752","journal-title":"J. Vis. Commun. Image Represent."},{"key":"1633_CR36","doi-asserted-by":"publisher","DOI":"10.3390\/drones7050304","author":"Y Li","year":"2023","unstructured":"Li, Y., Fan, Q., Huang, H., Han, Z., Gu, Q.: A modified YOLOv8 detection network for UAV aerial image recognition. Drones (2023). https:\/\/doi.org\/10.3390\/drones7050304","journal-title":"Drones"},{"key":"1633_CR37","doi-asserted-by":"publisher","unstructured":"Khanam, R. and Hussain, M.: YOLOv11: an overview of the key architectural enhancements. arXiv: arXiv:2410.17725 (2024). https:\/\/doi.org\/10.48550\/arXiv.2410.17725.","DOI":"10.48550\/arXiv.2410.17725"},{"issue":"3","key":"1633_CR38","doi-asserted-by":"publisher","first-page":"190","DOI":"10.3390\/drones7030190","volume":"7","author":"C Chen","year":"2023","unstructured":"Chen, C., et al.: Yolo-based uav technology: a review of the research and its applications. Drones 7(3), 190 (2023)","journal-title":"Drones"}],"container-title":["Journal of Real-Time Image Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11554-025-01633-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11554-025-01633-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11554-025-01633-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,3]],"date-time":"2025-05-03T06:22:24Z","timestamp":1746253344000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11554-025-01633-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,2,5]]},"references-count":38,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2025,4]]}},"alternative-id":["1633"],"URL":"https:\/\/doi.org\/10.1007\/s11554-025-01633-x","relation":{},"ISSN":["1861-8200","1861-8219"],"issn-type":[{"value":"1861-8200","type":"print"},{"value":"1861-8219","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,2,5]]},"assertion":[{"value":"20 June 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 January 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 February 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"56"}}