{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,28]],"date-time":"2026-03-28T18:07:16Z","timestamp":1774721236140,"version":"3.50.1"},"reference-count":33,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2024,10,10]],"date-time":"2024-10-10T00:00:00Z","timestamp":1728518400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,10,10]],"date-time":"2024-10-10T00:00:00Z","timestamp":1728518400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Real-Time Image Proc"],"published-print":{"date-parts":[[2024,12]]},"DOI":"10.1007\/s11554-024-01562-1","type":"journal-article","created":{"date-parts":[[2024,10,10]],"date-time":"2024-10-10T11:02:04Z","timestamp":1728558124000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":9,"title":["A small object detection architecture with concatenated detection heads and multi-head mixed self-attention mechanism"],"prefix":"10.1007","volume":"21","author":[{"given":"Jianhong","family":"Mu","sequence":"first","affiliation":[]},{"given":"Qinghua","family":"Su","sequence":"additional","affiliation":[]},{"given":"Xiyu","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Wenhui","family":"Liang","sequence":"additional","affiliation":[]},{"given":"Sheng","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Kaizheng","family":"Wan","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,10,10]]},"reference":[{"key":"1562_CR1","unstructured":"Li, Z., Yang, L., Zhou, F.: FSSD: feature fusion single shot multibox detector. arXiv arXiv:1712.00960 (2024)"},{"key":"1562_CR2","doi-asserted-by":"crossref","unstructured":"Liu, Z., Gao, G., Sun, L., Fang, Z.: HRDNet: high-resolution detection network for small objects. In: 2021 IEEE International Conference on Multimedia and Expo (ICME), pp. 1\u20136. IEEE (2021). https:\/\/ieeexplore.ieee.org\/abstract\/document\/9428241\/. Accessed 22 May 2024","DOI":"10.1109\/ICME51207.2021.9428241"},{"key":"1562_CR3","doi-asserted-by":"publisher","first-page":"1968","DOI":"10.1109\/TMM.2021.3074273","volume":"24","author":"C Deng","year":"2021","unstructured":"Deng, C., Wang, M., Liu, L., Liu, Y., Jiang, Y.: Extended feature pyramid network for small object detection. IEEE Trans. Multimedia 24, 1968\u20131979 (2021). https:\/\/doi.org\/10.1109\/TMM.2021.3074273","journal-title":"IEEE Trans. Multimedia"},{"issue":"8","key":"1562_CR4","doi-asserted-by":"publisher","first-page":"526","DOI":"10.3390\/drones7080526","volume":"7","author":"Z Zhang","year":"2023","unstructured":"Zhang, Z.: Drone-YOLO: an efficient neural network method for target detection in drone images. Drones 7(8), 526 (2023). https:\/\/doi.org\/10.3390\/drones7080526","journal-title":"Drones"},{"issue":"1","key":"1562_CR5","doi-asserted-by":"publisher","first-page":"87","DOI":"10.1109\/TPAMI.2022.3152247","volume":"45","author":"K Han","year":"2022","unstructured":"Han, K., et al.: A survey on vision transformer. IEEE Trans. Pattern Anal. Mach. Intell. 45(1), 87\u2013110 (2022). https:\/\/doi.org\/10.1109\/TPAMI.2022.3152247","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1562_CR6","unstructured":"Vaswani, A., et al.: Attention is all you need. Adv. Neural Inf. Process. Syst. 30 (2017)"},{"key":"1562_CR7","doi-asserted-by":"publisher","first-page":"48","DOI":"10.1016\/j.neucom.2021.03.091","volume":"452","author":"Z Niu","year":"2021","unstructured":"Niu, Z., Zhong, G., Yu, H.: A review on the attention mechanism of deep learning. Neurocomputing 452, 48\u201362 (2021). https:\/\/doi.org\/10.1016\/j.neucom.2021.03.091","journal-title":"Neurocomputing"},{"issue":"3","key":"1562_CR8","doi-asserted-by":"publisher","first-page":"331","DOI":"10.1007\/s41095-022-0271-y","volume":"8","author":"M-H Guo","year":"2022","unstructured":"Guo, M.-H., et al.: Attention mechanisms in computer vision: a survey. Comput. Vis. Media 8(3), 331\u2013368 (2022). https:\/\/doi.org\/10.1007\/s41095-022-0271-y","journal-title":"Comput. Vis. Media"},{"issue":"5","key":"1562_CR9","doi-asserted-by":"publisher","first-page":"391","DOI":"10.1037\/h0031333","volume":"78","author":"MI Posner","year":"1971","unstructured":"Posner, M.I., Boies, S.J.: Components of attention. Psychol. Rev. 78(5), 391 (1971). https:\/\/doi.org\/10.1037\/h0031333","journal-title":"Psychol. Rev."},{"key":"1562_CR10","doi-asserted-by":"crossref","unstructured":"Hu, J., Shen, L., Sun, G.: Squeeze-and-excitation networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7132\u20137141 (2018)","DOI":"10.1109\/CVPR.2018.00745"},{"key":"1562_CR11","doi-asserted-by":"crossref","unstructured":"Wang, Q., Wu, B., Zhu, P., Li, P., Zuo, W., Hu, Q.: ECA-Net: efficient channel attention for deep convolutional neural networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11534\u201311542 (2020)","DOI":"10.1109\/CVPR42600.2020.01155"},{"key":"1562_CR12","doi-asserted-by":"crossref","unstructured":"Hou, Q., Zhou, D., Feng, J:. Coordinate attention for efficient mobile network design. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13713\u201313722 (2021)","DOI":"10.1109\/CVPR46437.2021.01350"},{"key":"1562_CR13","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2023.106442","volume":"123","author":"D Wan","year":"2023","unstructured":"Wan, D., Lu, R., Shen, S., Xu, T., Lang, X., Ren, Z.: Mixed local channel attention for object detection. Eng. Appl. Artif. Intell. 123, 106442 (2023). https:\/\/doi.org\/10.1016\/j.engappai.2023.106442","journal-title":"Eng. Appl. Artif. Intell."},{"key":"1562_CR14","doi-asserted-by":"crossref","unstructured":"Zhu, C., He, Y., Savvides, M.: Feature selective anchor-free module for single-shot object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 840\u2013849 (2019)","DOI":"10.1109\/CVPR.2019.00093"},{"key":"1562_CR15","doi-asserted-by":"crossref","unstructured":"Zheng, Z., Wang, P., Liu, W., Li, J., Ye, R., Ren, D.: Distance-IoU loss: faster and better learning for bounding box regression. In: Proceedings of the AAAI Conference on Artificial Intelligence, pp. 12993\u201313000 (2020)","DOI":"10.1609\/aaai.v34i07.6999"},{"key":"1562_CR16","first-page":"21002","volume":"33","author":"X Li","year":"2020","unstructured":"Li, X., et al.: Generalized focal loss: learning qualified and distributed bounding boxes for dense object detection. Adv. Neural. Inf. Process. Syst. 33, 21002\u201321012 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"issue":"10","key":"1562_CR17","doi-asserted-by":"publisher","first-page":"7853","DOI":"10.1007\/s00521-022-08077-5","volume":"35","author":"J Wang","year":"2023","unstructured":"Wang, J., Chen, Y., Dong, Z., Gao, M.: Improved YOLOv5 network for real-time multi-scale traffic sign detection. Neural Comput. Appl. 35(10), 7853\u20137865 (2023). https:\/\/doi.org\/10.1007\/s00521-022-08077-5","journal-title":"Neural Comput. Appl."},{"key":"1562_CR18","doi-asserted-by":"publisher","DOI":"10.1016\/j.jvcir.2023.103752","volume":"90","author":"M Wang","year":"2023","unstructured":"Wang, M., et al.: FE-YOLOv5: feature enhancement network based on YOLOv5 for small object detection. J. Vis. Commun. Image Represent. 90, 103752 (2023)","journal-title":"J. Vis. Commun. Image Represent."},{"issue":"3","key":"1562_CR19","doi-asserted-by":"publisher","first-page":"1787","DOI":"10.1007\/s00371-023-02886-y","volume":"40","author":"S Zeng","year":"2024","unstructured":"Zeng, S., Yang, W., Jiao, Y., Geng, L., Chen, X.: SCA-YOLO: a new small object detection model for UAV images. Vis. Comput. 40(3), 1787\u20131803 (2024). https:\/\/doi.org\/10.1007\/s00371-023-02886-y","journal-title":"Vis. Comput."},{"key":"1562_CR20","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Ye, M., Zhu, G., Liu, Y., Guo, P., Yan, J.: FFCA-YOLO for small object detection in remote sensing images. IEEE Trans. Geosci. Remote Sens. (2024). https:\/\/ieeexplore.ieee.org\/abstract\/document\/10423050\/. Accessed 05 Aug 2024.","DOI":"10.1109\/TGRS.2024.3363057"},{"key":"1562_CR21","doi-asserted-by":"crossref","unstructured":"Wang, H., Liu, C., Cai, Y., Chen, L., Li, Y.: YOLOv8-QSD: an improved small object detection algorithm for autonomous vehicles based on YOLOv8. IEEE Trans. Instrum. Meas. (2024). https:\/\/ieeexplore.ieee.org\/abstract\/document\/10474434\/. Accessed 05 Aug 2024","DOI":"10.1109\/TIM.2024.3379090"},{"key":"1562_CR22","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2023.122669","volume":"241","author":"Y Zhang","year":"2024","unstructured":"Zhang, Y., Zhang, H., Huang, Q., Han, Y., Zhao, M.: DsP-YOLO: an anchor-free network with DsPAN for small object detection of multiscale defects. Expert Syst. Appl. 241, 122669 (2024). https:\/\/doi.org\/10.1016\/j.eswa.2023.122669","journal-title":"Expert Syst. Appl."},{"key":"1562_CR23","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2023.107206","volume":"127","author":"R Jing","year":"2024","unstructured":"Jing, R., Zhang, W., Liu, Y., Li, W., Li, Y., Liu, C.: An effective method for small object detection in low-resolution images. Eng. Appl. Artif. Intell. 127, 107206 (2024). https:\/\/doi.org\/10.1016\/j.engappai.2023.107206","journal-title":"Eng. Appl. Artif. Intell."},{"key":"1562_CR24","unstructured":"Wang, J., Xu, C., Yang, W., Yu, L.: A normalized Gaussian Wasserstein distance for tiny object detection. arXiv: arXiv:2110.13389 (2022). Accessed 19 Jun 2024"},{"key":"1562_CR25","doi-asserted-by":"crossref","unstructured":"Zhu, Z., Liang, D., Zhang, S., Huang, X., Li, B., Hu, S.: Traffic-sign detection and classification in the wild. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2110\u20132118 (2016). http:\/\/openaccess.thecvf.com\/content_cvpr_2016\/html\/Zhu_Traffic-Sign_Detection_and_CVPR_2016_paper.html. Accessed 22 May 2024","DOI":"10.1109\/CVPR.2016.232"},{"key":"1562_CR26","doi-asserted-by":"crossref","unstructured":"Yu, F., et al.: Bdd100k: a diverse driving dataset for heterogeneous multitask learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2636\u20132645 (2020). http:\/\/openaccess.thecvf.com\/content_CVPR_2020\/html\/Yu_BDD100K_A_Diverse_Driving_Dataset_for_Heterogeneous_Multitask_Learning_CVPR_2020_paper.html. Accessed 22 May 2024","DOI":"10.1109\/CVPR42600.2020.00271"},{"key":"1562_CR27","unstructured":"Chen, X., et al.: HAT: hybrid attention transformer for image restoration. arXiv: arXiv:2309.05239 (2024). Accessed 05 Jun 2024"},{"key":"1562_CR28","doi-asserted-by":"crossref","unstructured":"Chattopadhay, A., Sarkar, A., Howlader, P., Balasubramanian, V.N.: Grad-cam++: generalized gradient-based visual explanations for deep convolutional networks. In: 2018 IEEE Winter Conference on Applications of Computer Vision (WACV), pp. 839\u2013847. IEEE (2018)","DOI":"10.1109\/WACV.2018.00097"},{"key":"1562_CR29","unstructured":"Wang, C., et al.: Gold-YOLO: efficient object detector via gather-and-distribute mechanism. Adv. Neural Inf. Process. Syst. 36 (2024)"},{"key":"1562_CR30","doi-asserted-by":"crossref","unstructured":"Girshick, R.: Fast R-CNN. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1440\u20131448 (2015)","DOI":"10.1109\/ICCV.2015.169"},{"key":"1562_CR31","doi-asserted-by":"crossref","unstructured":"Pang, J., Chen, K., Shi, J., Feng, H., Ouyang, W., Lin, D.: Libra R-CNN: towards balanced learning for object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 821\u2013830 (2019). http:\/\/openaccess.thecvf.com\/content_CVPR_2019\/html\/Pang_Libra_R-CNN_Towards_Balanced_Learning_for_Object_Detection_CVPR_2019_paper.html. Accessed 22 May 2024.","DOI":"10.1109\/CVPR.2019.00091"},{"key":"1562_CR32","series-title":"Lecture notes in networks and systems","doi-asserted-by":"publisher","first-page":"403","DOI":"10.1007\/978-3-030-95892-3_31","volume-title":"Intelligent autonomous systems 16","author":"D Paz","year":"2022","unstructured":"Paz, D., Zhang, H., Christensen, H.I.: TridentNet: a conditional generative model for dynamic trajectory generation. In: Ang, M.H., Jr., Asama, H., Lin, W., Foong, S. (eds.) Intelligent autonomous systems 16. Lecture notes in networks and systems, vol. 412, pp. 403\u2013416. Springer International Publishing, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-030-95892-3_31"},{"key":"1562_CR33","doi-asserted-by":"publisher","unstructured":"Wang, A., et al.: YOLOv10: real-time end-to-end object detection. arXiv:2405.14458 (2024). https:\/\/doi.org\/10.48550\/arXiv.2405.14458.","DOI":"10.48550\/arXiv.2405.14458"}],"container-title":["Journal of Real-Time Image Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11554-024-01562-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11554-024-01562-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11554-024-01562-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,28]],"date-time":"2024-11-28T13:16:17Z","timestamp":1732799777000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11554-024-01562-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,10]]},"references-count":33,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2024,12]]}},"alternative-id":["1562"],"URL":"https:\/\/doi.org\/10.1007\/s11554-024-01562-1","relation":{},"ISSN":["1861-8200","1861-8219"],"issn-type":[{"value":"1861-8200","type":"print"},{"value":"1861-8219","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,10,10]]},"assertion":[{"value":"25 June 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 September 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 October 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"184"}}