{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,4]],"date-time":"2026-05-04T22:12:57Z","timestamp":1777932777908,"version":"3.51.4"},"reference-count":46,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,11,21]],"date-time":"2025-11-21T00:00:00Z","timestamp":1763683200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,11,21]],"date-time":"2025-11-21T00:00:00Z","timestamp":1763683200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["42374050"],"award-info":[{"award-number":["42374050"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["42174051"],"award-info":[{"award-number":["42174051"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Real-Time Image Proc"],"published-print":{"date-parts":[[2026,2]]},"DOI":"10.1007\/s11554-025-01810-y","type":"journal-article","created":{"date-parts":[[2025,11,21]],"date-time":"2025-11-21T08:23:36Z","timestamp":1763713416000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["LUW-DETR: a lightweight underwater object detection based on DETR"],"prefix":"10.1007","volume":"23","author":[{"given":"Zhenxiong","family":"Liu","sequence":"first","affiliation":[]},{"given":"Jun","family":"Han","sequence":"additional","affiliation":[]},{"given":"Guojun","family":"Zhai","sequence":"additional","affiliation":[]},{"given":"Wenkui","family":"Li","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,11,21]]},"reference":[{"key":"1810_CR1","doi-asserted-by":"publisher","first-page":"204","DOI":"10.1016\/j.neucom.2023.01.056","volume":"527","author":"S Xu","year":"2023","unstructured":"Xu, S., Zhang, M., Song, W., Mei, H., He, Q., Liotta, A.: A systematic review and analysis of deep learning-based underwater object detection. Neurocomputing 527, 204\u2013232 (2023)","journal-title":"Neurocomputing"},{"key":"1810_CR2","doi-asserted-by":"publisher","first-page":"2785","DOI":"10.1007\/s11831-020-09486-2","volume":"28","author":"L Yang","year":"2021","unstructured":"Yang, L., Liu, Y., Yu, H., Fang, X., Song, L., Li, D., Chen, Y.: Computer vision models in intelligent aquaculture with emphasis on fish detection and behavior analysis: a review. Arch. Comput. Methods Eng. 28, 2785\u20132816 (2021)","journal-title":"Arch. Comput. Methods Eng."},{"key":"1810_CR3","doi-asserted-by":"publisher","DOI":"10.1016\/j.ecoinf.2022.101847","volume":"72","author":"A Al Muksit","year":"2022","unstructured":"Al Muksit, A., Hasan, F., Emon, M.F.H.B., Haque, M.R., Anwary, A.R., Shatabda, S.: YOLO-fish: a robust fish detection model to detect fish in realistic underwater environment. Ecol. Inform. 72, 101847 (2022)","journal-title":"Ecol. Inform."},{"key":"1810_CR4","doi-asserted-by":"publisher","first-page":"14057","DOI":"10.1007\/s11227-024-06020-0","volume":"80","author":"X Chen","year":"2024","unstructured":"Chen, X., Fan, C., Shi, J., Wang, H., Yao, H.: Underwater target detection and embedded deployment based on lightweight YOLO-GN. J. Supercomput. 80, 14057\u201314084 (2024)","journal-title":"J. Supercomput."},{"key":"1810_CR5","doi-asserted-by":"crossref","unstructured":"Xie, X., Cheng, G., Wang, J., Yao, X., Han, J.: Oriented R-CNN for object detection. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 3520\u20133529, Montreal, QC, Canada (2021)","DOI":"10.1109\/ICCV48922.2021.00350"},{"key":"1810_CR6","doi-asserted-by":"crossref","unstructured":"Girshick, R.: Fast R-CNN. In: Proceedings of the IEEE International Conference on Computer Vision, Santiago, Chile, pp. 1440\u20131448 (2015)","DOI":"10.1109\/ICCV.2015.169"},{"key":"1810_CR7","doi-asserted-by":"crossref","unstructured":"Cai, Z., Vasconcelos, N.: Cascade R-CNN: Delving into high quality object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6154\u20136162, Salt Lake City, UT, USA (2018)","DOI":"10.1109\/CVPR.2018.00644"},{"key":"1810_CR8","doi-asserted-by":"publisher","first-page":"1137","DOI":"10.1109\/TPAMI.2016.2577031","volume":"39","author":"S Ren","year":"2017","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. IEEE Trans. Pattern Anal. Mach. Intell. 39, 1137\u20131149 (2017)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1810_CR9","doi-asserted-by":"crossref","unstructured":"Liu, W., Anguelov, D., Erhan, D., Szegedy, C., Reed, S., Fu, C.-Y., Berg, A.C.: SSD: Single shot multibox detector. In: European Conference on Computer Vision, pp. 21\u201337. Springer (2016)","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"1810_CR10","unstructured":"Ultralytics: YOLOv5. https:\/\/github.com\/ultralytics\/yolov5. Accessed 1 Nov 2021 (2021)"},{"key":"1810_CR11","unstructured":"Li, C., Li, L., Jiang, H., Weng, K., Geng, Y., Li, L., Wei, X.: YOLOv6: A single-stage object detection framework for industrial applications. arXiv preprint arXiv:2209.02976 (2022)"},{"key":"1810_CR12","doi-asserted-by":"crossref","unstructured":"Wang, C.Y., Bochkovskiy, A., Liao, H.Y.M.: YOLOv7: trainable bag-of-freebies sets new state-of-the-art for real-time object detectors. arXiv preprint arXiv:2207.02696 (2023)","DOI":"10.1109\/CVPR52729.2023.00721"},{"key":"1810_CR13","unstructured":"Jocher, G., Chaurasia, A., Stoken, A., et al.: YOLOv8 release v8.1.0. https:\/\/github.com\/ultralytics\/ultralytics\/releases\/tag\/v8.1.0. Accessed 1 Jan 2024 (2024)"},{"key":"1810_CR14","unstructured":"Jocher, G., Qiu, J.: Ultralytics YOLO11: real-time object detection with lightweight architecture. In: Proceedings of the International Conference on Computer Vision and Machine Learning, pp. 452\u2013461, Tokyo, Japan (2024)"},{"key":"1810_CR15","first-page":"1616","volume":"9","author":"N Carion","year":"2022","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., & Zagoruyko, S. (2020, August). End-to-end object detection with transformers. In European conference on computer vision (pp. 213-229). Cham: Springer International Publishing.","journal-title":"European Conference on Computer Vision"},{"key":"1810_CR16","doi-asserted-by":"crossref","unstructured":"Zhao, Y., Lv, W., Xu, S., Wei, J., Wang, G., Dang, Q., Liu, Y., Chen, J.: Detrs beat yolos on real-time object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 16965\u201316974, Seattle, WA, USA (2024)","DOI":"10.1109\/CVPR52733.2024.01605"},{"key":"1810_CR17","unstructured":"Zhu, X., Su, W., Lu, L., Li, B., Wang, X., Dai, J.: Deformable DETR: deformable transformers for end-to-end object detection. arXiv preprint arXiv:2010.04159 (2020)"},{"key":"1810_CR18","doi-asserted-by":"publisher","first-page":"15171","DOI":"10.1109\/TPAMI.2023.3319634","volume":"45","author":"X Xie","year":"2023","unstructured":"Xie, X., Lang, C., Miao, S., Cheng, G., Li, K., Han, J.: Mutual-assistance learning for object detection. IEEE Trans. Pattern Anal. Mach. Intell. 45, 15171\u201315184 (2023)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1810_CR19","unstructured":"Li, Y., Yuan, G., Wen, Y., Hu, J., Evangelidis, G., Tulyakov, S., Wang, Y., Ren, J.: EfficientFormer: vision transformers at MobileNet speed. In: Advances in Neural Information Processing Systems, vol. 35, pp. 12934\u201312949 (2022)"},{"key":"1810_CR20","doi-asserted-by":"crossref","unstructured":"Liu, X., Peng, H., Zheng, N., Yang, Y., Hu, H., Yuan, Y.: EfficientViT: memory efficient vision transformer with cascaded group attention. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 17332\u201317341, Vancouver, BC, Canada (2023)","DOI":"10.1109\/CVPR52729.2023.01386"},{"key":"1810_CR21","doi-asserted-by":"crossref","unstructured":"Wang, A., Chen, H., Lin, Z., Han, J., Ding, G.: RepVit: revisiting mobile CNN from ViT perspective. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 15909\u201315920, Seattle, WA, USA (2024)","DOI":"10.1109\/CVPR52733.2024.01506"},{"key":"1810_CR22","doi-asserted-by":"crossref","unstructured":"Chen, J., Kao, S.-H., He, H., Zhuo, W., Wen, S., Lee, C.-H., Chan, S.-H.G.: Run, Don\u2019t Walk: chasing higher FLOPS for faster neural networks. arXiv preprint arXiv:2303.03667 (2023)","DOI":"10.1109\/CVPR52729.2023.01157"},{"key":"1810_CR23","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TGRS.2024.3506630","volume":"63","author":"Y Xie","year":"2025","unstructured":"Xie, Y., Liu, S., Chen, H., Cao, S., Zhang, H., Feng, D., Wan, Q., Zhu, J., Zhu, Q.: Localization, balance, and affinity: a stronger multifaceted collaborative salient object detector in remote sensing images. IEEE Trans. Geosci. Remote Sens. 63, 1\u201317 (2025). https:\/\/doi.org\/10.1109\/TGRS.2024.3506630","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"issue":"1","key":"1810_CR24","doi-asserted-by":"publisher","first-page":"2346259","DOI":"10.1080\/17538947.2024.2346259","volume":"17","author":"J Zhu","year":"2024","unstructured":"Zhu, J., Zhang, J., Chen, H., Xie, Y., Gu, H., Lian, H.: A cross-view intelligent person search method based on multi-feature constraints. Int. J. Digit. Earth 17(1), 2346259 (2024)","journal-title":"Int. J. Digit. Earth"},{"key":"1810_CR25","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2023.122018","volume":"238","author":"Y Liu","year":"2024","unstructured":"Liu, Y., An, D., Ren, Y., et al.: DP-FishNet: dual-path pyramid vision transformer-based underwater fish detection network. Expert Syst. Appl. 238, 122018 (2024)","journal-title":"Expert Syst. Appl."},{"key":"1810_CR26","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2023.110222","volume":"149","author":"L Dai","year":"2024","unstructured":"Dai, L., Liu, H., Song, P., Liu, M.: A gated cross-domain collaborative network for underwater object detection. Pattern Recognit. 149, 110222 (2024)","journal-title":"Pattern Recognit."},{"key":"1810_CR27","doi-asserted-by":"publisher","first-page":"2291","DOI":"10.3390\/jmse12122291","volume":"12","author":"Z Li","year":"2024","unstructured":"Li, Z., Zheng, B., Chao, D., Zhu, W., Li, H., Duan, J., Zhang, X., Zhang, Z., Fu, W., Zhang, Y.: Underwater-YOLO: underwater object detection network with dilated deformable convolutions and dual-branch occlusion attention mechanism. J. Mar. Sci. Eng. 12, 2291 (2024)","journal-title":"J. Mar. Sci. Eng."},{"key":"1810_CR28","doi-asserted-by":"publisher","DOI":"10.1016\/j.compag.2022.107522","volume":"204","author":"W Ji","year":"2023","unstructured":"Ji, W., Peng, J., Xu, B., Zhang, T.: Real-time detection of underwater river crab based on multi-scale pyramid fusion image enhancement and MobileCenterNet model. Comput. Electron. Agric. 204, 107522 (2023)","journal-title":"Comput. Electron. Agric."},{"key":"1810_CR29","doi-asserted-by":"crossref","unstructured":"Wang, C.Y., Liao, H.Y.M., Wu, Y.H., Chen, P.Y., Hsieh, J.W., Yeh, I.H.: CSPNet: a new backbone that can enhance learning capability of CNN. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops, pp. 390\u2013391, Seattle, WA, USA (2020)","DOI":"10.1109\/CVPRW50498.2020.00203"},{"key":"1810_CR30","unstructured":"Pan, Z., Cai, J., Zhuang, B.: Fast vision transformers with HiLo attention. In: Advances in Neural Information Processing Systems, vol. 35, pp. 24985\u201324997 (2022)"},{"key":"1810_CR31","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778, Las Vegas, NV, USA (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"1810_CR32","doi-asserted-by":"crossref","unstructured":"Chen, C., Zhang, Y., Lv, Q., Wei, S., Wang, X., Sun, X., Dong, J.: RRNet: a hybrid detector for object detection in drone-captured images. In: Proceedings of the IEEE International Conference on Computer Vision Workshops, pp. 100\u2013108, Seoul, Korea (2019)","DOI":"10.1109\/ICCVW.2019.00018"},{"key":"1810_CR33","unstructured":"Han, Q., Fan, Z., Dai, Q., Sun, L., Cheng, M.-M., Liu, J., Wang, J.: On the connection between local attention and dynamic depth-wise convolution. In: Proceedings of the International Conference on Learning Representations (2022)"},{"key":"1810_CR34","doi-asserted-by":"crossref","unstructured":"Gheini, M., Ren, X., May, J.: Cross-attention is all you need: Adapting pretrained transformers for machine translation. In: Proceedings of the Conference on Empirical Methods in Natural Language Processing, Online and Punta Cana, Dominican Republic, pp. 1754\u20131765 (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.132"},{"key":"1810_CR35","doi-asserted-by":"crossref","unstructured":"Liu, C., Li, H., Wang, S., Zhu, M., Wang, D., Fan, X., Wang, Z.: A dataset and benchmark of underwater object detection for robot picking. In: Proceedings of the IEEE International Conference on Multimedia and Expo Workshops, pp. 1\u20136, Shenzhen, China (2021)","DOI":"10.1109\/ICMEW53276.2021.9455997"},{"key":"1810_CR36","doi-asserted-by":"publisher","first-page":"243","DOI":"10.1016\/j.neucom.2022.10.039","volume":"517","author":"C Fu","year":"2023","unstructured":"Fu, C., Liu, R., Fan, X., Chen, P., Fu, H., Yuan, W., Zhu, M., Luo, Z.: Rethinking general underwater object detection: datasets, challenges, and solutions. Neurocomputing 517, 243\u2013256 (2023)","journal-title":"Neurocomputing"},{"key":"1810_CR37","unstructured":"Song, Y., Zhou, Y., Qian, H., Du, X.: Rethinking performance gains in image dehazing networks. arXiv preprint arXiv:2209.11448 (2022)"},{"key":"1810_CR38","doi-asserted-by":"crossref","unstructured":"Zhang, X., Zeng, H., Guo, S., Zhang, L.: Efficient long-range attention network for image super-resolution. In: Proceedings of the European Conference on Computer Vision, pp. 649\u2013667, Tel Aviv, Israel (2022)","DOI":"10.1007\/978-3-031-19790-1_39"},{"key":"1810_CR39","doi-asserted-by":"crossref","unstructured":"Ding, X., Zhang, Y., Ge, Y., Zhao, S., Song, L., Yue, X., Shan, Y.: UniRepLKNet: a universal perception large-kernel ConvNet for audio video point cloud time-series and image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5513\u20135524, Seattle, WA, USA (2024)","DOI":"10.1109\/CVPR52733.2024.00527"},{"key":"1810_CR40","doi-asserted-by":"crossref","unstructured":"Ding, X., Zhang, X., Han, J., Ding, G.: Diverse branch block: Building a convolution as an inception-like unit. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 10886\u201310895, Nashville, TN, USA (2021)","DOI":"10.1109\/CVPR46437.2021.01074"},{"key":"1810_CR41","doi-asserted-by":"crossref","unstructured":"Fan, Q., Huang, H., Chen, M., Liu, H., He, R.: RMT: retentive networks meet vision transformers. arXiv preprint arXiv:2309.11523 (2024)","DOI":"10.1109\/CVPR52733.2024.00539"},{"key":"1810_CR42","doi-asserted-by":"crossref","unstructured":"Zheng, M., Sun, L., Dong, J., Pan, J.: SMFANet: A lightweight self-modulation feature aggregation network for efficient image super-resolution. In: Proceedings of the European Conference on Computer Vision, pp. 567\u2013583, Milan, Italy (2024)","DOI":"10.1007\/978-3-031-72973-7_21"},{"key":"1810_CR43","doi-asserted-by":"crossref","unstructured":"Cai, X., Lai, Q., Wang, Y., Wang, W., Sun, Z., Yao, Y.: Poly kernel inception network for remote sensing detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 27706\u201327716, Seattle, WA, USA (2024)","DOI":"10.1109\/CVPR52733.2024.02617"},{"key":"1810_CR44","doi-asserted-by":"crossref","unstructured":"Qin, D., Leichner, C., Delakis, M., Fornoni, M., Luo, S., Yang, F., Wang, W., Banbury, C., Ye, C., Akin, B., Aggarwal, V., Zhu, T., Moro, D., Howard, A.: MobileNetV4\u2013universal models for the mobile ecosystem. arXiv preprint arXiv:2404.10518 (2024)","DOI":"10.1007\/978-3-031-73661-2_5"},{"key":"1810_CR45","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-024-02247-9","author":"Y Li","year":"2024","unstructured":"Li, Y., Li, X., Dai, Y., Hou, Q., Liu, L., Liu, Y., Cheng, M.-M., Yang, J.: LSKNet: a foundation lightweight backbone for remote sensing. Int. J. Comput. Vis. (2024). https:\/\/doi.org\/10.1007\/s11263-024-02247-9","journal-title":"Int. J. Comput. Vis."},{"key":"1810_CR46","doi-asserted-by":"crossref","unstructured":"Huang, S., Lu, Z., Cun, X., Yu, Y., Zhou, X., Shen, X.: Deim: Detr with improved matching for fast convergence. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 15162\u201315171 (2025)","DOI":"10.1109\/CVPR52734.2025.01412"}],"container-title":["Journal of Real-Time Image Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11554-025-01810-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11554-025-01810-y","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11554-025-01810-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,6]],"date-time":"2026-02-06T16:49:58Z","timestamp":1770396598000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11554-025-01810-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,21]]},"references-count":46,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2026,2]]}},"alternative-id":["1810"],"URL":"https:\/\/doi.org\/10.1007\/s11554-025-01810-y","relation":{},"ISSN":["1861-8200","1861-8219"],"issn-type":[{"value":"1861-8200","type":"print"},{"value":"1861-8219","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,11,21]]},"assertion":[{"value":"16 August 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 November 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 November 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflicts of interest"}}],"article-number":"14"}}