{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,7]],"date-time":"2026-05-07T01:13:23Z","timestamp":1778116403185,"version":"3.51.4"},"reference-count":53,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Expert Systems with Applications"],"published-print":{"date-parts":[[2026,6]]},"DOI":"10.1016\/j.eswa.2026.131710","type":"journal-article","created":{"date-parts":[[2026,2,23]],"date-time":"2026-02-23T16:55:44Z","timestamp":1771865744000},"page":"131710","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Towards multimodal underwater object detection: A bidirectional feature recomposition network and visual-sonar dataset"],"prefix":"10.1016","volume":"316","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-8875-8317","authenticated-orcid":false,"given":"Yujie","family":"Wu","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0002-7050-4346","authenticated-orcid":false,"given":"Wenling","family":"Wang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1567-1398","authenticated-orcid":false,"given":"Cong","family":"Lin","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7751-4120","authenticated-orcid":false,"given":"Mingxin","family":"Hou","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1383-6558","authenticated-orcid":false,"given":"Mingxin","family":"Liu","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.eswa.2026.131710_bib0001","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2023.109868","article-title":"Underwater object classification combining SAS and transferred optical-to-SAS imagery","volume":"144","author":"Abu","year":"2023","journal-title":"Pattern Recognition"},{"issue":"2","key":"10.1016\/j.eswa.2026.131710_bib0002","doi-asserted-by":"crossref","first-page":"893","DOI":"10.1007\/s11160-025-09938-7","article-title":"Temporally-scheduled ROV-based monitoring to detect behavioural rhythms of deep-sea megafauna","volume":"35","author":"Chatzievangelou","year":"2025","journal-title":"Reviews in Fish Biology and Fisheries"},{"issue":"4","key":"10.1016\/j.eswa.2026.131710_bib0003","doi-asserted-by":"crossref","first-page":"970","DOI":"10.1002\/rob.22432","article-title":"Uamfdet: Acoustic-optical fusion for underwater multi-modal object detection","volume":"42","author":"Chen","year":"2025","journal-title":"Journal of Field Robotics"},{"key":"10.1016\/j.eswa.2026.131710_bib0004","unstructured":"Chen, Q., Su, X., Zhang, X., Wang, J., Chen, J., Shen, Y., Han, C., Chen, Z., Xu, W., Li, F., Zhang, S., Yao, K., Ding, E., Zhang, G., & Wang, J. (2024). Lw-detr: A transformer replacement to yolo for real-time detection. https:\/\/arxiv.org\/abs\/2406.03459."},{"key":"10.1016\/j.eswa.2026.131710_bib0005","series-title":"Pattern recognition","first-page":"236","article-title":"Deyolo: Dual-feature-enhancement yolo for cross-modality object detection","author":"Chen","year":"2025"},{"key":"10.1016\/j.eswa.2026.131710_bib0006","series-title":"Proceedings of the IEEE\/CVF winter conference on applications of computer vision (WACV)","first-page":"4432","article-title":"More than just attention: Improving cross-modal attentions with contrastive constraints for image-text matching","author":"Chen","year":"2023"},{"issue":"1","key":"10.1016\/j.eswa.2026.131710_bib0007","doi-asserted-by":"crossref","first-page":"18","DOI":"10.1007\/s44295-025-00065-4","article-title":"Utnet: event-rgb multimodal fusion model for underwater transparent organism detection","volume":"3","author":"Guo","year":"2025","journal-title":"Intelligent Marine Technology and Systems"},{"key":"10.1016\/j.eswa.2026.131710_bib0008","first-page":"1","article-title":"Seg2sonar: A full-class sample synthesis method applied to underwater sonar image target detection, recognition, and segmentation tasks","volume":"62","author":"Huang","year":"2024","journal-title":"IEEE Transactions on Geoscience and Remote Sensing"},{"issue":"1","key":"10.1016\/j.eswa.2026.131710_bib0009","doi-asserted-by":"crossref","first-page":"5473","DOI":"10.1038\/s41598-025-89202-x","article-title":"MemoCMT: Multimodal emotion recognition using cross-modal transformer-based feature fusion","volume":"15","author":"Khan","year":"2025","journal-title":"Scientific Reports"},{"key":"10.1016\/j.eswa.2026.131710_bib0010","unstructured":"Khanam, R., & Hussain, M. (2024). Yolov11: An overview of the key architectural enhancements. https:\/\/arxiv.org\/abs\/2410.17725."},{"key":"10.1016\/j.eswa.2026.131710_bib0011","doi-asserted-by":"crossref","DOI":"10.1016\/j.inffus.2024.102360","article-title":"Multimodal fusion-based spatiotemporal incremental learning for ocean environment perception under sparse observation","volume":"108","author":"Lei","year":"2024","journal-title":"Information Fusion"},{"key":"10.1016\/j.eswa.2026.131710_bib0012","first-page":"1","article-title":"Underwater image captioning with aquasketch-enhanced cross-scale information fusion","volume":"63","author":"Li","year":"2025","journal-title":"IEEE Transactions on Geoscience and Remote Sensing"},{"key":"10.1016\/j.eswa.2026.131710_bib0013","doi-asserted-by":"crossref","first-page":"440","DOI":"10.1016\/j.isprsjprs.2024.12.002","article-title":"Underwater image captioning: Challenges, models, and datasets","volume":"220","author":"Li","year":"2025","journal-title":"ISPRS Journal of Photogrammetry and Remote Sensing"},{"issue":"22","key":"10.1016\/j.eswa.2026.131710_bib0014","article-title":"Side-scan sonar image generation under zero and few samples for underwater target detection","volume":"16","author":"Li","year":"2024","journal-title":"Remote Sensing"},{"issue":"6","key":"10.1016\/j.eswa.2026.131710_bib0015","doi-asserted-by":"crossref","first-page":"18573","DOI":"10.1007\/s11042-023-16071-9","article-title":"A survey of multi-source image fusion","volume":"83","author":"Li","year":"2024","journal-title":"Multimedia Tools and Applications"},{"key":"10.1016\/j.eswa.2026.131710_bib0016","doi-asserted-by":"crossref","first-page":"197448","DOI":"10.1109\/ACCESS.2020.3034275","article-title":"A hybrid framework for underwater image enhancement","volume":"8","author":"Li","year":"2020","journal-title":"IEEE Access"},{"key":"10.1016\/j.eswa.2026.131710_bib0017","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2025.128914","article-title":"Discrete zeroing neural dynamic with noise tolerance for image deblurring","volume":"296","author":"Lin","year":"2026","journal-title":"Expert Systems with Applications"},{"key":"10.1016\/j.eswa.2026.131710_bib0018","series-title":"Proceedings of 2023 china science and technology information resource management and service annual conference (COINFO 2023)","first-page":"15","article-title":"Intelligent intelligence perception technology and applications based on space-air-ground multi-modal data fusion","author":"Lin","year":"2024"},{"key":"10.1016\/j.eswa.2026.131710_bib0019","article-title":"Lfn-yolo: precision underwater small object detection via a lightweight reparameterized approach","volume":"Volume 11 - 2024","author":"Liu","year":"2025","journal-title":"Frontiers in Marine Science"},{"key":"10.1016\/j.eswa.2026.131710_bib0020","doi-asserted-by":"crossref","DOI":"10.1016\/j.ecoinf.2023.102401","article-title":"Ywnet: A convolutional block attention-based fusion deep learning method for complex underwater small target detection","volume":"79","author":"Liu","year":"2024","journal-title":"Ecological Informatics"},{"key":"10.1016\/j.eswa.2026.131710_bib0021","doi-asserted-by":"crossref","unstructured":"Liu, Z., Tang, H., Amini, A., Yang, X., Mao, H., Rus, D., & Han, S. (2024b). Bevfusion: Multi-task multi-sensor fusion with unified bird\u2019s-eye view representation. https:\/\/arxiv.org\/abs\/2205.13542.","DOI":"10.1109\/ICRA48891.2023.10160968"},{"key":"10.1016\/j.eswa.2026.131710_bib0022","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2025.127194","article-title":"Cgcn-fmf:1d convolutional neural network based feature fusion and multi graph fusion for semi-supervised learning","volume":"277","author":"Peng","year":"2025","journal-title":"Expert Systems with Applications"},{"key":"10.1016\/j.eswa.2026.131710_bib0023","series-title":"Proceedings of the IEEE\/CVF international conference on computer vision (ICCV)","first-page":"6070","article-title":"Dynamic snake convolution based on topological geometric constraints for tubular structure segmentation","author":"Qi","year":"2023"},{"key":"10.1016\/j.eswa.2026.131710_bib0024","unstructured":"Qingyun, F., Dapeng, H., & Zhaokui, W. (2022). Cross-modality fusion transformer for multispectral object detection. https:\/\/arxiv.org\/abs\/2111.00273."},{"key":"10.1016\/j.eswa.2026.131710_bib0025","doi-asserted-by":"crossref","first-page":"1497","DOI":"10.1109\/JSTARS.2020.3041316","article-title":"Yolors: Object detection in multimodal remote sensing imagery","volume":"14","author":"Sharma","year":"2021","journal-title":"IEEE Journal of Selected Topics in Applied Earth Observations and Remote Sensing"},{"key":"10.1016\/j.eswa.2026.131710_bib0026","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2023.109913","article-title":"Icafusion: Iterative cross-attention guided feature fusion for multispectral object detection","volume":"145","author":"Shen","year":"2024","journal-title":"Pattern Recognition"},{"key":"10.1016\/j.eswa.2026.131710_bib0027","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2024.124958","article-title":"Multi-scale fusion and efficient feature extraction for enhanced sonar image object detection","volume":"256","author":"Shi","year":"2024","journal-title":"Expert Systems with Applications"},{"key":"10.1016\/j.eswa.2026.131710_bib0028","series-title":"Data intelligence and cognitive informatics","first-page":"529","article-title":"A review on YOLOv8 and its advancements","author":"Sohan","year":"2024"},{"key":"10.1016\/j.eswa.2026.131710_bib0029","first-page":"1","article-title":"Rgb and optimal waveband image fusion for real-time underwater clear image acquisition","volume":"72","author":"Sun","year":"2023","journal-title":"IEEE Transactions on Instrumentation and Measurement"},{"key":"10.1016\/j.eswa.2026.131710_bib0030","first-page":"1","article-title":"Drone-based RGB-infrared cross-modality vehicle detection via uncertainty-aware learning","author":"Sun","year":"2022","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"key":"10.1016\/j.eswa.2026.131710_bib0031","unstructured":"Tian, Y., Ye, Q., & Doermann, D. (2025). Yolov12: Attention-centric real-time object detectors. https:\/\/arxiv.org\/abs\/2502.12524."},{"key":"10.1016\/j.eswa.2026.131710_bib0032","series-title":"Advances in neural information processing systems","first-page":"107984","article-title":"Yolov10: Real-time end-to-end object detection","volume":"vol. 37","author":"Wang","year":"2024"},{"key":"10.1016\/j.eswa.2026.131710_bib0033","series-title":"Computer vision \u2013 ECCV 2024","first-page":"1","article-title":"Yolov9: Learning what you want to learn using programmable gradient information","author":"Wang","year":"2025"},{"key":"10.1016\/j.eswa.2026.131710_bib0034","first-page":"1","article-title":"Large foundation model empowered discriminative underwater image enhancement","volume":"63","author":"Wang","year":"2025","journal-title":"IEEE Transactions on Geoscience and Remote Sensing"},{"key":"10.1016\/j.eswa.2026.131710_bib0035","doi-asserted-by":"crossref","DOI":"10.1016\/j.inffus.2025.103693","article-title":"Watercyclediffusion: Visual-textual fusion empowered underwater image enhancement","volume":"127","author":"Wang","year":"2026","journal-title":"Information Fusion"},{"issue":"5","key":"10.1016\/j.eswa.2026.131710_bib0036","doi-asserted-by":"crossref","DOI":"10.3390\/rs15051303","article-title":"Multi-modal multi-stage underwater side-scan sonar target recognition based on synthetic images","volume":"15","author":"Wang","year":"2023","journal-title":"Remote Sensing"},{"key":"10.1016\/j.eswa.2026.131710_bib0037","series-title":"2022 international conference on image processing and media computing (ICIPMC)","first-page":"17","article-title":"Underwater object detection based on enhanced YOLO","author":"Wang","year":"2022"},{"key":"10.1016\/j.eswa.2026.131710_bib0038","series-title":"2025 44th chinese control conference (CCC)","first-page":"8259","article-title":"An underwater acoustic-optic paired dataset based on attention-aware multi-stroke style transfer","author":"Wang","year":"2025"},{"issue":"3","key":"10.1016\/j.eswa.2026.131710_bib0039","doi-asserted-by":"crossref","first-page":"976","DOI":"10.1109\/JOE.2024.3379481","article-title":"Side-scan sonar underwater target detection: Combining the diffusion model with an improved YOLOv7 model","volume":"49","author":"Wen","year":"2024","journal-title":"IEEE Journal of Oceanic Engineering"},{"key":"10.1016\/j.eswa.2026.131710_bib0040","doi-asserted-by":"crossref","DOI":"10.1016\/j.compbiomed.2022.106457","article-title":"Aggn: Attention-based glioma grading network with multi-scale feature extraction and multi-modal information fusion","volume":"152","author":"Wu","year":"2023","journal-title":"Computers in Biology and Medicine"},{"issue":"12","key":"10.1016\/j.eswa.2026.131710_bib0041","doi-asserted-by":"crossref","first-page":"14378","DOI":"10.1109\/TITS.2023.3307589","article-title":"Crossfuser: Multi-modal feature fusion for end-to-end autonomous driving under unseen weather conditions","volume":"24","author":"Wu","year":"2023","journal-title":"IEEE Transactions on Intelligent Transportation Systems"},{"key":"10.1016\/j.eswa.2026.131710_bib0042","doi-asserted-by":"crossref","unstructured":"Xu, C., Jin, Y., Ma, S., Qian, R., Fang, H., Liu, J., Liu, X., Ngai, E. C. H., Atlas, W. I., Connors, K. M., & Spoljaric, M. A. (2025). Exploring multimodal foundation AI and expert-in-the-loop for sustainable management of wild salmon fisheries in indigenous rivers. https:\/\/arxiv.org\/abs\/2505.06637.","DOI":"10.24963\/ijcai.2025\/1106"},{"issue":"14","key":"10.1016\/j.eswa.2026.131710_bib0043","doi-asserted-by":"crossref","first-page":"10361","DOI":"10.1007\/s00521-023-08239-z","article-title":"Cross-modality complementary information fusion for multispectral pedestrian detection","volume":"35","author":"Yan","year":"2023","journal-title":"Neural Computing and Applications"},{"key":"10.1016\/j.eswa.2026.131710_bib0044","first-page":"1","article-title":"A lightweight underwater target detection network for forward-looking sonar images","volume":"73","author":"Yang","year":"2024","journal-title":"IEEE Transactions on Instrumentation and Measurement"},{"issue":"3","key":"10.1016\/j.eswa.2026.131710_bib0045","doi-asserted-by":"crossref","first-page":"665","DOI":"10.1007\/s11802-023-5296-z","article-title":"Ugc-yolo: Underwater environment object detection based on yolo with a global context block","volume":"22","author":"Yang","year":"2023","journal-title":"Journal of Ocean University of China"},{"issue":"2","key":"10.1016\/j.eswa.2026.131710_bib0046","doi-asserted-by":"crossref","first-page":"919","DOI":"10.1109\/JOE.2025.3529121","article-title":"Ao-uod: A novel paradigm for underwater object detection using acousto-optic fusion","volume":"50","author":"Yu","year":"2025","journal-title":"IEEE Journal of Oceanic Engineering"},{"issue":"1","key":"10.1016\/j.eswa.2026.131710_bib0047","article-title":"Sonar image target detection based on deep learning","volume":"2022","author":"Yu","year":"2022","journal-title":"Mathematical Problems in Engineering"},{"key":"10.1016\/j.eswa.2026.131710_bib0048","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2024.110714","article-title":"Yolo-facev2: A scale and occlusion aware face detector","volume":"155","author":"Yu","year":"2024","journal-title":"Pattern Recognition"},{"key":"10.1016\/j.eswa.2026.131710_bib0049","doi-asserted-by":"crossref","first-page":"323","DOI":"10.1016\/j.inffus.2021.06.008","article-title":"Image fusion meets deep learning: A survey and perspective","volume":"76","author":"Zhang","year":"2021","journal-title":"Information Fusion"},{"key":"10.1016\/j.eswa.2026.131710_bib0050","doi-asserted-by":"crossref","first-page":"323","DOI":"10.1016\/j.inffus.2021.06.008","article-title":"Image fusion meets deep learning: A survey and perspective","volume":"76","author":"Zhang","year":"2021","journal-title":"Information Fusion"},{"issue":"3","key":"10.1016\/j.eswa.2026.131710_bib0051","doi-asserted-by":"crossref","DOI":"10.3390\/app14031095","article-title":"Efficient small-object detection in underwater images using the enhanced YOLOv8 network","volume":"14","author":"Zhang","year":"2024","journal-title":"Applied Sciences"},{"key":"10.1016\/j.eswa.2026.131710_bib0052","first-page":"1","article-title":"Underwater sonar target detection based on improved scEMA-YOLOv8","volume":"21","author":"Zheng","year":"2024","journal-title":"IEEE Geoscience and Remote Sensing Letters"},{"key":"10.1016\/j.eswa.2026.131710_bib0053","unstructured":"zyc (2025). Flir dataset. https:\/\/universe.roboflow.com\/zyc-zuleq\/flir-pl1yt. visited on 2025-09-10 https:\/\/universe.roboflow.com\/zyc-zuleq\/flir-pl1yt."}],"container-title":["Expert Systems with Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S095741742600624X?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S095741742600624X?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,5,7]],"date-time":"2026-05-07T00:49:12Z","timestamp":1778114952000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S095741742600624X"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6]]},"references-count":53,"alternative-id":["S095741742600624X"],"URL":"https:\/\/doi.org\/10.1016\/j.eswa.2026.131710","relation":{},"ISSN":["0957-4174"],"issn-type":[{"value":"0957-4174","type":"print"}],"subject":[],"published":{"date-parts":[[2026,6]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Towards multimodal underwater object detection: A bidirectional feature recomposition network and visual-sonar dataset","name":"articletitle","label":"Article Title"},{"value":"Expert Systems with Applications","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.eswa.2026.131710","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"131710"}}