{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,19]],"date-time":"2026-04-19T06:40:33Z","timestamp":1776580833168,"version":"3.51.2"},"reference-count":54,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["52371350"],"award-info":[{"award-number":["52371350"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002855","name":"Ministry of Science and Technology of the People's Republic of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100002855","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2023YFC2809104"],"award-info":[{"award-number":["2023YFC2809104"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Expert Systems with Applications"],"published-print":{"date-parts":[[2026,6]]},"DOI":"10.1016\/j.eswa.2026.131683","type":"journal-article","created":{"date-parts":[[2026,2,14]],"date-time":"2026-02-14T07:32:09Z","timestamp":1771054329000},"page":"131683","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["UPinst: Real time instance segmentation of underwater optical images"],"prefix":"10.1016","volume":"314","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-6336-8932","authenticated-orcid":false,"given":"Jiahao","family":"Wan","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0100-6290","authenticated-orcid":false,"given":"Rui","family":"Wang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1485-5873","authenticated-orcid":false,"given":"Bo","family":"Wang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2969-2894","authenticated-orcid":false,"given":"Ye","family":"Li","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0002-1181-415X","authenticated-orcid":false,"given":"Shengyuan","family":"Luo","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.eswa.2026.131683_bib0001","unstructured":"Bochkovskiy, A., Wang, C.-Y., & Liao, H.-Y. M. (2020). YOLOv4: Optimal speed and accuracy of object detection. arXiv: 2004.10934."},{"key":"10.1016\/j.eswa.2026.131683_bib0002","series-title":"Computer vision \u2013 ECCV 2020","first-page":"583","article-title":"TIDE: A general toolbox for identifying object detection errors","author":"Bolya","year":"2020"},{"key":"10.1016\/j.eswa.2026.131683_bib0003","series-title":"2019\u202fIEEE\/CVF International conference on computer vision (ICCV)","first-page":"9157","article-title":"YOLACT: Real-time instance segmentation","author":"Bolya","year":"2019"},{"issue":"2","key":"10.1016\/j.eswa.2026.131683_bib0004","doi-asserted-by":"crossref","first-page":"1108","DOI":"10.1109\/TPAMI.2020.3014297","article-title":"YOLACT++: Better real-time instance segmentation","volume":"44","author":"Bolya","year":"2022","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"10.1016\/j.eswa.2026.131683_bib0005","series-title":"International conference on learning representations (ICLR)","article-title":"How attentive are graph attention networks?","author":"Brody","year":"2022"},{"key":"10.1016\/j.eswa.2026.131683_bib0006","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","first-page":"27653","article-title":"Poly kernel inception network for remote sensing detection","author":"Cai","year":"2024"},{"key":"10.1016\/j.eswa.2026.131683_bib0007","series-title":"2018\u202fIEEE\/CVF Conference on computer vision and pattern recognition","first-page":"6154","article-title":"Cascade R-CNN: Delving into high quality object detection","author":"Cai","year":"2018"},{"key":"10.1016\/j.eswa.2026.131683_bib0008","series-title":"Computer vision \u2013 ECCV 2020","first-page":"1","article-title":"SipMask: Spatial information preservation for fast image and video instance segmentation","author":"Cao","year":"2020"},{"key":"10.1016\/j.eswa.2026.131683_bib0009","series-title":"2020\u202fIEEE\/CVF Conference on computer vision and pattern recognition (CVPR)","first-page":"8573","article-title":"BlendMask: Top-down meets bottom-up for instance segmentation","author":"Chen","year":"2020"},{"key":"10.1016\/j.eswa.2026.131683_bib0010","series-title":"2019\u202fIEEE\/CVF Conference on computer vision and pattern recognition (CVPR)","first-page":"4974","article-title":"Hybrid task cascade for instance segmentation","author":"Chen","year":"2019"},{"key":"10.1016\/j.eswa.2026.131683_bib0011","unstructured":"Chen, K., Wang, J., Pang, J., Cao, Y., Xiong, Y., Li, X., Sun, S., Feng, W., Liu, Z., Xu, J. et al. (2019b). MMDetection: Open MMLab detection toolbox and benchmark. arXiv: 1906.07155,."},{"key":"10.1016\/j.eswa.2026.131683_bib0012","series-title":"2022\u202fIEEE\/CVF Conference on computer vision and pattern recognition (CVPR)","first-page":"1290","article-title":"Masked-attention mask transformer for universal image segmentation","author":"Cheng","year":"2022"},{"key":"10.1016\/j.eswa.2026.131683_bib0013","series-title":"Proceedings of the 33rd ACM international conference on multimedia","first-page":"343","article-title":"UIS-Mamba: Exploring Mamba for underwater instance segmentation via dynamic tree scan and hidden state weaken","author":"Cong","year":"2025"},{"key":"10.1016\/j.eswa.2026.131683_bib0014","series-title":"2017\u202fIEEE International conference on computer vision (ICCV)","first-page":"764","article-title":"Deformable convolutional networks","author":"Dai","year":"2017"},{"key":"10.1016\/j.eswa.2026.131683_bib0015","series-title":"International conference on learning representations (ICLR)","article-title":"An image is worth 16x16 words: Transformers for image recognition at scale","author":"Dosovitskiy","year":"2021"},{"key":"10.1016\/j.eswa.2026.131683_bib0016","unstructured":"Ge, Z., Liu, S., Wang, F., Li, Z., & Sun, J. (2021). YOLOX: Exceeding YOLO series in 2021. arXiv: 2107.08430,."},{"key":"10.1016\/j.eswa.2026.131683_bib0017","unstructured":"Goodfellow, I. J., Shlens, J., & Szegedy, C. (2014). Explaining and harnessing adversarial examples. arXiv: 1412.6572,."},{"issue":"2","key":"10.1016\/j.eswa.2026.131683_bib0018","doi-asserted-by":"crossref","first-page":"386","DOI":"10.1109\/TPAMI.2018.2844175","article-title":"Mask R-CNN","volume":"42","author":"He","year":"2020","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"8","key":"10.1016\/j.eswa.2026.131683_bib0019","doi-asserted-by":"crossref","first-page":"2011","DOI":"10.1109\/TPAMI.2019.2913372","article-title":"Squeeze-and-excitation networks","volume":"42","author":"Hu","year":"2020","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"10.1016\/j.eswa.2026.131683_bib0020","series-title":"2018\u202fIEEE International conference on big data (Big data)","first-page":"2503","article-title":"YOLO-LITE: A real-time object detection algorithm optimized for non-GPU computers","author":"Huang","year":"2018"},{"key":"10.1016\/j.eswa.2026.131683_bib0021","doi-asserted-by":"crossref","DOI":"10.1016\/j.asoc.2024.112000","article-title":"Underwater image enhancement via cross-wise transformer network focusing on pre-post differences","volume":"164","author":"Huang","year":"2024","journal-title":"Applied Soft Computing"},{"key":"10.1016\/j.eswa.2026.131683_bib0022","series-title":"Advances in neural information processing systems (neurIPS)","first-page":"2017","article-title":"Spatial transformer networks","volume":"vol. 28","author":"Jaderberg","year":"2015"},{"key":"10.1016\/j.eswa.2026.131683_bib0023","series-title":"2023\u202fIEEE\/CVF International conference on computer vision (ICCV)","first-page":"1305","article-title":"WaterMASK: Instance segmentation for underwater imagery","author":"Lian","year":"2023"},{"key":"10.1016\/j.eswa.2026.131683_bib0024","unstructured":"Lian, S., Zhang, Z., Li, H., Li, W., Yang, L. T., Kwong, S., & Cong, R. (2024). Diving into underwater: Segment anything model guided underwater salient instance segmentation and a large-scale dataset. arXiv: 2406.06039,."},{"key":"10.1016\/j.eswa.2026.131683_bib0025","series-title":"2017\u202fIEEE Conference on computer vision and pattern recognition (CVPR)","first-page":"2117","article-title":"Feature pyramid networks for object detection","author":"Lin","year":"2017"},{"key":"10.1016\/j.eswa.2026.131683_bib0026","series-title":"Computer vision \u2013 ECCV 2014","first-page":"740","article-title":"Microsoft COCO: Common objects in context","author":"Lin","year":"2014"},{"key":"10.1016\/j.eswa.2026.131683_bib0027","series-title":"2021\u202fIEEE International conference on multimedia and expo workshops (ICMEW)","first-page":"1","article-title":"A dataset and benchmark of underwater object detection for robot picking","author":"Liu","year":"2021"},{"key":"10.1016\/j.eswa.2026.131683_bib0028","doi-asserted-by":"crossref","first-page":"3695","DOI":"10.1109\/TIP.2020.2964518","article-title":"Connecting image denoising and high-level vision tasks via deep learning","volume":"29","author":"Liu","year":"2020","journal-title":"IEEE Transactions on Image Processing"},{"key":"10.1016\/j.eswa.2026.131683_bib0029","series-title":"Introduction to graph neural networks","first-page":"39","article-title":"Graph attention networks","author":"Liu","year":"2020"},{"key":"10.1016\/j.eswa.2026.131683_bib0030","unstructured":"Lyu, C., Zhang, W., Huang, H., Zhou, Y., Wang, Y., Liu, Y., Zhang, S., & Chen, K. (2022). RTMDet: An empirical study of designing real-time object detectors. arXiv: 2212.07784,."},{"key":"10.1016\/j.eswa.2026.131683_bib0031","series-title":"2021\u202fIEEE Winter conference on applications of computer vision (WACV)","first-page":"3139","article-title":"Rotate to attend: Convolutional triplet attention module","author":"Misra","year":"2021"},{"key":"10.1016\/j.eswa.2026.131683_bib0032","series-title":"2021 British machine vision conference (BMVC)","article-title":"Mask-aware IoU for anchor assignment in real-time instance segmentation","author":"Oksuz","year":"2021"},{"key":"10.1016\/j.eswa.2026.131683_bib0033","series-title":"Advances in neural information processing systems (neurIPS)","first-page":"8024","article-title":"PyTorch: An imperative style, high-performance deep learning library","volume":"vol. 32","author":"Paszke","year":"2019"},{"key":"10.1016\/j.eswa.2026.131683_bib0034","series-title":"2016\u202fIEEE Conference on computer vision and pattern recognition (CVPR)","first-page":"779","article-title":"You only look once: Unified, real-time object detection","author":"Redmon","year":"2016"},{"key":"10.1016\/j.eswa.2026.131683_bib0035","unstructured":"Redmon, J., & Farhadi, A. (2018). YOLOv3: An incremental improvement. arXiv: 1804.02767,."},{"key":"10.1016\/j.eswa.2026.131683_bib0036","doi-asserted-by":"crossref","first-page":"336","DOI":"10.1007\/s11263-019-01228-7","article-title":"Grad-CAM: Visual explanations from deep networks via gradient-based localization","volume":"128","author":"Selvaraju","year":"2020","journal-title":"International Journal of Computer Vision"},{"key":"10.1016\/j.eswa.2026.131683_bib0037","doi-asserted-by":"crossref","first-page":"257","DOI":"10.1016\/j.neucom.2022.01.017","article-title":"Global mask R-CNN for marine ship instance segmentation","volume":"480","author":"Sun","year":"2022","journal-title":"Neurocomputing"},{"key":"10.1016\/j.eswa.2026.131683_bib0038","series-title":"2020\u202fIEEE\/CVF Conference on computer vision and pattern recognition (CVPR)","first-page":"10781","article-title":"EfficientDet: Scalable and efficient object detection","author":"Tan","year":"2020"},{"key":"10.1016\/j.eswa.2026.131683_bib0039","series-title":"2024\u202fIEEE International conference on robotics and automation (ICRA)","first-page":"10034","article-title":"HIC-YOLOv5: Improved YOLOv5 for small object detection","author":"Tang","year":"2024"},{"key":"10.1016\/j.eswa.2026.131683_bib0040","series-title":"Computer vision \u2013 ECCV 2020","first-page":"282","article-title":"Conditional convolutions for instance segmentation","author":"Tian","year":"2020"},{"key":"10.1016\/j.eswa.2026.131683_bib0041","series-title":"2020\u202fIEEE\/CVF Conference on computer vision and pattern recognition workshops (CVPRW)","first-page":"1571","article-title":"CSPNet: A new backbone that can enhance learning capability of CNN","author":"Wang","year":"2020"},{"key":"10.1016\/j.eswa.2026.131683_bib0042","series-title":"2017\u202fIEEE Conference on computer vision and pattern recognition (CVPR)","first-page":"3156","article-title":"Residual attention network for image classification","author":"Wang","year":"2017"},{"key":"10.1016\/j.eswa.2026.131683_bib0043","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2024.125318","article-title":"Automated fish counting system based on instance segmentation in aquaculture","volume":"259","author":"Wang","year":"2025","journal-title":"Expert Systems with Applications"},{"key":"10.1016\/j.eswa.2026.131683_bib0044","series-title":"2020\u202fIEEE\/CVF Conference on computer vision and pattern recognition (CVPR)","first-page":"11534","article-title":"ECA-Net: Efficient channel attention for deep convolutional neural networks","author":"Wang","year":"2020"},{"key":"10.1016\/j.eswa.2026.131683_bib0045","series-title":"2023\u202fIEEE\/CVF Conference on computer vision and pattern recognition (CVPR)","first-page":"14408","article-title":"InternImage: Exploring large-scale vision foundation models with deformable convolutions","author":"Wang","year":"2023"},{"key":"10.1016\/j.eswa.2026.131683_bib0046","series-title":"Computer vision \u2013 ECCV 2020","first-page":"649","article-title":"SOLO: Segmenting objects by locations","author":"Wang","year":"2020"},{"key":"10.1016\/j.eswa.2026.131683_bib0047","series-title":"2020\u202fIEEE\/CVF Conference on computer vision and pattern recognition (CVPR)","first-page":"17721","article-title":"SOLOv2: Dynamic and fast instance segmentation","author":"Wang","year":"2020"},{"key":"10.1016\/j.eswa.2026.131683_bib0048","series-title":"2020\u202fIEEE\/CVF Conference on computer vision and pattern recognition (CVPR)","first-page":"9313","article-title":"CenterMask: Single shot instance segmentation with point representation","author":"Wang","year":"2020"},{"issue":"6","key":"10.1016\/j.eswa.2026.131683_bib0049","doi-asserted-by":"crossref","first-page":"1983","DOI":"10.1007\/s11554-020-01007-5","article-title":"Joint multi-task cascade for instance segmentation","volume":"17","author":"Wen","year":"2020","journal-title":"Journal of Real-Time Image Processing"},{"key":"10.1016\/j.eswa.2026.131683_bib0050","series-title":"Computer vision \u2013 ECCV 2018","first-page":"3","article-title":"CBAM: Convolutional block attention module","author":"Woo","year":"2018"},{"key":"10.1016\/j.eswa.2026.131683_bib0051","doi-asserted-by":"crossref","DOI":"10.1016\/j.neucom.2024.127488","article-title":"Coordinate-aware mask R-CNN with group normalization: A underwater marine animal instance segmentation framework","volume":"583","author":"Yi","year":"2024","journal-title":"Neurocomputing"},{"issue":"4","key":"10.1016\/j.eswa.2026.131683_bib0052","doi-asserted-by":"crossref","first-page":"100","DOI":"10.1007\/s11554-024-01479-9","article-title":"AM YOLO: Adaptive multi-scale YOLO for ship instance segmentation","volume":"21","author":"Yuan","year":"2024","journal-title":"Journal of Real-Time Image Processing"},{"key":"10.1016\/j.eswa.2026.131683_bib0053","doi-asserted-by":"crossref","DOI":"10.1016\/j.asoc.2024.111291","article-title":"YOLO-based marine organism detection using two-terminal attention mechanism and difficult-sample resampling","volume":"153","author":"Zhou","year":"2024","journal-title":"Applied Soft Computing"},{"key":"10.1016\/j.eswa.2026.131683_bib0054","series-title":"2019\u202fIEEE\/CVF Conference on computer vision and pattern recognition (CVPR)","first-page":"9308","article-title":"Deformable ConvNets v2: More deformable, better results","author":"Zhu","year":"2019"}],"container-title":["Expert Systems with Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0957417426005968?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0957417426005968?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,4,19]],"date-time":"2026-04-19T05:44:57Z","timestamp":1776577497000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0957417426005968"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6]]},"references-count":54,"alternative-id":["S0957417426005968"],"URL":"https:\/\/doi.org\/10.1016\/j.eswa.2026.131683","relation":{},"ISSN":["0957-4174"],"issn-type":[{"value":"0957-4174","type":"print"}],"subject":[],"published":{"date-parts":[[2026,6]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"UPinst: Real time instance segmentation of underwater optical images","name":"articletitle","label":"Article Title"},{"value":"Expert Systems with Applications","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.eswa.2026.131683","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"131683"}}