{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T19:04:26Z","timestamp":1775070266648,"version":"3.50.1"},"reference-count":167,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2027,3,28]],"date-time":"2027-03-28T00:00:00Z","timestamp":1806192000000},"content-version":"am","delay-in-days":300,"URL":"http:\/\/www.elsevier.com\/open-access\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/100000002","name":"National Institutes of Health","doi-asserted-by":"publisher","award":["R01 CA240808"],"award-info":[{"award-number":["R01 CA240808"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000002","name":"National Institutes of Health","doi-asserted-by":"publisher","award":["R01 CA258987"],"award-info":[{"award-number":["R01 CA258987"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000002","name":"National Institutes of Health","doi-asserted-by":"publisher","award":["R01 EB034691"],"award-info":[{"award-number":["R01 EB034691"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000002","name":"National Institutes of Health","doi-asserted-by":"publisher","award":["R01 CA280135"],"award-info":[{"award-number":["R01 CA280135"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Neurocomputing"],"published-print":{"date-parts":[[2026,6]]},"DOI":"10.1016\/j.neucom.2026.133439","type":"journal-article","created":{"date-parts":[[2026,3,24]],"date-time":"2026-03-24T07:52:34Z","timestamp":1774338754000},"page":"133439","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Segment anything for video: A comprehensive review of video object segmentation and tracking from past to future"],"prefix":"10.1016","volume":"682","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1431-7191","authenticated-orcid":false,"given":"Guoping","family":"Xu","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7361-2927","authenticated-orcid":false,"given":"Jayaram K.","family":"Udupa","sequence":"additional","affiliation":[]},{"given":"Yajun","family":"Yu","sequence":"additional","affiliation":[]},{"given":"Hua-Chieh","family":"Shao","sequence":"additional","affiliation":[]},{"given":"Songlin","family":"Zhao","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0935-3999","authenticated-orcid":false,"given":"Wei","family":"Liu","sequence":"additional","affiliation":[]},{"given":"You","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.neucom.2026.133439_bib1","article-title":"Imagenet classification with deep convolutional neural networks","volume":"25","author":"Krizhevsky","year":"2012","journal-title":"Adv. Neural Inf. Process. Syst."},{"issue":"6","key":"10.1016\/j.neucom.2026.133439_bib2","doi-asserted-by":"crossref","first-page":"1137","DOI":"10.1109\/TPAMI.2016.2577031","article-title":"Faster R-CNN: towards real-time object detection with region proposal networks","volume":"39","author":"Ren","year":"2017","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"11","key":"10.1016\/j.neucom.2026.133439_bib3","doi-asserted-by":"crossref","first-page":"7118","DOI":"10.1002\/mp.15854","article-title":"Combining natural and artificial intelligence for robust automatic anatomy segmentation: Application in neck and thorax auto-contouring","volume":"49","author":"Udupa","year":"2022","journal-title":"Med Phys."},{"issue":"4","key":"10.1016\/j.neucom.2026.133439_bib4","doi-asserted-by":"crossref","first-page":"834","DOI":"10.1109\/TPAMI.2017.2699184","article-title":"DeepLab: semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected CRFs","volume":"40","author":"Chen","year":"2018","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.neucom.2026.133439_bib5","doi-asserted-by":"crossref","DOI":"10.1016\/j.engappai.2024.109890","article-title":"Development of residual learning in deep neural networks for computer vision: a survey","volume":"142","author":"Xu","year":"2025","journal-title":"Eng. Appl. Artif. Intell."},{"key":"10.1016\/j.neucom.2026.133439_bib6","first-page":"1725","article-title":"Large-scale video classification with convolutional neural networks","author":"Karpathy","year":"2014","journal-title":"Proc. IEEE Conf. Comput. Vis. Pattern Recognit."},{"key":"10.1016\/j.neucom.2026.133439_bib7","first-page":"640","article-title":"Xmem: Long-term video object segmentation with an atkinson-shiffrin memory model","author":"Cheng","year":"2022","journal-title":"Eur. Conf. Comput. Vis."},{"issue":"12","key":"10.1016\/j.neucom.2026.133439_bib8","doi-asserted-by":"crossref","first-page":"4338","DOI":"10.1109\/TPAMI.2020.3005434","article-title":"Deep learning for 3d point clouds: A survey","volume":"43","author":"Guo","year":"2020","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.neucom.2026.133439_bib9","doi-asserted-by":"crossref","unstructured":"F. Milletari, N. Navab, and S.-A. Ahmadi, \"V-net: Fully convolutional neural networks for volumetric medical image segmentation.\" pp. 565-571.","DOI":"10.1109\/3DV.2016.79"},{"key":"10.1016\/j.neucom.2026.133439_bib10","doi-asserted-by":"crossref","first-page":"7889","DOI":"10.1109\/TIP.2021.3108405","article-title":"An unified recurrent video object segmentation framework for various surveillance environments","volume":"30","author":"Patil","year":"2021","journal-title":"IEEE Trans. Image Process"},{"issue":"1","key":"10.1016\/j.neucom.2026.133439_bib11","doi-asserted-by":"crossref","first-page":"457","DOI":"10.1007\/s10462-022-10176-7","article-title":"Deep learning for video object segmentation: a review","volume":"56","author":"Gao","year":"2023","journal-title":"Artif. Intell. Rev."},{"key":"10.1016\/j.neucom.2026.133439_bib12","unstructured":"J. Zhu, Y. Qi, and J. Wu, \u201cMedical sam 2: Segment medical images as video via segment anything model 2,\u201d arXiv preprint arXiv:2408.00874, 2024."},{"issue":"4","key":"10.1016\/j.neucom.2026.133439_bib13","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3391743","article-title":"Video object segmentation and tracking: a survey","volume":"11","author":"Yao","year":"2020","journal-title":"Acm Trans. Intell. Syst. Technol."},{"key":"10.1016\/j.neucom.2026.133439_bib14","first-page":"9226","article-title":"Video Object Segmentation Using Space-Time Memory Networks","author":"Oh","year":"2019","journal-title":"Proc. IEEE\/CVF Int. Conf. Comput. Vis. (ICCV)"},{"key":"10.1016\/j.neucom.2026.133439_bib15","first-page":"11781","article-title":"Rethinking space-time networks with improved memory coverage for efficient video object segmentation","volume":"34","author":"Cheng","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst. 34 (Neurips 2021)"},{"key":"10.1016\/j.neucom.2026.133439_bib16","first-page":"12889","article-title":"Hierarchical memory matching network for video object segmentation","author":"Seong","year":"2021","journal-title":"Proc. IEEE\/CVF Int. Conf. Comput. Vis."},{"key":"10.1016\/j.neucom.2026.133439_bib17","first-page":"661","article-title":"Video object segmentation with episodic graph memory networks","author":"Lu","year":"2020","journal-title":"Proc. Eur. Conf. Comput. Vis. (ECCV"},{"key":"10.1016\/j.neucom.2026.133439_bib18","first-page":"1877","article-title":"\u201cLanguage models are few-shot learners,\u201d","volume":"33","author":"Brown","year":"2020","journal-title":"Adv. Neural Inf. Process. Syst. 33 Neurips 2020"},{"key":"10.1016\/j.neucom.2026.133439_bib19","first-page":"27730","article-title":"Training language models to follow instructions with human feedback","volume":"35","author":"Ouyang","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst. 35 (Neurips 2022)"},{"key":"10.1016\/j.neucom.2026.133439_bib20","first-page":"4015","article-title":"Segment Anything","author":"Kirillov","year":"2023","journal-title":"Proc. IEEE\/CVF Int. Conf. Comput. Vis. (ICCV)"},{"key":"10.1016\/j.neucom.2026.133439_bib21","unstructured":"J. Yang, M. Gao, Z. Li, S. Gao, F. Wang, and F. Zheng, \u201cTrack anything: Segment anything meets videos,\u201d arXiv preprint arXiv:2304.11968, 2023."},{"key":"10.1016\/j.neucom.2026.133439_bib22","unstructured":"Y. Cheng, L. Li, Y. Xu, X. Li, Z. Yang, W. Wang, and Y. Yang, \u201cSegment and track anything,\u201d arXiv preprint arXiv:2305.06558, 2023."},{"key":"10.1016\/j.neucom.2026.133439_bib23","doi-asserted-by":"crossref","first-page":"9302","DOI":"10.1109\/WACV61041.2025.00901","article-title":"Segment anything meets point tracking","author":"Raji\u010d","year":"2025","journal-title":"2025 IEEE\/CVF Winter Conf. Appl. Comput. Vis. (WACV)"},{"key":"10.1016\/j.neucom.2026.133439_bib24","unstructured":"J. Zhu, Z. Chen, Z. Hao, S. Chang, L. Zhang, D. Wang, H. Lu, B. Luo, J.-Y. He, and J.-P. Lan, \u201cTracking anything in high quality,\u201d arXiv preprint arXiv:2307.13974, 2023."},{"key":"10.1016\/j.neucom.2026.133439_bib25","article-title":"\u201cSAM 2: Segment Anything in Images and Videos,\u201d","author":"Ravi","year":"2025","journal-title":"Thirteen. Int. Conf. Learn. Represent."},{"issue":"6","key":"10.1016\/j.neucom.2026.133439_bib26","doi-asserted-by":"crossref","first-page":"7099","DOI":"10.1109\/TPAMI.2022.3225573","article-title":"A survey on deep learning technique for video segmentation","volume":"45","author":"Zhou","year":"2022","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"12","key":"10.1016\/j.neucom.2026.133439_bib27","doi-asserted-by":"crossref","first-page":"10138","DOI":"10.1109\/TPAMI.2024.3434373","article-title":"Transformer-Based Visual Segmentation: A Survey","volume":"46","author":"Li","year":"2024","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.neucom.2026.133439_bib28","unstructured":"C. Zhang, Y. Cui, W. Lin, G. Huang, Y. Rong, L. Liu, and S. Shan, \u201cSegment anything for videos: A systematic survey,\u201d arXiv preprint arXiv:2408.08315, 2024."},{"key":"10.1016\/j.neucom.2026.133439_bib29","unstructured":"T. Zhou, W. Xia, F. Zhang, B. Chang, W. Wang, Y. Yuan, E. Konukoglu, and D. Cremers, \u201cImage segmentation in foundation model era: A survey,\u201d arXiv preprint arXiv:2408.12957, 2024."},{"key":"10.1016\/j.neucom.2026.133439_bib30","unstructured":"Z. Jiaxing, and T. Hao, \u201cSAM2 for Image and Video Segmentation: A Comprehensive Survey,\u201d arXiv preprint arXiv:2503.12781, 2025."},{"key":"10.1016\/j.neucom.2026.133439_bib31","first-page":"6949","article-title":"A transductive approach for video object segmentation","author":"Zhang","year":"2020","journal-title":"Proc. IEEE\/CVF Conf. Comput. Vis. Pattern Recognit."},{"key":"10.1016\/j.neucom.2026.133439_bib32","first-page":"1296","article-title":"Swiftnet: Real-time video object segmentation","author":"Wang","year":"2021","journal-title":"Proc. IEEE\/CVF Conf. Comput. Vis. Pattern Recognit."},{"key":"10.1016\/j.neucom.2026.133439_bib33","first-page":"9481","article-title":"Feelvos: Fast end-to-end embedding learning for video object segmentation","author":"Voigtlaender","year":"2019","journal-title":"Proc. IEEE\/CVF Conf. Comput. Vis. Pattern Recognit."},{"key":"10.1016\/j.neucom.2026.133439_bib34","first-page":"332","article-title":"Collaborative video object segmentation by foreground-background integration","author":"Yang","year":"2020","journal-title":"Eur. Conf. Comput. Vis."},{"key":"10.1016\/j.neucom.2026.133439_bib35","first-page":"2491","article-title":"Associating Objects with Transformers for Video Object Segmentation","volume":"34","author":"Yang","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst. 34 (Neurips 2021)"},{"key":"10.1016\/j.neucom.2026.133439_bib36","first-page":"213","article-title":"End-to-end object detection with transformers","author":"Carion","year":"2020","journal-title":"Eur. Conf. Comput. Vis."},{"key":"10.1016\/j.neucom.2026.133439_bib37","unstructured":"X. Zhu, W. Su, L. Lu, B. Li, X. Wang, and J. Dai, \"Deformable DETR: Deformable Transformers for End-to-End Object Detection.\""},{"key":"10.1016\/j.neucom.2026.133439_bib38","first-page":"17864","article-title":"Per-pixel classification is not all you need for semantic segmentation","volume":"34","author":"Cheng","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.neucom.2026.133439_bib39","first-page":"1290","article-title":"Masked-attention mask transformer for universal image segmentation","author":"Cheng","year":"2022","journal-title":"Proc. IEEE\/CVF Conf. Comput. Vis. Pattern Recognit."},{"key":"10.1016\/j.neucom.2026.133439_bib40","unstructured":"B. Cheng, A. Choudhuri, I. Misra, A. Kirillov, R. Girdhar, and A.G. Schwing, \u201cMask2former for video instance segmentation,\u201d arXiv preprint arXiv:2112.10764, 2021."},{"key":"10.1016\/j.neucom.2026.133439_bib41","first-page":"13914","article-title":"Tubeformer-deeplab: Video mask transformer","author":"Kim","year":"2022","journal-title":"Proc. IEEE\/CVF Conf. Comput. Vis. Pattern Recognit."},{"key":"10.1016\/j.neucom.2026.133439_bib42","doi-asserted-by":"crossref","first-page":"23109","DOI":"10.52202\/068431-1679","article-title":"VITA: Video Instance Segmentation via Object Token Association","volume":"35","author":"Heo","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst. 35 (Neurips 2022)"},{"key":"10.1016\/j.neucom.2026.133439_bib43","first-page":"18847","article-title":"Video k-net: A simple, strong, and unified baseline for video segmentation","author":"Li","year":"2022","journal-title":"Proc. IEEE\/CVF Conf. Comput. Vis. Pattern Recognit."},{"key":"10.1016\/j.neucom.2026.133439_bib44","first-page":"8741","article-title":"End-to-end video instance segmentation with transformers","author":"Wang","year":"2021","journal-title":"Proc. IEEE\/CVF Conf. Comput. Vis. Pattern Recognit."},{"key":"10.1016\/j.neucom.2026.133439_bib45","first-page":"27948","article-title":"Omg-seg: Is one model good enough for all segmentation?","author":"Li","year":"2024","journal-title":"Proc. IEEE\/CVF Conf. Comput. Vis. Pattern Recognit."},{"key":"10.1016\/j.neucom.2026.133439_bib46","article-title":"arXiv preprint","author":"Carion","year":"2025","journal-title":"Sam. 3 Segm. anything Concepts"},{"key":"10.1016\/j.neucom.2026.133439_bib47","first-page":"16000","article-title":"Masked autoencoders are scalable vision learners","author":"He","year":"2022","journal-title":"Proc. IEEE\/CVF Conf. Comput. Vis. Pattern Recognit."},{"key":"10.1016\/j.neucom.2026.133439_bib48","first-page":"29441","article-title":"Hiera: A hierarchical vision transformer without the bells-and-whistles","author":"Ryali","year":"2023","journal-title":"Int. Conf. Mach. Learn."},{"key":"10.1016\/j.neucom.2026.133439_bib49","first-page":"2663","article-title":"Learning video object segmentation from static images","author":"Perazzi","year":"2017","journal-title":"Proc. IEEE Conf. Comput. Vis. Pattern Recognit."},{"key":"10.1016\/j.neucom.2026.133439_bib50","unstructured":"T. Zhou, W. Luo, Q. Ye, Z. Shi, and J. Chen, \u201cSam-pd: How far can sam take us in tracking and segmenting anything in videos by prompt denoising,\u201d arXiv preprint arXiv:2403.04194, 2024."},{"key":"10.1016\/j.neucom.2026.133439_bib51","first-page":"1316","article-title":"Tracking anything with decoupled video segmentation","author":"Cheng","year":"2023","journal-title":"Proc. IEEE\/CVF Int. Conf. Comput. Vis."},{"key":"10.1016\/j.neucom.2026.133439_bib52","first-page":"9622","article-title":"MemSAM: taming segment anything model for echocardiography video segmentation","author":"Deng","year":"2024","journal-title":"Proc. IEEE\/CVF Conf. Comput. Vis. Pattern Recognit."},{"key":"10.1016\/j.neucom.2026.133439_bib53","doi-asserted-by":"crossref","first-page":"36324","DOI":"10.52202\/068431-2632","article-title":"Decoupling Features in Hierarchical Propagation for Video Object Segmentation","volume":"35","author":"Yang","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst. 35 (Neurips 2022)"},{"key":"10.1016\/j.neucom.2026.133439_bib54","first-page":"18602","article-title":"Rmem: Restricted memory banks improve video object segmentation","author":"Zhou","year":"2024","journal-title":"Proc. IEEE\/CVF Conf. Comput. Vis. Pattern Recognit."},{"key":"10.1016\/j.neucom.2026.133439_bib55","first-page":"3417","article-title":"SAM-I2V: Upgrading SAM to Support Promptable Video Segmentation with Less than 0.2% Training Cost","author":"Mei","year":"2025","journal-title":"Proc. Comput. Vis. Pattern Recognit. Conf."},{"key":"10.1016\/j.neucom.2026.133439_bib56","article-title":"MaskTrack: Auto-Labeling and Stable Tracking for Video Object Segmentation","author":"Chen","year":"2024","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"10.1016\/j.neucom.2026.133439_bib57","unstructured":"Z. Yan, W. Sun, R. Zhou, Z. Yuan, K. Zhang, Y. Li, T. Liu, Q. Li, X. Li, L. HearXiv preprint \u201cBiomedical sam 2: Segment anything in biomedical images and videos,\u201d arXiv preprint arXiv:2408.03286, 2024 arXiv:2408.032862024."},{"key":"10.1016\/j.neucom.2026.133439_bib58","doi-asserted-by":"crossref","DOI":"10.1016\/j.media.2025.103547","article-title":"Medical SAM adapter: adapting segment anything model for medical image segmentation","volume":"102","author":"Wu","year":"2025","journal-title":"Med Image Anal."},{"key":"10.1016\/j.neucom.2026.133439_bib59","first-page":"3367","article-title":"Sam-adapter: Adapting segment anything in underperformed scenes","author":"Chen","year":"2023","journal-title":"Proc. IEEE\/CVF Int. Conf. Comput. Vis."},{"key":"10.1016\/j.neucom.2026.133439_bib60","doi-asserted-by":"crossref","first-page":"4791","DOI":"10.1109\/JSTARS.2025.3532690","article-title":"ClassWise-SAM-Adapter: Parameter-Efficient Fine-Tuning Adapts Segment Anything to SAR Domain for Semantic Segmentation","volume":"18","author":"Pu","year":"2025","journal-title":"Ieee J. Sel. Top. Appl. Earth Obs. Remote Sens."},{"key":"10.1016\/j.neucom.2026.133439_bib61","unstructured":"J. Cheng, J. Ye, Z. Deng, J. Chen, T. Li, H. Wang, Y. Su, Z. Huang, J. Chen, and L. Jiang, \u201cSam-med2d,\u201d arXiv preprint arXiv:2308.16184, 2023."},{"key":"10.1016\/j.neucom.2026.133439_bib62","doi-asserted-by":"crossref","DOI":"10.1016\/j.media.2024.103310","article-title":"MA-SAM: Modality-agnostic SAM adaptation for 3D medical image segmentation","volume":"98","author":"Chen","year":"2024","journal-title":"Med. Image Anal."},{"key":"10.1016\/j.neucom.2026.133439_bib63","doi-asserted-by":"crossref","DOI":"10.1016\/j.media.2024.103324","article-title":"3DSAM-adapter: Holistic adaptation of SAM from 2D to 3D for promptable tumor segmentation","volume":"98","author":"Gong","year":"2024","journal-title":"Med. Image Anal."},{"key":"10.1016\/j.neucom.2026.133439_bib64","unstructured":"C. Zhou, K. Ning, Q. Shen, S. Zhou, Z. Yu, and H. Wang, \u201cSam-sp: Self-prompting makes sam great again,\u201d arXiv preprint arXiv:2408.12364, 2024."},{"key":"10.1016\/j.neucom.2026.133439_bib65","doi-asserted-by":"crossref","unstructured":"K. Zhang, and D. Liu, \u201cCustomized segment anything model for medical image segmentation,\u201d arXiv preprint arXiv:2304.13785, 2023.","DOI":"10.2139\/ssrn.4495221"},{"key":"10.1016\/j.neucom.2026.133439_bib66","article-title":"When SAM Meets Sonar Images","volume":"21","author":"Wang","year":"2024","journal-title":"Ieee Geosci. Remote Sens. Lett."},{"issue":"12","key":"10.1016\/j.neucom.2026.133439_bib67","doi-asserted-by":"crossref","first-page":"8954","DOI":"10.1109\/JBHI.2025.3540306","article-title":"MediViSTA: Medical Video Segmentation Via Temporal Fusion SAM Adaptation for Echocardiography","volume":"29","author":"Kim","year":"2025","journal-title":"IEEE J. Biomed. Health Inf."},{"key":"10.1016\/j.neucom.2026.133439_bib68","article-title":"BLO-SAM: bi-level optimization based finetuning of the segment anything model for overfitting-preventing semantic segmentation","author":"Zhang","year":"2024","journal-title":"Forty-first Int. Conf. Mach. Learn."},{"issue":"4","key":"10.1016\/j.neucom.2026.133439_bib69","doi-asserted-by":"crossref","DOI":"10.1088\/2632-2153\/ae13d1","article-title":"Depthwise-Dilated Convolutional Adapters for Medical Object Tracking and Segmentation Using the Segment Anything Model 2","volume":"6","author":"Xu","year":"2025","journal-title":"Mach. Learn Sci. Technol."},{"key":"10.1016\/j.neucom.2026.133439_bib70","doi-asserted-by":"crossref","unstructured":"A.W. Harley, Z. Fang, and K. Fragkiadaki, \"Particle video revisited: Tracking through occlusions using point trajectories.\" pp. 59-75.","DOI":"10.1007\/978-3-031-20047-2_4"},{"key":"10.1016\/j.neucom.2026.133439_bib71","first-page":"10061","article-title":"Tapir: Tracking any point with per-frame initialization and temporal refinement","author":"Doersch","year":"2023","journal-title":"Proc. IEEE\/CVF Int. Conf. Comput. Vis."},{"key":"10.1016\/j.neucom.2026.133439_bib72","first-page":"18","article-title":"Cotracker: It is better to track together","author":"Karaev","year":"2024","journal-title":"Eur. Conf. Comput. Vis."},{"key":"10.1016\/j.neucom.2026.133439_bib73","first-page":"6013","article-title":"Cotracker3: Simpler and better point tracking by pseudo-labelling real videos","author":"Karaev","year":"2025","journal-title":"Proc. IEEE\/CVF Int. Conf. Comput. Vis."},{"key":"10.1016\/j.neucom.2026.133439_bib74","doi-asserted-by":"crossref","first-page":"970","DOI":"10.1109\/TIP.2026.3651835","article-title":"SAMURAI: Motion-Aware Memory for Training-Free Visual Object Tracking With SAM 2","volume":"35","author":"Yang","year":"2026","journal-title":"IEEE Trans. Image Process"},{"key":"10.1016\/j.neucom.2026.133439_bib75","first-page":"11302","article-title":"One-prompt to segment all medical images","author":"Wu","year":"2024","journal-title":"Proc. IEEE\/CVF Conf. Comput. Vis. Pattern Recognit."},{"key":"10.1016\/j.neucom.2026.133439_bib76","first-page":"1164","article-title":"Epicflow: Edge-preserving interpolation of correspondences for optical flow","author":"Revaud","year":"2015","journal-title":"Proc. IEEE Conf. Comput. Vis. Pattern Recognit."},{"key":"10.1016\/j.neucom.2026.133439_bib77","first-page":"4015","article-title":"Flow fields: Dense correspondence fields for highly accurate large displacement optical flow estimation","author":"Bailer","year":"2015","journal-title":"Proc. IEEE Int. Conf. Comput. Vis."},{"key":"10.1016\/j.neucom.2026.133439_bib78","first-page":"580","article-title":"Convolutional oriented boundaries","author":"Maninis","year":"2016","journal-title":"Proc. Eur. Conf. Comput. Vis. (ECCV"},{"issue":"9","key":"10.1016\/j.neucom.2026.133439_bib79","first-page":"4701","article-title":"Collaborative video object segmentation by multi-scale foreground-background integration","volume":"44","author":"Yang","year":"2021","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"9","key":"10.1016\/j.neucom.2026.133439_bib80","doi-asserted-by":"crossref","first-page":"6247","DOI":"10.1109\/TPAMI.2024.3383592","article-title":"Scalable Video Object Segmentation With Identification Mechanism","volume":"46","author":"Yang","year":"2024","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.neucom.2026.133439_bib81","first-page":"90","article-title":"Video object segmentation with joint re-identification and attention-aware mask propagation","author":"Li","year":"2018","journal-title":"Proc. Eur. Conf. Comput. Vis. (ECCV"},{"key":"10.1016\/j.neucom.2026.133439_bib82","first-page":"4481","article-title":"Learning video object segmentation with visual memory","author":"Tokmakov","year":"2017","journal-title":"Proc. IEEE Int. Conf. Comput. Vis."},{"key":"10.1016\/j.neucom.2026.133439_bib83","series-title":"Proceedings of the IEEE conference on computer vision and pattern recognition","first-page":"9080","article-title":"Reinforcement cutting-agent learning for video object segmentation","author":"Han","year":"2018"},{"key":"10.1016\/j.neucom.2026.133439_bib84","article-title":"DINO: DETR with Improved DeNoising Anchor Boxes for End-to-End Object Detection","author":"Zhang","year":"2023","journal-title":"Elev. Int. Conf. Learn. Represent."},{"key":"10.1016\/j.neucom.2026.133439_bib85","first-page":"162","article-title":"Moving object segmentation: All you need is sam (and flow)","author":"Xie","year":"2024","journal-title":"Proc. Asian Conf. Comput. Vis."},{"key":"10.1016\/j.neucom.2026.133439_bib86","first-page":"2592","article-title":"Comprehensive multimodal segmentation in medical imaging: Combining yolov8 with sam and hq-sam models","author":"Pandey","year":"2023","journal-title":"Proc. IEEE\/CVF Int. Conf. Comput. Vis."},{"key":"10.1016\/j.neucom.2026.133439_bib87","series-title":"Psychology of learning and motivation","doi-asserted-by":"crossref","first-page":"89","DOI":"10.1016\/S0079-7421(08)60422-3","article-title":"\"Human memory: A proposed system and its control processes","author":"Atkinson","year":"1968"},{"key":"10.1016\/j.neucom.2026.133439_bib88","first-page":"29914","article-title":"Segment Anything in High Quality","volume":"36","author":"Ke","year":"2023","journal-title":"Adv. Neural Inf. Process. Syst. 36 (Neurips 2023)"},{"key":"10.1016\/j.neucom.2026.133439_bib89","first-page":"635","article-title":"Xmem++: Production-level video segmentation from few annotated frames","author":"Bekuzarov","year":"2023","journal-title":"Proc. IEEE\/CVF Int. Conf. Comput. Vis."},{"key":"10.1016\/j.neucom.2026.133439_bib90","first-page":"4015","article-title":"Segment anything","author":"Kirillov","year":"2023","journal-title":"Proc. IEEE\/CVF Int. Conf. Comput. Vis."},{"key":"10.1016\/j.neucom.2026.133439_bib91","unstructured":"J. Zhu, A. Hamdi, Y. Qi, Y. Jin, and J. Wu, \u201cMedical sam 2: Segment medical images as video via segment anything model 2,\u201d arXiv preprint arXiv:2408.00874, 2024."},{"key":"10.1016\/j.neucom.2026.133439_bib92","article-title":"Surgical SAM 2: Real-time Segment Anything in Surgical Video by Efficient Frame Pruning","author":"Liu","year":"2024","journal-title":"Adv. Med. Found. Model. Explain. Robust. Secur. Beyond"},{"key":"10.1016\/j.neucom.2026.133439_bib93","first-page":"13614","article-title":"Sam2long: Enhancing sam 2 for long video segmentation with a training-free memory tree","author":"Ding","year":"2025","journal-title":"Proc. IEEE\/CVF Int. Conf. Comput. Vis."},{"key":"10.1016\/j.neucom.2026.133439_bib94","unstructured":"S. Ding, R. Qian, X. Dong, P. Zhang, Y. Zang, Y. Cao, Y. Guo, D. Lin, and J. Wang, \u201cSam2long: Enhancing sam 2 for long video segmentation with a training-free memory tree,\u201d arXiv preprint arXiv:2410.16268, 2024."},{"key":"10.1016\/j.neucom.2026.133439_bib95","unstructured":"Q. Yang, Y. Yao, M. Cui, and L. Bo, \u201cMoSAM: Motion-Guided Segment Anything Model with Spatial-Temporal Memory Selection,\u201d arXiv preprint arXiv:2505.00739, 2025."},{"issue":"1","key":"10.1016\/j.neucom.2026.133439_bib96","doi-asserted-by":"crossref","DOI":"10.1088\/3049-477X\/ae291a","article-title":"TSMS-SAM2: multi-scale temporal sampling augmentation and memory-splitting pruning for promptable video object segmentation and tracking in surgical scenarios","volume":"2","author":"Xu","year":"2026","journal-title":"Mach. Learn. Health"},{"key":"10.1016\/j.neucom.2026.133439_bib97","unstructured":"N. Houlsby, A. Giurgiu, S. Jastrzebski, B. Morrone, Q. De Laroussilhe, A. Gesmundo, M. Attariyan, and S. Gelly, \"Parameter-efficient transfer learning for NLP.\" pp. 2790-2799."},{"key":"10.1016\/j.neucom.2026.133439_bib98","first-page":"3","article-title":"Lora: Low-rank adaptation of large language models","volume":"1","author":"Hu","year":"2022"},{"key":"10.1016\/j.neucom.2026.133439_bib99","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2024.112909","article-title":"Trans-SAM: Transfer Segment Anything Model to medical image segmentation with Parameter-Efficient Fine-Tuning","volume":"310","author":"Wu","year":"2025","journal-title":"Knowl. -Based Syst."},{"key":"10.1016\/j.neucom.2026.133439_bib100","unstructured":"Z. Tu, L. Gu, X. Wang, and B. Jiang, \u201cUltrasound sam adapter: Adapting sam for breast lesion segmentation in ultrasound images,\u201d arXiv preprint arXiv:2404.14837, 2024."},{"issue":"1","key":"10.1016\/j.neucom.2026.133439_bib101","doi-asserted-by":"crossref","first-page":"29","DOI":"10.1016\/j.ghm.2024.01.001","article-title":"Tunnel SAM adapter: Adapting segment anything model for tunnel water leakage inspection","volume":"2","author":"Chen","year":"2024","journal-title":"Geohazard Mech."},{"issue":"4","key":"10.1016\/j.neucom.2026.133439_bib102","doi-asserted-by":"crossref","DOI":"10.1088\/2632-2153\/ae13d1","article-title":"Depthwise-Dilated Convolutional Adapters for Medical Object Tracking and Segmentation Using the Segment Anything Model 2","volume":"6","author":"Xu","year":"2025","journal-title":"Mach. Learn Sci. Technol."},{"key":"10.1016\/j.neucom.2026.133439_bib103","article-title":"SAM2-Adapter: Evaluating & Adapting Segment Anything 2 in Downstream Tasks: Camouflage, Shadow, Medical Image Segmentation, and More","author":"Chen","year":"2025","journal-title":"ICLR 2025 Workshop Found. Models Wild"},{"key":"10.1016\/j.neucom.2026.133439_bib104","article-title":"BLO-SAM: bi-level optimization based finetuning of the segment anything model for overfitting-preventing semantic segmentation","author":"Zhang","year":"2024","journal-title":"Forty-first International Conference on Machine Learning"},{"key":"10.1016\/j.neucom.2026.133439_bib105","unstructured":"C. Zhu, B. Xiao, L. Shi, S. Xu, and X. Zheng, \u201cCustomize Segment Anything Model for Multi-Modal Semantic Segmentation with Mixture of LoRA Experts,\u201d arXiv preprint arXiv:2412.04220, 2024."},{"key":"10.1016\/j.neucom.2026.133439_bib106","doi-asserted-by":"crossref","unstructured":"X. Chen, C. Wang, H. Ning, M. Zhang, M. Shen, and S. Li, \"Sam-octa2: Layer sequence octa segmentation with fine-tuned segment anything model 2.\" pp. 1-5.","DOI":"10.1109\/ICASSP49660.2025.10888853"},{"key":"10.1016\/j.neucom.2026.133439_bib107","doi-asserted-by":"crossref","unstructured":"A. Iltaf, R.M. Ahmed, Z. Zhang, B. Li, and S. Zhou, \u201cVesselSAM: Leveraging SAM for Aortic Vessel Segmentation with LoRA and Atrous Attention,\u201d arXiv preprint arXiv:2502.18185, 2025.","DOI":"10.36227\/techrxiv.174234663.37696124\/v1"},{"key":"10.1016\/j.neucom.2026.133439_bib108","first-page":"1","article-title":"Vehicle Localization During GPS Outages With Extended Kalman Filter and Deep Learning","volume":"70","author":"Liu","year":"2021","journal-title":"Ieee Trans. Instrum. Meas."},{"issue":"9","key":"10.1016\/j.neucom.2026.133439_bib109","doi-asserted-by":"crossref","first-page":"4275","DOI":"10.1109\/TIV.2023.3287790","article-title":"A novel method for land vehicle positioning: Invariant kalman filters and deep-learning-based radar speed estimation","volume":"8","author":"de Araujo","year":"2023","journal-title":"IEEE Trans. Intell. Veh."},{"key":"10.1016\/j.neucom.2026.133439_bib110","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2021.107861","article-title":"Optical flow and scene flow estimation: A survey","volume":"114","author":"Zhai","year":"2021","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.neucom.2026.133439_bib111","first-page":"19855","article-title":"Pointodyssey: A large-scale synthetic dataset for long-term point tracking","author":"Zheng","year":"2023","journal-title":"Proc. IEEE\/CVF Int. Conf. Comput. Vis."},{"key":"10.1016\/j.neucom.2026.133439_bib112","doi-asserted-by":"crossref","first-page":"9302","DOI":"10.1109\/WACV61041.2025.00901","article-title":"Segment anything meets point tracking","author":"Rajic","year":"2025","journal-title":"2025 IEEE\/CVF Winter Conf. Appl. Comput. Vis. (WACV)"},{"key":"10.1016\/j.neucom.2026.133439_bib113","first-page":"3406","article-title":"Segment Any Motion in Videos","author":"Huang","year":"2025","journal-title":"Proc. Comput. Vis. Pattern Recognit. Conf."},{"issue":"2","key":"10.1016\/j.neucom.2026.133439_bib114","doi-asserted-by":"crossref","first-page":"190","DOI":"10.1007\/s11263-011-0512-5","article-title":"Motion Coherent Tracking Using Multi-label MRF Optimization","volume":"100","author":"Tsai","year":"2012","journal-title":"Int. J. Comput. Vis."},{"key":"10.1016\/j.neucom.2026.133439_bib115","first-page":"2192","article-title":"Video segmentation by tracking many figure-ground segments","author":"Li","year":"2013","journal-title":"Proc. IEEE Int. Conf. Comput. Vis."},{"key":"10.1016\/j.neucom.2026.133439_bib116","first-page":"724","article-title":"A benchmark dataset and evaluation methodology for video object segmentation","author":"Perazzi","year":"2016","journal-title":"Proc. IEEE Conf. Comput. Vis. Pattern Recognit."},{"key":"10.1016\/j.neucom.2026.133439_bib117","unstructured":"J. Pont-Tuset, F. Perazzi, S. Caelles, P. Arbel\u00e1ez, A. Sorkine-Hornung, and L. Van Gool, \u201cThe 2017 davis challenge on video object segmentation,\u201d arXiv preprint arXiv:1704.00675, 2017."},{"key":"10.1016\/j.neucom.2026.133439_bib118","first-page":"13480","article-title":"Lvos: A benchmark for long-term video object segmentation","author":"Hong","year":"2023","journal-title":"Proc. IEEE\/CVF Int. Conf. Comput. Vis."},{"key":"10.1016\/j.neucom.2026.133439_bib119","article-title":"Lvos: A benchmark for large-scale long-term video object segmentation","author":"Hong","year":"2025","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.neucom.2026.133439_bib120","first-page":"585","article-title":"Youtube-vos: Sequence-to-sequence video object segmentation","author":"Xu","year":"2018","journal-title":"Proc. Eur. Conf. Comput. Vis. (ECCV"},{"key":"10.1016\/j.neucom.2026.133439_bib121","first-page":"20224","article-title":"MOSE: A new dataset for video object segmentation in complex scenes","author":"Ding","year":"2023","journal-title":"Proc. IEEE\/CVF Int. Conf. Comput. Vis."},{"key":"10.1016\/j.neucom.2026.133439_bib122","unstructured":"M. Allan, A. Shvets, T. Kurmann, Z. Zhang, R. Duggal, Y.-H. Su, N. Rieke, I. Laina, N. Kalavakonda, and S. Bodenstedt, \u201c2017 robotic instrument segmentation challenge,\u201d arXiv preprint arXiv:1902.06426, 2019."},{"key":"10.1016\/j.neucom.2026.133439_bib123","unstructured":"M. Allan, S. Kondo, S. Bodenstedt, S. Leger, R. Kadkhodamohammadi, I. Luengo, F. Fuentes, E. Flouty, A. Mohammed, and M. Pedersen, \u201c2018 robotic scene segmentation challenge,\u201d arXiv preprint arXiv:2001.11190, 2020."},{"issue":"9","key":"10.1016\/j.neucom.2026.133439_bib124","doi-asserted-by":"crossref","first-page":"2198","DOI":"10.1109\/TMI.2019.2900516","article-title":"Deep Learning for Segmentation Using an Open Large-Scale Dataset in 2D Echocardiography","volume":"38","author":"Leclerc","year":"2019","journal-title":"IEEE Trans. Med Imaging"},{"issue":"7802","key":"10.1016\/j.neucom.2026.133439_bib125","doi-asserted-by":"crossref","first-page":"252","DOI":"10.1038\/s41586-020-2145-8","article-title":"Video-based AI for beat-to-beat assessment of cardiac function","volume":"580","author":"Ouyang","year":"2020","journal-title":"Nature"},{"issue":"7","key":"10.1016\/j.neucom.2026.133439_bib126","doi-asserted-by":"crossref","DOI":"10.1002\/mp.17964","article-title":"TrackRAD2025 challenge dataset: real-time tumor tracking for MRI-guided radiotherapy","volume":"52","author":"Wang","year":"2025","journal-title":"Med Phys."},{"key":"10.1016\/j.neucom.2026.133439_bib127","unstructured":"A. Murali, D. Alapatt, P. Mascagni, A. Vardazaryan, A. Garcia, N. Okamoto, G. Costamagna, D. Mutter, J. Marescaux, and B. Dallemagne, \u201cThe endoscapes dataset for surgical scene segmentation, object detection, and critical view of safety assessment: Official splits and benchmark,\u201d arXiv preprint arXiv:2312.12429, 2023."},{"issue":"6","key":"10.1016\/j.neucom.2026.133439_bib128","doi-asserted-by":"crossref","first-page":"531","DOI":"10.1007\/s11633-022-1371-y","article-title":"Video Polyp Segmentation: A Deep Learning Perspective","volume":"19","author":"Ji","year":"2022","journal-title":"Mach. Intell. Res."},{"issue":"1","key":"10.1016\/j.neucom.2026.133439_bib129","doi-asserted-by":"crossref","first-page":"75","DOI":"10.1038\/s41597-023-01981-y","article-title":"A multi-centre polyp detection and segmentation dataset for generalisability assessment","volume":"10","author":"Ali","year":"2023","journal-title":"Sci. Data"},{"issue":"7","key":"10.1016\/j.neucom.2026.133439_bib130","doi-asserted-by":"crossref","first-page":"1010","DOI":"10.1038\/s41592-023-01879-y","article-title":"\u201cThe Cell Tracking Challenge: 10 years of objective benchmarking,\u201d","volume":"20","author":"Maska","year":"2023","journal-title":"Nat. Methods"},{"key":"10.1016\/j.neucom.2026.133439_bib131","first-page":"595","article-title":"Isinet: an instance-based approach for surgical instrument segmentation","author":"Gonz\u00e1lez","year":"2020","journal-title":"Int. Conf. Med. Image Comput. Comput. -Assist. Interv."},{"key":"10.1016\/j.neucom.2026.133439_bib132","first-page":"6191","article-title":"From forks to forceps: A new framework for instance segmentation of surgical instruments","author":"Baby","year":"2023","journal-title":"Proc. IEEE\/CVF Winter Conf. Appl. Comput. Vis."},{"key":"10.1016\/j.neucom.2026.133439_bib133","first-page":"6890","article-title":"Surgicalsam: Efficient class promptable surgical instrument segmentation","author":"Yue","year":"2024","journal-title":"Proc. AAAI Conf. Artif. Intell."},{"key":"10.1016\/j.neucom.2026.133439_bib134","doi-asserted-by":"crossref","DOI":"10.1016\/j.media.2022.102615","article-title":"\u201cTransMorph: Transformer for unsupervised medical image registration,\u201d","volume":"82","author":"Chen","year":"2022","journal-title":"Med Image Anal."},{"issue":"1","key":"10.1016\/j.neucom.2026.133439_bib135","doi-asserted-by":"crossref","first-page":"654","DOI":"10.1038\/s41467-024-44824-z","article-title":"Segment anything in medical images","volume":"15","author":"Ma","year":"2024","journal-title":"Nat. Commun."},{"key":"10.1016\/j.neucom.2026.133439_bib136","first-page":"715","article-title":"Pyramid dilated deeper convlstm for video salient object detection","author":"Song","year":"2018","journal-title":"Proc. Eur. Conf. Comput. Vis. (ECCV"},{"key":"10.1016\/j.neucom.2026.133439_bib137","unstructured":"C. Liao, X. Zheng, Y. Lyu, H. Xue, Y. Cao, J. Wang, K. Yang, and X. Hu, \u201cMemorysam: Memorize modalities and semantics with segment anything model 2 for multi-modal semantic segmentation,\u201d arXiv preprint arXiv:2503.06700, 2025."},{"key":"10.1016\/j.neucom.2026.133439_bib138","unstructured":"H. Yuan, X. Li, T. Zhang, Z. Huang, S. Xu, S. Ji, Y. Tong, L. Qi, J. Feng, and M.-H. Yang, \u201cSa2VA: Marrying SAM2 with LLaVA for Dense Grounded Understanding of Images and Videos,\u201d arXiv preprint arXiv:2501.04001, 2025."},{"key":"10.1016\/j.neucom.2026.133439_bib139","article-title":"Refsam: Efficiently adapting segmenting anything model for referring video object segmentation","author":"Li","year":"2025","journal-title":"Neural Netw."},{"issue":"140","key":"10.1016\/j.neucom.2026.133439_bib140","first-page":"1","article-title":"Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer","volume":"21","author":"Raffel","year":"2020","journal-title":"J. Mach. Learn. Res."},{"key":"10.1016\/j.neucom.2026.133439_bib141","first-page":"38","article-title":"Grounding dino: Marrying dino with grounded pre-training for open-set object detection","author":"Liu","year":"2024","journal-title":"Eur. Conf. Comput. Vis."},{"key":"10.1016\/j.neucom.2026.133439_bib142","first-page":"3395","article-title":"Samwise: Infusing wisdom in sam2 for text-driven video segmentation","author":"Cuttano","year":"2025","journal-title":"Proc. Comput. Vis. Pattern Recognit. Conf."},{"key":"10.1016\/j.neucom.2026.133439_bib143","first-page":"28932","article-title":"SAM2-LOVE: Segment Anything Model 2 in Language-aided Audio-Visual Scenes","author":"Wang","year":"2025","journal-title":"Proc. Comput. Vis. Pattern Recognit. Conf."},{"issue":"1","key":"10.1016\/j.neucom.2026.133439_bib144","doi-asserted-by":"crossref","first-page":"566","DOI":"10.1038\/s41746-025-01964-w","article-title":"Large-vocabulary segmentation for medical images with text prompts","volume":"8","author":"Zhao","year":"2025","journal-title":"NPJ Digit Med"},{"key":"10.1016\/j.neucom.2026.133439_bib145","first-page":"1","article-title":"Pa-sam: Prompt adapter sam for high-quality image segmentation","volume":"2024","author":"Xie","year":"2024","journal-title":"IEEE Int. Conf. Multimed. Expo. (ICME)"},{"key":"10.1016\/j.neucom.2026.133439_bib146","first-page":"24255","article-title":"A distractor-aware memory for visual object tracking with sam2","author":"Videnovic","year":"2025","journal-title":"Proc. Comput. Vis. Pattern Recognit. Conf."},{"key":"10.1016\/j.neucom.2026.133439_bib147","unstructured":"X. Zhao, Y. Pang, S. Chang, Y. Zhao, L. Zhang, H. Lu, G.E. Fakhri, and X. Liu, \u201cInspiring the next generation of segment anything models: Comprehensively evaluate sam and sam 2 with diverse prompts towards context-dependent concepts under different scenes,\u201d arXiv preprint arXiv:2412.01240, 2024."},{"key":"10.1016\/j.neucom.2026.133439_bib148","first-page":"2194","article-title":"Gazegnn: A gaze-guided graph neural network for chest x-ray classification","author":"Wang","year":"2024","journal-title":"Proc. IEEE\/CVF Winter Conf. Appl. Comput. Vis."},{"key":"10.1016\/j.neucom.2026.133439_bib149","first-page":"3167","article-title":"Eyes Tell the Truth: GazeVal Highlights Shortcomings of Generative AI in Medical Imaging","author":"Wong","year":"2025","journal-title":"Proc. Comput. Vis. Pattern Recognit. Conf."},{"key":"10.1016\/j.neucom.2026.133439_bib150","doi-asserted-by":"crossref","DOI":"10.1109\/TMI.2025.3627954","article-title":"Accelerating Volumetric Medical Image Annotation via Short-Long Memory SAM 2","author":"Chen","year":"2025","journal-title":"IEEE Trans. Med Imaging"},{"issue":"1","key":"10.1016\/j.neucom.2026.133439_bib151","doi-asserted-by":"crossref","DOI":"10.1049\/htl2.12111","article-title":"Augmenting efficient real-time surgical instrument segmentation in video with point tracking and Segment Anything","volume":"12","author":"Wu","year":"2025","journal-title":"Health Technol. Lett."},{"key":"10.1016\/j.neucom.2026.133439_bib152","unstructured":"C. Zhang, D. Han, Y. Qiao, J.U. Kim, S.-H. Bae, S. Lee, and C.S. Hong, \u201cFaster segment anything: Towards lightweight sam for mobile applications,\u201d arXiv preprint arXiv:2306.14289, 2023."},{"key":"10.1016\/j.neucom.2026.133439_bib153","first-page":"16111","article-title":"Efficientsam: Leveraged masked image pretraining for efficient segment anything","author":"Xiong","year":"2024","journal-title":"Proc. IEEE\/CVF Conf. Comput. Vis. Pattern Recognit."},{"key":"10.1016\/j.neucom.2026.133439_bib154","unstructured":"Y. Song, B. Pu, P. Wang, H. Jiang, D. Dong, Y. Cao, and Y. Shen, \u201cSam-lightening: A lightweight segment anything model with dilated flash attention to achieve 30 times acceleration,\u201d arXiv preprint arXiv:2403.09195, 2024."},{"key":"10.1016\/j.neucom.2026.133439_bib155","unstructured":"A. Wang, H. Chen, Z. Lin, J. Han, and G. Ding, \u201cRepvit-sam: Towards real-time segmenting anything,\u201d arXiv preprint arXiv:2312.05760, 2023."},{"key":"10.1016\/j.neucom.2026.133439_bib156","unstructured":"B. Varadarajan, B. Soran, F. Iandola, X. Xiang, Y. Xiong, L. Wu, C. Zhu, R. Krishnamoorthi, and V. Chandra, \u201cSqueezeSAM: User friendly mobile interactive segmentation,\u201d arXiv preprint arXiv:2312.06736, 2023."},{"key":"10.1016\/j.neucom.2026.133439_bib157","doi-asserted-by":"crossref","first-page":"13832","DOI":"10.1109\/CVPR52734.2025.01291","article-title":"EdgeTAM: On-Device Track Anything Model","author":"Zhou","year":"2025","journal-title":"2025 Ieee\/Cvf Conf. Comput. Vis. Pattern Recognit. (Cvpr"},{"issue":"10","key":"10.1016\/j.neucom.2026.133439_bib158","doi-asserted-by":"crossref","first-page":"7406","DOI":"10.1007\/s11263-025-02539-8","article-title":"On Efficient Variants of Segment Anything Model: A Survey: X. Sun et al","volume":"133","author":"Sun","year":"2025","journal-title":"Int. J. Comput. Vis."},{"issue":"10","key":"10.1016\/j.neucom.2026.133439_bib159","doi-asserted-by":"crossref","first-page":"2838","DOI":"10.1038\/s41591-024-03113-4","article-title":"The limits of fair medical imaging AI in real-world generalization","volume":"30","author":"Yang","year":"2024","journal-title":"Nat. Med"},{"issue":"6","key":"10.1016\/j.neucom.2026.133439_bib160","doi-asserted-by":"crossref","DOI":"10.1148\/ryai.2021210097","article-title":"Toward Generalizability in the Deployment of Artificial Intelligence in Radiology: Role of Computation Stress Testing to Overcome Underspecification","volume":"3","author":"Eche","year":"2021","journal-title":"Radio. Artif. Intell."},{"issue":"3","key":"10.1016\/j.neucom.2026.133439_bib161","doi-asserted-by":"crossref","first-page":"642","DOI":"10.1016\/j.media.2010.03.005","article-title":"A review of 3D\/2D registration methods for image-guided interventions","volume":"16","author":"Markelj","year":"2012","journal-title":"Med Image Anal."},{"issue":"18","key":"10.1016\/j.neucom.2026.133439_bib162","doi-asserted-by":"crossref","first-page":"5097","DOI":"10.3390\/s20185097","article-title":"\u201c3D Deep Learning on Medical Images: A Review,\u201d","volume":"20","author":"Singh","year":"2020","journal-title":"Sens. (Basel)"},{"issue":"2","key":"10.1016\/j.neucom.2026.133439_bib163","doi-asserted-by":"crossref","first-page":"274","DOI":"10.1016\/j.radonc.2011.07.031","article-title":"Monitoring tumor motion by real time 2D\/3D registration during radiotherapy","volume":"102","author":"Gendrin","year":"2012","journal-title":"Radio. Oncol."},{"issue":"1","key":"10.1016\/j.neucom.2026.133439_bib164","doi-asserted-by":"crossref","first-page":"154","DOI":"10.1186\/s13014-022-02123-1","article-title":"CyberKnife Xsight versus fiducial-based target-tracking: a novel 3D dosimetric comparison in a dynamic phantom","volume":"17","author":"Klein","year":"2022","journal-title":"Radiat. Oncol."},{"issue":"2","key":"10.1016\/j.neucom.2026.133439_bib165","doi-asserted-by":"crossref","first-page":"e63","DOI":"10.1016\/j.prro.2017.10.014","article-title":"Clinical log data analysis for assessing the accuracy of the CyberKnife fiducial-free lung tumor tracking system","volume":"8","author":"Nakayama","year":"2018","journal-title":"Pr. Radiat. Oncol."},{"issue":"1","key":"10.1016\/j.neucom.2026.133439_bib166","doi-asserted-by":"crossref","first-page":"166","DOI":"10.1038\/s41592-024-02499-w","article-title":"A foundation model for joint segmentation, detection and recognition of biomedical objects across nine modalities","volume":"22","author":"Zhao","year":"2025","journal-title":"Nat. Methods"},{"issue":"5","key":"10.1016\/j.neucom.2026.133439_bib167","doi-asserted-by":"crossref","DOI":"10.1371\/journal.pcbi.1013071","article-title":"Cell-TRACTR: A transformer-based model for end-to-end segmentation and tracking of cells","volume":"21","author":"O\u2019Connor","year":"2025","journal-title":"PLOS Comput. Biol."}],"container-title":["Neurocomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0925231226008362?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0925231226008362?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T17:51:47Z","timestamp":1775065907000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0925231226008362"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6]]},"references-count":167,"alternative-id":["S0925231226008362"],"URL":"https:\/\/doi.org\/10.1016\/j.neucom.2026.133439","relation":{},"ISSN":["0925-2312"],"issn-type":[{"value":"0925-2312","type":"print"}],"subject":[],"published":{"date-parts":[[2026,6]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Segment anything for video: A comprehensive review of video object segmentation and tracking from past to future","name":"articletitle","label":"Article Title"},{"value":"Neurocomputing","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.neucom.2026.133439","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier B.V. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"133439"}}