{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,21]],"date-time":"2025-11-21T18:55:34Z","timestamp":1763751334894,"version":"3.45.0"},"reference-count":45,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62206039","62406053","62476048"],"award-info":[{"award-number":["62206039","62406053","62476048"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","award":["DUT24RC(3)025","DUT24YG119","ZX20240867"],"award-info":[{"award-number":["DUT24RC(3)025","DUT24YG119","ZX20240867"]}],"id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Signal Process. Lett."],"published-print":{"date-parts":[[2025]]},"DOI":"10.1109\/lsp.2025.3628789","type":"journal-article","created":{"date-parts":[[2025,11,4]],"date-time":"2025-11-04T18:38:58Z","timestamp":1762281538000},"page":"4349-4353","source":"Crossref","is-referenced-by-count":0,"title":["RVMamba: Selective Text-Vision Mamba for Referring Video Object Segmentation"],"prefix":"10.1109","volume":"32","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-6859-8987","authenticated-orcid":false,"given":"Zhenyu","family":"Chen","sequence":"first","affiliation":[{"name":"School of Information and Communication Engineering, Dalian University of Technology, Dalian, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5431-1654","authenticated-orcid":false,"given":"Jiawen","family":"Zhu","sequence":"additional","affiliation":[{"name":"School of Information and Communication Engineering, Dalian University of Technology, Dalian, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4648-4437","authenticated-orcid":false,"given":"Lu","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Information and Communication Engineering, Dalian University of Technology, Dalian, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3321-6759","authenticated-orcid":false,"given":"Ping","family":"Hu","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yunzhi","family":"Zhuge","sequence":"additional","affiliation":[{"name":"School of Information and Communication Engineering, Dalian University of Technology, Dalian, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6668-9758","authenticated-orcid":false,"given":"Huchuan","family":"Lu","sequence":"additional","affiliation":[{"name":"School of Information and Communication Engineering, Dalian University of Technology, Dalian, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6111-340X","authenticated-orcid":false,"given":"You","family":"He","sequence":"additional","affiliation":[{"name":"ShenZhen International Graduate School, Tsinghua University, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.565"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2024.110553"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.3390\/s21051919"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2021.3072970"},{"article-title":"Towards human-embodied visual intelligence","year":"2024","author":"Han","key":"ref5"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00493"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00492"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2021.3129418"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00320"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2025.3535345"},{"key":"ref11","article-title":"Mamba: Linear-time sequence modeling with selective state spaces","volume-title":"Proc. 1st Conf. Lang. Model.","author":"Gu","year":"2023"},{"key":"ref12","first-page":"103031","article-title":"Vmamba: Visual state space model","volume-title":"Proc. NeurIPS","author":"Liu","year":"2024"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2025.3559425"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2025.3563123"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2025.3547861"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2025.3582672"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2025.3607615"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00864"},{"article-title":"Simplified mamba with disentangled dependency encoding for long-term time series forecasting","year":"2024","author":"Weng","key":"ref20"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2024.129308"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00624"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00404"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3079993"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00091"},{"key":"ref26","first-page":"26425","article-title":"SOC: Semantic-assisted object cluster for referring video object segmentation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"36","author":"Luo","year":"2024"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/tcsvt.2025.3618119"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2025.3557689"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58555-6_13"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01471"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01234"},{"key":"ref32","first-page":"22236","article-title":"R $^{2}$ vos: Robust referring video object segmentation via relational multimodal cycle consistency","volume-title":"Proc. IEEE\/CVF Int. Conf. Comput. Vis.","author":"Li","year":"2022"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72633-0_6"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01418"},{"article-title":"Mail: A unified mask-image-language trimodal network for referring image segmentation","year":"2021","author":"Li","key":"ref35"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01139"},{"key":"ref37","first-page":"19652","article-title":"Referring transformer: A one-step approach to multi-task visual grounding","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Li","year":"2021"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01762"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3217852"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2025.3565964"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72684-2_7"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00542"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.396"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"article-title":"Roberta: A robustly optimized BERT pretraining approach","year":"2019","author":"Liu","key":"ref45"}],"container-title":["IEEE Signal Processing Letters"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/97\/10802935\/11224720.pdf?arnumber=11224720","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,21]],"date-time":"2025-11-21T18:46:49Z","timestamp":1763750809000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11224720\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":45,"URL":"https:\/\/doi.org\/10.1109\/lsp.2025.3628789","relation":{},"ISSN":["1070-9908","1558-2361"],"issn-type":[{"type":"print","value":"1070-9908"},{"type":"electronic","value":"1558-2361"}],"subject":[],"published":{"date-parts":[[2025]]}}}