{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,31]],"date-time":"2026-03-31T13:51:47Z","timestamp":1774965107231,"version":"3.50.1"},"reference-count":29,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"7","license":[{"start":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T00:00:00Z","timestamp":1751328000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T00:00:00Z","timestamp":1751328000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T00:00:00Z","timestamp":1751328000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62276028"],"award-info":[{"award-number":["62276028"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U20A20167"],"award-info":[{"award-number":["U20A20167"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Robot. Autom. Lett."],"published-print":{"date-parts":[[2025,7]]},"DOI":"10.1109\/lra.2025.3568612","type":"journal-article","created":{"date-parts":[[2025,5,9]],"date-time":"2025-05-09T18:02:54Z","timestamp":1746813774000},"page":"6592-6599","source":"Crossref","is-referenced-by-count":2,"title":["MambaSlip: A Novel Multimodal Large Language Model for Real-Time Robotic Slip Detection"],"prefix":"10.1109","volume":"10","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-8271-2980","authenticated-orcid":false,"given":"Shaohua","family":"Zhang","sequence":"first","affiliation":[{"name":"School of Information Science and Engineering, Yanshan University, Qinhuangdao, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-8111-1501","authenticated-orcid":false,"given":"Haoze","family":"Li","sequence":"additional","affiliation":[{"name":"School of Information Science and Engineering, Yanshan University, Qinhuangdao, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-6547-5150","authenticated-orcid":false,"given":"Bingyi","family":"Mao","sequence":"additional","affiliation":[{"name":"School of Information Science and Engineering, Yanshan University, Qinhuangdao, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9085-9633","authenticated-orcid":false,"given":"Fengda","family":"Zhao","sequence":"additional","affiliation":[{"name":"School of Information Science and Engineering, Yanshan University, Qinhuangdao, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7683-2776","authenticated-orcid":false,"given":"Wenbai","family":"Chen","sequence":"additional","affiliation":[{"name":"School of Automation, Beijing Information Science and Technology University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3437-4265","authenticated-orcid":false,"given":"Guowei","family":"Gao","sequence":"additional","affiliation":[{"name":"School of Automation, Beijing Information Science and Technology University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1228-2757","authenticated-orcid":false,"given":"Peiliang","family":"Wu","sequence":"additional","affiliation":[{"name":"School of Information Science and Engineering, Yanshan University, Qinhuangdao, China"}]}],"member":"263","reference":[{"key":"ref1","article-title":"LLaMA: Open and efficient foundation language models","author":"Touvron","year":"2023"},{"key":"ref2","first-page":"540","article-title":"VoxPoser: Composable 3D value maps for robotic manipulation with language models","volume-title":"Proc. 7th Conf. Robot Learn.","author":"Huang","year":"2023"},{"key":"ref3","first-page":"2165","article-title":"RT-2: Vision-language-action models transfer web knowledge to robotic control","volume-title":"Proc. 7th Conf. Robot Learn.","author":"Zitkovich","year":"2023"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2023.3281153"},{"key":"ref5","article-title":"Ovis: Structural embedding alignment for multimodal large language model","author":"Lu","year":"2024"},{"key":"ref6","first-page":"416","article-title":"Real-world robot learning with masked visual pre-training","volume-title":"Proc. 6th Conf. Robot Learn.","author":"Radosavovic","year":"2022"},{"key":"ref7","article-title":"Mamba: Linear-time sequence modeling with selective state spaces","author":"Gu","year":"2023"},{"key":"ref8","article-title":"RoboMamba: Multimodal state space model for efficient robot reasoning and manipulation","author":"Liu","year":"2024"},{"key":"ref9","article-title":"GraspMamba: A Mamba-based language-driven grasp detection framework with hierarchical feature learning","author":"Nguyen","year":"2024"},{"key":"ref10","first-page":"128940","article-title":"An image is worth 32 tokens for reconstruction and generation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Yu","year":"2024"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460495"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA46639.2022.9811589"},{"key":"ref13","article-title":"Visuo-tactile-based slip detection using a multi-scale temporal convolution network","author":"Gao","year":"2023"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TMECH.2024.3400789"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICICM59499.2023.10365811"},{"key":"ref16","first-page":"34139","article-title":"Banana: Banach fixed-point network for pointcloud segmentation with inter-part equivariance","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Deng","year":"2023"},{"key":"ref17","first-page":"8469","article-title":"PaLM-E: An embodied multimodal language model","volume-title":"Proc. 40th Int. Conf. Mach. Learn.","author":"Driess","year":"2023"},{"key":"ref18","article-title":"Vision-language foundation models as effective robot imitators","author":"Li","year":"2023"},{"key":"ref19","article-title":"Chameleon: Mixed-modal early-fusion foundation models","author":"Team","year":"2024"},{"key":"ref20","first-page":"121670","article-title":"SimVG: A simple framework for visual grounding with decoupled multi-modal fusion","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Dai","year":"2024"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2023.127063"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.816"},{"key":"ref23","first-page":"34892","article-title":"Visual instruction tuning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Liu","year":"2023"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i10.33131"},{"key":"ref25","article-title":"A survey of mamba","author":"Qu","year":"2024"},{"key":"ref26","article-title":"Image first or text first? Optimising the sequencing of modalities in large language model prompting and reasoning tasks","author":"Wardle","year":"2024"},{"key":"ref27","article-title":"Multi-modal generative AI: Multi-modal LLM, diffusion and beyond","author":"Chen","year":"2024"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CAC63892.2024.10865385"},{"key":"ref29","article-title":"HMT-grasp: A hybrid Mamba-transformer approach for robot grasping in cluttered environments","author":"Xiong","year":"2024"}],"container-title":["IEEE Robotics and Automation Letters"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/7083369\/11008675\/10994851.pdf?arnumber=10994851","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,23]],"date-time":"2025-05-23T04:33:57Z","timestamp":1747974837000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10994851\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7]]},"references-count":29,"journal-issue":{"issue":"7"},"URL":"https:\/\/doi.org\/10.1109\/lra.2025.3568612","relation":{},"ISSN":["2377-3766","2377-3774"],"issn-type":[{"value":"2377-3766","type":"electronic"},{"value":"2377-3774","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,7]]}}}