{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,19]],"date-time":"2025-12-19T08:20:44Z","timestamp":1766132444021,"version":"3.48.0"},"reference-count":21,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100003012","name":"Impact Fund","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100003012","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,19]]},"DOI":"10.1109\/iros60139.2025.11246890","type":"proceedings-article","created":{"date-parts":[[2025,11,27]],"date-time":"2025-11-27T18:54:45Z","timestamp":1764269685000},"page":"414-419","source":"Crossref","is-referenced-by-count":0,"title":["CapsDT: Diffusion-Transformer for Capsule Robot Manipulation"],"prefix":"10.1109","author":[{"given":"Xiting","family":"He","sequence":"first","affiliation":[{"name":"The Chinese University of Hong Kong (CUHK),Department of Electronic Engineering,Hong Kong,China"}]},{"given":"Mingwu","family":"Su","sequence":"additional","affiliation":[{"name":"The Chinese University of Hong Kong (CUHK),Department of Electronic Engineering,Hong Kong,China"}]},{"given":"Xinqi","family":"Jiang","sequence":"additional","affiliation":[{"name":"The Chinese University of Hong Kong (CUHK),Department of Electronic Engineering,Hong Kong,China"}]},{"given":"Long","family":"Bai","sequence":"additional","affiliation":[{"name":"The Chinese University of Hong Kong (CUHK),Department of Electronic Engineering,Hong Kong,China"}]},{"given":"Hongliang","family":"Ren","sequence":"additional","affiliation":[{"name":"The Chinese University of Hong Kong (CUHK),Department of Electronic Engineering,Hong Kong,China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/j.compbiomed.2023.107412"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1038\/35013140"},{"article-title":"Rdt-1b: a diffusion foundation model for bimanual manipulation","year":"2024","author":"Liu","key":"ref3"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TASE.2025.3530791"},{"article-title":"Endovla: Dual-phase vision-language-action model for autonomous tracking in endoscopy","year":"2025","author":"Ng","key":"ref5"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1111\/den.13270"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1053\/j.gastro.2011.02.031"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/s10151-022-02577-1"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1002\/aisy.202370022"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.3389\/frobt.2022.832208"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TASE.2024.3435035"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TMRB.2021.3123407"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1016\/j.tige.2022.10.001"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1177\/0278364914558006"},{"article-title":"Deepseek-v2: A strong, economical, and efficient mixture-of-experts language model","year":"2024","author":"Liu","key":"ref15"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1049\/iet-ipr.2018.6479"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.703"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1007\/s11633-025-1562-4"},{"article-title":"Octo: An open-source generalist robot policy","year":"2024","author":"Team","key":"ref19"},{"article-title":"Openvla: An open-source vision-language-action model","year":"2024","author":"Kim","key":"ref20"},{"article-title":"Llama 2: Open foundation and fine-tuned chat models","year":"2023","author":"Touvron","key":"ref21"}],"event":{"name":"2025 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","start":{"date-parts":[[2025,10,19]]},"location":"Hangzhou, China","end":{"date-parts":[[2025,10,25]]}},"container-title":["2025 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11245651\/11245652\/11246890.pdf?arnumber=11246890","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,19]],"date-time":"2025-12-19T08:18:11Z","timestamp":1766132291000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11246890\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,19]]},"references-count":21,"URL":"https:\/\/doi.org\/10.1109\/iros60139.2025.11246890","relation":{},"subject":[],"published":{"date-parts":[[2025,10,19]]}}}